Commit 9093d04e authored by gaojiuli's avatar gaojiuli

init

parent 2123a11c
...@@ -55,4 +55,4 @@ string = """ ...@@ -55,4 +55,4 @@ string = """
""" """
print(Tomd(string).markdown) print(Tomd(string).markdown)
print(Tomd('<h1>title</h1>').markdown) # print(Tomd('<h1>title</h1>').markdown)
...@@ -23,21 +23,20 @@ MARKDOWN = { ...@@ -23,21 +23,20 @@ MARKDOWN = {
'inline_p_with_out_class': ('', '') 'inline_p_with_out_class': ('', '')
} }
BlOCK_ELEMENTS = { BlOCK_ELEMENTS = (
'h1': '<h1.*?>(.*?)</h1>', ('h1', '<h1.*?>(.*?)</h1>'),
'h2': '<h2.*?>(.*?)</h2>', ('h2', '<h2.*?>(.*?)</h2>'),
'h3': '<h3.*?>(.*?)</h3>', ('h3', '<h3.*?>(.*?)</h3>'),
'h4': '<h4.*?>(.*?)</h4>', ('h4', '<h4.*?>(.*?)</h4>'),
'h5': '<h5.*?>(.*?)</h5>', ('h5', '<h5.*?>(.*?)</h5>'),
'h6': '<h6.*?>(.*?)</h6>', ('h6', '<h6.*?>(.*?)</h6>'),
'p': '<p\s.*?>(.*?)</p>', ('blockquote', '<blockquote.*?>(.*?)</blockquote>'),
'p_with_out_class': '<p>(.*?)</p>', # conflict with <pre> ('ul', '<ul.*?>(.*?)</ul>'),
'blockquote': '<blockquote.*?>(.*?)</blockquote>', ('ol', '<ol.*?>(.*?)</ol>'),
'ul': '<ul.*?>(.*?)</ul>', ('block_code', '<pre.*?><code.*?>(.*?)</code></pre>'),
'ol': '<ol.*?>(.*?)</ol>', ('p', '<p\s.*?>(.*?)</p>'),
'block_code': '<pre.*?><code.*?>(.*?)</code></pre>', ('p_with_out_class', '<p>(.*?)</p>'), # conflict with <pre>
)
}
INLINE_ELEMENTS = { INLINE_ELEMENTS = {
'inline_p': '<p\s.*?>(.*?)</p>', 'inline_p': '<p\s.*?>(.*?)</p>',
...@@ -52,20 +51,20 @@ INLINE_ELEMENTS = { ...@@ -52,20 +51,20 @@ INLINE_ELEMENTS = {
'em': '<em.*?>(.*?)</em>' 'em': '<em.*?>(.*?)</em>'
} }
## pos < max_pos
DELETE_ELEMENTS = ['<span.*?>', '</span>', '<div.*?>', '</div>'] DELETE_ELEMENTS = ['<span.*?>', '</span>', '<div.*?>', '</div>']
class Element: class Element:
def __init__(self, pos, content, tag): def __init__(self, start_pos, end_pos, content, tag, is_block=False):
self.pos = pos self.start_pos = start_pos
self.end_pos = end_pos
self.content = content self.content = content
self._elements = [] self._elements = []
self.is_block = is_block
self.tag = tag self.tag = tag
self._result = None self._result = None
if tag in BlOCK_ELEMENTS: if self.is_block:
self.parse_inline() self.parse_inline()
def __str__(self): def __str__(self):
...@@ -89,13 +88,21 @@ class Tomd: ...@@ -89,13 +88,21 @@ class Tomd:
self._markdown = re.sub(element, '', self._markdown) self._markdown = re.sub(element, '', self._markdown)
def parse_block(self): def parse_block(self):
for tag, pattern in BlOCK_ELEMENTS:
for tag, pattern in BlOCK_ELEMENTS.items():
for m in re.finditer(pattern, self.html, re.I | re.S | re.M): for m in re.finditer(pattern, self.html, re.I | re.S | re.M):
element = Element(pos=m.start(), content=''.join(m.groups()), tag=tag) element = Element(start_pos=m.start(),
end_pos=m.end(),
content=''.join(m.groups()),
tag=tag,
is_block=True)
can_append = True
for e in self._elements:
if e.start_pos < m.start() and e.end_pos > m.end():
can_append = False
if can_append:
self._elements.append(element) self._elements.append(element)
self._elements.sort(key=lambda element: element.pos) self._elements.sort(key=lambda element: element.start_pos)
self._markdown = ''.join([str(e) for e in self._elements]) self._markdown = ''.join([str(e) for e in self._elements])
@property @property
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment