Commit 786673c3 authored by gaojiuli's avatar gaojiuli

Add function api tomd.convert

parent 725e7a50
...@@ -20,9 +20,11 @@ Convert HTML to Markdown. ...@@ -20,9 +20,11 @@ Convert HTML to Markdown.
Input Input
```python ```python
from tomd import Tomd import tomd
Tomd("<h1>h1</h1>").markdown tomd.Tomd('<h1>h1</h1>').markdown
# or
tomd.convert('<h1>h1</h1>')
``` ```
Output Output
......
from tomd import Tomd from tomd import Tomd
import tomd
string = """ string = """
<h1>h1</h1> <h1>h1</h1>
<h2>h2</h2> <h2>h2</h2>
...@@ -52,3 +52,4 @@ string = """ ...@@ -52,3 +52,4 @@ string = """
""" """
print(Tomd(string).markdown) print(Tomd(string).markdown)
print(tomd.convert(string))
import re import re
__all__ = ['Tomd', 'convert']
MARKDOWN = { MARKDOWN = {
'h1': ('\n# ', '\n'), 'h1': ('\n# ', '\n'),
'h2': ('\n## ', '\n'), 'h2': ('\n## ', '\n'),
...@@ -45,7 +47,10 @@ BlOCK_ELEMENTS = { ...@@ -45,7 +47,10 @@ BlOCK_ELEMENTS = {
'p': '<p\s.*?>(.*?)</p>', 'p': '<p\s.*?>(.*?)</p>',
'p_with_out_class': '<p>(.*?)</p>', 'p_with_out_class': '<p>(.*?)</p>',
'thead': '<thead.*?>(.*?)</thead>', 'thead': '<thead.*?>(.*?)</thead>',
'tr': '<tr>(.*?)</tr>' 'tr': '<tr>(.*?)</tr>',
'b': '<b>(.*?)</b>',
'i': '<i>(.*?)</i>',
'del': '<del>(.*?)</del>'
} }
INLINE_ELEMENTS = { INLINE_ELEMENTS = {
...@@ -111,34 +116,41 @@ class Element: ...@@ -111,34 +116,41 @@ class Element:
class Tomd: class Tomd:
def __init__(self, html): def __init__(self, html='', options=None):
self.html = html self.html = html
self._elements = [] self.options = options
self._markdown = None self._markdown = ''
self.parse_block()
for index, element in enumerate(DELETE_ELEMENTS):
self._markdown = re.sub(element, '', self._markdown)
def parse_block(self): def convert(self, html, options=None):
elements = []
for tag, pattern in BlOCK_ELEMENTS.items(): for tag, pattern in BlOCK_ELEMENTS.items():
for m in re.finditer(pattern, self.html, re.I | re.S | re.M): for m in re.finditer(pattern, html, re.I | re.S | re.M):
element = Element(start_pos=m.start(), element = Element(start_pos=m.start(),
end_pos=m.end(), end_pos=m.end(),
content=''.join(m.groups()), content=''.join(m.groups()),
tag=tag, tag=tag,
is_block=True) is_block=True)
can_append = True can_append = True
for e in self._elements: for e in elements:
if e.start_pos < m.start() and e.end_pos > m.end(): if e.start_pos < m.start() and e.end_pos > m.end():
can_append = False can_append = False
elif e.start_pos > m.start() and e.end_pos < m.end(): elif e.start_pos > m.start() and e.end_pos < m.end():
self._elements.remove(e) elements.remove(e)
if can_append: if can_append:
self._elements.append(element) elements.append(element)
self._elements.sort(key=lambda element: element.start_pos) elements.sort(key=lambda element: element.start_pos)
self._markdown = ''.join([str(e) for e in self._elements]) self._markdown = ''.join([str(e) for e in elements])
for index, element in enumerate(DELETE_ELEMENTS):
self._markdown = re.sub(element, '', self._markdown)
return self._markdown
@property @property
def markdown(self): def markdown(self):
self.convert(self.html, self.options)
return self._markdown return self._markdown
_inst = Tomd()
convert = _inst.convert
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment