Commit e6ac1409 authored by gaojiuli's avatar gaojiuli

init

parent 421b4df2
from setuptools import find_packages, setup
setup(
name="tomd",
version="0.1.1",
description="Convert HTML to Markdown.",
author="Gaojiuli",
author_email="gaojiuli@aaronsw.com",
url='https://github.com/gaojiuli/tomd',
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License (GPL)',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5'
],
license='GNU GPL 3',
packages=find_packages(),
py_modules=['tomd'],
include_package_data=True,
zip_safe=False,
)
from tomd import Tomd
string = """
<div class="markdown_body"><p>XData</p>
<p>Github: <a href="https://github.com/gaojiuli/xdata" rel="nofollow">https://github.com/gaojiuli/xdata</a></p>
<p>一款非常实用的数据验证工具, 通常用于请求数据的验证.</p>
<h2>Features</h2>
<ul>
<li>验证数据一步到位</li>
<li>容易扩展,容易自定义数据类型以及验证方式</li>
<li>无第三方依赖</li>
</ul>
<h2>Required</h2>
<ul>
<li>python &gt;= 3.5</li>
</ul>
<h2>Installation</h2>
<p><code>pip install xdata</code></p>
<h2>Usage</h2>
<h3>ValidatedData</h3>
<pre><code class="hljs python"><span class="hljs-keyword">from</span> xdata <span class="hljs-keyword">import</span> *
<span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">UserSchema</span><span class="hljs-params">(Schema)</span>:</span>
telephone = Str(length=<span class="hljs-number">11</span>, required=<span class="hljs-keyword">True</span>)
password = Str(min_length=<span class="hljs-number">8</span>,max_length=<span class="hljs-number">16</span>, required=<span class="hljs-keyword">True</span>)
request_data = {
<span class="hljs-string">'telephone'</span>:<span class="hljs-string">'18180050000'</span>,
<span class="hljs-string">'password'</span>:<span class="hljs-string">'idonotknow'</span>
}
schema = UserSchema(request_data)
<span class="hljs-keyword">if</span> schema.valid:
print(schema.validated_data) <span class="hljs-comment"># {'telephone': '18180050000', 'password': 'idonotknow'}</span>
</code></pre>
<h3>Errors</h3>
<pre><code class="hljs python"><span class="hljs-keyword">from</span> xdata <span class="hljs-keyword">import</span> *
<span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">UserSchema</span><span class="hljs-params">(Schema)</span>:</span>
telephone = Str(length=<span class="hljs-number">11</span>, required=<span class="hljs-keyword">True</span>)
password = Str(min_length=<span class="hljs-number">8</span>, max_length=<span class="hljs-number">16</span>, required=<span class="hljs-keyword">True</span>)
request_data = {}
schema = UserSchema(request_data)
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> schema.valid:
print(schema.errors) <span class="hljs-comment"># {'telephone': 'telephone is required', 'password': 'password is required'}</span>
</code></pre>
<h3>DataTypes</h3>
<pre><code class="hljs lisp">from xdata import *
DataType(<span class="hljs-name">required=True</span>,default='<span class="hljs-number">11</span>',choices=[])
Str(<span class="hljs-name">length=11</span>, max_length=12,min_length=10,regex=<span class="hljs-string">""</span>)
Int(<span class="hljs-name">max=10000</span>,min=12)
Bool(<span class="hljs-name">max=10000</span>,min=12)
Decimal(<span class="hljs-name">left=5</span>,right=2)
DateTime(<span class="hljs-name">max_datetime=</span>'<span class="hljs-number">2001</span><span class="hljs-number">-01</span><span class="hljs-number">-01</span> <span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>', min_datetime='<span class="hljs-number">2000</span><span class="hljs-number">-01</span><span class="hljs-number">-01</span> <span class="hljs-number">00</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>')
Date(<span class="hljs-name">max_date=</span>'<span class="hljs-number">2001</span><span class="hljs-number">-01</span><span class="hljs-number">-01</span>', min_date='<span class="hljs-number">2000</span><span class="hljs-number">-01</span><span class="hljs-number">-01</span>')
Time(<span class="hljs-name">max_time=</span>'<span class="hljs-number">06</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>', min_time='<span class="hljs-number">05</span>:<span class="hljs-number">00</span>:<span class="hljs-number">00</span>')
</code></pre>
<h2>Test</h2>
<p><code>coverage run --source=xdata -m pytest &amp;&amp; coverage report</code></p>
<p>Github: <a href="https://github.com/gaojiuli/xdata" rel="nofollow">https://github.com/gaojiuli/xdata</a></p>
<p>欢迎有兴趣的朋友一起参与进来</p>
</div>
"""
# Tomd(string)
Tomd(string)
import re
MARKDOWN = {
'h1': ('\n# ', '\n'),
'h2': ('\n## ', '\n'),
'h3': ('\n### ', '\n'),
'h4': ('\n#### ', '\n'),
'h5': ('\n##### ', '\n'),
'h6': ('\n###### ', '\n'),
'p': ('\n', '\n'),
'p_with_out_class': ('\n', '\n'),
'code': ('`', '`'),
'ul': ('\n', '\n'),
'ol': ('\n', '\n'),
'li': ('*. ', ''),
'blockquote': ('> ', '\n'),
'em': ('**', '**'),
'a': ('[](', ')'),
'img': ('![](', ')'),
'block_code': ('\n```\n', '\n```\n'),
'span': ('', '')
}
BlOCK_ELEMENTS = {
'h1': '<h1.*?>(.*?)</h1>',
'h2': '<h2.*?>(.*?)</h2>',
'h3': '<h3.*?>(.*?)</h3>',
'h4': '<h4.*?>(.*?)</h4>',
'h5': '<h5.*?>(.*?)</h5>',
'h6': '<h6.*?>(.*?)</h6>',
'p': '<p\s.*?>(.*?)</p>',
'p_with_out_class': '<p>(.*?)</p>',
'blockquote': '<blockquote.*?>(.*?)</blockquote>',
'ul': '<ul.*?>(.*?)</ul>',
'block_code': '<pre.*?><code.*?>(.*?)</code></pre>',
}
INLINE_ELEMENTS = {
'code': '<code.*?>(.*?)</code>',
'span': '<span.*?>(.*?)</span>',
'ol': '<ol.*?>(.*?)</ol>',
'li': '<li.*?>(.*?)</li>',
'img': '<img.*?>(.*?)</img>',
'a': '<a.*?>(.*?)</a>',
'em': '<em.*?>(.*?)</em>',
# 'pre': '<pre.*><code.*>(.*)</code></pre>',
}
class Element:
def __init__(self, pos, content, tag):
self.pos = pos
self.content = content
self._elements = []
self.tag = tag
self._result = None
if tag in BlOCK_ELEMENTS:
self.parse_inline()
def __str__(self):
wrapper = MARKDOWN.get(self.tag)
self._result = '{}{}{}'.format(wrapper[0], self.content, wrapper[1])
return self._result
def parse_inline(self):
for tag, pattern in INLINE_ELEMENTS.items():
wrapper = MARKDOWN.get(tag)
self.content = re.sub(pattern, '{}\g<1>{}'.format(wrapper[0], wrapper[1]), self.content)
class Tomd:
def __init__(self, html):
self.html = html
self._elements = []
self._markdown = None
self.parse_block()
print(self._markdown)
for element in self._elements:
if len(element._result) > 1000:
print(element.__dict__)
def parse_block(self):
for tag, pattern in BlOCK_ELEMENTS.items():
for m in re.finditer(pattern, self.html, re.I | re.S | re.M):
element = Element(pos=m.start(), content=''.join(m.groups()), tag=tag)
self._elements.append(element)
self._elements.sort(key=lambda element: element.pos)
self._markdown = ''.join([str(e) for e in self._elements])
@property
def markdown(self):
return self._markdown
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment