Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tomd
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Jobs
Commits
Open sidebar
Чумбаев Максим
tomd
Commits
3540279d
Commit
3540279d
authored
Jul 07, 2017
by
Andrew Xia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tomd support for evernote checkbox
parent
d1f207f5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
135 additions
and
11 deletions
+135
-11
ex.md
ex.md
+101
-0
tomd.py
tomd.py
+34
-11
No files found.
ex.md
0 → 100644
View file @
3540279d
# h1
## h2
### h3
#### h4
##### h5
###### h6
paragraph
[
link
](
https://github.com
)
![
img
](
https://github.com
)
-
1
-
2
-
3
1.
1
1.
2
1.
3
> blockquote
`inline code`
```
block code
```
~~del~~
**bold**
*italic*
***bold italic**
*
**em**
**strong**
---
|
th1|th
2
|
-----
-
|
td|td
|
td|td
#
h1
#
# h2
#
## h3
#
### h4
#
#### h5
#
##### h6
p
aragraph
[
link](https://github.com)
!
[img](https://github.com)
-
1
-
2
-
3
1
. 1
1
. 2
1
. 3
>
blockquote
`
inline code`
`
``
b
lock code
`
``
~
~del~~
*
*bold**
*
italic*
*
**bold italic***
*
*em**
*
*strong**
-
--
|th1|th2
|------
|td|td
|td|td
tomd.py
View file @
3540279d
import
re
import
re
,
os
__all__
=
[
'Tomd'
,
'convert'
]
__all__
=
[
'Tomd'
,
'convert'
]
...
@@ -56,7 +56,7 @@ BlOCK_ELEMENTS = {
...
@@ -56,7 +56,7 @@ BlOCK_ELEMENTS = {
'table'
:
'<table.*?>(.*?)</table>'
,
#assume that table must be around tr
'table'
:
'<table.*?>(.*?)</table>'
,
#assume that table must be around tr
# evernote
# evernote
'e_p'
:
'<div
style="font-size
.*?>(.*?)</div>'
#div for paragraph ?
'e_p'
:
'<div.*?>(.*?)</div>'
#div for paragraph ?
}
}
INLINE_ELEMENTS
=
{
INLINE_ELEMENTS
=
{
...
@@ -80,23 +80,24 @@ INLINE_ELEMENTS = {
...
@@ -80,23 +80,24 @@ INLINE_ELEMENTS = {
'tbody'
:
'<tbody.*?>((.|
\n
)*)</tbody>'
,
'tbody'
:
'<tbody.*?>((.|
\n
)*)</tbody>'
,
}
}
DELETE_ELEMENTS
=
[
'<span.*?>'
,
'</span>'
,
'<div.*?>'
,
'</div>'
]
DELETE_ELEMENTS
=
[
'<span.*?>'
,
'</span>'
,
'<div.*?>'
,
'</div>'
,
'<br clear="none"/>'
]
class
Element
:
class
Element
:
def
__init__
(
self
,
start_pos
,
end_pos
,
content
,
tag
,
is_block
=
False
):
def
__init__
(
self
,
start_pos
,
end_pos
,
content
,
tag
,
folder
,
is_block
=
False
):
self
.
start_pos
=
start_pos
self
.
start_pos
=
start_pos
self
.
end_pos
=
end_pos
self
.
end_pos
=
end_pos
self
.
content
=
content
self
.
content
=
content
self
.
_elements
=
[]
self
.
_elements
=
[]
self
.
is_block
=
is_block
self
.
is_block
=
is_block
self
.
tag
=
tag
self
.
tag
=
tag
self
.
folder
=
folder
self
.
_result
=
None
self
.
_result
=
None
if
self
.
is_block
:
if
self
.
is_block
:
print
"parsing tag:"
,
self
.
tag
,
", content: "
,
repr
(
self
.
content
)
#
print "parsing tag:", self.tag, ", content: ", repr(self.content)
self
.
parse_inline
()
self
.
parse_inline
()
print
"parsed:"
,
self
.
tag
,
", content: "
,
self
.
content
if
self
.
tag
!=
'table'
:
print
"parsed:"
,
self
.
tag
,
self
.
folder
,
", content: "
,
repr
(
self
.
content
)
def
__str__
(
self
):
def
__str__
(
self
):
wrapper
=
MARKDOWN
.
get
(
self
.
tag
)
wrapper
=
MARKDOWN
.
get
(
self
.
tag
)
...
@@ -105,6 +106,19 @@ class Element:
...
@@ -105,6 +106,19 @@ class Element:
def
parse_inline
(
self
):
def
parse_inline
(
self
):
self
.
content
=
self
.
content
.
replace
(
'
\r
'
,
''
)
#windows \r character
self
.
content
=
self
.
content
.
replace
(
'
\r
'
,
''
)
#windows \r character
self
.
content
=
self
.
content
.
replace
(
'"'
,
'
\"
'
)
#html quote mark
for
m
in
re
.
finditer
(
"<img(.*?)en_todo.*?>"
,
self
.
content
):
#remove img and change to [ ] and [x]
#evernote specific parsing
imgSrc
=
re
.
search
(
'src=".*?"'
,
m
.
group
())
imgLoc
=
imgSrc
.
group
()[
5
:
-
1
]
#remove source and " "
imgLoc
=
imgLoc
.
replace
(
'
\\
'
,
'/'
)
#\\ folder slash rotate
if
os
.
stat
(
self
.
folder
+
"/"
+
imgLoc
)
.
st_size
<
250
:
self
.
content
=
self
.
content
.
replace
(
m
.
group
(),
"[ ] "
)
else
:
self
.
content
=
self
.
content
.
replace
(
m
.
group
(),
"[x] "
)
# print self.content
if
"e_"
in
self
.
tag
:
#evernote-specific parsing
if
"e_"
in
self
.
tag
:
#evernote-specific parsing
# if self.content != re.sub(BlOCK_ELEMENTS['table'], '\g<1>', self.content):
# if self.content != re.sub(BlOCK_ELEMENTS['table'], '\g<1>', self.content):
...
@@ -114,7 +128,7 @@ class Element:
...
@@ -114,7 +128,7 @@ class Element:
inner
=
Element
(
start_pos
=
m
.
start
(),
inner
=
Element
(
start_pos
=
m
.
start
(),
end_pos
=
m
.
end
(),
end_pos
=
m
.
end
(),
content
=
''
.
join
(
m
.
groups
()),
content
=
''
.
join
(
m
.
groups
()),
tag
=
'table'
,
tag
=
'table'
,
folder
=
self
.
folder
,
is_block
=
True
)
is_block
=
True
)
self
.
content
=
inner
.
content
self
.
content
=
inner
.
content
return
#no need for further parsing ?
return
#no need for further parsing ?
...
@@ -154,6 +168,10 @@ class Element:
...
@@ -154,6 +168,10 @@ class Element:
else
:
else
:
wrapper
=
MARKDOWN
.
get
(
tag
)
wrapper
=
MARKDOWN
.
get
(
tag
)
self
.
content
=
re
.
sub
(
pattern
,
'{}
\
g<1>{}'
.
format
(
wrapper
[
0
],
wrapper
[
1
]),
self
.
content
)
self
.
content
=
re
.
sub
(
pattern
,
'{}
\
g<1>{}'
.
format
(
wrapper
[
0
],
wrapper
[
1
]),
self
.
content
)
# if self.tag == "e_p" and self.content[-2:] != '\n': #div, add new line if not there
# self.content += '\n'
def
construct_table
(
self
):
def
construct_table
(
self
):
# this function, after self.content has gained | for table entries,
# this function, after self.content has gained | for table entries,
# adds the |---| in markdown to create a proper table
# adds the |---| in markdown to create a proper table
...
@@ -173,12 +191,16 @@ class Element:
...
@@ -173,12 +191,16 @@ class Element:
class
Tomd
:
class
Tomd
:
def
__init__
(
self
,
html
=
''
,
options
=
None
):
def
__init__
(
self
,
html
=
''
,
folder
=
''
,
file
=
''
,
options
=
None
):
self
.
html
=
html
#actual data
self
.
html
=
html
#actual data
self
.
folder
=
folder
self
.
file
=
file
self
.
options
=
options
# haven't been implemented yet
self
.
options
=
options
# haven't been implemented yet
self
.
_markdown
=
''
self
.
_markdown
=
''
def
convert
(
self
,
html
,
options
=
None
):
def
convert
(
self
,
html
=
""
,
options
=
None
):
if
html
==
""
:
html
=
self
.
html
#main function here
#main function here
elements
=
[]
elements
=
[]
for
tag
,
pattern
in
BlOCK_ELEMENTS
.
items
():
for
tag
,
pattern
in
BlOCK_ELEMENTS
.
items
():
...
@@ -186,11 +208,12 @@ class Tomd:
...
@@ -186,11 +208,12 @@ class Tomd:
for
m
in
re
.
finditer
(
pattern
,
html
,
re
.
I
|
re
.
S
|
re
.
M
):
for
m
in
re
.
finditer
(
pattern
,
html
,
re
.
I
|
re
.
S
|
re
.
M
):
# now m contains the pattern without the tag
# now m contains the pattern without the tag
# if tag == "e_p":
# if tag == "e_p":
print
"found"
,
tag
,
m
.
groups
(),
"start"
,
m
.
start
(),
"end"
,
m
.
end
()
print
"found"
,
tag
,
m
.
groups
(),
"start"
,
m
.
start
(),
"end"
,
m
.
end
()
,
self
.
folder
element
=
Element
(
start_pos
=
m
.
start
(),
element
=
Element
(
start_pos
=
m
.
start
(),
end_pos
=
m
.
end
(),
end_pos
=
m
.
end
(),
content
=
''
.
join
(
m
.
groups
()),
content
=
''
.
join
(
m
.
groups
()),
tag
=
tag
,
tag
=
tag
,
folder
=
self
.
folder
,
is_block
=
True
)
is_block
=
True
)
can_append
=
True
can_append
=
True
for
e
in
elements
:
for
e
in
elements
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment