Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -101,13 +101,22 @@ def md_to_dom(markdown_text: str) -> list[dict[str, str | list | dict | None]]:
|
|
101 |
return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
|
102 |
|
103 |
def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
def parse_element(element) -> dict[str, str | list | dict | None]:
|
107 |
handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
|
108 |
handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
|
109 |
return handler(element)
|
110 |
-
|
111 |
html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
|
112 |
tree = html.fromstring(html_content)
|
113 |
return [parse_element(element) for element in tree.body]
|
|
|
101 |
return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
|
102 |
|
103 |
def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
|
104 |
+
children = []
|
105 |
+
for child in element.iterchildren():
|
106 |
+
if child.tag:
|
107 |
+
children.append(parse_element(child))
|
108 |
+
elif isinstance(child, str):
|
109 |
+
children.append(child.strip())
|
110 |
+
if element.text and element.text.strip():
|
111 |
+
children.insert(0, element.text.strip())
|
112 |
+
if element.tail and element.tail.strip():
|
113 |
+
children.append(element.tail.strip())
|
114 |
+
return children
|
115 |
|
116 |
def parse_element(element) -> dict[str, str | list | dict | None]:
|
117 |
handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
|
118 |
handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
|
119 |
return handler(element)
|
|
|
120 |
html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
|
121 |
tree = html.fromstring(html_content)
|
122 |
return [parse_element(element) for element in tree.body]
|