oxipng

Sleeping

getapi commited on Sep 11, 2024

Commit

0422eb5

verified ·

1 Parent(s): 5cf78a4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -101,13 +101,22 @@ def md_to_dom(markdown_text: str) -> list[dict[str, str | list | dict | None]]:
         return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
     def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
-        return [parse_element(child) if child.tag else child.strip() for child in element.iterchildren() if child.tag or (isinstance(child, str) and child.strip())]
     def parse_element(element) -> dict[str, str | list | dict | None]:
         handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
         handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
         return handler(element)
     html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
     tree = html.fromstring(html_content)
     return [parse_element(element) for element in tree.body]

         return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
     def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
+        children = []
+        for child in element.iterchildren():
+            if child.tag:
+                children.append(parse_element(child))
+            elif isinstance(child, str):
+                children.append(child.strip())
+        if element.text and element.text.strip():
+            children.insert(0, element.text.strip())
+        if element.tail and element.tail.strip():
+            children.append(element.tail.strip())
+        return children
     def parse_element(element) -> dict[str, str | list | dict | None]:
         handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
         handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
         return handler(element)
     html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
     tree = html.fromstring(html_content)
     return [parse_element(element) for element in tree.body]