chore: update something
Browse files- docsifer/service.py +11 -14
docsifer/service.py
CHANGED
@@ -2,12 +2,12 @@ from __future__ import annotations
|
|
2 |
|
3 |
import asyncio
|
4 |
import logging
|
5 |
-
import tempfile
|
6 |
|
7 |
-
import requests.cookies
|
8 |
import magic
|
9 |
import mimetypes
|
10 |
-
import requests
|
11 |
from pathlib import Path
|
12 |
from typing import Optional, Dict, Tuple, Any
|
13 |
from scuid import scuid
|
@@ -130,8 +130,6 @@ class DocsiferService:
|
|
130 |
Returns:
|
131 |
A tuple containing a dictionary with keys "filename" and "markdown", and the token count.
|
132 |
"""
|
133 |
-
file_extension = None
|
134 |
-
|
135 |
if source.startswith("http"):
|
136 |
filename = f"{scuid()}.html"
|
137 |
else:
|
@@ -165,7 +163,6 @@ class DocsiferService:
|
|
165 |
if cleanup and guessed_ext.lower() in (".html", ".htm"):
|
166 |
self._maybe_cleanup_html(tmp_path)
|
167 |
|
168 |
-
file_extension = guessed_ext.lstrip(".")
|
169 |
filename = new_filename
|
170 |
source = tmp_path
|
171 |
|
@@ -176,16 +173,16 @@ class DocsiferService:
|
|
176 |
md_converter = self._basic_markitdown
|
177 |
|
178 |
# Load cookies if provided in the HTTP config.
|
179 |
-
if http_config:
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
|
187 |
try:
|
188 |
-
result_obj = md_converter.convert(source
|
189 |
print("result_obj:\n", result_obj.text_content)
|
190 |
except Exception as e:
|
191 |
logger.error("MarkItDown conversion failed: %s", e)
|
|
|
2 |
|
3 |
import asyncio
|
4 |
import logging
|
5 |
+
# import tempfile
|
6 |
|
7 |
+
# import requests.cookies
|
8 |
import magic
|
9 |
import mimetypes
|
10 |
+
# import requests
|
11 |
from pathlib import Path
|
12 |
from typing import Optional, Dict, Tuple, Any
|
13 |
from scuid import scuid
|
|
|
130 |
Returns:
|
131 |
A tuple containing a dictionary with keys "filename" and "markdown", and the token count.
|
132 |
"""
|
|
|
|
|
133 |
if source.startswith("http"):
|
134 |
filename = f"{scuid()}.html"
|
135 |
else:
|
|
|
163 |
if cleanup and guessed_ext.lower() in (".html", ".htm"):
|
164 |
self._maybe_cleanup_html(tmp_path)
|
165 |
|
|
|
166 |
filename = new_filename
|
167 |
source = tmp_path
|
168 |
|
|
|
173 |
md_converter = self._basic_markitdown
|
174 |
|
175 |
# Load cookies if provided in the HTTP config.
|
176 |
+
# if http_config:
|
177 |
+
# if "cookies" in http_config:
|
178 |
+
# requests.cookies.cookiejar_from_dict(
|
179 |
+
# http_config["cookies"],
|
180 |
+
# requests.cookies.RequestsCookieJar,
|
181 |
+
# overwrite=True,
|
182 |
+
# )
|
183 |
|
184 |
try:
|
185 |
+
result_obj = md_converter.convert(source)
|
186 |
print("result_obj:\n", result_obj.text_content)
|
187 |
except Exception as e:
|
188 |
logger.error("MarkItDown conversion failed: %s", e)
|