Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -6,7 +6,7 @@ import pypandoc
|
|
6 |
import shutil
|
7 |
import logging
|
8 |
import tempfile
|
9 |
-
|
10 |
|
11 |
# Initialize the logger
|
12 |
logging.basicConfig(level=logging.DEBUG)
|
@@ -52,12 +52,15 @@ async def convert_file_to_txt(
|
|
52 |
unique_id = uuid.uuid4().hex
|
53 |
output_filename = os.path.join(tempfile.gettempdir(), f"{base_filename}_{unique_id}.txt")
|
54 |
|
55 |
-
# PDF to text conversion
|
56 |
if ext == '.pdf':
|
57 |
-
text =
|
58 |
-
with open(
|
|
|
|
|
|
|
59 |
f.write(text)
|
60 |
-
logging.debug(f"PDF conversion successful: {output_filename}")
|
61 |
|
62 |
# Other file formats to text conversion using Pandoc
|
63 |
else:
|
|
|
6 |
import shutil
|
7 |
import logging
|
8 |
import tempfile
|
9 |
+
import fitz # PyMuPDF importé ici
|
10 |
|
11 |
# Initialize the logger
|
12 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
52 |
unique_id = uuid.uuid4().hex
|
53 |
output_filename = os.path.join(tempfile.gettempdir(), f"{base_filename}_{unique_id}.txt")
|
54 |
|
55 |
+
# PDF to text conversion using PyMuPDF
|
56 |
if ext == '.pdf':
|
57 |
+
text = ""
|
58 |
+
with fitz.open(input_filename) as doc:
|
59 |
+
for page in doc:
|
60 |
+
text += page.get_text()
|
61 |
+
with open(output_filename, "w", encoding="utf-8") as f:
|
62 |
f.write(text)
|
63 |
+
logging.debug(f"PDF conversion successful with PyMuPDF: {output_filename}")
|
64 |
|
65 |
# Other file formats to text conversion using Pandoc
|
66 |
else:
|