Spaces:

Sk4467
/

fastapiapp

Sleeping

Sk4467 commited on Mar 17, 2024

Commit

d42007c

verified ·

1 Parent(s): 5a893a5

Update file_processing.py

Files changed (1) hide show

file_processing.py CHANGED Viewed

@@ -28,13 +28,22 @@ from typing import List
 import fitz  # PyMuPDF
 import pandas as pd
 import docx
 from langchain.docstore.document import Document
 def read_pdf(file_path: str) -> str:
-    doc = fitz.open(file_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
 def read_docx(file_path: str) -> str:
     doc = docx.Document(file_path)

 import fitz  # PyMuPDF
 import pandas as pd
 import docx
+import tempfile
 from langchain.docstore.document import Document
 def read_pdf(file_path: str) -> str:
+    with tempfile.NamedTemporaryFile(suffix=".pdf") as temp_pdf:
+        # Write the uploaded file's content to the temporary file
+        temp_pdf.write(file.file.read())
+        temp_pdf.seek(0)  # Go to the start of the file
+        # Open the PDF with fitz
+        doc = fitz.open(temp_pdf.name)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        # No need to delete the temporary file - it's done automatically
+        return text
 def read_docx(file_path: str) -> str:
     doc = docx.Document(file_path)