Spaces:

Sk4467
/

fastapiapp

Sleeping

Sk4467 commited on Mar 17, 2024

Commit

6c96db3

verified ·

1 Parent(s): 8967645

Update file_processing.py

Files changed (1) hide show

file_processing.py CHANGED Viewed

@@ -30,20 +30,15 @@ import pandas as pd
 import docx
 import tempfile
 from langchain.docstore.document import Document
 def read_pdf(file_path: str) -> str:
-    with tempfile.NamedTemporaryFile(suffix=".pdf") as temp_pdf:
-        # Write the uploaded file's content to the temporary file
-        temp_pdf.write(file.file.read())
-        temp_pdf.seek(0)  # Go to the start of the file
-        # Open the PDF with fitz
-        doc = fitz.open(temp_pdf.name)
-        text = ""
-        for page in doc:
-            text += page.get_text()
-        # No need to delete the temporary file - it's done automatically
-        return text
 def read_docx(file_path: str) -> str:
     doc = docx.Document(file_path)

 import docx
 import tempfile
 from langchain.docstore.document import Document
 def read_pdf(file_path: str) -> str:
+    # Open the PDF with fitz
+    doc = fitz.open(file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
 def read_docx(file_path: str) -> str:
     doc = docx.Document(file_path)