Spaces:

Sk4467
/

fastapiapp

Sleeping

Sk4467 commited on Mar 17, 2024

Commit

8967645

verified ·

1 Parent(s): d076fbf

Update file_processing.py

Files changed (1) hide show

file_processing.py CHANGED Viewed

@@ -60,16 +60,18 @@ def read_txt(file_path: str) -> str:
     with open(file_path, 'r', encoding='utf-8') as file:
         return file.read()
-async def load_documents(file: UploadFile)->List[Document]:
     temp_file_path = f"temp_{file.filename}"
     try:
         # Save the uploaded file to a temporary file
         with open(temp_file_path, "wb") as temp_file:
-            temp_file.write(await file.read())
         content = ""
         if file.filename.endswith('.pdf'):
-            content = read_pdf(temp_file_path)
         elif file.filename.endswith('.docx'):
             content = read_docx(temp_file_path)
         elif file.filename.endswith('.csv'):
@@ -79,19 +81,18 @@ async def load_documents(file: UploadFile)->List[Document]:
         else:
             raise ValueError("Unsupported file format")
     except Exception as e:
-        # Handle general errors - log or adjust as necessary for your application
         print(f"Error processing document: {e}")
         content = "Error processing document."
     finally:
-        # Cleanup: remove the temporary file
         if os.path.exists(temp_file_path):
-            os.remove(temp_file_path)
     metadata = {'source': file.filename}
     document = Document(page_content=content, metadata=metadata)
     return [document]
 from langchain.text_splitter import CharacterTextSplitter
 def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):

     with open(file_path, 'r', encoding='utf-8') as file:
         return file.read()
+async def load_documents(file: UploadFile) -> List[Document]:
     temp_file_path = f"temp_{file.filename}"
     try:
         # Save the uploaded file to a temporary file
         with open(temp_file_path, "wb") as temp_file:
+            contents = await file.read()  # Read the content of the uploaded file
+            temp_file.write(contents)  # Write the content to the temporary file
+        # Now you can pass temp_file_path to your read functions
         content = ""
         if file.filename.endswith('.pdf'):
+            content = read_pdf(temp_file_path)  # Pass the path, not the file object
         elif file.filename.endswith('.docx'):
             content = read_docx(temp_file_path)
         elif file.filename.endswith('.csv'):
         else:
             raise ValueError("Unsupported file format")
     except Exception as e:
         print(f"Error processing document: {e}")
         content = "Error processing document."
     finally:
         if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)  # Clean up the temporary file
     metadata = {'source': file.filename}
     document = Document(page_content=content, metadata=metadata)
     return [document]
 from langchain.text_splitter import CharacterTextSplitter
 def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):