Spaces:

NaimaAqeel
/

Chatbot

Build error

NaimaAqeel commited on Jun 7, 2024

Commit

57a1273

verified ·

1 Parent(s): 13c64a5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ nltk.download('punkt')
 def extract_text_from_pdf(pdf_file):
     text = ""
     try:
-        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
         for page in pdf_reader.pages:
             text += page.extract_text()
     except Exception as e:
@@ -31,7 +31,7 @@ def extract_text_from_pdf(pdf_file):
 def extract_text_from_docx(docx_file):
     text = ""
     try:
-        doc = Document(io.BytesIO(docx_file))
         text = "\n".join([para.text for para in doc.paragraphs])
     except Exception as e:
         print(f"Error extracting text from DOCX: {e}")
@@ -73,15 +73,16 @@ def upload_files(files):
     global faiss_index
     try:
         for file in files:
-            file_data = file.read()
             if file.name.endswith('.pdf'):
-                text = extract_text_from_pdf(file_data)
             elif file.name.endswith('.docx'):
-                text = extract_text_from_docx(file_data)
             else:
                 return {"error": "Unsupported file format"}
-            # Preprocess text
             sentences = preprocess_text(text)
             # Encode sentences and add to FAISS index

 def extract_text_from_pdf(pdf_file):
     text = ""
     try:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
         for page in pdf_reader.pages:
             text += page.extract_text()
     except Exception as e:
 def extract_text_from_docx(docx_file):
     text = ""
     try:
+        doc = Document(docx_file)
         text = "\n".join([para.text for para in doc.paragraphs])
     except Exception as e:
         print(f"Error extracting text from DOCX: {e}")
     global faiss_index
     try:
         for file in files:
+            # Access the actual file content
+            file_content = file.read()
             if file.name.endswith('.pdf'):
+                text = extract_text_from_pdf(io.BytesIO(file_content))
             elif file.name.endswith('.docx'):
+                text = extract_text_from_docx(io.BytesIO(file_content))
             else:
                 return {"error": "Unsupported file format"}
+            # Preprocess text (same as before)
             sentences = preprocess_text(text)
             # Encode sentences and add to FAISS index