Spaces:

NaimaAqeel
/

Chatbot

Runtime error

NaimaAqeel commited on Jun 4, 2024

Commit

0632240

verified ·

1 Parent(s): 3f3bafc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,11 +13,19 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
 from nltk.tokenize import sent_tokenize  # Import for sentence segmentation
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-# Function to extract text from a PDF file (same as before)
 def extract_text_from_pdf(pdf_path):
-    # ...
-# Function to extract text from a Word document (fixed indentation)
 def extract_text_from_docx(docx_path):
     """Extracts text from a Word document."""
     text = ""

 from nltk.tokenize import sent_tokenize  # Import for sentence segmentation
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
+    text = ""
+    try:
+        doc = fitz.open(pdf_path)
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            text += page.get_text()
+    except Exception as e:
+        print(f"Error extracting text from PDF: {e}")
+    return text
+# Function to extract text from a Word document
 def extract_text_from_docx(docx_path):
     """Extracts text from a Word document."""
     text = ""