NaimaAqeel commited on
Commit
b28c6a7
·
verified ·
1 Parent(s): 8c06cc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -13,15 +13,23 @@ import pickle
13
  # Function to extract text from a PDF file
14
  def extract_text_from_pdf(pdf_path):
15
  text = ""
16
- doc = fitz.open(pdf_path)
17
- for page in doc:
18
- text += page.get_text()
 
 
 
 
19
  return text
20
 
21
  # Function to extract text from a Word document
22
  def extract_text_from_docx(docx_path):
23
- doc = Document(docx_path)
24
- text = "\n".join([para.text for para in doc.paragraphs])
 
 
 
 
25
  return text
26
 
27
  # Initialize the embedding model
 
13
  # Function to extract text from a PDF file
14
  def extract_text_from_pdf(pdf_path):
15
  text = ""
16
+ try:
17
+ doc = fitz.open(pdf_path)
18
+ for page_num in range(len(doc)):
19
+ page = doc.load_page(page_num)
20
+ text += page.get_text()
21
+ except Exception as e:
22
+ print(f"Error extracting text from PDF: {e}")
23
  return text
24
 
25
  # Function to extract text from a Word document
26
  def extract_text_from_docx(docx_path):
27
+ text = ""
28
+ try:
29
+ doc = Document(docx_path)
30
+ text = "\n".join([para.text for para in doc.paragraphs])
31
+ except Exception as e:
32
+ print(f"Error extracting text from DOCX: {e}")
33
  return text
34
 
35
  # Initialize the embedding model