NaimaAqeel commited on
Commit
57a1273
·
verified ·
1 Parent(s): 13c64a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -20,7 +20,7 @@ nltk.download('punkt')
20
  def extract_text_from_pdf(pdf_file):
21
  text = ""
22
  try:
23
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
  except Exception as e:
@@ -31,7 +31,7 @@ def extract_text_from_pdf(pdf_file):
31
  def extract_text_from_docx(docx_file):
32
  text = ""
33
  try:
34
- doc = Document(io.BytesIO(docx_file))
35
  text = "\n".join([para.text for para in doc.paragraphs])
36
  except Exception as e:
37
  print(f"Error extracting text from DOCX: {e}")
@@ -73,15 +73,16 @@ def upload_files(files):
73
  global faiss_index
74
  try:
75
  for file in files:
76
- file_data = file.read()
 
77
  if file.name.endswith('.pdf'):
78
- text = extract_text_from_pdf(file_data)
79
  elif file.name.endswith('.docx'):
80
- text = extract_text_from_docx(file_data)
81
  else:
82
  return {"error": "Unsupported file format"}
83
 
84
- # Preprocess text
85
  sentences = preprocess_text(text)
86
 
87
  # Encode sentences and add to FAISS index
 
20
  def extract_text_from_pdf(pdf_file):
21
  text = ""
22
  try:
23
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
  except Exception as e:
 
31
  def extract_text_from_docx(docx_file):
32
  text = ""
33
  try:
34
+ doc = Document(docx_file)
35
  text = "\n".join([para.text for para in doc.paragraphs])
36
  except Exception as e:
37
  print(f"Error extracting text from DOCX: {e}")
 
73
  global faiss_index
74
  try:
75
  for file in files:
76
+ # Access the actual file content
77
+ file_content = file.read()
78
  if file.name.endswith('.pdf'):
79
+ text = extract_text_from_pdf(io.BytesIO(file_content))
80
  elif file.name.endswith('.docx'):
81
+ text = extract_text_from_docx(io.BytesIO(file_content))
82
  else:
83
  return {"error": "Unsupported file format"}
84
 
85
+ # Preprocess text (same as before)
86
  sentences = preprocess_text(text)
87
 
88
  # Encode sentences and add to FAISS index