NaimaAqeel commited on
Commit
e4b3db1
·
verified ·
1 Parent(s): ebb7c59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -17,10 +17,10 @@ import nltk
17
  nltk.download('punkt')
18
 
19
  # Function to extract text from a PDF file
20
- def extract_text_from_pdf(pdf_file):
21
  text = ""
22
  try:
23
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
  except Exception as e:
@@ -28,10 +28,10 @@ def extract_text_from_pdf(pdf_file):
28
  return text
29
 
30
  # Function to extract text from a Word document
31
- def extract_text_from_docx(docx_file):
32
  text = ""
33
  try:
34
- doc = Document(io.BytesIO(docx_file))
35
  text = "\n".join([para.text for para in doc.paragraphs])
36
  except Exception as e:
37
  print(f"Error extracting text from DOCX: {e}")
@@ -74,9 +74,9 @@ def upload_files(files):
74
  try:
75
  for file in files:
76
  if file.name.endswith('.pdf'):
77
- text = extract_text_from_pdf(file.file.read())
78
  elif file.name.endswith('.docx'):
79
- text = extract_text_from_docx(file.file.read())
80
  else:
81
  return {"error": "Unsupported file format"}
82
 
 
17
  nltk.download('punkt')
18
 
19
  # Function to extract text from a PDF file
20
+ def extract_text_from_pdf(pdf_data):
21
  text = ""
22
  try:
23
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
  except Exception as e:
 
28
  return text
29
 
30
  # Function to extract text from a Word document
31
+ def extract_text_from_docx(docx_data):
32
  text = ""
33
  try:
34
+ doc = Document(io.BytesIO(docx_data))
35
  text = "\n".join([para.text for para in doc.paragraphs])
36
  except Exception as e:
37
  print(f"Error extracting text from DOCX: {e}")
 
74
  try:
75
  for file in files:
76
  if file.name.endswith('.pdf'):
77
+ text = extract_text_from_pdf(file.data)
78
  elif file.name.endswith('.docx'):
79
+ text = extract_text_from_docx(file.data)
80
  else:
81
  return {"error": "Unsupported file format"}
82