Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ nltk.download('punkt')
|
|
20 |
def extract_text_from_pdf(pdf_file):
|
21 |
text = ""
|
22 |
try:
|
23 |
-
pdf_reader = PyPDF2.PdfReader(
|
24 |
for page in pdf_reader.pages:
|
25 |
text += page.extract_text()
|
26 |
except Exception as e:
|
@@ -31,7 +31,7 @@ def extract_text_from_pdf(pdf_file):
|
|
31 |
def extract_text_from_docx(docx_file):
|
32 |
text = ""
|
33 |
try:
|
34 |
-
doc = Document(
|
35 |
text = "\n".join([para.text for para in doc.paragraphs])
|
36 |
except Exception as e:
|
37 |
print(f"Error extracting text from DOCX: {e}")
|
@@ -73,15 +73,16 @@ def upload_files(files):
|
|
73 |
global faiss_index
|
74 |
try:
|
75 |
for file in files:
|
76 |
-
|
|
|
77 |
if file.name.endswith('.pdf'):
|
78 |
-
text = extract_text_from_pdf(
|
79 |
elif file.name.endswith('.docx'):
|
80 |
-
text = extract_text_from_docx(
|
81 |
else:
|
82 |
return {"error": "Unsupported file format"}
|
83 |
|
84 |
-
# Preprocess text
|
85 |
sentences = preprocess_text(text)
|
86 |
|
87 |
# Encode sentences and add to FAISS index
|
|
|
20 |
def extract_text_from_pdf(pdf_file):
|
21 |
text = ""
|
22 |
try:
|
23 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
24 |
for page in pdf_reader.pages:
|
25 |
text += page.extract_text()
|
26 |
except Exception as e:
|
|
|
31 |
def extract_text_from_docx(docx_file):
|
32 |
text = ""
|
33 |
try:
|
34 |
+
doc = Document(docx_file)
|
35 |
text = "\n".join([para.text for para in doc.paragraphs])
|
36 |
except Exception as e:
|
37 |
print(f"Error extracting text from DOCX: {e}")
|
|
|
73 |
global faiss_index
|
74 |
try:
|
75 |
for file in files:
|
76 |
+
# Access the actual file content
|
77 |
+
file_content = file.read()
|
78 |
if file.name.endswith('.pdf'):
|
79 |
+
text = extract_text_from_pdf(io.BytesIO(file_content))
|
80 |
elif file.name.endswith('.docx'):
|
81 |
+
text = extract_text_from_docx(io.BytesIO(file_content))
|
82 |
else:
|
83 |
return {"error": "Unsupported file format"}
|
84 |
|
85 |
+
# Preprocess text (same as before)
|
86 |
sentences = preprocess_text(text)
|
87 |
|
88 |
# Encode sentences and add to FAISS index
|