Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ nltk.download('punkt')
|
|
20 |
def extract_text_from_pdf(pdf_file):
|
21 |
text = ""
|
22 |
try:
|
23 |
-
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
24 |
for page in pdf_reader.pages:
|
25 |
text += page.extract_text()
|
26 |
except Exception as e:
|
@@ -31,7 +31,7 @@ def extract_text_from_pdf(pdf_file):
|
|
31 |
def extract_text_from_docx(docx_file):
|
32 |
text = ""
|
33 |
try:
|
34 |
-
doc = Document(docx_file)
|
35 |
text = "\n".join([para.text for para in doc.paragraphs])
|
36 |
except Exception as e:
|
37 |
print(f"Error extracting text from DOCX: {e}")
|
@@ -73,16 +73,14 @@ def upload_files(files):
|
|
73 |
global faiss_index
|
74 |
try:
|
75 |
for file in files:
|
76 |
-
# Access the actual file content
|
77 |
-
file_content = file.read()
|
78 |
if file.name.endswith('.pdf'):
|
79 |
-
text = extract_text_from_pdf(
|
80 |
elif file.name.endswith('.docx'):
|
81 |
-
text = extract_text_from_docx(
|
82 |
else:
|
83 |
return {"error": "Unsupported file format"}
|
84 |
|
85 |
-
# Preprocess text
|
86 |
sentences = preprocess_text(text)
|
87 |
|
88 |
# Encode sentences and add to FAISS index
|
|
|
20 |
def extract_text_from_pdf(pdf_file):
|
21 |
text = ""
|
22 |
try:
|
23 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
|
24 |
for page in pdf_reader.pages:
|
25 |
text += page.extract_text()
|
26 |
except Exception as e:
|
|
|
31 |
def extract_text_from_docx(docx_file):
|
32 |
text = ""
|
33 |
try:
|
34 |
+
doc = Document(io.BytesIO(docx_file))
|
35 |
text = "\n".join([para.text for para in doc.paragraphs])
|
36 |
except Exception as e:
|
37 |
print(f"Error extracting text from DOCX: {e}")
|
|
|
73 |
global faiss_index
|
74 |
try:
|
75 |
for file in files:
|
|
|
|
|
76 |
if file.name.endswith('.pdf'):
|
77 |
+
text = extract_text_from_pdf(file.file.read())
|
78 |
elif file.name.endswith('.docx'):
|
79 |
+
text = extract_text_from_docx(file.file.read())
|
80 |
else:
|
81 |
return {"error": "Unsupported file format"}
|
82 |
|
83 |
+
# Preprocess text
|
84 |
sentences = preprocess_text(text)
|
85 |
|
86 |
# Encode sentences and add to FAISS index
|