Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,22 +19,39 @@ text_chunks = []
|
|
19 |
def extract_text_from_pdf(pdf_file):
|
20 |
"""Extract text from a PDF file."""
|
21 |
text = ""
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
25 |
return text
|
26 |
|
27 |
-
def index_text_chunks(
|
28 |
"""Split text into chunks, generate embeddings, and index them."""
|
29 |
global text_chunks, index
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
embeddings = embedding_model.encode(text_chunks)
|
|
|
|
|
32 |
index = faiss.IndexFlatL2(dimension)
|
33 |
index.add(np.array(embeddings))
|
34 |
-
|
|
|
35 |
|
36 |
def answer_question(question):
|
37 |
"""Retrieve relevant chunks and generate an answer."""
|
|
|
|
|
38 |
if not text_chunks:
|
39 |
return "Please upload a paper first."
|
40 |
|
@@ -56,7 +73,7 @@ with gr.Blocks() as demo:
|
|
56 |
gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
|
57 |
|
58 |
with gr.Row():
|
59 |
-
pdf_input = gr.File(label="Upload PDF")
|
60 |
upload_status = gr.Textbox(label="Upload Status", interactive=False)
|
61 |
|
62 |
with gr.Row():
|
|
|
19 |
def extract_text_from_pdf(pdf_file):
|
20 |
"""Extract text from a PDF file."""
|
21 |
text = ""
|
22 |
+
try:
|
23 |
+
with pdfplumber.open(pdf_file) as pdf:
|
24 |
+
for page in pdf.pages:
|
25 |
+
text += page.extract_text() or "" # Handle empty pages
|
26 |
+
except Exception as e:
|
27 |
+
return f"Error extracting text: {e}"
|
28 |
return text
|
29 |
|
30 |
+
def index_text_chunks(pdf_file):
|
31 |
"""Split text into chunks, generate embeddings, and index them."""
|
32 |
global text_chunks, index
|
33 |
+
|
34 |
+
# Extract text from the uploaded PDF
|
35 |
+
text = extract_text_from_pdf(pdf_file)
|
36 |
+
if not text:
|
37 |
+
return "No text extracted from the PDF. Please upload a valid PDF file."
|
38 |
+
|
39 |
+
# Split text into chunks (e.g., paragraphs)
|
40 |
+
text_chunks = [chunk for chunk in text.split("\n\n") if chunk.strip()]
|
41 |
+
|
42 |
+
# Generate embeddings for the chunks
|
43 |
embeddings = embedding_model.encode(text_chunks)
|
44 |
+
|
45 |
+
# Build the FAISS index
|
46 |
index = faiss.IndexFlatL2(dimension)
|
47 |
index.add(np.array(embeddings))
|
48 |
+
|
49 |
+
return f"Paper uploaded and indexed successfully! Found {len(text_chunks)} chunks."
|
50 |
|
51 |
def answer_question(question):
|
52 |
"""Retrieve relevant chunks and generate an answer."""
|
53 |
+
global text_chunks, index
|
54 |
+
|
55 |
if not text_chunks:
|
56 |
return "Please upload a paper first."
|
57 |
|
|
|
73 |
gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
|
74 |
|
75 |
with gr.Row():
|
76 |
+
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
77 |
upload_status = gr.Textbox(label="Upload Status", interactive=False)
|
78 |
|
79 |
with gr.Row():
|