Spaces:

thesnak
/

ai-search-assistant

Sleeping

App Files Files Community

thesnak commited on Jan 6

Commit

bb13b3d

verified ·

1 Parent(s): 3e07fd9

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -7

app.py CHANGED Viewed

@@ -19,22 +19,39 @@ text_chunks = []
 def extract_text_from_pdf(pdf_file):
     """Extract text from a PDF file."""
     text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text()
     return text
-def index_text_chunks(text):
     """Split text into chunks, generate embeddings, and index them."""
     global text_chunks, index
-    text_chunks = text.split("\n\n")  # Split by paragraphs
     embeddings = embedding_model.encode(text_chunks)
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
-    return "Paper uploaded and indexed successfully!"
 def answer_question(question):
     """Retrieve relevant chunks and generate an answer."""
     if not text_chunks:
         return "Please upload a paper first."
@@ -56,7 +73,7 @@ with gr.Blocks() as demo:
     gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
     with gr.Row():
-        pdf_input = gr.File(label="Upload PDF")
         upload_status = gr.Textbox(label="Upload Status", interactive=False)
     with gr.Row():

 def extract_text_from_pdf(pdf_file):
     """Extract text from a PDF file."""
     text = ""
+    try:
+        with pdfplumber.open(pdf_file) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text() or ""  # Handle empty pages
+    except Exception as e:
+        return f"Error extracting text: {e}"
     return text
+def index_text_chunks(pdf_file):
     """Split text into chunks, generate embeddings, and index them."""
     global text_chunks, index
+    # Extract text from the uploaded PDF
+    text = extract_text_from_pdf(pdf_file)
+    if not text:
+        return "No text extracted from the PDF. Please upload a valid PDF file."
+    # Split text into chunks (e.g., paragraphs)
+    text_chunks = [chunk for chunk in text.split("\n\n") if chunk.strip()]
+    # Generate embeddings for the chunks
     embeddings = embedding_model.encode(text_chunks)
+    # Build the FAISS index
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
+    return f"Paper uploaded and indexed successfully! Found {len(text_chunks)} chunks."
 def answer_question(question):
     """Retrieve relevant chunks and generate an answer."""
+    global text_chunks, index
     if not text_chunks:
         return "Please upload a paper first."
     gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
     with gr.Row():
+        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         upload_status = gr.Textbox(label="Upload Status", interactive=False)
     with gr.Row():