Spaces:

thesnak
/

ai-search-assistant

Sleeping

App Files Files Community

thesnak commited on Jan 6

Commit

e10d69e

verified ·

1 Parent(s): abc712f

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import pdfplumber
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+from transformers import pipeline
+# Load models
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # For embedding text chunks
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")  # For QA
+# Initialize FAISS index
+dimension = 384  # Dimension of the embedding model
+index = faiss.IndexFlatL2(dimension)
+# Store text chunks and their embeddings
+text_chunks = []
+def extract_text_from_pdf(pdf_file):
+    """Extract text from a PDF file."""
+    text = ""
+    with pdfplumber.open(pdf_file) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+def index_text_chunks(text):
+    """Split text into chunks, generate embeddings, and index them."""
+    global text_chunks, index
+    text_chunks = text.split("\n\n")  # Split by paragraphs
+    embeddings = embedding_model.encode(text_chunks)
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(embeddings))
+    return "Paper uploaded and indexed successfully!"
+def answer_question(question):
+    """Retrieve relevant chunks and generate an answer."""
+    if not text_chunks:
+        return "Please upload a paper first."
+    # Embed the question
+    question_embedding = embedding_model.encode([question])
+    # Retrieve top-k relevant chunks
+    distances, indices = index.search(question_embedding, k=2)
+    relevant_chunks = [text_chunks[i] for i in indices[0]]
+    # Use the QA model to generate an answer
+    context = " ".join(relevant_chunks)
+    result = qa_pipeline(question=question, context=context)
+    return result['answer']
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Chat with Your Paper 📄")
+    gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
+    with gr.Row():
+        pdf_input = gr.File(label="Upload PDF")
+        upload_status = gr.Textbox(label="Upload Status", interactive=False)
+    with gr.Row():
+        question_input = gr.Textbox(label="Ask a Question", placeholder="What is the main contribution of this paper?")
+        answer_output = gr.Textbox(label="Answer", interactive=False)
+    # Buttons
+    upload_button = gr.Button("Upload and Index Paper")
+    ask_button = gr.Button("Ask Question")
+    # Define actions
+    upload_button.click(
+        fn=index_text_chunks,
+        inputs=pdf_input,
+        outputs=upload_status
+    )
+    ask_button.click(
+        fn=answer_question,
+        inputs=question_input,
+        outputs=answer_output
+    )
+# Launch the app
+demo.launch()