Spaces:

thesnak
/

ai-search-assistant

Sleeping

thesnak commited on Jan 6

Commit

c58cb45

verified ·

1 Parent(s): bb13b3d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,11 +6,11 @@ import numpy as np
 from transformers import pipeline
 # Load models
-embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # For embedding text chunks
-qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")  # For QA
 # Initialize FAISS index
-dimension = 384  # Dimension of the embedding model
 index = faiss.IndexFlatL2(dimension)
 # Store text chunks and their embeddings
@@ -58,14 +58,21 @@ def answer_question(question):
     # Embed the question
     question_embedding = embedding_model.encode([question])
-    # Retrieve top-k relevant chunks
-    distances, indices = index.search(question_embedding, k=2)
     relevant_chunks = [text_chunks[i] for i in indices[0]]
     # Use the QA model to generate an answer
     context = " ".join(relevant_chunks)
     result = qa_pipeline(question=question, context=context)
-    return result['answer']
 # Gradio Interface
 with gr.Blocks() as demo:

 from transformers import pipeline
 # Load models
+embedding_model = SentenceTransformer('all-mpnet-base-v2')  # Better embedding model
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-large-squad2")  # Larger QA model
 # Initialize FAISS index
+dimension = 768  # Dimension of the embedding model
 index = faiss.IndexFlatL2(dimension)
 # Store text chunks and their embeddings
     # Embed the question
     question_embedding = embedding_model.encode([question])
+    # Retrieve top-k relevant chunks (increase k for more context)
+    k = 5  # Retrieve more chunks for better context
+    distances, indices = index.search(question_embedding, k=k)
     relevant_chunks = [text_chunks[i] for i in indices[0]]
     # Use the QA model to generate an answer
     context = " ".join(relevant_chunks)
     result = qa_pipeline(question=question, context=context)
+    # Post-process the answer
+    answer = result['answer']
+    if answer.strip() == "":
+        return "The paper does not provide enough information to answer this question."
+    return answer
 # Gradio Interface
 with gr.Blocks() as demo: