Spaces:

charulp2499
/

RAG

Runtime error

App Files Files Community

charulp2499 commited on Jan 22

Commit

9e2f9ad

verified ·

1 Parent(s): 26d3a5a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -37

app.py CHANGED Viewed

@@ -1,62 +1,62 @@
 from haystack.nodes import DensePassageRetriever
 from haystack.document_stores import FAISSDocumentStore
-from haystack.pipelines import ExtractiveQAPipeline
 from transformers import pipeline
 import gradio as gr
-from haystack.utils import convert_files_to_docs
-# Step 1: Set up Document Store
-# Create a FAISS document store for efficient retrieval
-document_store = FAISSDocumentStore(embedding_dim=768, faiss_index_factory_str="Flat")
 # Step 2: Upload and Process PDF Documents
 def upload_and_process_pdf(file):
-    # Convert PDF file to documents
-    docs = convert_files_to_docs(dir_path=".", file_paths=[file.name])
-    document_store.write_documents(docs)
-    document_store.update_embeddings(retriever)
     return "Document uploaded and processed successfully."
-# Step 3: Set up Retriever
-retriever = DensePassageRetriever(
-    document_store=document_store,
-    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
-    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
-)
-# Step 4: Set up Generator (using FLAN-T5)
-generator = pipeline("text2text-generation", model="google/flan-t5-base")
-# Step 5: Build the Retrieval-Augmented Generation Function
 def rag_system(query):
-    # Retrieve relevant documents
-    retrieved_docs = retriever.retrieve(query, top_k=2)
-    context = " ".join([doc.content for doc in retrieved_docs])
-    # Generate answer using the context
-    input_text = f"Question: {query}\nContext: {context}"
-    answer = generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
-    # Return results
-    return {
-        "Question": query,
-        "Answer": answer,
-        "Context": context
-    }
-# Step 6: Create Gradio Interface
 def query_rag(question):
-    result = rag_system(question)
-    return result["Answer"], result["Context"]
 def upload_document(file):
-    message = upload_and_process_pdf(file)
-    return message
 interface = gr.Blocks()
 with interface:
-    gr.Markdown("# RAG System with PDF Upload")
     with gr.Tab("Ask a Question"):
         question = gr.Textbox(label="Enter your question")
         answer = gr.Textbox(label="Generated Answer")
@@ -69,6 +69,6 @@ with interface:
         upload_output = gr.Textbox(label="Upload Status")
         upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
-# Step 7: Launch the Interface
 if __name__ == "__main__":
     interface.launch()

 from haystack.nodes import DensePassageRetriever
 from haystack.document_stores import FAISSDocumentStore
+from haystack.pipelines import RetrievalQA
 from transformers import pipeline
+from langchain.document_loaders import PyPDFLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
 import gradio as gr
+# Step 1: Initialize Document Store and Vector Store
+document_store = None  # Placeholder for FAISS document store
+vector_store = None
 # Step 2: Upload and Process PDF Documents
 def upload_and_process_pdf(file):
+    global vector_store
+    # Load PDF documents using PyPDFLoader
+    loader = PyPDFLoader(file.name)
+    docs = loader.load()
+    # Generate embeddings and create a vector store
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+    vector_store = FAISS.from_documents(docs, embeddings)
     return "Document uploaded and processed successfully."
+# Step 3: Set up Generator (using FLAN-T5)
+generator_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
+def generate_answer(context, query):
+    input_text = f"Question: {query}\nContext: {context}"
+    answer = generator_pipeline(input_text, max_length=100, do_sample=True)[0]['generated_text']
+    return answer
+# Step 4: Build the Retrieval-Augmented Generation Function
 def rag_system(query):
+    global vector_store
+    if vector_store is None:
+        return "No documents uploaded. Please upload a document first.", ""
+    retriever = vector_store.as_retriever()
+    results = retriever.get_relevant_documents(query)
+    context = " ".join([doc.page_content for doc in results[:2]])  # Use top 2 documents
+    # Generate the answer
+    answer = generate_answer(context, query)
+    return answer, context
+# Step 5: Create Gradio Interface
 def query_rag(question):
+    answer, context = rag_system(question)
+    return answer, context
 def upload_document(file):
+    return upload_and_process_pdf(file)
 interface = gr.Blocks()
 with interface:
+    gr.Markdown("# RAG System with PDF Upload (LangChain Integration)")
     with gr.Tab("Ask a Question"):
         question = gr.Textbox(label="Enter your question")
         answer = gr.Textbox(label="Generated Answer")
         upload_output = gr.Textbox(label="Upload Status")
         upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
+# Step 6: Launch the Interface
 if __name__ == "__main__":
     interface.launch()