charulp2499 commited on
Commit
9e2f9ad
·
verified ·
1 Parent(s): 26d3a5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -1,62 +1,62 @@
1
  from haystack.nodes import DensePassageRetriever
2
  from haystack.document_stores import FAISSDocumentStore
3
- from haystack.pipelines import ExtractiveQAPipeline
4
  from transformers import pipeline
 
 
 
5
  import gradio as gr
6
- from haystack.utils import convert_files_to_docs
7
 
8
- # Step 1: Set up Document Store
9
- # Create a FAISS document store for efficient retrieval
10
- document_store = FAISSDocumentStore(embedding_dim=768, faiss_index_factory_str="Flat")
11
 
12
  # Step 2: Upload and Process PDF Documents
13
  def upload_and_process_pdf(file):
14
- # Convert PDF file to documents
15
- docs = convert_files_to_docs(dir_path=".", file_paths=[file.name])
16
- document_store.write_documents(docs)
17
- document_store.update_embeddings(retriever)
 
 
 
 
18
  return "Document uploaded and processed successfully."
19
 
20
- # Step 3: Set up Retriever
21
- retriever = DensePassageRetriever(
22
- document_store=document_store,
23
- query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
24
- passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
25
- )
26
 
27
- # Step 4: Set up Generator (using FLAN-T5)
28
- generator = pipeline("text2text-generation", model="google/flan-t5-base")
 
 
29
 
30
- # Step 5: Build the Retrieval-Augmented Generation Function
31
  def rag_system(query):
32
- # Retrieve relevant documents
33
- retrieved_docs = retriever.retrieve(query, top_k=2)
34
- context = " ".join([doc.content for doc in retrieved_docs])
35
 
36
- # Generate answer using the context
37
- input_text = f"Question: {query}\nContext: {context}"
38
- answer = generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
39
 
40
- # Return results
41
- return {
42
- "Question": query,
43
- "Answer": answer,
44
- "Context": context
45
- }
46
 
47
- # Step 6: Create Gradio Interface
48
  def query_rag(question):
49
- result = rag_system(question)
50
- return result["Answer"], result["Context"]
51
 
52
  def upload_document(file):
53
- message = upload_and_process_pdf(file)
54
- return message
55
 
56
  interface = gr.Blocks()
57
 
58
  with interface:
59
- gr.Markdown("# RAG System with PDF Upload")
60
  with gr.Tab("Ask a Question"):
61
  question = gr.Textbox(label="Enter your question")
62
  answer = gr.Textbox(label="Generated Answer")
@@ -69,6 +69,6 @@ with interface:
69
  upload_output = gr.Textbox(label="Upload Status")
70
  upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
71
 
72
- # Step 7: Launch the Interface
73
  if __name__ == "__main__":
74
  interface.launch()
 
1
  from haystack.nodes import DensePassageRetriever
2
  from haystack.document_stores import FAISSDocumentStore
3
+ from haystack.pipelines import RetrievalQA
4
  from transformers import pipeline
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
  import gradio as gr
 
9
 
10
+ # Step 1: Initialize Document Store and Vector Store
11
+ document_store = None # Placeholder for FAISS document store
12
+ vector_store = None
13
 
14
  # Step 2: Upload and Process PDF Documents
15
  def upload_and_process_pdf(file):
16
+ global vector_store
17
+ # Load PDF documents using PyPDFLoader
18
+ loader = PyPDFLoader(file.name)
19
+ docs = loader.load()
20
+
21
+ # Generate embeddings and create a vector store
22
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
23
+ vector_store = FAISS.from_documents(docs, embeddings)
24
  return "Document uploaded and processed successfully."
25
 
26
+ # Step 3: Set up Generator (using FLAN-T5)
27
+ generator_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
 
 
 
 
28
 
29
+ def generate_answer(context, query):
30
+ input_text = f"Question: {query}\nContext: {context}"
31
+ answer = generator_pipeline(input_text, max_length=100, do_sample=True)[0]['generated_text']
32
+ return answer
33
 
34
+ # Step 4: Build the Retrieval-Augmented Generation Function
35
  def rag_system(query):
36
+ global vector_store
37
+ if vector_store is None:
38
+ return "No documents uploaded. Please upload a document first.", ""
39
 
40
+ retriever = vector_store.as_retriever()
41
+ results = retriever.get_relevant_documents(query)
42
+ context = " ".join([doc.page_content for doc in results[:2]]) # Use top 2 documents
43
 
44
+ # Generate the answer
45
+ answer = generate_answer(context, query)
46
+ return answer, context
 
 
 
47
 
48
+ # Step 5: Create Gradio Interface
49
  def query_rag(question):
50
+ answer, context = rag_system(question)
51
+ return answer, context
52
 
53
  def upload_document(file):
54
+ return upload_and_process_pdf(file)
 
55
 
56
  interface = gr.Blocks()
57
 
58
  with interface:
59
+ gr.Markdown("# RAG System with PDF Upload (LangChain Integration)")
60
  with gr.Tab("Ask a Question"):
61
  question = gr.Textbox(label="Enter your question")
62
  answer = gr.Textbox(label="Generated Answer")
 
69
  upload_output = gr.Textbox(label="Upload Status")
70
  upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
71
 
72
+ # Step 6: Launch the Interface
73
  if __name__ == "__main__":
74
  interface.launch()