charulp2499 commited on
Commit
54adfc6
·
verified ·
1 Parent(s): f7b0da6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.nodes import DensePassageRetriever
2
+ from haystack.document_stores import FAISSDocumentStore
3
+ from haystack.pipelines import ExtractiveQAPipeline
4
+ from transformers import pipeline
5
+ import gradio as gr
6
+ from haystack.utils import convert_files_to_docs
7
+
8
+ # Step 1: Set up Document Store
9
+ # Create a FAISS document store for efficient retrieval
10
+ document_store = FAISSDocumentStore(embedding_dim=768, faiss_index_factory_str="Flat")
11
+
12
+ # Step 2: Upload and Process PDF Documents
13
+ def upload_and_process_pdf(file):
14
+ # Convert PDF file to documents
15
+ docs = convert_files_to_docs(dir_path=".", file_paths=[file.name])
16
+ document_store.write_documents(docs)
17
+ document_store.update_embeddings(retriever)
18
+ return "Document uploaded and processed successfully."
19
+
20
+ # Step 3: Set up Retriever
21
+ retriever = DensePassageRetriever(
22
+ document_store=document_store,
23
+ query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
24
+ passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
25
+ )
26
+
27
+ # Step 4: Set up Generator (using FLAN-T5)
28
+ generator = pipeline("text2text-generation", model="google/flan-t5-base")
29
+
30
+ # Step 5: Build the Retrieval-Augmented Generation Function
31
+ def rag_system(query):
32
+ # Retrieve relevant documents
33
+ retrieved_docs = retriever.retrieve(query, top_k=2)
34
+ context = " ".join([doc.content for doc in retrieved_docs])
35
+
36
+ # Generate answer using the context
37
+ input_text = f"Question: {query}\nContext: {context}"
38
+ answer = generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
39
+
40
+ # Return results
41
+ return {
42
+ "Question": query,
43
+ "Answer": answer,
44
+ "Context": context
45
+ }
46
+
47
+ # Step 6: Create Gradio Interface
48
+ def query_rag(question):
49
+ result = rag_system(question)
50
+ return result["Answer"], result["Context"]
51
+
52
+ def upload_document(file):
53
+ message = upload_and_process_pdf(file)
54
+ return message
55
+
56
+ interface = gr.Blocks()
57
+
58
+ with interface:
59
+ gr.Markdown("# RAG System with PDF Upload")
60
+ with gr.Tab("Ask a Question"):
61
+ question = gr.Textbox(label="Enter your question")
62
+ answer = gr.Textbox(label="Generated Answer")
63
+ context = gr.Textbox(label="Context")
64
+ query_button = gr.Button("Get Answer")
65
+ query_button.click(query_rag, inputs=question, outputs=[answer, context])
66
+ with gr.Tab("Upload Document"):
67
+ file_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
68
+ upload_button = gr.Button("Upload and Process")
69
+ upload_output = gr.Textbox(label="Upload Status")
70
+ upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
71
+
72
+ # Step 7: Launch the Interface
73
+ if __name__ == "__main__":
74
+ interface.launch()