RAG / app.py
charulp2499's picture
Update app.py
34ba555 verified
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline
import gradio as gr
# Step 1: Initialize Vector Store
vector_store = None
# Step 2: Upload and Process PDF Documents
def upload_and_process_pdf(file):
global vector_store
# Load PDF documents using PyPDFLoader
loader = PyPDFLoader(file.name)
docs = loader.load()
# Generate embeddings and create a vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = FAISS.from_documents(docs, embeddings)
return "Document uploaded and processed successfully."
# Step 3: Set up Generator (using FLAN-T5)
generator_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
def generate_answer(context, query):
input_text = f"Question: {query}\nContext: {context}"
answer = generator_pipeline(input_text, max_length=100, do_sample=True)[0]['generated_text']
return answer
# Step 4: Build the Retrieval-Augmented Generation Function
def rag_system(query):
global vector_store
if vector_store is None:
return "No documents uploaded. Please upload a document first.", ""
retriever = vector_store.as_retriever()
results = retriever.get_relevant_documents(query)
context = " ".join([doc.page_content for doc in results[:2]]) # Use top 2 documents
# Generate the answer
answer = generate_answer(context, query)
return answer, context
# Step 5: Create Gradio Interface
def query_rag(question):
answer, context = rag_system(question)
return answer, context
def upload_document(file):
return upload_and_process_pdf(file)
interface = gr.Blocks()
with interface:
gr.Markdown("# RAG System with PDF Upload (LangChain Integration)")
with gr.Tab("Ask a Question"):
question = gr.Textbox(label="Enter your question")
answer = gr.Textbox(label="Generated Answer")
context = gr.Textbox(label="Context")
query_button = gr.Button("Get Answer")
query_button.click(query_rag, inputs=question, outputs=[answer, context])
with gr.Tab("Upload Document"):
file_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Upload and Process")
upload_output = gr.Textbox(label="Upload Status")
upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)
# Step 6: Launch the Interface
if __name__ == "__main__":
interface.launch()