from haystack.nodes import DensePassageRetriever from haystack.document_stores import FAISSDocumentStore from haystack.pipelines import RetrievalQA from transformers import pipeline from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS import gradio as gr # Step 1: Initialize Document Store and Vector Store document_store = None # Placeholder for FAISS document store vector_store = None # Step 2: Upload and Process PDF Documents def upload_and_process_pdf(file): global vector_store # Load PDF documents using PyPDFLoader loader = PyPDFLoader(file.name) docs = loader.load() # Generate embeddings and create a vector store embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") vector_store = FAISS.from_documents(docs, embeddings) return "Document uploaded and processed successfully." # Step 3: Set up Generator (using FLAN-T5) generator_pipeline = pipeline("text2text-generation", model="google/flan-t5-base") def generate_answer(context, query): input_text = f"Question: {query}\nContext: {context}" answer = generator_pipeline(input_text, max_length=100, do_sample=True)[0]['generated_text'] return answer # Step 4: Build the Retrieval-Augmented Generation Function def rag_system(query): global vector_store if vector_store is None: return "No documents uploaded. Please upload a document first.", "" retriever = vector_store.as_retriever() results = retriever.get_relevant_documents(query) context = " ".join([doc.page_content for doc in results[:2]]) # Use top 2 documents # Generate the answer answer = generate_answer(context, query) return answer, context # Step 5: Create Gradio Interface def query_rag(question): answer, context = rag_system(question) return answer, context def upload_document(file): return upload_and_process_pdf(file) interface = gr.Blocks() with interface: gr.Markdown("# RAG System with PDF Upload (LangChain Integration)") with gr.Tab("Ask a Question"): question = gr.Textbox(label="Enter your question") answer = gr.Textbox(label="Generated Answer") context = gr.Textbox(label="Context") query_button = gr.Button("Get Answer") query_button.click(query_rag, inputs=question, outputs=[answer, context]) with gr.Tab("Upload Document"): file_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_button = gr.Button("Upload and Process") upload_output = gr.Textbox(label="Upload Status") upload_button.click(upload_document, inputs=file_upload, outputs=upload_output) # Step 6: Launch the Interface if __name__ == "__main__": interface.launch()