RAG-PDF-Chatbot

Running

App Files Files Community

arjunanand13 commited on Nov 18, 2024

Commit

5ead45e

verified ·

1 Parent(s): 82add23

Create app.py

Browse files

Files changed (1) hide show

app.py +305 -0

app.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import gradio as gr
+import os
+from typing import List, Dict
+import ragas
+from ragas.metrics import (
+    context_relevancy,
+    faithfulness,
+    answer_relevancy,
+    context_recall
+)
+from datasets import load_dataset
+from langchain.text_splitter import (
+    RecursiveCharacterTextSplitter,
+    CharacterTextSplitter,
+    SemanticTextSplitter
+)
+from langchain_community.vectorstores import FAISS, Chroma, Qdrant
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.chains import ConversationalRetrievalChain
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain.memory import ConversationBufferMemory
+import torch
+# Constants
+list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
+list_llm_simple = [os.path.basename(llm) for llm in list_llm]
+api_token = os.getenv("HF_TOKEN")
+# Text splitting strategies
+def get_text_splitter(strategy: str, chunk_size: int = 1024, chunk_overlap: int = 64):
+    splitters = {
+        "recursive": RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap
+        ),
+        "fixed": CharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap
+        ),
+        "semantic": SemanticTextSplitter(
+            embedding_function=HuggingFaceEmbeddings().embed_query,
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap
+        )
+    }
+    return splitters.get(strategy)
+# Load and split PDF document
+def load_doc(list_file_path: List[str], splitting_strategy: str = "recursive"):
+    loaders = [PyPDFLoader(x) for x in list_file_path]
+    pages = []
+    for loader in loaders:
+        pages.extend(loader.load())
+    text_splitter = get_text_splitter(splitting_strategy)
+    doc_splits = text_splitter.split_documents(pages)
+    return doc_splits
+# Vector database creation functions
+def create_faiss_db(splits, embeddings):
+    return FAISS.from_documents(splits, embeddings)
+def create_chroma_db(splits, embeddings):
+    return Chroma.from_documents(splits, embeddings)
+def create_qdrant_db(splits, embeddings):
+    return Qdrant.from_documents(
+        splits,
+        embeddings,
+        location=":memory:",
+        collection_name="pdf_docs"
+    )
+def create_db(splits, db_choice: str = "faiss"):
+    embeddings = HuggingFaceEmbeddings()
+    db_creators = {
+        "faiss": create_faiss_db,
+        "chroma": create_chroma_db,
+        "qdrant": create_qdrant_db
+    }
+    return db_creators[db_choice](splits, embeddings)
+# Evaluation functions
+def load_evaluation_dataset():
+    # Load example dataset from RAGAS
+    dataset = load_dataset("explodinggradients/fiqa", split="test")
+    return dataset
+def evaluate_rag_pipeline(qa_chain, dataset):
+    # Sample a few examples for evaluation
+    eval_samples = dataset.select(range(5))
+    results = {
+        "context_relevancy": [],
+        "faithfulness": [],
+        "answer_relevancy": [],
+        "context_recall": []
+    }
+    for sample in eval_samples:
+        question = sample["question"]
+        ground_truth = sample["answer"]
+        # Get response from the chain
+        response = qa_chain.invoke({
+            "question": question,
+            "chat_history": []
+        })
+        # Evaluate using RAGAS metrics
+        metrics = {
+            "context_relevancy": context_relevancy.score(
+                question=question,
+                answer=response["answer"],
+                contexts=response["source_documents"]
+            ),
+            "faithfulness": faithfulness.score(
+                question=question,
+                answer=response["answer"],
+                contexts=response["source_documents"]
+            ),
+            "answer_relevancy": answer_relevancy.score(
+                question=question,
+                answer=response["answer"]
+            ),
+            "context_recall": context_recall.score(
+                question=question,
+                answer=response["answer"],
+                contexts=response["source_documents"],
+                ground_truth=ground_truth
+            )
+        }
+        for metric, score in metrics.items():
+            results[metric].append(score)
+    # Calculate average scores
+    avg_results = {
+        metric: sum(scores) / len(scores)
+        for metric, scores in results.items()
+    }
+    return avg_results
+# Initialize langchain LLM chain
+def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
+    if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
+        llm = HuggingFaceEndpoint(
+            repo_id=llm_model,
+            huggingfacehub_api_token=api_token,
+            temperature=temperature,
+            max_new_tokens=max_tokens,
+            top_k=top_k,
+        )
+    else:
+        llm = HuggingFaceEndpoint(
+            huggingfacehub_api_token=api_token,
+            repo_id=llm_model,
+            temperature=temperature,
+            max_new_tokens=max_tokens,
+            top_k=top_k,
+        )
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        output_key='answer',
+        return_messages=True
+    )
+    retriever = vector_db.as_retriever()
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm,
+        retriever=retriever,
+        chain_type="stuff",
+        memory=memory,
+        return_source_documents=True,
+        verbose=False,
+    )
+    return qa_chain
+# Initialize database with chunking strategy and vector DB choice
+def initialize_database(list_file_obj, splitting_strategy, db_choice, progress=gr.Progress()):
+    list_file_path = [x.name for x in list_file_obj if x is not None]
+    doc_splits = load_doc(list_file_path, splitting_strategy)
+    vector_db = create_db(doc_splits, db_choice)
+    return vector_db, f"Database created using {splitting_strategy} splitting and {db_choice} vector database!"
+def demo():
+    with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
+        vector_db = gr.State()
+        qa_chain = gr.State()
+        gr.HTML("<center><h1>Enhanced RAG PDF Chatbot</h1></center>")
+        gr.Markdown("""<b>Query your PDF documents with advanced RAG capabilities!</b>""")
+        with gr.Row():
+            with gr.Column(scale=86):
+                gr.Markdown("<b>Step 1 - Configure and Initialize RAG Pipeline</b>")
+                with gr.Row():
+                    document = gr.Files(height=300, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload PDF documents")
+                with gr.Row():
+                    splitting_strategy = gr.Radio(
+                        ["recursive", "fixed", "semantic"],
+                        label="Text Splitting Strategy",
+                        value="recursive"
+                    )
+                    db_choice = gr.Radio(
+                        ["faiss", "chroma", "qdrant"],
+                        label="Vector Database",
+                        value="faiss"
+                    )
+                with gr.Row():
+                    db_btn = gr.Button("Create vector database")
+                    evaluate_btn = gr.Button("Evaluate RAG Pipeline")
+                with gr.Row():
+                    db_progress = gr.Textbox(value="Not initialized", show_label=False)
+                    evaluation_results = gr.JSON(label="Evaluation Results")
+                gr.Markdown("<b>Select Large Language Model (LLM) and input parameters</b>")
+                with gr.Row():
+                    llm_btn = gr.Radio(list_llm_simple, label="Available LLMs", value=list_llm_simple[0], type="index")
+                with gr.Row():
+                    with gr.Accordion("LLM input parameters", open=False):
+                        slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature")
+                        slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max New Tokens")
+                        slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k")
+                with gr.Row():
+                    qachain_btn = gr.Button("Initialize Question Answering Chatbot")
+                    llm_progress = gr.Textbox(value="Not initialized", show_label=False)
+            with gr.Column(scale=200):
+                gr.Markdown("<b>Step 2 - Chat with your Document</b>")
+                chatbot = gr.Chatbot(height=505)
+                with gr.Accordion("Relevant context from the source document", open=False):
+                    with gr.Row():
+                        doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
+                        source1_page = gr.Number(label="Page", scale=1)
+                    with gr.Row():
+                        doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
+                        source2_page = gr.Number(label="Page", scale=1)
+                    with gr.Row():
+                        doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
+                        source3_page = gr.Number(label="Page", scale=1)
+                with gr.Row():
+                    msg = gr.Textbox(placeholder="Ask a question", container=True)
+                with gr.Row():
+                    submit_btn = gr.Button("Submit")
+                    clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
+        # Event handlers
+        db_btn.click(
+            initialize_database,
+            inputs=[document, splitting_strategy, db_choice],
+            outputs=[vector_db, db_progress]
+        )
+        evaluate_btn.click(
+            lambda qa_chain: evaluate_rag_pipeline(qa_chain, load_evaluation_dataset()) if qa_chain else None,
+            inputs=[qa_chain],
+            outputs=[evaluation_results]
+        )
+        qachain_btn.click(
+            initialize_LLM,
+            inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db],
+            outputs=[qa_chain, llm_progress]
+        ).then(
+            lambda: [None, "", 0, "", 0, "", 0],
+            inputs=None,
+            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
+            queue=False
+        )
+        # Chatbot event handlers remain the same
+        msg.submit(conversation,
+            inputs=[qa_chain, msg, chatbot],
+            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
+            queue=False
+        )
+        submit_btn.click(conversation,
+            inputs=[qa_chain, msg, chatbot],
+            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
+            queue=False
+        )
+        clear_btn.click(
+            lambda: [None, "", 0, "", 0, "", 0],
+            inputs=None,
+            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
+            queue=False
+        )
+    demo.queue().launch(debug=True)
+if __name__ == "__main__":
+    demo()