RAG-PDF-Chatbot

Sleeping

App Files Files Community

arjunanand13 commited on Nov 18, 2024

Commit

1596101

verified ·

1 Parent(s): c64a83f

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -47

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
 import os
 from typing import List, Dict
-import ragas
 from ragas.metrics import (
-    context_relevancy,
-    faithfulness,
-    answer_relevancy,
-    context_recall
 )
 from datasets import load_dataset
 from langchain.text_splitter import (
@@ -81,7 +81,7 @@ def create_db(splits, db_choice: str = "faiss"):
     }
     return db_creators[db_choice](splits, embeddings)
-# Evaluation functions
 def load_evaluation_dataset():
     # Load example dataset from RAGAS
     dataset = load_dataset("explodinggradients/fiqa", split="test")
@@ -91,16 +91,10 @@ def evaluate_rag_pipeline(qa_chain, dataset):
     # Sample a few examples for evaluation
     eval_samples = dataset.select(range(5))
-    results = {
-        "context_relevancy": [],
-        "faithfulness": [],
-        "answer_relevancy": [],
-        "context_recall": []
-    }
     for sample in eval_samples:
         question = sample["question"]
-        ground_truth = sample["answer"]
         # Get response from the chain
         response = qa_chain.invoke({
@@ -108,40 +102,34 @@ def evaluate_rag_pipeline(qa_chain, dataset):
             "chat_history": []
         })
-        # Evaluate using RAGAS metrics
-        metrics = {
-            "context_relevancy": context_relevancy.score(
-                question=question,
-                answer=response["answer"],
-                contexts=response["source_documents"]
-            ),
-            "faithfulness": faithfulness.score(
-                question=question,
-                answer=response["answer"],
-                contexts=response["source_documents"]
-            ),
-            "answer_relevancy": answer_relevancy.score(
-                question=question,
-                answer=response["answer"]
-            ),
-            "context_recall": context_recall.score(
-                question=question,
-                answer=response["answer"],
-                contexts=response["source_documents"],
-                ground_truth=ground_truth
-            )
-        }
-        for metric, score in metrics.items():
-            results[metric].append(score)
-    # Calculate average scores
-    avg_results = {
-        metric: sum(scores) / len(scores)
-        for metric, scores in results.items()
-    }
-    return avg_results
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
@@ -186,6 +174,39 @@ def initialize_database(list_file_obj, splitting_strategy, db_choice, progress=g
     vector_db = create_db(doc_splits, db_choice)
     return vector_db, f"Database created using {splitting_strategy} splitting and {db_choice} vector database!"
 def demo():
     with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
         vector_db = gr.State()
@@ -279,7 +300,6 @@ def demo():
             queue=False
         )
-        # Chatbot event handlers remain the same
         msg.submit(conversation,
             inputs=[qa_chain, msg, chatbot],
             outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],

 import gradio as gr
 import os
 from typing import List, Dict
+from ragas import evaluate
 from ragas.metrics import (
+    ContextRecall,
+    ContextRelevancy,
+    Faithfulness,
+    AnswerRelevancy
 )
 from datasets import load_dataset
 from langchain.text_splitter import (
     }
     return db_creators[db_choice](splits, embeddings)
+# Updated evaluation functions
 def load_evaluation_dataset():
     # Load example dataset from RAGAS
     dataset = load_dataset("explodinggradients/fiqa", split="test")
     # Sample a few examples for evaluation
     eval_samples = dataset.select(range(5))
+    # Prepare data for RAGAS evaluation
+    eval_data = []
     for sample in eval_samples:
         question = sample["question"]
         # Get response from the chain
         response = qa_chain.invoke({
             "chat_history": []
         })
+        eval_data.append({
+            "question": question,
+            "answer": response["answer"],
+            "ground_truth": sample["answer"],
+            "contexts": [doc.page_content for doc in response["source_documents"]]
+        })
+    # Initialize RAGAS metrics
+    metrics = [
+        ContextRecall(),
+        ContextRelevancy(),
+        Faithfulness(),
+        AnswerRelevancy()
+    ]
+    # Run evaluation
+    results = evaluate(
+        eval_data,
+        metrics=metrics
+    )
+    # Convert results to dictionary
+    return {
+        "context_recall": float(results["context_recall"]),
+        "context_relevancy": float(results["context_relevancy"]),
+        "faithfulness": float(results["faithfulness"]),
+        "answer_relevancy": float(results["answer_relevancy"])
+    }
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     vector_db = create_db(doc_splits, db_choice)
     return vector_db, f"Database created using {splitting_strategy} splitting and {db_choice} vector database!"
+# Formatting chat history
+def format_chat_history(message, chat_history):
+    formatted_chat_history = []
+    for user_message, bot_message in chat_history:
+        formatted_chat_history.append(f"User: {user_message}")
+        formatted_chat_history.append(f"Assistant: {bot_message}")
+    return formatted_chat_history
+# Conversation function
+def conversation(qa_chain, message, history):
+    formatted_chat_history = format_chat_history(message, history)
+    response = qa_chain.invoke({
+        "question": message,
+        "chat_history": formatted_chat_history
+    })
+    response_answer = response["answer"]
+    if response_answer.find("Helpful Answer:") != -1:
+        response_answer = response_answer.split("Helpful Answer:")[-1]
+    response_sources = response["source_documents"]
+    response_source1 = response_sources[0].page_content.strip()
+    response_source2 = response_sources[1].page_content.strip()
+    response_source3 = response_sources[2].page_content.strip()
+    response_source1_page = response_sources[0].metadata["page"] + 1
+    response_source2_page = response_sources[1].metadata["page"] + 1
+    response_source3_page = response_sources[2].metadata["page"] + 1
+    new_history = history + [(message, response_answer)]
+    return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
 def demo():
     with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
         vector_db = gr.State()
             queue=False
         )
         msg.submit(conversation,
             inputs=[qa_chain, msg, chatbot],
             outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],