File size: 5,153 Bytes
b008f13
85fe3dc
b008f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fe3dc
 
 
 
 
 
 
 
 
 
b008f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fe3dc
 
b008f13
 
85fe3dc
 
b008f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fe3dc
b008f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fe3dc
b008f13
85fe3dc
b008f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85fe3dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdeb668
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import os
from dotenv import load_dotenv
import nltk
from typing import List
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator
from ragas.metrics import (
    LLMContextRecall,
    Faithfulness,
    FactualCorrectness,
    ResponseRelevancy,
    ContextEntityRecall,
    NoiseSensitivity
)
from ragas import evaluate, RunConfig, EvaluationDataset

# Load environment variables
load_dotenv()

# Ensure OpenAI API key is set
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY not found in environment variables")

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["RAGAS_APP_TOKEN"] = os.getenv("RAGAS_APP_TOKEN")

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain.prompts import ChatPromptTemplate

# Initialize the URLs (same as app.py)
urls = [
    "https://www.timeout.com/london/things-to-do-in-london-this-weekend",
    "https://www.timeout.com/london/london-events-in-march"
]

# Load documents
loader = WebBaseLoader(urls)
docs = loader.load()

# Initialize generator models for RAGAS
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

# Generate synthetic test dataset
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

print(dataset.upload())

# Print the generated test questions
print("\nGenerated Test Questions:")
for i, test_row in enumerate(dataset):
    print(f"{i+1}. {test_row.eval_sample.user_input}")

# Set up the RAG pipeline for testing
# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(docs)

# Create vector store
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
client = QdrantClient(":memory:")

client.create_collection(
    collection_name="london_events",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="london_events",
    embedding=embeddings,
)

# Add documents to vector store
vector_store.add_documents(documents=split_documents)
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

# Create RAG prompt
RAG_PROMPT = """
You are a helpful assistant who answers questions about events and activities in London.
Answer based only on the provided context. If you cannot find the answer, say so.

Question: {question}

Context: {context}

Answer:"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
llm = ChatOpenAI(model="gpt-4o-mini")

# Process each test question through the RAG pipeline
for test_row in dataset:
    # Retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(test_row.eval_sample.user_input)
    
    # Format context and generate response
    context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    messages = rag_prompt.format_messages(question=test_row.eval_sample.user_input, context=context)
    response = llm.invoke(messages)
    
    # Store results in dataset
    test_row.eval_sample.response = response.content
    test_row.eval_sample.retrieved_contexts = [doc.page_content for doc in retrieved_docs]

# Convert to evaluation dataset
evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())


# Set up evaluator
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))

# Run evaluation with all metrics
custom_run_config = RunConfig(timeout=360)

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[
        LLMContextRecall(),
        Faithfulness(),
        FactualCorrectness(),
        ResponseRelevancy(),
        ContextEntityRecall(),
        NoiseSensitivity()
    ],
    llm=evaluator_llm,
    run_config=custom_run_config
)

print("RAW RESULT: ", result)
print("Type of result: ", type(result))

# Convert to pandas DataFrame for better formatting
df = result.to_pandas()
print("\nEvaluation Results as DataFrame:")
print(df)

try:
    # Try to save as markdown
    print("Attempting to save as markdown...")
    import tabulate  # Try to import explicitly to verify installation
    df.to_markdown("docs/evaluation_results.md", index=False)
    print("Successfully saved as markdown!")
except ImportError as e:
    # Print detailed error message
    print(f"Import Error: {e}")
    print("Note: Install 'tabulate' package for markdown output. Falling back to CSV format.")
    df.to_csv("docs/evaluation_results.csv", index=False)

# Save test questions
with open("docs/test_questions.md", "w") as f:
    f.write("# Test Questions\n\n")
    for i, test_row in enumerate(dataset):
        f.write(f"{i+1}. {test_row.eval_sample.user_input}\n")