Spaces:
Running
Running
File size: 5,153 Bytes
b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc b008f13 85fe3dc fdeb668 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import os
from dotenv import load_dotenv
import nltk
from typing import List
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator
from ragas.metrics import (
LLMContextRecall,
Faithfulness,
FactualCorrectness,
ResponseRelevancy,
ContextEntityRecall,
NoiseSensitivity
)
from ragas import evaluate, RunConfig, EvaluationDataset
# Load environment variables
load_dotenv()
# Ensure OpenAI API key is set
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY not found in environment variables")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["RAGAS_APP_TOKEN"] = os.getenv("RAGAS_APP_TOKEN")
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain.prompts import ChatPromptTemplate
# Initialize the URLs (same as app.py)
urls = [
"https://www.timeout.com/london/things-to-do-in-london-this-weekend",
"https://www.timeout.com/london/london-events-in-march"
]
# Load documents
loader = WebBaseLoader(urls)
docs = loader.load()
# Initialize generator models for RAGAS
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
# Generate synthetic test dataset
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)
print(dataset.upload())
# Print the generated test questions
print("\nGenerated Test Questions:")
for i, test_row in enumerate(dataset):
print(f"{i+1}. {test_row.eval_sample.user_input}")
# Set up the RAG pipeline for testing
# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(docs)
# Create vector store
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
client = QdrantClient(":memory:")
client.create_collection(
collection_name="london_events",
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
vector_store = QdrantVectorStore(
client=client,
collection_name="london_events",
embedding=embeddings,
)
# Add documents to vector store
vector_store.add_documents(documents=split_documents)
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
# Create RAG prompt
RAG_PROMPT = """
You are a helpful assistant who answers questions about events and activities in London.
Answer based only on the provided context. If you cannot find the answer, say so.
Question: {question}
Context: {context}
Answer:"""
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
llm = ChatOpenAI(model="gpt-4o-mini")
# Process each test question through the RAG pipeline
for test_row in dataset:
# Retrieve relevant documents
retrieved_docs = retriever.get_relevant_documents(test_row.eval_sample.user_input)
# Format context and generate response
context = "\n\n".join(doc.page_content for doc in retrieved_docs)
messages = rag_prompt.format_messages(question=test_row.eval_sample.user_input, context=context)
response = llm.invoke(messages)
# Store results in dataset
test_row.eval_sample.response = response.content
test_row.eval_sample.retrieved_contexts = [doc.page_content for doc in retrieved_docs]
# Convert to evaluation dataset
evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
# Set up evaluator
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
# Run evaluation with all metrics
custom_run_config = RunConfig(timeout=360)
result = evaluate(
dataset=evaluation_dataset,
metrics=[
LLMContextRecall(),
Faithfulness(),
FactualCorrectness(),
ResponseRelevancy(),
ContextEntityRecall(),
NoiseSensitivity()
],
llm=evaluator_llm,
run_config=custom_run_config
)
print("RAW RESULT: ", result)
print("Type of result: ", type(result))
# Convert to pandas DataFrame for better formatting
df = result.to_pandas()
print("\nEvaluation Results as DataFrame:")
print(df)
try:
# Try to save as markdown
print("Attempting to save as markdown...")
import tabulate # Try to import explicitly to verify installation
df.to_markdown("docs/evaluation_results.md", index=False)
print("Successfully saved as markdown!")
except ImportError as e:
# Print detailed error message
print(f"Import Error: {e}")
print("Note: Install 'tabulate' package for markdown output. Falling back to CSV format.")
df.to_csv("docs/evaluation_results.csv", index=False)
# Save test questions
with open("docs/test_questions.md", "w") as f:
f.write("# Test Questions\n\n")
for i, test_row in enumerate(dataset):
f.write(f"{i+1}. {test_row.eval_sample.user_input}\n")
|