# Synthetic data generation using Ragas framework

> Python packages are installed from `requirements.txt` file into virtual environment

In [1]:
!python --version

Python 3.10.12


In [2]:
!pip install -qU langsmith==0.1.125 \
  langchain-core==0.2.41 \
  langchain-community \
  langchain-qdrant==0.1.4 \
  langchain-experimental \
  langchain-openai \
  langchain_huggingface \
  PyMuPDF==1.24.10 \
  ragas==0.1.18 \
  protobuf==3.20.3 \
  pyarrow==14.0.1 \
  fsspec==2024.6.1


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.2/290.2 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m397.0/397.0 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m94.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.7/185.7 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.0/38.0 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m110.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [17]:
import os
import getpass
from uuid import uuid4

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")

os.environ["LANGCHAIN_PROJECT"] = "AIM-SDG-MidTerm - AI Safety"
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

os.environ["QDRANT_API_KEY"] = getpass.getpass("Enter Your Qdrant API Key: ")
os.environ["QDRANT_URL"] = getpass.getpass("Enter Your Qdrant URL: ")


LangChain API Key:··········
OpenAI API Key:··········
Enter Your Qdrant API Key: ··········
Enter Your Qdrant URL: ··········


In [4]:
from langchain_experimental.text_splitter import SemanticChunker
from enum import Enum
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.documents import Document
import asyncio

class PDFLoaderWrapper():
    class LoaderType(str, Enum):
        PYMUPDF = "pymupdf"

    def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
        self.file_path = file_path if isinstance(file_path, list) else [file_path]
        self.loader_type = loader_type

    async def aload(self) -> List[Document]:
        all_docs = []
        for file_path in self.file_path:
            if self.loader_type == self.LoaderType.PYMUPDF:
                try:
                    loader = PyMuPDFLoader(file_path)
                    docs = await loader.aload()
                    all_docs.extend(docs)
                except Exception as e:
                    print(f"Error loading file {file_path}: {e}")
                    continue
        return all_docs




BOR_FILE_PATH = "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
NIST_FILE_PATH = "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"
SMALL_DOC = "https://arxiv.org/pdf/1908.10084"
documents_to_preload = [
    BOR_FILE_PATH,
    NIST_FILE_PATH
    # SMALL_DOC
]

pdf_loader = PDFLoaderWrapper(
    documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF
)
documents = await pdf_loader.aload()



In [5]:
print ("Importing packages")
import os
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.testset.docstore import Document, DocumentStore,InMemoryDocumentStore
from ragas.testset.extractor import KeyphraseExtractor
from langchain_experimental.text_splitter import SemanticChunker
from langchain_huggingface import HuggingFaceEmbeddings
from pydantic import BaseModel

from google.colab import userdata
os.environ["HF_TOKEN"]=userdata.get('HF_TOKEN')

print ("Packages import complete")
print ("Getting the Embedding model from Huggingface")
# Using best performing embedding model from hugging face to generate quality dataset.
# Need GPU
model_name = "Snowflake/snowflake-arctic-embed-l"
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
print ("Embedding model loaded")

print ("Splitting the documents into semantic chunks")
text_splitter = SemanticChunker(embedding_model, breakpoint_threshold_type="percentile",breakpoint_threshold_amount=90)
chunked_docs = text_splitter.split_documents(documents)

print ("Creating the document store for ragas and loading LLM models")
generator_llm = ChatOpenAI(model="gpt-4o-mini")
critic_llm = ChatOpenAI(model="gpt-4o")

# keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)
# docstore = InMemoryDocumentStore(splitter=text_splitter,extractor=keyphrase_extractor, embeddings=embedding_model)

print ("Creating the testset generator")
generator = TestsetGenerator.from_langchain( # Default uses TokenTextSplitter
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embedding_model,
    # docstore=docstore # Document store uses SemenaticChunker
)

distributions = {
    simple: 0.5,
    multi_context: 0.3,
    reasoning: 0.2
}

Importing packages
Packages import complete
Getting the Embedding model from Huggingface


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/84.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/107 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/704 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

Embedding model loaded
Splitting the documents into semantic chunks
Creating the document store for ragas and loading LLM models
Creating the testset generator


In [6]:
!pip install nest-asyncio
import nest_asyncio
nest_asyncio.apply()



In [7]:
tests_per_doc = 2
test_size = tests_per_doc * len(documents)
from pydantic import BaseModel
testset = generator.generate_with_langchain_docs(
    documents,
    test_size,
    distributions,
    with_debugging_logs=True
) # Default  RunConfig(max_retries=15, max_wait=90)

embedding nodes:   0%|          | 0/284 [00:00<?, ?it/s]



Generating:   0%|          | 0/274 [00:00<?, ?it/s]

[ragas.testset.filters.DEBUG] context scoring: {'clarity': 1, 'depth': 2, 'structure': 1, 'relevance': 2, 'score': 1.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Synthetic training data', 'Model collapse', 'Environmental impact', 'GAI systems', 'Carbon capture programs']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Algorithmic discrimination', 'Equitable design', 'Automated systems', 'Legal protections', 'Proactive equity assessments']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 1, 'depth': 1, 'structure': 1, 'relevance': 1, 'score': 1.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Automated systems', 'Ongoing monitoring',

In [8]:
testset.to_pandas()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What actions did the OSTP take to engage with ...,[APPENDIX\nLisa Feldman Barrett \nMadeline Owe...,OSTP engaged with stakeholders regarding the u...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
1,What are the potential issues associated with ...,"[ \n \n \n \nHUMAN ALTERNATIVES, \nCONSIDERAT...",The potential issues associated with automated...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
2,What role does synthetic content detection pla...,[ \n51 \ngeneral public participants. For exam...,Synthetic content detection plays a crucial ro...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
3,What role does risk management play in the imp...,[ \n50 \nParticipatory Engagement Methods \nOn...,The answer to given question is not present in...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
4,What concerns arise from companies using surve...,[ \n \n \n \nDATA PRIVACY \nWHY THIS PRINCIPL...,Concerns arise from companies using surveillan...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
...,...,...,...,...,...,...
269,What drives extra data protections in health a...,[ \n \n \n \nDATA PRIVACY \nEXTRA PROTECTIONS ...,Extra data protections in health and finance a...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
270,What insights did OSTP seek from experts in AI...,[ \n \n \n \n \nSECTION TITLE\nAPPENDIX\nListe...,OSTP sought insights and analysis on the risks...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
271,What key elements ensure clarity in docs about...,[ \nYou should know that an automated system i...,Key elements that ensure clarity in documentat...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
272,What biases to note for pre-deployment measure...,[ \n38 \nMEASURE 2.13: Eﬀectiveness of the emp...,The context mentions documenting biases or sta...,reasoning,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True


In [42]:
testset_df = testset.to_pandas()
testset_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What actions did the OSTP take to engage with ...,[APPENDIX\nLisa Feldman Barrett \nMadeline Owe...,OSTP engaged with stakeholders regarding the u...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
1,What are the potential issues associated with ...,"[ \n \n \n \nHUMAN ALTERNATIVES, \nCONSIDERAT...",The potential issues associated with automated...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
2,What role does synthetic content detection pla...,[ \n51 \ngeneral public participants. For exam...,Synthetic content detection plays a crucial ro...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
3,What role does risk management play in the imp...,[ \n50 \nParticipatory Engagement Methods \nOn...,The answer to given question is not present in...,simple,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True
4,What concerns arise from companies using surve...,[ \n \n \n \nDATA PRIVACY \nWHY THIS PRINCIPL...,Concerns arise from companies using surveillan...,simple,[{'source': 'https://www.whitehouse.gov/wp-con...,True
...,...,...,...,...,...,...
269,What drives extra data protections in health a...,[ \n \n \n \nDATA PRIVACY \nEXTRA PROTECTIONS ...,Extra data protections in health and finance a...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
270,What insights did OSTP seek from experts in AI...,[ \n \n \n \n \nSECTION TITLE\nAPPENDIX\nListe...,OSTP sought insights and analysis on the risks...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
271,What key elements ensure clarity in docs about...,[ \nYou should know that an automated system i...,Key elements that ensure clarity in documentat...,reasoning,[{'source': 'https://www.whitehouse.gov/wp-con...,True
272,What biases to note for pre-deployment measure...,[ \n38 \nMEASURE 2.13: Eﬀectiveness of the emp...,The context mentions documenting biases or sta...,reasoning,[{'source': 'https://nvlpubs.nist.gov/nistpubs...,True


In [69]:
testset_df.to_csv('ai-safety-sdg.csv', index=False)

In [43]:
test_questions = testset_df["question"].values.tolist()
test_groundtruths = testset_df["ground_truth"].values.tolist()

# Create Rag chain to generate answers for above questions in the dataset

> Note that we are usig Qdrant cloud where the pdf document is processed and saved for us to consume. For the RAG pipeline we use the same embedding model originally used to populate the Qdrant vectorstore.

In [32]:
from langchain_qdrant import QdrantVectorStore
from langchain_core.documents import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

dimension = 1024
collection_name = "ai-safety-sr-arctic-embed-l-recursive"
qdrant_server = os.environ["QDRANT_URL"]
qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ["QDRANT_API_KEY"])

# qdrant_client.create_collection(
#     collection_name=collection_name,
#     vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),
# )

vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=collection_name,
    embedding=embedding_model,
)

retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
                search_kwargs={'k':10,'score_threshold': 0.8})

In [29]:
retriever.invoke("What steps can organizations take to minimize bias in AI models?")

[Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 44, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': "D:20240805141702-04'00'", 'modDate': "D:20240805143048-04'00'", 'trapped': '', '_id': 'b6779e22-20c4-44d3-8741-c06cc2bb380c', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Human-AI Conﬁguration \n'),
 Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 33, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management 

In [53]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_openai import ChatOpenAI

async def get_contextual_compressed_retriever(retriver):

    base_retriever = retriver
    compressor_llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=1500)
    compressor = LLMChainExtractor.from_llm(compressor_llm)

    #Combine the retriever with the compressor
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor,
        base_retriever=base_retriever
    )
    return compression_retriever

In [54]:
contextual_compressed_retriever = await get_contextual_compressed_retriever(retriever)

In [20]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Context: {context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [21]:
from langchain_openai import ChatOpenAI

# Using the same model used in the app.
chat_model_name = "gpt-4o"
llm = ChatOpenAI(model=chat_model_name,temperature=0)

In [55]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser

ai_safety_rag_chain = (
    {"context": itemgetter("question") | contextual_compressed_retriever, "question": itemgetter("question")}
    | rag_prompt | llm | StrOutputParser()
)

In [56]:
ai_safety_rag_chain.invoke({"question" : "How can companies ensure AI does not violate data privacy laws?"})

'Companies can ensure AI does not violate data privacy laws by implementing the following measures:\n\n1. **Built-in Protections**: Incorporate built-in protections to guard against abusive data practices.\n2. **User Agency**: Provide users with control over how their data is used.\n3. **Privacy by Design**: Make design choices that protect user privacy by default.\n4. **Reasonable Data Collection**: Ensure that data collection conforms to reasonable expectations and only collect data strictly necessary for the specific context.\n5. **User Permission and Respect**: Seek user permission and respect their decisions regarding the collection, use, access, transfer, and deletion of their data.\n6. **Alternative Safeguards**: Use alternative privacy by design safeguards where user permission is not possible.\n7. **Avoid Obfuscation**: Avoid user experience and design decisions that obfuscate user choice or burden users with privacy-invasive defaults.\n\nThese measures ensure that the systems

In [37]:
ai_safety_rag_chain.invoke({"question" :"What are the implications of using GAI systems for organizations in terms of risk management and compliance?"})

"I don't know."

In [66]:
# Rag chain used in the app
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import MessagesPlaceholder
from langchain.prompts import ChatPromptTemplate
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

def create_history_aware_retriever_self(chat_model, retriever):
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question which might reference context in the chat history, "
        "formulate a standalone question which can be understood without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    return create_history_aware_retriever(chat_model, retriever, contextualize_q_prompt)

def create_qa_chain(chat_model):
    qa_system_prompt = (
        "You are an helpful assistant named 'Shield' and your task is to answer any questions related to AI Safety for the given context."
        "Use the following pieces of retrieved context to answer the question."
        # "If any questions asked outside AI Safety context, just say that you are a specialist in AI Safety and can't answer that."
        # f"When introducing you, just say that you are an AI assistant powered by embedding model {embedding_model_name} and chat model {chat_model_name} and your knowledge is limited to 'Blueprint for an AI Bill of Rights' and 'NIST AI Standards' documents."
        "If you don't know the answer, just say that you don't know.\n\n"
        "{context}"
    )
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", qa_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    return create_stuff_documents_chain(chat_model, qa_prompt)

def create_rag_chain(chat_model, retriever):
    history_aware_retriever = create_history_aware_retriever_self(chat_model, retriever)
    question_answer_chain = create_qa_chain(chat_model)
    return create_retrieval_chain(history_aware_retriever, question_answer_chain)

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

history_ai_safety_rag_chain = create_rag_chain(llm, contextual_compressed_retriever)

conversational_rag_chain = RunnableWithMessageHistory(
        history_ai_safety_rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )

# Ragas Evaluation

In [68]:
import time
import uuid

answers = []
contexts = []

for question in test_questions:
  store = {}
  session_id = str(uuid.uuid4())

  response = conversational_rag_chain.invoke({"input" : question},  config={"configurable": {"session_id": session_id}})
  # time.sleep(1)
  answers.append(response["answer"])
  contexts.append([context.page_content for context in response["context"]])

In [70]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [71]:
response_dataset[0]

{'question': 'What actions did the OSTP take to engage with stakeholders regarding the use of artificial intelligence and biometric technologies?',
 'answer': 'The Office of Science and Technology Policy (OSTP) took the following actions to engage with stakeholders regarding the use of artificial intelligence and biometric technologies:\n\n1. **Soliciting Public Comments**: OSTP created an email address (ai-equity@ostp.eop.gov) to solicit comments from the public on the use of artificial intelligence and other data-driven technologies in their lives.\n\n2. **Request for Information (RFI)**: OSTP issued a Request for Information (RFI) on the use and governance of biometric technologies. The purpose of this RFI was to understand the extent and variety of biometric technologies in past, current, or planned use; the domains in which these technologies are being used; the entities making use of them; current principles, practices, or policies governing their use; and the stakeholders that a

In [72]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [73]:
results = evaluate(response_dataset, metrics)

Evaluating:   0%|          | 0/1370 [00:00<?, ?it/s]

In [74]:
results_df = results.to_pandas()
results_df

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,What actions did the OSTP take to engage with ...,[• OSTP created an email address (ai-equity@os...,The Office of Science and Technology Policy (O...,OSTP engaged with stakeholders regarding the u...,1.000000,0.971421,0.666667,1.0,0.832265
1,What are the potential issues associated with ...,[],Automated performance evaluation systems in th...,The potential issues associated with automated...,1.000000,0.988479,0.000000,0.0,0.289107
2,What role does synthetic content detection pla...,[],Synthetic content detection plays a crucial ro...,Synthetic content detection plays a crucial ro...,0.944444,1.000000,0.000000,0.0,0.308811
3,What role does risk management play in the imp...,[risk identification and management assessment...,Risk management plays a crucial role in the im...,The answer to given question is not present in...,0.233333,1.000000,0.000000,0.0,0.920685
4,What concerns arise from companies using surve...,[Companies use surveillance software to track ...,The use of surveillance software by companies ...,Concerns arise from companies using surveillan...,0.565217,0.989812,1.000000,1.0,0.612930
...,...,...,...,...,...,...,...,...,...
269,What drives extra data protections in health a...,[],Extra data protections in health and finance a...,Extra data protections in health and finance a...,0.935484,0.982460,0.000000,0.0,0.759702
270,What insights did OSTP seek from experts in AI...,[],The Office of Science and Technology Policy (O...,OSTP sought insights and analysis on the risks...,0.000000,0.910677,0.000000,0.0,0.688606
271,What key elements ensure clarity in docs about...,"[Designers, developers, and deployers of autom...",To ensure clarity in documentation about an au...,Key elements that ensure clarity in documentat...,0.962963,0.949962,1.000000,1.0,0.510159
272,What biases to note for pre-deployment measure...,[],In the context of pre-deployment measurement e...,The context mentions documenting biases or sta...,0.000000,0.951481,0.000000,0.0,0.884850




In [75]:
results_df.to_csv('ai-safety-ragas-evaluation-result.csv', index=False)