File size: 1,990 Bytes
e7ac61a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# %%
from dotenv import load_dotenv

load_dotenv()

"""
# %%
import pandas as pd

df = pd.read_parquet(
    "../raw_data/dale_carnegie/how_to_win_friends_and_influence_people.parquet"
)
df.head()

# %%
from langchain.schema import Document

documents = []
for index, row in df.iterrows():
    doc = Document(page_content=row["text"])
    documents.append(doc)

documents

# %%
from autorag.utils import cast_corpus_dataset
from autorag.data.corpus import langchain_documents_to_parquet

corpus_df = langchain_documents_to_parquet(documents)
corpus_df = cast_corpus_dataset(corpus_df)
corpus_df.to_parquet("./data/corpus.parquet")


# %%
import nest_asyncio

nest_asyncio.apply()

import os
from llama_index.llms.openai import OpenAI
from autorag.data.qacreation import generate_qa_llama_index, make_single_content_qa

llm = OpenAI(
    api_base=os.getenv("OPENAI_BASE_URL"),
    model="gpt-4o",
)

qa_df = make_single_content_qa(
    corpus_df,
    content_size=49,
    qa_creation_func=generate_qa_llama_index,
    llm=llm,
    question_num_per_content=1,
)
qa_df.to_parquet("./data/qa.parquet")
"""

# %%
import nest_asyncio

nest_asyncio.apply()

import autorag as ag
from autorag.evaluator import Evaluator
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

ag.embedding_models["huggingface_baai_llm_embedder"] = HuggingFaceEmbedding(
    "BAAI/llm-embedder"
)
ag.embedding_models["huggingface_baai_bge_large_en"] = HuggingFaceEmbedding(
    "BAAI/bge-large-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_base_en"] = HuggingFaceEmbedding(
    "BAAI/bge-base-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_small_en"] = HuggingFaceEmbedding(
    "BAAI/bge-small-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_m3"] = HuggingFaceEmbedding("BAAI/bge-m3")

evaluator = Evaluator(
    qa_data_path="./data/qa.parquet",
    corpus_data_path="./data/corpus.parquet",
    project_dir="./benchmark",
)

evaluator.start_trial("./config/config_small.yaml")

# %%