File size: 1,340 Bytes
f7c2fa3
026aeba
e384879
026aeba
 
e384879
026aeba
bd69eee
 
 
026aeba
 
bd69eee
 
026aeba
 
bd69eee
 
026aeba
 
bd69eee
 
026aeba
 
bd69eee
79dcf63
e384879
79dcf63
 
bd69eee
e384879
 
 
026aeba
e384879
 
f7c2fa3
 
e384879
 
 
 
026aeba
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import logging
from data.load_dataset import load_data
from generator.compute_rmse_auc_roc_metrics import compute_rmse_auc_roc_metrics
from retriever.chunk_documents import chunk_documents
from retriever.embed_documents import embed_documents
from generator.generate_metrics import generate_metrics
from generator.initialize_llm import initialize_llm

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def main():
    logging.info("Starting the RAG pipeline")

    # Load the dataset
    dataset = load_data()
    logging.info("Dataset loaded")

    # Chunk the dataset
    documents = chunk_documents(dataset)
    logging.info("Documents chunked")

    # Embed the documents
    vector_store = embed_documents(documents)
    logging.info("Documents embedded")
    
     # Initialize the Generation LLM
    llm = initialize_llm()
    logging.info("LLM initialized")

    # Sample question
    row_num = 43
    sample_question = dataset[row_num]['question']

    # Call generate_metrics for above sample question
    generate_metrics(llm, vector_store, sample_question)
    
    #Compute RMSE and AUC-ROC for entire dataset
    compute_rmse_auc_roc_metrics(llm, dataset, vector_store, dataset.num_rows)
    
    logging.info("Finished!!!")

if __name__ == "__main__":
    main()