Spaces:
Sleeping
Sleeping
File size: 1,340 Bytes
f7c2fa3 026aeba e384879 026aeba e384879 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 79dcf63 e384879 79dcf63 bd69eee e384879 026aeba e384879 f7c2fa3 e384879 026aeba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import logging
from data.load_dataset import load_data
from generator.compute_rmse_auc_roc_metrics import compute_rmse_auc_roc_metrics
from retriever.chunk_documents import chunk_documents
from retriever.embed_documents import embed_documents
from generator.generate_metrics import generate_metrics
from generator.initialize_llm import initialize_llm
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def main():
logging.info("Starting the RAG pipeline")
# Load the dataset
dataset = load_data()
logging.info("Dataset loaded")
# Chunk the dataset
documents = chunk_documents(dataset)
logging.info("Documents chunked")
# Embed the documents
vector_store = embed_documents(documents)
logging.info("Documents embedded")
# Initialize the Generation LLM
llm = initialize_llm()
logging.info("LLM initialized")
# Sample question
row_num = 43
sample_question = dataset[row_num]['question']
# Call generate_metrics for above sample question
generate_metrics(llm, vector_store, sample_question)
#Compute RMSE and AUC-ROC for entire dataset
compute_rmse_auc_roc_metrics(llm, dataset, vector_store, dataset.num_rows)
logging.info("Finished!!!")
if __name__ == "__main__":
main() |