from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator from llmgaurdrails.custom_models.groundedness_checker.grounding_classifier import GroundingTrainer from llmgaurdrails.custom_models.groundedness_checker.simple_qa_generator import SimpleQAGenerator from llmgaurdrails.custom_models.groundedness_checker.evaluate_groundedness_model import evaluate,get_eval_data if __name__ == "__main__": # Replace with your PDF Files trainning_pdf_paths = ["D:\Sasidhar\Projects\cba\data\CreditCard.pdf" , "D:\Sasidhar\Projects\cba\data\home_insurance_pds.pdf"] eval_pdf_paths = ["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\PrivateBanking.pdf"] all_chunks = [] for path in trainning_pdf_paths: chunks = process_pdf(trainning_pdf_paths[0]) all_chunks.append(chunks) chunks_flattened = [x for xs in all_chunks for x in xs] # generate qa dataset qa_generator = LLMBasedQAGenerator() dataset = qa_generator.generate_dataset(chunks_flattened,persist_dataset=True) trainer = GroundingTrainer() trainer.train(dataset) eval_dataset = get_eval_data(eval_pdf_paths=eval_pdf_paths) evaluate(dataset) # Result on test dataset - This is wrong as these numbers were obtained on a trained dataset by mistake . Will fix before the presentation. # Accuracy: 0.8952380952380953 # Precision: 0.8738738738738738 # Recall: 0.9238095238095239 # F1 Score: 0.8981481481481481