hakim
module updaed
a637525
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
from src.textsummarizer.entity.config_entity import ModelEvaluationConfig
import mlflow
import dagshub
import json
class ModelEvaluation:
def __init__(self, config: ModelEvaluationConfig):
self.config = config
def generate_batch_sized_chunks(self, list_of_elements, batch_size):
"""split the dataset into smaller batches that we can process simultaneously
Yield successive batch-sized chunks from list_of_elements."""
for i in range(0, len(list_of_elements), batch_size):
yield list_of_elements[i : i + batch_size]
def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer,
batch_size=16, device="cuda" if torch.cuda.is_available() else "cpu",
column_text="article",
column_summary="highlights"):
article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))
for article_batch, target_batch in tqdm(
zip(article_batches, target_batches), total=len(article_batches)):
inputs = tokenizer(article_batch, max_length=1024, truncation=True,
padding="max_length", return_tensors="pt")
summaries = model.generate(input_ids=inputs["input_ids"].to(device),
attention_mask=inputs["attention_mask"].to(device),
length_penalty=0.8, num_beams=8, max_length=128)
decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
clean_up_tokenization_spaces=True)
for s in summaries]
decoded_summaries = [d.replace("", " ") for d in decoded_summaries]
metric.add_batch(predictions=decoded_summaries, references=target_batch)
score = metric.compute()
return score
def evaluate(self):
# Set up MLflow tracking
dagshub.init(repo_owner='azizulhakim8291', repo_name='text-summarization', mlflow=True)
mlflow.set_tracking_uri("https://dagshub.com/azizulhakim8291/text-summarization.mlflow")
mlflow.set_experiment("text-summarization-evaluation")
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
dataset_samsum_pt = load_from_disk(self.config.data_path)
rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
rouge_metric = load_metric('rouge')
with mlflow.start_run():
mlflow.log_param("model_name", "pegasus")
mlflow.log_param("dataset", "samsum")
mlflow.log_param('parameter name', 'value')
score = self.calculate_metric_on_test_ds(
dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer,
batch_size = 2, column_text = 'dialogue', column_summary= 'summary'
)
rouge_dict = dict((rn, score[rn].mid.fmeasure) for rn in rouge_names)
mlflow.log_params(self.config.all_params)
# Log metrics to MLflow
for rouge_name, rouge_score in rouge_dict.items():
mlflow.log_metric(rouge_name, rouge_score)
# Save results as JSON
with open(self.config.metric_file_name, 'w') as f:
json.dump(rouge_dict, f, indent=4)
# Log the JSON file as an artifact
mlflow.log_artifact(self.config.metric_file_name)