Spaces:

Md-Hakim
/

text-summarization

Sleeping

text-summarization / src /textsummarizer /conponents /model_evaluation.py

hakim

module updaed

a637525 11 months ago

3.99 kB

	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	from datasets import load_dataset, load_from_disk, load_metric
	import torch
	import pandas as pd
	from tqdm import tqdm
	from src.textsummarizer.entity.config_entity import ModelEvaluationConfig
	import mlflow
	import dagshub
	import json





	class ModelEvaluation:
	def __init__(self, config: ModelEvaluationConfig):
	self.config = config

	def generate_batch_sized_chunks(self, list_of_elements, batch_size):
	"""split the dataset into smaller batches that we can process simultaneously
	Yield successive batch-sized chunks from list_of_elements."""
	for i in range(0, len(list_of_elements), batch_size):
	yield list_of_elements[i : i + batch_size]

	def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer,
	batch_size=16, device="cuda" if torch.cuda.is_available() else "cpu",
	column_text="article",
	column_summary="highlights"):
	article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
	target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

	for article_batch, target_batch in tqdm(
	zip(article_batches, target_batches), total=len(article_batches)):

	inputs = tokenizer(article_batch, max_length=1024, truncation=True,
	padding="max_length", return_tensors="pt")

	summaries = model.generate(input_ids=inputs["input_ids"].to(device),
	attention_mask=inputs["attention_mask"].to(device),
	length_penalty=0.8, num_beams=8, max_length=128)

	decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
	clean_up_tokenization_spaces=True)
	for s in summaries]

	decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

	metric.add_batch(predictions=decoded_summaries, references=target_batch)

	score = metric.compute()
	return score

	def evaluate(self):
	# Set up MLflow tracking
	dagshub.init(repo_owner='azizulhakim8291', repo_name='text-summarization', mlflow=True)
	mlflow.set_tracking_uri("https://dagshub.com/azizulhakim8291/text-summarization.mlflow")
	mlflow.set_experiment("text-summarization-evaluation")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
	model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

	dataset_samsum_pt = load_from_disk(self.config.data_path)

	rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
	rouge_metric = load_metric('rouge')

	with mlflow.start_run():
	mlflow.log_param("model_name", "pegasus")
	mlflow.log_param("dataset", "samsum")
	mlflow.log_param('parameter name', 'value')

	score = self.calculate_metric_on_test_ds(
	dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer,
	batch_size = 2, column_text = 'dialogue', column_summary= 'summary'
	)

	rouge_dict = dict((rn, score[rn].mid.fmeasure) for rn in rouge_names)
	mlflow.log_params(self.config.all_params)

	# Log metrics to MLflow
	for rouge_name, rouge_score in rouge_dict.items():
	mlflow.log_metric(rouge_name, rouge_score)

	# Save results as JSON
	with open(self.config.metric_file_name, 'w') as f:
	json.dump(rouge_dict, f, indent=4)

	# Log the JSON file as an artifact
	mlflow.log_artifact(self.config.metric_file_name)