{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "%pwd\n", "os.chdir(\"../\")\n", "\n", "%pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "\n", "@dataclass(frozen=True)\n", "class ModelEvaluationConfig:\n", " root_dir: str\n", " data_path: str\n", " model_path: str\n", " tokenizer_path: str\n", " metric_file_name: str" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from box import ConfigBox\n", "from pathlib import Path\n", "from src.TextSummarizer.constants import file_path\n", "from src.TextSummarizer.utils.general import read_yaml, create_directories\n", "\n", "\n", "class ConfigurationManager:\n", "\n", " def __init__(self) -> None:\n", " self.config: ConfigBox = read_yaml(Path(file_path.CONFIG_FILE_PATH))\n", " self.params: ConfigBox = read_yaml(Path(file_path.PARAMS_FILE_PATH))\n", "\n", " create_directories(path_to_directories=[self.config.artifacts_root])\n", "\n", " def get_model_evaluation_config(self) -> ModelEvaluationConfig:\n", " config = self.config.model_evaluation\n", "\n", " create_directories([config.root_dir])\n", "\n", " model_evaluation_config = ModelEvaluationConfig(\n", " root_dir=config.root_dir,\n", " data_path=config.data_path,\n", " model_path = config.model_path,\n", " tokenizer_path = config.tokenizer_path,\n", " metric_file_name = config.metric_file_name\n", "\n", " )\n", "\n", " return model_evaluation_config" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", "from datasets import load_dataset, load_from_disk, load_metric\n", "import torch\n", "import pandas as pd\n", "from tqdm import tqdm\n", "\n", "class ModelEvaluation:\n", " def __init__(self, config: ModelEvaluationConfig):\n", " self.config = config\n", "\n", "\n", "\n", " def generate_batch_sized_chunks(self,list_of_elements, batch_size):\n", " \"\"\"split the dataset into smaller batches that we can process simultaneously\n", " Yield successive batch-sized chunks from list_of_elements.\"\"\"\n", " for i in range(0, len(list_of_elements), batch_size):\n", " yield list_of_elements[i : i + batch_size]\n", "\n", "\n", " def calculate_metric_on_test_ds(self,dataset, metric, model, tokenizer,\n", " batch_size=16, device=\"cuda\" if torch.cuda.is_available() else \"cpu\",\n", " column_text=\"article\",\n", " column_summary=\"highlights\"):\n", " article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))\n", " target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))\n", "\n", " for article_batch, target_batch in tqdm(\n", " zip(article_batches, target_batches), total=len(article_batches)):\n", "\n", " inputs = tokenizer(article_batch, max_length=1024, truncation=True,\n", " padding=\"max_length\", return_tensors=\"pt\")\n", "\n", " summaries = model.generate(input_ids=inputs[\"input_ids\"].to(device),\n", " attention_mask=inputs[\"attention_mask\"].to(device),\n", " length_penalty=0.8, num_beams=8, max_length=128)\n", " ''' parameter for length penalty ensures that the model does not generate sequences that are too long. '''\n", "\n", " # Finally, we decode the generated texts,\n", " # replace the token, and add the decoded texts with the references to the metric.\n", " decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,\n", " clean_up_tokenization_spaces=True)\n", " for s in summaries]\n", "\n", " decoded_summaries = [d.replace(\"\", \" \") for d in decoded_summaries]\n", "\n", "\n", " metric.add_batch(predictions=decoded_summaries, references=target_batch)\n", "\n", " # Finally compute and return the ROUGE scores.\n", " score = metric.compute()\n", " return score\n", "\n", "\n", " def evaluate(self):\n", " device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", " tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)\n", " model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)\n", "\n", " #loading data\n", " dataset_samsum_pt = load_from_disk(self.config.data_path)\n", "\n", "\n", " rouge_names = [\"rouge1\", \"rouge2\", \"rougeL\", \"rougeLsum\"]\n", "\n", " rouge_metric = load_metric('rouge')\n", "\n", " score = self.calculate_metric_on_test_ds(\n", " dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer, batch_size = 2, column_text = 'dialogue', column_summary= 'summary'\n", " )\n", "\n", " rouge_dict = dict((rn, score[rn].mid.fmeasure ) for rn in rouge_names )\n", "\n", " df = pd.DataFrame(rouge_dict, index = ['pegasus'] )\n", " df.to_csv(self.config.metric_file_name, index=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " config = ConfigurationManager()\n", " model_evaluation_config = config.get_model_evaluation_config()\n", " model_evaluation_config = ModelEvaluation(config=model_evaluation_config)\n", " model_evaluation_config.evaluate()\n", "except Exception as e:\n", " raise e" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }