diff --git "a/FineTuneAndEvaluationscores.ipynb" "b/FineTuneAndEvaluationscores.ipynb" new file mode 100644--- /dev/null +++ "b/FineTuneAndEvaluationscores.ipynb" @@ -0,0 +1,5086 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0gF2VQ_f96of", + "outputId": "fa2f63cc-d47d-45f2-9c07-f418c7c8f5f5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.\n", + "torchvision 0.21.0+cu124 requires torch==2.6.0, but you have torch 2.4.1 which is incompatible.\n", + "torchaudio 2.6.0+cu124 requires torch==2.6.0, but you have torch 2.4.1 which is incompatible.\n", + "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.6.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install torch==2.4.1 transformers==4.44.2 datasets==3.0.1 nltk==3.9.1 pandas==2.2.3 matplotlib==3.8.4 evaluate==0.4.5 rouge_score>=0.1.2 sentence-transformers==2.7.0 -q" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WSRNnxX7_N3f", + "outputId": "f4cd0003-bb29-47d8-8a18-2c9fb88eb863" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m821.2/821.2 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.1/393.1 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m121.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m95.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m897.7/897.7 kB\u001b[0m \u001b[31m60.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m571.0/571.0 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.2/200.2 MB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.2/158.2 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.6/216.6 MB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m156.8/156.8 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.3/201.3 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m96.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.3/89.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.7/155.7 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m122.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m100.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m115.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m116.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "bigframes 2.8.0 requires gcsfs!=2025.5.0,>=2023.3.0, which is not installed.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.\n", + "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n", + "dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\n", + "cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "# Uninstall conflicting packages\n", + "!pip uninstall -y torch torchvision torchaudio pandas fsspec gcsfs -q\n", + "# Install compatible versions\n", + "!pip install torch torchvision torchaudio pandas transformers datasets nltk matplotlib evaluate rouge_score sentence-transformers -q" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xnr7STny-GO1", + "outputId": "66f22759-c2b7-43ac-a98e-b7682c21123f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-07-13 18:15:03-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30288272 (29M) [application/json]\n", + "Saving to: ‘train-v1.1.json’\n", + "\n", + "train-v1.1.json 100%[===================>] 28.88M --.-KB/s in 0.08s \n", + "\n", + "2025-07-13 18:15:06 (345 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O train-v1.1.json" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZGvxD7Aq_7r3", + "outputId": "aa48116f-e19c-47c9-8f4a-5cc7b3c13e6b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample data: {'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'qas': [{'answers': [{'answer_start': 515, 'text': 'Saint Bernadette Soubirous'}], 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'id': '5733be284776f41900661182'}, {'answers': [{'answer_start': 188, 'text': 'a copper statue of Christ'}], 'question': 'What is in front of the Notre Dame Main Building?', 'id': '5733be284776f4190066117f'}, {'answers': [{'answer_start': 279, 'text': 'the Main Building'}], 'question': 'The Basilica of the Sacred heart at Notre Dame is beside to which structure?', 'id': '5733be284776f41900661180'}, {'answers': [{'answer_start': 381, 'text': 'a Marian place of prayer and reflection'}], 'question': 'What is the Grotto at Notre Dame?', 'id': '5733be284776f41900661181'}, {'answers': [{'answer_start': 92, 'text': 'a golden statue of the Virgin Mary'}], 'question': 'What sits on top of the Main Building at Notre Dame?', 'id': '5733be284776f4190066117e'}]}\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "with open('train-v1.1.json', 'r', encoding='utf-8') as f:\n", + " squad_data = json.load(f)\n", + "\n", + "# Print the first paragraph to inspect\n", + "print(\"Sample data:\", squad_data['data'][0]['paragraphs'][0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4jUus8rH_-zw", + "outputId": "d9c06937-6bc4-4ba1-f232-398c59585f1b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train size: 80 | Eval size: 20\n", + "First train example: {'context': \"In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university's traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president.\", 'question': 'Which college did Notre Dame add in 1921?', 'answer': 'College of Commerce'}\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from datasets import Dataset, Features, Value\n", + "\n", + "data = []\n", + "for article in squad_data['data']:\n", + " for paragraph in article['paragraphs']:\n", + " context = paragraph['context'].strip()\n", + " for qa in paragraph['qas']:\n", + " question = qa['question'].strip()\n", + " answer = qa['answers'][0]['text'].strip() if qa['answers'] else \"\"\n", + " if context and question and answer: # Basic cleaning\n", + " data.append({\"context\": context, \"question\": question, \"answer\": answer})\n", + "\n", + "# Limit to 100 samples for quick testing\n", + "data = data[:100]\n", + "\n", + "# Create DataFrame and Dataset\n", + "df = pd.DataFrame(data)\n", + "features = Features({\n", + " \"context\": Value(\"string\"),\n", + " \"question\": Value(\"string\"),\n", + " \"answer\": Value(\"string\")\n", + "})\n", + "dataset = Dataset.from_pandas(df, features=features)\n", + "train_test_split = dataset.train_test_split(test_size=0.2, seed=42)\n", + "train_dataset = train_test_split[\"train\"]\n", + "eval_dataset = train_test_split[\"test\"]\n", + "\n", + "print(f\"Train size: {len(train_dataset)} | Eval size: {len(eval_dataset)}\")\n", + "print(\"First train example:\", train_dataset[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Wv0WZbb5C9jE" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "81a01ed73ad14c0bb242e92fcee3691d", + "6c78eed341cc4b24989af6ab2978d68d", + "5f6f845632a146d58278487c851d94ca", + "43643ae87f094acaacb88aa65e23b7da", + "4ee01fa61c584fb891b2cecd3c5b68f9", + "e79873a3ce284af0b1cebc1f386db911", + "33f6bfbdba6542ca9767180f762ca167", + "5eb911a19df549478f4738a2d59421cc", + "272d26b935df4a66bcd567935be2a8e0", + "960769ad013f469db9e875d72f685c06", + "cb84cd598dce48ea959fcf2589423154", + "453ccc0077d341a7b11cb485b2593da5", + "a8c772a788dd46f88516694b2e2d8c5e", + "8b957ad3c1414d2697fa053434c57c7a", + "3112299bc9f746e08ceea5b06bedcdd1", + "30404b359d6f4eae88c88dda321fdf3e", + "6c725916bffe47248bfb56ccab26fff2", + "a87e71276e7c41868048650764e7c89c", + "19cf035ddf7e4c6ea3feaee497397b78", + "bc064df8f4ad43a39b6643b6dc91a08c", + "ad1ea7e2aff7420d9db6915d900cbcfb", + "faf0cadc32d547e5a15d7ecbb157a0c7" + ] + }, + "id": "Duks_ZMrDBJu", + "outputId": "5b931687-284a-4765-d80e-287e4db9e916" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Skipping gcsfs as it is not installed.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "bigframes 2.8.0 requires gcsfs!=2025.5.0,>=2023.3.0, which is not installed.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.\n", + "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n", + "dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\n", + "cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Torch version: 2.7.1+cu126\n", + "GPU available: True\n", + "--2025-07-13 18:30:27-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30288272 (29M) [application/json]\n", + "Saving to: ‘train-v1.1.json’\n", + "\n", + "train-v1.1.json 100%[===================>] 28.88M --.-KB/s in 0.07s \n", + "\n", + "2025-07-13 18:30:27 (443 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", + "\n", + "Sample data: {'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'qas': [{'answers': [{'answer_start': 515, 'text': 'Saint Bernadette Soubirous'}], 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'id': '5733be284776f41900661182'}, {'answers': [{'answer_start': 188, 'text': 'a copper statue of Christ'}], 'question': 'What is in front of the Notre Dame Main Building?', 'id': '5733be284776f4190066117f'}, {'answers': [{'answer_start': 279, 'text': 'the Main Building'}], 'question': 'The Basilica of the Sacred heart at Notre Dame is beside to which structure?', 'id': '5733be284776f41900661180'}, {'answers': [{'answer_start': 381, 'text': 'a Marian place of prayer and reflection'}], 'question': 'What is the Grotto at Notre Dame?', 'id': '5733be284776f41900661181'}, {'answers': [{'answer_start': 92, 'text': 'a golden statue of the Virgin Mary'}], 'question': 'What sits on top of the Main Building at Notre Dame?', 'id': '5733be284776f4190066117e'}]}\n", + "Train size: 80 | Eval size: 20\n", + "First train example: {'context': \"In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university's traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president.\", 'question': 'Which college did Notre Dame add in 1921?', 'answer': 'College of Commerce'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.11/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "81a01ed73ad14c0bb242e92fcee3691d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/80 [00:00\n", + " \n", + " \n", + " [120/120 01:00, Epoch 3/3]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation LossRouge1Rougel
16.2775005.7507670.3286130.298354
23.7331003.0378110.3602490.324975
33.0880002.2233820.4088790.372439

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running final evaluation...\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [10/10 00:00]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final Evaluation Results:\n", + " eval_loss: 2.223381519317627\n", + " eval_rouge1: 0.4088792920468986\n", + " eval_rougeL: 0.3724393817985359\n", + " eval_runtime: 1.9014\n", + " eval_samples_per_second: 10.519\n", + " eval_steps_per_second: 5.259\n", + " epoch: 3.0\n", + "Context: The Lobund Institute grew out of pioneering research in germ-free-life which began in 1928. This are...\n", + "Answer: The Lobund Institute\n", + "Generated Question: Lobund was the first research organization to answer whether animal life was possible without bacteria?\n", + "Reference Question: Work on a germ-free-life ended up in the creation of which Notre Dame institute?\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model and tokenizer saved!\n" + ] + } + ], + "source": [ + "# Install dependencies\n", + "!pip uninstall -y torch torchvision torchaudio pandas fsspec gcsfs -q\n", + "!pip install torch torchvision torchaudio pandas transformers datasets nltk matplotlib evaluate rouge_score sentence-transformers -q\n", + "# Restart runtime after installation\n", + "\n", + "import json\n", + "import pandas as pd\n", + "from datasets import Dataset, Features, Value\n", + "from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer\n", + "import evaluate\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import nltk\n", + "import numpy as np # Added missing import\n", + "nltk.download('punkt')\n", + "\n", + "# Verify setup\n", + "print(f\"Torch version: {torch.__version__}\")\n", + "print(f\"GPU available: {torch.cuda.is_available()}\")\n", + "\n", + "# Step 2: Download and load dataset\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O train-v1.1.json\n", + "with open('train-v1.1.json', 'r', encoding='utf-8') as f:\n", + " squad_data = json.load(f)\n", + "print(\"Sample data:\", squad_data['data'][0]['paragraphs'][0])\n", + "\n", + "# Step 3: Clean and prepare dataset\n", + "data = []\n", + "for article in squad_data['data']:\n", + " for paragraph in article['paragraphs']:\n", + " context = paragraph['context'].strip()\n", + " for qa in paragraph['qas']:\n", + " question = qa['question'].strip()\n", + " answer = qa['answers'][0]['text'].strip() if qa['answers'] else \"\"\n", + " if context and question and answer:\n", + " data.append({\"context\": context, \"question\": question, \"answer\": answer})\n", + "\n", + "data = data[:100]\n", + "df = pd.DataFrame(data)\n", + "features = Features({\n", + " \"context\": Value(\"string\"),\n", + " \"question\": Value(\"string\"),\n", + " \"answer\": Value(\"string\")\n", + "})\n", + "dataset = Dataset.from_pandas(df, features=features)\n", + "train_test_split = dataset.train_test_split(test_size=0.2, seed=42)\n", + "train_dataset = train_test_split[\"train\"]\n", + "eval_dataset = train_test_split[\"test\"]\n", + "print(f\"Train size: {len(train_dataset)} | Eval size: {len(eval_dataset)}\")\n", + "print(\"First train example:\", train_dataset[0])\n", + "\n", + "# Step 4: Fine-tune the model\n", + "model_name = \"valhalla/t5-small-qg-hl\"\n", + "tokenizer = T5Tokenizer.from_pretrained(model_name)\n", + "model = T5ForConditionalGeneration.from_pretrained(model_name)\n", + "\n", + "def preprocess(examples):\n", + " inputs = [f\"generate question: {ctx} {ans}\" for ctx, ans in zip(examples['context'], examples['answer'])]\n", + " targets = examples['question']\n", + " model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding=\"max_length\", return_tensors=None)\n", + " labels = tokenizer(targets, max_length=32, truncation=True, padding=\"max_length\")[\"input_ids\"]\n", + " model_inputs[\"labels\"] = labels\n", + " return model_inputs\n", + "\n", + "tokenized_train_dataset = train_dataset.map(preprocess, remove_columns=train_dataset.column_names, batched=True)\n", + "tokenized_eval_dataset = eval_dataset.map(preprocess, remove_columns=eval_dataset.column_names, batched=True)\n", + "\n", + "tokenized_train_dataset = tokenized_train_dataset.with_format(\"torch\")\n", + "tokenized_eval_dataset = tokenized_eval_dataset.with_format(\"torch\")\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=\"./qg-finetuned\",\n", + " per_device_train_batch_size=2,\n", + " per_device_eval_batch_size=2,\n", + " num_train_epochs=3, # Increased to 3\n", + " eval_strategy=\"epoch\",\n", + " learning_rate=2e-5,\n", + " logging_dir=\"./logs\",\n", + " logging_steps=10,\n", + " save_strategy=\"epoch\",\n", + " save_total_limit=1,\n", + " fp16=True,\n", + " report_to=\"none\",\n", + " load_best_model_at_end=True,\n", + " metric_for_best_model=\"eval_loss\",\n", + " greater_is_better=False\n", + ")\n", + "\n", + "\n", + "def compute_metrics(eval_pred):\n", + " predictions, labels = eval_pred\n", + " predictions = predictions[0] if isinstance(predictions, tuple) else predictions\n", + " predictions = np.argmax(predictions, axis=-1) if predictions.ndim == 3 else predictions\n", + " labels = np.argmax(labels, axis=-1) if labels.ndim == 3 else labels\n", + "\n", + " def decode_sequences(sequences):\n", + " return [tokenizer.decode(seq, skip_special_tokens=True) for seq in sequences]\n", + "\n", + " decoded_preds = decode_sequences(predictions)\n", + " decoded_labels = decode_sequences(labels)\n", + "\n", + " rouge = evaluate.load(\"rouge\")\n", + " rouge_score = rouge.compute(predictions=decoded_preds, references=decoded_labels)\n", + "\n", + " return {\n", + " \"rouge1\": rouge_score[\"rouge1\"],\n", + " \"rougeL\": rouge_score[\"rougeL\"]\n", + " }\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_train_dataset,\n", + " eval_dataset=tokenized_eval_dataset,\n", + " compute_metrics=compute_metrics\n", + ")\n", + "\n", + "print(\"Fine-tuning started...\")\n", + "trainer.train()\n", + "print(\"Running final evaluation...\")\n", + "results = trainer.evaluate()\n", + "print(\"Final Evaluation Results:\")\n", + "for metric, score in results.items():\n", + " print(f\" {metric}: {score}\")\n", + "\n", + "# Step 5: Generate and evaluate sample questions\n", + "from transformers import GenerationConfig\n", + "model.eval()\n", + "sample = eval_dataset[0]\n", + "inputs = tokenizer(f\"generate question: {sample['context']} {sample['answer']}\", max_length=256, truncation=True, padding=\"max_length\", return_tensors=\"pt\").to(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "generation_config = GenerationConfig(early_stopping=True, num_beams=5, max_length=128) # Adjusted\n", + "outputs = model.generate(**inputs, generation_config=generation_config)\n", + "generated_question = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(f\"Context: {sample['context'][:100]}...\")\n", + "print(f\"Answer: {sample['answer']}\")\n", + "print(f\"Generated Question: {generated_question}\")\n", + "print(f\"Reference Question: {sample['question']}\")\n", + "\n", + "# Step 6: Plot evaluation scores\n", + "log_history = trainer.state.log_history\n", + "epochs = [entry['epoch'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rouge1_scores = [entry['eval_rouge1'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rougeL_scores = [entry['eval_rougeL'] for entry in log_history if 'eval_rougeL' in entry]\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(epochs, rouge1_scores, label='ROUGE-1')\n", + "plt.plot(epochs, rougeL_scores, label='ROUGE-L')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Score')\n", + "plt.title('Evaluation Scores Over Epochs')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()\n", + "\n", + "# Step 7: Save the model\n", + "model.save_pretrained(\"./qg-finetuned/final\")\n", + "tokenizer.save_pretrained(\"./qg-finetuned/final\")\n", + "print(\"Model and tokenizer saved!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PLtSpaNOFLxP" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "a84e34de79c140abb75f26d9f9ca64c3", + "7cd6276e65a34c4381cc3f141f7210ab", + "dd4a25ac0fe34b3cb1509487b58ffd04", + "58a2acc544464cbfa2cbde86ffcada43", + "a2795646ea33406284fdb0a68a772ce7", + "39e7157554244ec884531f94e513811e", + "352154f2bbf54326b36fc0404752b8c9", + "e4cff4262d46424297c8308e97d0be53", + "f5eb532c16ad4a2eb6b189f9b583258a", + "23a5d020f62447dab80bd7424adf14da", + "348bcd5d97c4439b805f3a306cd2dddd", + "efe1f351b01440acb9259b254b91eabc", + "f70e2f3ca60b4c37b509c3a25363a577", + "aefde5f88d1f4f12b49b123758b265fb", + "5b8ae4261cf8451192fea749e4134a56", + "298a54de277c44c19797ee16216f700d", + "24dc071b0e5a46e3a38656ac6ac756fa", + "10cdcf806dbd4875ab5597791d8c284f", + "5e5250d5fa8249148ad503d7570a5e27", + "7907da1db61c44f18d93b73fa0b6a019", + "a5f26730a83543abace4fff07987c3f7", + "d82a4b978f2b4ee7a1cd9f81850ebab9" + ] + }, + "id": "i2p85bpiFO1X", + "outputId": "dd52cfe3-5d0f-44bb-d3a0-4a162c5720c9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Skipping gcsfs as it is not installed.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "bigframes 2.8.0 requires gcsfs!=2025.5.0,>=2023.3.0, which is not installed.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.\n", + "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n", + "dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\n", + "cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Torch version: 2.7.1+cu126\n", + "GPU available: True\n", + "--2025-07-13 18:40:38-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30288272 (29M) [application/json]\n", + "Saving to: ‘train-v1.1.json’\n", + "\n", + "train-v1.1.json 100%[===================>] 28.88M --.-KB/s in 0.08s \n", + "\n", + "2025-07-13 18:40:39 (360 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", + "\n", + "Sample data: {'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'qas': [{'answers': [{'answer_start': 515, 'text': 'Saint Bernadette Soubirous'}], 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'id': '5733be284776f41900661182'}, {'answers': [{'answer_start': 188, 'text': 'a copper statue of Christ'}], 'question': 'What is in front of the Notre Dame Main Building?', 'id': '5733be284776f4190066117f'}, {'answers': [{'answer_start': 279, 'text': 'the Main Building'}], 'question': 'The Basilica of the Sacred heart at Notre Dame is beside to which structure?', 'id': '5733be284776f41900661180'}, {'answers': [{'answer_start': 381, 'text': 'a Marian place of prayer and reflection'}], 'question': 'What is the Grotto at Notre Dame?', 'id': '5733be284776f41900661181'}, {'answers': [{'answer_start': 92, 'text': 'a golden statue of the Virgin Mary'}], 'question': 'What sits on top of the Main Building at Notre Dame?', 'id': '5733be284776f4190066117e'}]}\n", + "Train size: 640 | Eval size: 160\n", + "First train example: {'context': 'The University of Notre Dame du Lac (or simply Notre Dame /ˌnoʊtərˈdeɪm/ NOH-tər-DAYM) is a Catholic research university located adjacent to South Bend, Indiana, in the United States. In French, Notre Dame du Lac means \"Our Lady of the Lake\" and refers to the university\\'s patron saint, the Virgin Mary. The main campus covers 1,250 acres in a suburban setting and it contains a number of recognizable landmarks, such as the Golden Dome, the \"Word of Life\" mural (commonly known as Touchdown Jesus), and the Basilica.', 'question': 'The school known as Notre Dame is known by a more lengthy name, what is it?', 'answer': 'University of Notre Dame du'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.11/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a84e34de79c140abb75f26d9f9ca64c3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/640 [00:00\n", + " \n", + " \n", + " [320/320 01:02, Epoch 2/2]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation LossRouge1Rougel
11.3599001.1235630.4682390.447952
21.1854001.0516120.4745660.454544

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running final evaluation...\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [40/40 00:01]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final Evaluation Results:\n", + " eval_loss: 1.0516119003295898\n", + " eval_rouge1: 0.47456620661431465\n", + " eval_rougeL: 0.45454379559304303\n", + " eval_runtime: 4.061\n", + " eval_samples_per_second: 39.399\n", + " eval_steps_per_second: 9.85\n", + " epoch: 2.0\n", + "Context: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academi...\n", + "Answer: three years\n", + "Generated Question: What was the name of the president of Notre Dame in 1919?\n", + "Reference Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model and tokenizer saved!\n" + ] + } + ], + "source": [ + "# Install dependencies\n", + "!pip uninstall -y torch torchvision torchaudio pandas fsspec gcsfs -q\n", + "!pip install torch torchvision torchaudio pandas transformers datasets nltk matplotlib evaluate rouge_score sentence-transformers -q\n", + "# Restart runtime after installation\n", + "\n", + "import json\n", + "import pandas as pd\n", + "from datasets import Dataset, Features, Value\n", + "from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer\n", + "import evaluate\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import nltk\n", + "import numpy as np # Added missing import\n", + "nltk.download('punkt')\n", + "\n", + "# Verify setup\n", + "print(f\"Torch version: {torch.__version__}\")\n", + "print(f\"GPU available: {torch.cuda.is_available()}\")\n", + "\n", + "# Step 2: Download and load dataset\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O train-v1.1.json\n", + "with open('train-v1.1.json', 'r', encoding='utf-8') as f:\n", + " squad_data = json.load(f)\n", + "print(\"Sample data:\", squad_data['data'][0]['paragraphs'][0])\n", + "\n", + "# Step 3: Clean and prepare dataset\n", + "data = []\n", + "for article in squad_data['data']:\n", + " for paragraph in article['paragraphs']:\n", + " context = paragraph['context'].strip()\n", + " for qa in paragraph['qas']:\n", + " question = qa['question'].strip()\n", + " answer = qa['answers'][0]['text'].strip() if qa['answers'] else \"\"\n", + " if context and question and answer:\n", + " data.append({\"context\": context, \"question\": question, \"answer\": answer})\n", + "\n", + "data = data[:800]\n", + "df = pd.DataFrame(data)\n", + "features = Features({\n", + " \"context\": Value(\"string\"),\n", + " \"question\": Value(\"string\"),\n", + " \"answer\": Value(\"string\")\n", + "})\n", + "dataset = Dataset.from_pandas(df, features=features)\n", + "train_test_split = dataset.train_test_split(test_size=0.2, seed=42)\n", + "train_dataset = train_test_split[\"train\"]\n", + "eval_dataset = train_test_split[\"test\"]\n", + "print(f\"Train size: {len(train_dataset)} | Eval size: {len(eval_dataset)}\")\n", + "print(\"First train example:\", train_dataset[0])\n", + "\n", + "# Step 4: Fine-tune the model\n", + "model_name = \"valhalla/t5-small-qg-hl\"\n", + "tokenizer = T5Tokenizer.from_pretrained(model_name)\n", + "model = T5ForConditionalGeneration.from_pretrained(model_name)\n", + "\n", + "def preprocess(examples):\n", + " inputs = [f\"generate question: {ctx} {ans}\" for ctx, ans in zip(examples['context'], examples['answer'])]\n", + " targets = examples['question']\n", + " model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding=\"max_length\", return_tensors=None)\n", + " labels = tokenizer(targets, max_length=32, truncation=True, padding=\"max_length\")[\"input_ids\"]\n", + " model_inputs[\"labels\"] = labels\n", + " return model_inputs\n", + "\n", + "tokenized_train_dataset = train_dataset.map(preprocess, remove_columns=train_dataset.column_names, batched=True)\n", + "tokenized_eval_dataset = eval_dataset.map(preprocess, remove_columns=eval_dataset.column_names, batched=True)\n", + "\n", + "tokenized_train_dataset = tokenized_train_dataset.with_format(\"torch\")\n", + "tokenized_eval_dataset = tokenized_eval_dataset.with_format(\"torch\")\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=\"./qg-finetuned\",\n", + " per_device_train_batch_size=4,\n", + " per_device_eval_batch_size=4,\n", + " num_train_epochs=2,\n", + " eval_strategy=\"epoch\",\n", + " learning_rate=2e-5,\n", + " logging_dir=\"./logs\",\n", + " logging_steps=10,\n", + " save_strategy=\"epoch\",\n", + " save_total_limit=1,\n", + " fp16=True,\n", + " report_to=\"none\",\n", + " load_best_model_at_end=True,\n", + " metric_for_best_model=\"eval_loss\",\n", + " greater_is_better=False\n", + ")\n", + "\n", + "\n", + "def compute_metrics(eval_pred):\n", + " predictions, labels = eval_pred\n", + " predictions = predictions[0] if isinstance(predictions, tuple) else predictions\n", + " predictions = np.argmax(predictions, axis=-1) if predictions.ndim == 3 else predictions\n", + " labels = np.argmax(labels, axis=-1) if labels.ndim == 3 else labels\n", + "\n", + " def decode_sequences(sequences):\n", + " return [tokenizer.decode(seq, skip_special_tokens=True) for seq in sequences]\n", + "\n", + " decoded_preds = decode_sequences(predictions)\n", + " decoded_labels = decode_sequences(labels)\n", + "\n", + " rouge = evaluate.load(\"rouge\")\n", + " rouge_score = rouge.compute(predictions=decoded_preds, references=decoded_labels)\n", + "\n", + " return {\n", + " \"rouge1\": rouge_score[\"rouge1\"],\n", + " \"rougeL\": rouge_score[\"rougeL\"]\n", + " }\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_train_dataset,\n", + " eval_dataset=tokenized_eval_dataset,\n", + " compute_metrics=compute_metrics\n", + ")\n", + "\n", + "print(\"Fine-tuning started...\")\n", + "trainer.train()\n", + "print(\"Running final evaluation...\")\n", + "results = trainer.evaluate()\n", + "print(\"Final Evaluation Results:\")\n", + "for metric, score in results.items():\n", + " print(f\" {metric}: {score}\")\n", + "\n", + "# Step 5: Generate and evaluate sample questions\n", + "from transformers import GenerationConfig\n", + "model.eval()\n", + "sample = eval_dataset[0]\n", + "inputs = tokenizer(f\"generate question: {sample['context']} {sample['answer']}\", max_length=256, truncation=True, padding=\"max_length\", return_tensors=\"pt\").to(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "generation_config = GenerationConfig(early_stopping=True, num_beams=5, max_length=128) # Adjusted\n", + "outputs = model.generate(**inputs, generation_config=generation_config)\n", + "generated_question = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(f\"Context: {sample['context'][:100]}...\")\n", + "print(f\"Answer: {sample['answer']}\")\n", + "print(f\"Generated Question: {generated_question}\")\n", + "print(f\"Reference Question: {sample['question']}\")\n", + "\n", + "# Step 6: Plot evaluation scores\n", + "log_history = trainer.state.log_history\n", + "epochs = [entry['epoch'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rouge1_scores = [entry['eval_rouge1'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rougeL_scores = [entry['eval_rougeL'] for entry in log_history if 'eval_rougeL' in entry]\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(epochs, rouge1_scores, label='ROUGE-1')\n", + "plt.plot(epochs, rougeL_scores, label='ROUGE-L')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Score')\n", + "plt.title('Evaluation Scores Over Epochs')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()\n", + "\n", + "# Step 7: Save the model\n", + "model.save_pretrained(\"./qg-finetuned/final\")\n", + "tokenizer.save_pretrained(\"./qg-finetuned/final\")\n", + "print(\"Model and tokenizer saved!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "68nRksRUH2D-" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "83fcc7f94cd0425e893ac1e68213b9c6", + "214ef7b83c4d441b8b9474dcb6ea3aff", + "b000c107ddf24b168ba733937efd32a3", + "d6fcecfa42234b06ad3c8dc47a265001", + "cf1e3fbde7e34642a0dbe181858ea8f2", + "d11db59116f5450695f6aaf944617cf6", + "27e4b36842a749d781467a4593fbd453", + "3cf1c4746da54c88916b90772f434a4f", + "c4ef9e9b3c2043f0a08dacd0088871b7", + "cb440926360a4ba39cc34d1eae9a429d", + "d6ce4f75dd824911971c19a1602d4e34", + "e9bd52917d3744a6bde159c367b352cb", + "9de9cf92a62f495887c6495e55ce0738", + "53e2edf5cacf4eeabe24b61e7c8cc34e", + "1eb163a3ed314ca19f97a0ec060aebb6", + "352ef36e791348eb9d4d6021f073c8a5", + "be5b828dd1444c4694e67b545a858f59", + "5d7724fa0d4e4e18a0fabf6344a3503a", + "5970fce117894ccaab6e598417968d43", + "d1359916a9fd4ff993f142f18b04209e", + "11af3884716d44f3b2ed29804a2766a9", + "50f01d34d3664802ab2b56e07ea6d630" + ] + }, + "id": "waaltAGLH4h2", + "outputId": "76774308-9670-4755-805f-f84c5045e12b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Skipping gcsfs as it is not installed.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "bigframes 2.8.0 requires gcsfs!=2025.5.0,>=2023.3.0, which is not installed.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.\n", + "fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.\n", + "dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\n", + "cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Torch version: 2.7.1+cu126\n", + "GPU available: True\n", + "--2025-07-13 18:53:28-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30288272 (29M) [application/json]\n", + "Saving to: ‘train-v1.1.json’\n", + "\n", + "train-v1.1.json 100%[===================>] 28.88M --.-KB/s in 0.08s \n", + "\n", + "2025-07-13 18:53:29 (356 MB/s) - ‘train-v1.1.json’ saved [30288272/30288272]\n", + "\n", + "Sample data: {'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'qas': [{'answers': [{'answer_start': 515, 'text': 'Saint Bernadette Soubirous'}], 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'id': '5733be284776f41900661182'}, {'answers': [{'answer_start': 188, 'text': 'a copper statue of Christ'}], 'question': 'What is in front of the Notre Dame Main Building?', 'id': '5733be284776f4190066117f'}, {'answers': [{'answer_start': 279, 'text': 'the Main Building'}], 'question': 'The Basilica of the Sacred heart at Notre Dame is beside to which structure?', 'id': '5733be284776f41900661180'}, {'answers': [{'answer_start': 381, 'text': 'a Marian place of prayer and reflection'}], 'question': 'What is the Grotto at Notre Dame?', 'id': '5733be284776f41900661181'}, {'answers': [{'answer_start': 92, 'text': 'a golden statue of the Virgin Mary'}], 'question': 'What sits on top of the Main Building at Notre Dame?', 'id': '5733be284776f4190066117e'}]}\n", + "Train size: 640 | Eval size: 160\n", + "First train example: {'context': 'The University of Notre Dame du Lac (or simply Notre Dame /ˌnoʊtərˈdeɪm/ NOH-tər-DAYM) is a Catholic research university located adjacent to South Bend, Indiana, in the United States. In French, Notre Dame du Lac means \"Our Lady of the Lake\" and refers to the university\\'s patron saint, the Virgin Mary. The main campus covers 1,250 acres in a suburban setting and it contains a number of recognizable landmarks, such as the Golden Dome, the \"Word of Life\" mural (commonly known as Touchdown Jesus), and the Basilica.', 'question': 'The school known as Notre Dame is known by a more lengthy name, what is it?', 'answer': 'University of Notre Dame du'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.11/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "83fcc7f94cd0425e893ac1e68213b9c6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/640 [00:00\n", + " \n", + " \n", + " [320/320 00:56, Epoch 2/2]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation LossRouge1Rougel
11.2955000.9407060.5337820.524730
21.0051000.8833010.5431430.535329

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running final evaluation...\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [40/40 00:01]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final Evaluation Results:\n", + " eval_loss: 0.8833014369010925\n", + " eval_rouge1: 0.5431428752464913\n", + " eval_rougeL: 0.5353286026815901\n", + " eval_runtime: 4.2109\n", + " eval_samples_per_second: 37.996\n", + " eval_steps_per_second: 9.499\n", + " epoch: 2.0\n", + "Context: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academi...\n", + "Answer: three years\n", + "Generated Question: What did Father James Burns do to Notre Dame?\n", + "Reference Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model and tokenizer saved!\n" + ] + } + ], + "source": [ + "# Install dependencies\n", + "!pip uninstall -y torch torchvision torchaudio pandas fsspec gcsfs -q\n", + "!pip install torch torchvision torchaudio pandas transformers datasets nltk matplotlib evaluate rouge_score sentence-transformers -q\n", + "# Restart runtime after installation\n", + "\n", + "import json\n", + "import pandas as pd\n", + "from datasets import Dataset, Features, Value\n", + "from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer\n", + "import evaluate\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import nltk\n", + "import numpy as np # Added missing import\n", + "nltk.download('punkt')\n", + "\n", + "# Verify setup\n", + "print(f\"Torch version: {torch.__version__}\")\n", + "print(f\"GPU available: {torch.cuda.is_available()}\")\n", + "\n", + "# Step 2: Download and load dataset\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O train-v1.1.json\n", + "with open('train-v1.1.json', 'r', encoding='utf-8') as f:\n", + " squad_data = json.load(f)\n", + "print(\"Sample data:\", squad_data['data'][0]['paragraphs'][0])\n", + "\n", + "# Step 3: Clean and prepare dataset\n", + "data = []\n", + "for article in squad_data['data']:\n", + " for paragraph in article['paragraphs']:\n", + " context = paragraph['context'].strip()\n", + " for qa in paragraph['qas']:\n", + " question = qa['question'].strip()\n", + " answer = qa['answers'][0]['text'].strip() if qa['answers'] else \"\"\n", + " if context and question and answer:\n", + " data.append({\"context\": context, \"question\": question, \"answer\": answer})\n", + "\n", + "data = data[:800]\n", + "df = pd.DataFrame(data)\n", + "features = Features({\n", + " \"context\": Value(\"string\"),\n", + " \"question\": Value(\"string\"),\n", + " \"answer\": Value(\"string\")\n", + "})\n", + "dataset = Dataset.from_pandas(df, features=features)\n", + "train_test_split = dataset.train_test_split(test_size=0.2, seed=42)\n", + "train_dataset = train_test_split[\"train\"]\n", + "eval_dataset = train_test_split[\"test\"]\n", + "print(f\"Train size: {len(train_dataset)} | Eval size: {len(eval_dataset)}\")\n", + "print(\"First train example:\", train_dataset[0])\n", + "\n", + "# Step 4: Fine-tune the model\n", + "model_name = \"valhalla/t5-small-qg-hl\"\n", + "tokenizer = T5Tokenizer.from_pretrained(model_name)\n", + "model = T5ForConditionalGeneration.from_pretrained(model_name)\n", + "\n", + "def preprocess(examples):\n", + " inputs = []\n", + " for ctx, ans in zip(examples['context'], examples['answer']):\n", + " if ans in ctx:\n", + " highlighted = ctx.replace(ans, f\" {ans} \")\n", + " inputs.append(f\"generate question: {highlighted}\")\n", + " else:\n", + " inputs.append(f\"generate question: {ctx} {ans} \")\n", + " targets = examples['question']\n", + " model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding=\"max_length\", return_tensors=None)\n", + " labels = tokenizer(targets, max_length=32, truncation=True, padding=\"max_length\")[\"input_ids\"]\n", + " model_inputs[\"labels\"] = labels\n", + " return model_inputs\n", + "\n", + "tokenized_train_dataset = train_dataset.map(preprocess, remove_columns=train_dataset.column_names, batched=True)\n", + "tokenized_eval_dataset = eval_dataset.map(preprocess, remove_columns=eval_dataset.column_names, batched=True)\n", + "\n", + "tokenized_train_dataset = tokenized_train_dataset.with_format(\"torch\")\n", + "tokenized_eval_dataset = tokenized_eval_dataset.with_format(\"torch\")\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=\"./qg-finetuned\",\n", + " per_device_train_batch_size=4,\n", + " per_device_eval_batch_size=4,\n", + " num_train_epochs=2,\n", + " eval_strategy=\"epoch\",\n", + " learning_rate=2e-5,\n", + " logging_dir=\"./logs\",\n", + " logging_steps=10,\n", + " save_strategy=\"epoch\",\n", + " save_total_limit=1,\n", + " fp16=True,\n", + " report_to=\"none\",\n", + " load_best_model_at_end=True,\n", + " metric_for_best_model=\"eval_loss\",\n", + " greater_is_better=False\n", + ")\n", + "\n", + "\n", + "def compute_metrics(eval_pred):\n", + " predictions, labels = eval_pred\n", + " predictions = predictions[0] if isinstance(predictions, tuple) else predictions\n", + " predictions = np.argmax(predictions, axis=-1) if predictions.ndim == 3 else predictions\n", + " labels = np.argmax(labels, axis=-1) if labels.ndim == 3 else labels\n", + "\n", + " def decode_sequences(sequences):\n", + " return [tokenizer.decode(seq, skip_special_tokens=True) for seq in sequences]\n", + "\n", + " decoded_preds = decode_sequences(predictions)\n", + " decoded_labels = decode_sequences(labels)\n", + "\n", + " rouge = evaluate.load(\"rouge\")\n", + " rouge_score = rouge.compute(predictions=decoded_preds, references=decoded_labels)\n", + "\n", + " return {\n", + " \"rouge1\": rouge_score[\"rouge1\"],\n", + " \"rougeL\": rouge_score[\"rougeL\"]\n", + " }\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_train_dataset,\n", + " eval_dataset=tokenized_eval_dataset,\n", + " compute_metrics=compute_metrics\n", + ")\n", + "\n", + "print(\"Fine-tuning started...\")\n", + "trainer.train()\n", + "print(\"Running final evaluation...\")\n", + "results = trainer.evaluate()\n", + "print(\"Final Evaluation Results:\")\n", + "for metric, score in results.items():\n", + " print(f\" {metric}: {score}\")\n", + "\n", + "# Step 5: Generate and evaluate sample questions\n", + "from transformers import GenerationConfig\n", + "model.eval()\n", + "sample = eval_dataset[0]\n", + "inputs = tokenizer(f\"generate question: {sample['context']} {sample['answer']}\", max_length=256, truncation=True, padding=\"max_length\", return_tensors=\"pt\").to(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "generation_config = GenerationConfig(early_stopping=True, num_beams=5, max_length=128) # Adjusted\n", + "outputs = model.generate(**inputs, generation_config=generation_config)\n", + "generated_question = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(f\"Context: {sample['context'][:100]}...\")\n", + "print(f\"Answer: {sample['answer']}\")\n", + "print(f\"Generated Question: {generated_question}\")\n", + "print(f\"Reference Question: {sample['question']}\")\n", + "\n", + "# Step 6: Plot evaluation scores\n", + "log_history = trainer.state.log_history\n", + "epochs = [entry['epoch'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rouge1_scores = [entry['eval_rouge1'] for entry in log_history if 'eval_rouge1' in entry]\n", + "rougeL_scores = [entry['eval_rougeL'] for entry in log_history if 'eval_rougeL' in entry]\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(epochs, rouge1_scores, label='ROUGE-1')\n", + "plt.plot(epochs, rougeL_scores, label='ROUGE-L')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Score')\n", + "plt.title('Evaluation Scores Over Epochs')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()\n", + "\n", + "# Step 7: Save the model\n", + "model.save_pretrained(\"./qg-finetuned/final\")\n", + "tokenizer.save_pretrained(\"./qg-finetuned/final\")\n", + "print(\"Model and tokenizer saved!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TlKkCvw0GoVJ", + "outputId": "74e5f939-c3fa-451e-98f5-61919ffa0969" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%| | 1/160 [00:00<01:21, 1.94it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 1 ---\n", + "Raw token IDs: [0, 571, 307, 410, 10846, 7, 582, 2753, 13, 7711, 3, 17084, 58, 1]\n", + "Decoded Prediction: How long did Burns become president of Notre Dame?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 1%|▏ | 2/160 [00:01<02:41, 1.02s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 2 ---\n", + "Raw token IDs: [0, 0, 2645, 2832, 24, 493, 63, 14549, 65, 582, 3, 9, 26159, 3, 7, 994, 6083, 437, 8, 1576, 13, 2744, 1304, 11937, 16, 2129, 58, 1]\n", + "Decoded Prediction: Who wrote that Beyonce has become a crossover sex symbol since the release of Dangerously in Love?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 2%|▏ | 3/160 [00:02<02:39, 1.01s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 3 ---\n", + "Raw token IDs: [0, 493, 63, 14549, 47, 8, 166, 3850, 18, 8778, 2377, 12, 1189, 8, 1600, 962, 13, 125, 3835, 58, 1]\n", + "Decoded Prediction: Beyonce was the first African-American artist to cover the September issue of what magazine?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 2%|▎ | 4/160 [00:03<02:28, 1.05it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 4 ---\n", + "Raw token IDs: [0, 0, 363, 47, 493, 63, 14549, 31, 7, 166, 6729, 2306, 1883, 30, 1515, 14320, 3888, 58, 1]\n", + "Decoded Prediction: What was Beyonce's first solo album released on June 24, 2003?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 3%|▎ | 5/160 [00:04<02:16, 1.14it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 5 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 20111, 30, 1186, 6464, 2628, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé marry on April 4, 2008?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 4%|▍ | 6/160 [00:05<02:00, 1.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 6 ---\n", + "Raw token IDs: [0, 363, 410, 25219, 5530, 493, 63, 106, 75, 154, 38, 58, 1]\n", + "Decoded Prediction: What did Reid describe Beyoncé as?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 4%|▍ | 7/160 [00:06<02:23, 1.07it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 7 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 8, 126, 6198, 21, 1076, 44, 8, 3137, 26, 13277, 1888, 13, 1769, 5408, 2855, 1775, 58, 1]\n", + "Decoded Prediction: What is the name of the new residence for men at the Mendoza College of Business Executive Education Department?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 5%|▌ | 8/160 [00:07<02:10, 1.17it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 8 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 8, 1193, 18301, 257, 13, 6679, 4737, 58, 1]\n", + "Decoded Prediction: What is the name of the Congregation of Holy Cross?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 6%|▌ | 9/160 [00:07<01:54, 1.32it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 9 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 14549, 20111, 58, 1]\n", + "Decoded Prediction: Who did Beyonce marry?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 6%|▋ | 10/160 [00:08<02:19, 1.07it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 10 ---\n", + "Raw token IDs: [0, 0, 363, 19, 8, 564, 13, 8, 814, 24, 493, 63, 106, 75, 154, 3, 29029, 16, 58, 1]\n", + "Decoded Prediction: What is the name of the film that Beyoncé starred in?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 7%|▋ | 11/160 [00:09<02:19, 1.07it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 11 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 1320, 46, 539, 2068, 21, 16, 1230, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé sign an open letter for in 2015?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 8%|▊ | 12/160 [00:11<02:33, 1.04s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 12 ---\n", + "Raw token IDs: [0, 571, 231, 410, 8, 3819, 13, 7711, 3, 17084, 11, 3526, 5412, 1211, 726, 21, 70, 7117, 7, 58, 1]\n", + "Decoded Prediction: How much did the university of Notre Dame and Under Armour pay for their uniforms?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 8%|▊ | 13/160 [00:12<02:39, 1.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 13 ---\n", + "Raw token IDs: [0, 2840, 405, 7711, 3, 17084, 31, 7, 3, 867, 16528, 372, 5978, 58, 1]\n", + "Decoded Prediction: Where does Notre Dame's ice hockey team compete?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 9%|▉ | 14/160 [00:13<02:26, 1.00s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 14 ---\n", + "Raw token IDs: [0, 0, 363, 2324, 410, 493, 63, 106, 75, 154, 1912, 44, 8, 2464, 13074, 3, 30634, 58, 1]\n", + "Decoded Prediction: What song did Beyoncé perform at the 2009 presidential inauguration?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 9%|▉ | 15/160 [00:13<02:04, 1.16it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 15 ---\n", + "Raw token IDs: [0, 571, 186, 8167, 410, 272, 31, 16803, 1789, 16, 165, 166, 471, 58, 1]\n", + "Decoded Prediction: How many copies did B'Day sell in its first week?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 10%|█ | 16/160 [00:14<01:46, 1.35it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 16 ---\n", + "Raw token IDs: [0, 363, 19, 411, 31, 10499, 18819, 655, 63, 2501, 31, 7, 3595, 358, 58, 1]\n", + "Decoded Prediction: What is O'Shaughnessy Hall's library system?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 11%|█ | 17/160 [00:14<01:39, 1.43it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 17 ---\n", + "Raw token IDs: [0, 0, 571, 231, 410, 493, 63, 14549, 1492, 30, 160, 166, 4219, 1552, 16, 4101, 58, 1]\n", + "Decoded Prediction: How much did Beyonce spend on her first concert tour in 2007?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 11%|█▏ | 18/160 [00:16<02:11, 1.08it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 18 ---\n", + "Raw token IDs: [0, 571, 186, 12321, 410, 8, 991, 712, 96, 308, 154, 354, 85, 584, 76, 121, 1535, 30, 8, 3259, 1976, 5396, 910, 5059, 58, 1]\n", + "Decoded Prediction: How many charts did the lead single \"Déjà Vu\" reach on the Billboard Hot 100 chart?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 12%|█▏ | 19/160 [00:16<01:53, 1.24it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 19 ---\n", + "Raw token IDs: [0, 0, 363, 605, 410, 493, 63, 14549, 11, 9373, 18, 956, 2467, 16, 1718, 2038, 58, 1]\n", + "Decoded Prediction: What event did Beyonce and Jay-Z attend in July 2013?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 12%|█▎ | 20/160 [00:16<01:29, 1.56it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 20 ---\n", + "Raw token IDs: [0, 0, 363, 47, 493, 63, 14549, 31, 7, 166, 6922, 1075, 16, 3581, 58, 1]\n", + "Decoded Prediction: What was Beyonce's first acting role in 2006?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 13%|█▎ | 21/160 [00:17<01:18, 1.76it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 21 ---\n", + "Raw token IDs: [0, 366, 410, 493, 63, 14549, 6456, 3, 9, 7102, 144, 302, 45, 160, 723, 1415, 58, 1]\n", + "Decoded Prediction: When did Beyonce announce a hiatus from her music career?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 14%|█▍ | 22/160 [00:17<01:14, 1.85it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 22 ---\n", + "Raw token IDs: [0, 0, 571, 231, 410, 493, 63, 106, 75, 154, 31, 7, 6922, 1075, 16, 37, 12070, 21149, 8690, 44, 8, 1367, 828, 4388, 58, 1]\n", + "Decoded Prediction: How much did Beyoncé's acting role in The Pink Panther gross at the box office worldwide?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 14%|█▍ | 23/160 [00:18<01:01, 2.24it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 23 ---\n", + "Raw token IDs: [0, 571, 186, 203, 47, 8, 4543, 13, 14984, 7, 8307, 58, 1]\n", + "Decoded Prediction: How many years was the Review of Politics edited?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 15%|█▌ | 24/160 [00:18<00:54, 2.47it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 24 ---\n", + "Raw token IDs: [0, 0, 363, 686, 13, 3800, 410, 493, 63, 106, 75, 154, 31, 7, 778, 3605, 560, 58, 1]\n", + "Decoded Prediction: What type of theme did Beyoncé's early songs include?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 16%|█▌ | 25/160 [00:18<00:48, 2.79it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 25 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 8, 4467, 13, 2330, 15848, 58, 1]\n", + "Decoded Prediction: What is the name of the Word of Life mural?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 16%|█▋ | 26/160 [00:19<00:49, 2.72it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 26 ---\n", + "Raw token IDs: [0, 0, 86, 125, 203, 410, 2900, 570, 493, 63, 106, 75, 154, 859, 8, 910, 167, 16569, 151, 16, 8, 296, 58, 1]\n", + "Decoded Prediction: In what years did Time list Beyoncé among the 100 most influential people in the world?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 17%|█▋ | 27/160 [00:19<00:45, 2.95it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 27 ---\n", + "Raw token IDs: [0, 571, 186, 2061, 43, 751, 8, 1076, 31, 7, 8498, 372, 58, 1]\n", + "Decoded Prediction: How many schools have won the men's basketball team?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 18%|█▊ | 28/160 [00:19<00:45, 2.88it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 28 ---\n", + "Raw token IDs: [0, 0, 363, 3295, 410, 3259, 1976, 3835, 570, 493, 63, 106, 75, 154, 30, 16, 2722, 58, 1]\n", + "Decoded Prediction: What category did Billboard magazine list Beyoncé on in 2011?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 18%|█▊ | 29/160 [00:20<00:47, 2.77it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 29 ---\n", + "Raw token IDs: [0, 0, 363, 19, 8, 564, 13, 8, 283, 5, 21313, 5, 478, 1860, 57, 8, 1888, 13, 4218, 11, 8706, 7, 58, 1]\n", + "Decoded Prediction: What is the name of the M.Div. program offered by the College of Arts and Letters?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 19%|█▉ | 30/160 [00:20<00:49, 2.61it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 30 ---\n", + "Raw token IDs: [0, 571, 186, 8167, 410, 493, 63, 106, 75, 154, 1789, 16, 165, 166, 471, 58, 1]\n", + "Decoded Prediction: How many copies did Beyoncé sell in its first week?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 19%|█▉ | 31/160 [00:20<00:45, 2.83it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 31 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 14549, 2249, 16, 24692, 58, 1]\n", + "Decoded Prediction: Who did Beyonce voice in Epic?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 20%|██ | 32/160 [00:21<00:49, 2.59it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 32 ---\n", + "Raw token IDs: [0, 0, 363, 1900, 410, 216, 7289, 107, 11, 7711, 3, 17084, 1344, 3, 9, 576, 18, 994, 13073, 478, 58, 1]\n", + "Decoded Prediction: What college did Hesburgh and Notre Dame develop a co-exchange program?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 21%|██ | 33/160 [00:21<00:47, 2.67it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 33 ---\n", + "Raw token IDs: [0, 0, 363, 1552, 410, 493, 63, 106, 75, 154, 17046, 30, 16, 1740, 16, 1671, 3888, 58, 1]\n", + "Decoded Prediction: What tour did Beyoncé embark on in Europe in November 2003?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 21%|██▏ | 34/160 [00:21<00:47, 2.65it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 34 ---\n", + "Raw token IDs: [0, 0, 363, 47, 493, 63, 14549, 31, 7, 166, 6729, 1368, 30, 8, 3259, 1976, 5396, 910, 5059, 58, 1]\n", + "Decoded Prediction: What was Beyonce's first solo record on the Billboard Hot 100 chart?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 22%|██▏ | 35/160 [00:22<00:45, 2.76it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 35 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 11872, 887, 12, 1716, 38, 16, 8, 350, 940, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé urge women to serve as in the G7?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 22%|██▎ | 36/160 [00:22<00:50, 2.47it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 36 ---\n", + "Raw token IDs: [0, 571, 186, 10657, 7, 399, 1962, 410, 19344, 63, 31, 7, 9364, 1912, 44, 8, 2011, 9713, 3, 4, 15086, 196, 196, 985, 715, 504, 58, 1]\n", + "Decoded Prediction: How many tweets per minute did Destiny's Child perform at the Super Bowl XLVII halftime show?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 23%|██▎ | 37/160 [00:23<00:46, 2.67it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 37 ---\n", + "Raw token IDs: [0, 2840, 410, 493, 63, 106, 75, 154, 2467, 13156, 18867, 1121, 58, 1]\n", + "Decoded Prediction: Where did Beyoncé attend Parker Elementary School?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 24%|██▍ | 38/160 [00:23<00:50, 2.43it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 38 ---\n", + "Raw token IDs: [0, 0, 363, 19, 8, 564, 13, 3, 9, 2324, 16, 272, 31, 16803, 58, 1]\n", + "Decoded Prediction: What is the name of a song in B'Day?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 24%|██▍ | 39/160 [00:24<00:50, 2.40it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 39 ---\n", + "Raw token IDs: [0, 0, 2645, 12566, 493, 63, 106, 75, 154, 31, 7, 5739, 11, 3, 2998, 1999, 38, 11562, 58, 1]\n", + "Decoded Prediction: Who highlighted Beyoncé's tone and timbre as distinctive?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 25%|██▌ | 40/160 [00:24<00:49, 2.45it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 40 ---\n", + "Raw token IDs: [0, 4073, 3819, 65, 8, 167, 216, 159, 348, 21361, 107, 725, 751, 58, 1]\n", + "Decoded Prediction: Which university has the most Heisman Trophies won?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 26%|██▌ | 41/160 [00:24<00:51, 2.32it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 41 ---\n", + "Raw token IDs: [0, 0, 363, 2324, 410, 9373, 1027, 1576, 30, 2330, 232, 715, 7, 5, 287, 58, 1]\n", + "Decoded Prediction: What song did Jay Z release on Lifeandtimes.com?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 26%|██▋ | 42/160 [00:25<00:55, 2.14it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 42 ---\n", + "Raw token IDs: [0, 0, 571, 186, 13569, 410, 493, 63, 14549, 43, 44, 8, 2722, 283, 4562, 3953, 3057, 6580, 58, 1]\n", + "Decoded Prediction: How many viewers did Beyonce have at the 2011 MTV Video Music Awards?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 27%|██▋ | 43/160 [00:25<00:51, 2.25it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 43 ---\n", + "Raw token IDs: [0, 363, 8468, 405, 59, 43, 3, 9, 6040, 8815, 42, 14829, 21913, 45, 7711, 3, 17084, 58, 1]\n", + "Decoded Prediction: What newspaper does not have a faculty advisor or editorial oversight from Notre Dame?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 28%|██▊ | 44/160 [00:26<00:53, 2.16it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 44 ---\n", + "Raw token IDs: [0, 0, 2645, 2832, 24, 493, 63, 14549, 65, 582, 3, 9, 26159, 3, 7, 994, 6083, 437, 8, 1576, 13, 2744, 1304, 11937, 16, 2129, 58, 1]\n", + "Decoded Prediction: Who wrote that Beyonce has become a crossover sex symbol since the release of Dangerously in Love?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 28%|██▊ | 45/160 [00:26<00:48, 2.35it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 45 ---\n", + "Raw token IDs: [0, 0, 363, 686, 13, 3, 7, 994, 3958, 65, 493, 63, 14549, 118, 3028, 38, 58, 1]\n", + "Decoded Prediction: What type of sex appeal has Beyonce been described as?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 29%|██▉ | 46/160 [00:26<00:45, 2.53it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 46 ---\n", + "Raw token IDs: [0, 0, 363, 3066, 47, 6946, 1079, 1660, 630, 4904, 107, 51, 638, 18, 23620, 127, 13, 58, 1]\n", + "Decoded Prediction: What department was Father John Augustine Zahm Co-Director of?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 29%|██▉ | 47/160 [00:27<00:43, 2.58it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 47 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 11872, 887, 12, 1716, 38, 16, 8, 350, 940, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé urge women to serve as in the G7?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 30%|███ | 48/160 [00:27<00:41, 2.67it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 48 ---\n", + "Raw token IDs: [0, 0, 363, 47, 8, 564, 13, 493, 63, 14549, 31, 7, 511, 6729, 2306, 58, 1]\n", + "Decoded Prediction: What was the name of Beyonce's second solo album?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 31%|███ | 49/160 [00:28<00:41, 2.70it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 49 ---\n", + "Raw token IDs: [0, 366, 410, 493, 63, 106, 75, 154, 497, 24, 5428, 15822, 3555, 160, 12, 240, 610, 13, 160, 1415, 58, 1]\n", + "Decoded Prediction: When did Beyoncé say that Madonna inspired her to take control of her career?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 31%|███▏ | 50/160 [00:28<00:38, 2.86it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 50 ---\n", + "Raw token IDs: [0, 0, 363, 4183, 12373, 410, 493, 63, 106, 75, 154, 577, 1414, 6073, 16, 16, 3888, 58, 1]\n", + "Decoded Prediction: What musical comedy did Beyoncé play Lilly in in 2003?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 32%|███▏ | 51/160 [00:28<00:35, 3.10it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 51 ---\n", + "Raw token IDs: [0, 2645, 5105, 2691, 81, 493, 63, 106, 75, 154, 58, 1]\n", + "Decoded Prediction: Who obtained documents about Beyoncé?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 32%|███▎ | 52/160 [00:28<00:35, 3.03it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 52 ---\n", + "Raw token IDs: [0, 0, 363, 47, 493, 63, 14549, 31, 7, 4509, 712, 3, 9, 1328, 1269, 16, 8, 837, 58, 1]\n", + "Decoded Prediction: What was Beyonce's fourth single a commercial success in the US?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 33%|███▎ | 53/160 [00:29<00:35, 2.99it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 53 ---\n", + "Raw token IDs: [0, 366, 47, 7155, 3, 19003, 1790, 58, 1]\n", + "Decoded Prediction: When was Common Sense published?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 34%|███▍ | 54/160 [00:29<00:33, 3.14it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 54 ---\n", + "Raw token IDs: [0, 0, 363, 405, 493, 63, 106, 75, 154, 114, 12, 3270, 30, 10705, 58, 1]\n", + "Decoded Prediction: What does Beyoncé like to dress onstage?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 34%|███▍ | 55/160 [00:29<00:35, 2.96it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 55 ---\n", + "Raw token IDs: [0, 2645, 47, 8, 1025, 2335, 30, 3259, 1976, 31, 7, 570, 13, 8, 2224, 460, 5396, 910, 11263, 12756, 7, 58, 1]\n", + "Decoded Prediction: Who was the third woman on Billboard's list of the Top 20 Hot 100 Songwriters?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 35%|███▌ | 56/160 [00:30<00:39, 2.66it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 56 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 1912, 30, 8, 1463, 96, 566, 63, 51, 29, 21, 8, 20418, 121, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé perform on the track \"Hymn for the Weekend\"?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 36%|███▌ | 57/160 [00:30<00:41, 2.46it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 57 ---\n", + "Raw token IDs: [0, 571, 186, 14234, 410, 8, 991, 712, 96, 308, 154, 354, 85, 584, 76, 121, 1535, 30, 8, 3259, 1976, 5396, 910, 5059, 58, 1]\n", + "Decoded Prediction: How many albums did the lead single \"Déjà Vu\" reach on the Billboard Hot 100 chart?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 36%|███▋ | 58/160 [00:31<00:39, 2.61it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 58 ---\n", + "Raw token IDs: [0, 0, 571, 186, 13588, 7, 410, 493, 63, 14549, 911, 44, 8, 9065, 727, 7389, 26596, 6580, 58, 1]\n", + "Decoded Prediction: How many nominations did Beyonce receive at the 52nd Annual Grammy Awards?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 37%|███▋ | 59/160 [00:31<00:35, 2.88it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 59 ---\n", + "Raw token IDs: [0, 363, 2324, 410, 493, 63, 106, 75, 154, 1431, 21, 24692, 58, 1]\n", + "Decoded Prediction: What song did Beyoncé write for Epic?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 38%|███▊ | 60/160 [00:31<00:32, 3.12it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 60 ---\n", + "Raw token IDs: [0, 2645, 3555, 493, 63, 106, 75, 154, 58, 1]\n", + "Decoded Prediction: Who inspired Beyoncé?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 38%|███▊ | 61/160 [00:32<00:30, 3.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 61 ---\n", + "Raw token IDs: [0, 363, 47, 8, 2041, 585, 3135, 21, 7711, 3, 17084, 58, 1]\n", + "Decoded Prediction: What was the annual research funding for Notre Dame?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 39%|███▉ | 62/160 [00:32<00:27, 3.57it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 62 ---\n", + "Raw token IDs: [0, 2645, 1632, 8, 166, 797, 12, 1299, 3, 9, 5419, 1569, 58, 1]\n", + "Decoded Prediction: Who became the first American to send a wireless message?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 39%|███▉ | 63/160 [00:32<00:32, 3.00it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 63 ---\n", + "Raw token IDs: [0, 363, 13681, 19, 2425, 12, 585, 6, 1073, 11, 16866, 30, 8, 4110, 13, 9674, 4129, 11, 8, 1124, 21, 5086, 3065, 58, 1]\n", + "Decoded Prediction: What institute is dedicated to research, education and outreach on the causes of violent conflict and the conditions for sustainable peace?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 40%|████ | 64/160 [00:33<00:30, 3.10it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 64 ---\n", + "Raw token IDs: [0, 0, 2645, 751, 493, 63, 106, 75, 154, 31, 7, 2760, 21, 16182, 12559, 58, 1]\n", + "Decoded Prediction: Who won Beyoncé's award for Morning Phase?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 41%|████ | 65/160 [00:33<00:28, 3.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 65 ---\n", + "Raw token IDs: [0, 2645, 10626, 2765, 12, 6774, 8, 452, 3078, 3543, 58, 1]\n", + "Decoded Prediction: Who hired workers to construct the public parking garage?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 41%|████▏ | 66/160 [00:33<00:31, 2.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 66 ---\n", + "Raw token IDs: [0, 2645, 47, 8, 6342, 5, 37, 7512, 15, 283, 5, 216, 7289, 107, 205, 4112, 58, 1]\n", + "Decoded Prediction: Who was the Rev. Theodore M. Hesburgh CSC?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 42%|████▏ | 67/160 [00:34<00:31, 2.91it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 67 ---\n", + "Raw token IDs: [0, 0, 2645, 1891, 493, 63, 14549, 8, 1867, 12, 619, 280, 6, 12, 36, 3555, 57, 378, 541, 58, 1]\n", + "Decoded Prediction: Who gave Beyonce the advice to live life, to be inspired by things again?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 42%|████▎ | 68/160 [00:34<00:30, 2.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 68 ---\n", + "Raw token IDs: [0, 0, 366, 47, 493, 63, 14549, 31, 7, 4509, 3100, 2306, 314, 1883, 58, 1]\n", + "Decoded Prediction: When was Beyonce's fourth studio album 4 released?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 43%|████▎ | 69/160 [00:34<00:31, 2.87it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 69 ---\n", + "Raw token IDs: [0, 461, 125, 833, 47, 1449, 13, 8, 325, 3809, 17, 444, 6341, 1166, 708, 58, 1]\n", + "Decoded Prediction: On what date was construction of the LaFortune Student Center started?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 44%|████▍ | 70/160 [00:34<00:28, 3.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 70 ---\n", + "Raw token IDs: [0, 2645, 1632, 819, 3763, 13, 7711, 3, 17084, 16, 21402, 58, 1]\n", + "Decoded Prediction: Who became head coach of Notre Dame in 1918?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 44%|████▍ | 71/160 [00:35<00:26, 3.42it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 71 ---\n", + "Raw token IDs: [0, 363, 19, 8, 711, 740, 13, 7711, 3, 17084, 31, 7, 3595, 58, 1]\n", + "Decoded Prediction: What is the main building of Notre Dame's library?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 45%|████▌ | 72/160 [00:35<00:29, 3.02it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 72 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 7711, 3, 17084, 58, 1]\n", + "Decoded Prediction: What is the name of Notre Dame?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 46%|████▌ | 73/160 [00:35<00:29, 2.98it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 73 ---\n", + "Raw token IDs: [0, 461, 125, 239, 56, 8, 15714, 5567, 31, 15697, 916, 58, 1]\n", + "Decoded Prediction: On what day will the Drummers' Circle continue?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 46%|████▋ | 74/160 [00:36<00:42, 2.03it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 74 ---\n", + "Raw token IDs: [0, 86, 1230, 18, 11505, 6, 125, 11003, 410, 7711, 3, 17084, 11003, 859, 96, 16557, 8278, 121, 16, 8, 412, 5, 134, 5, 3529, 3, 184, 1150, 3750, 31, 7, 1648, 1888, 7, 58, 1]\n", + "Decoded Prediction: In 2015-2016, what rank did Notre Dame rank among \"national universities\" in the U.S. News & World Report's Best Colleges?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 47%|████▋ | 75/160 [00:37<00:42, 1.98it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 75 ---\n", + "Raw token IDs: [0, 0, 486, 125, 605, 410, 493, 63, 106, 75, 154, 582, 8, 166, 3955, 2377, 12, 12392, 8, 30237, 1726, 58, 1]\n", + "Decoded Prediction: At what event did Beyoncé become the first female artist to headline the Pyramid stage?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 48%|████▊ | 76/160 [00:38<00:52, 1.59it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 76 ---\n", + "Raw token IDs: [0, 0, 493, 63, 14549, 11, 9373, 1027, 263, 34, 139, 8, 2846, 77, 655, 1150, 11547, 38, 8, 96, 6739, 222, 18, 2741, 29, 53, 579, 1158, 121, 21, 6018, 120, 12127, 1514, 3940, 770, 58, 1]\n", + "Decoded Prediction: Beyonce and Jay Z made it into the Guinness World Records as the \"highest-earning power couple\" for collectively earning $78 million?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 48%|████▊ | 77/160 [00:38<00:44, 1.87it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 77 ---\n", + "Raw token IDs: [0, 0, 363, 5349, 13, 3605, 410, 19344, 63, 31, 7, 9364, 1576, 2129, 18142, 58, 1]\n", + "Decoded Prediction: What genre of songs did Destiny's Child release Love Songs?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 49%|████▉ | 78/160 [00:39<00:41, 1.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 78 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 582, 8, 166, 3850, 18, 8778, 2335, 12, 43, 913, 11893, 30, 386, 381, 80, 3605, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé become the first African-American woman to have writing credits on three number one songs?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 49%|████▉ | 79/160 [00:39<00:36, 2.20it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 79 ---\n", + "Raw token IDs: [0, 571, 186, 13588, 7, 410, 493, 63, 14549, 911, 44, 8, 9065, 727, 7389, 26596, 6580, 58, 1]\n", + "Decoded Prediction: How many nominations did Beyonce receive at the 52nd Annual Grammy Awards?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 50%|█████ | 80/160 [00:39<00:36, 2.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 80 ---\n", + "Raw token IDs: [0, 2645, 2681, 493, 63, 106, 75, 154, 44, 381, 80, 30, 160, 570, 13, 8, 9528, 1648, 24366, 87, 308, 663, 52, 7, 58, 1]\n", + "Decoded Prediction: Who placed Beyoncé at number one on her list of the Five Best Singer/Dancers?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 51%|█████ | 81/160 [00:40<00:32, 2.40it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 81 ---\n", + "Raw token IDs: [0, 0, 363, 19, 8, 564, 13, 411, 31, 10499, 18819, 655, 63, 2501, 58, 1]\n", + "Decoded Prediction: What is the name of O'Shaughnessy Hall?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 51%|█████▏ | 82/160 [00:40<00:28, 2.73it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 82 ---\n", + "Raw token IDs: [0, 0, 363, 47, 8, 167, 7533, 6878, 16, 283, 4562, 892, 58, 1]\n", + "Decoded Prediction: What was the most watched broadcast in MTV history?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 52%|█████▏ | 83/160 [00:40<00:29, 2.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 83 ---\n", + "Raw token IDs: [0, 363, 2760, 410, 493, 63, 106, 75, 154, 911, 45, 8, 368, 1060, 2125, 13, 1589, 3559, 343, 7, 58, 1]\n", + "Decoded Prediction: What award did Beyoncé receive from the New York Association of Black Journalists?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 52%|█████▎ | 84/160 [00:41<00:29, 2.59it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 84 ---\n", + "Raw token IDs: [0, 0, 571, 231, 410, 8513, 2621, 7, 16, 2540, 12066, 16917, 16, 4407, 58, 1]\n", + "Decoded Prediction: How much did Austin Powers in Goldmember grosse in 2002?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 53%|█████▎ | 85/160 [00:41<00:26, 2.80it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 85 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 7711, 3, 17084, 31, 7, 1449, 516, 58, 1]\n", + "Decoded Prediction: What is the name of Notre Dame's construction project?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 54%|█████▍ | 86/160 [00:41<00:24, 3.03it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 86 ---\n", + "Raw token IDs: [0, 363, 47, 493, 63, 14549, 31, 7, 166, 4219, 1552, 58, 1]\n", + "Decoded Prediction: What was Beyonce's first concert tour?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 54%|█████▍ | 87/160 [00:42<00:23, 3.05it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 87 ---\n", + "Raw token IDs: [0, 0, 2645, 1940, 8, 3567, 12, 112, 5961, 1826, 3100, 11, 2681, 135, 16, 2042, 4769, 58, 1]\n", + "Decoded Prediction: Who brought the girls to his Northern California studio and placed them in Star Search?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 55%|█████▌ | 88/160 [00:42<00:24, 2.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 88 ---\n", + "Raw token IDs: [0, 363, 47, 8, 564, 13, 8, 723, 6358, 44, 325, 3809, 17, 444, 58, 1]\n", + "Decoded Prediction: What was the name of the music hall at LaFortune?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 56%|█████▌ | 89/160 [00:42<00:22, 3.10it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 89 ---\n", + "Raw token IDs: [0, 363, 3066, 13, 8, 1888, 13, 2854, 405, 59, 462, 10360, 1356, 58, 1]\n", + "Decoded Prediction: What department of the College of Science does not offer PhD programs?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 56%|█████▋ | 90/160 [00:43<00:23, 2.98it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 90 ---\n", + "Raw token IDs: [0, 0, 363, 47, 8, 564, 13, 19344, 63, 31, 7, 9364, 22, 7, 167, 1574, 2324, 44, 8, 97, 58, 1]\n", + "Decoded Prediction: What was the name of Destiny's Child’s most successful song at the time?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 57%|█████▋ | 91/160 [00:43<00:25, 2.75it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 91 ---\n", + "Raw token IDs: [0, 363, 47, 493, 63, 106, 75, 154, 31, 7, 11592, 16, 8, 5777, 11263, 12756, 13, 8, 2929, 2760, 58, 1]\n", + "Decoded Prediction: What was Beyoncé's ranking in the Pop Songwriter of the Year award?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 57%|█████▊ | 92/160 [00:43<00:25, 2.68it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 92 ---\n", + "Raw token IDs: [0, 366, 47, 8, 1657, 96, 279, 32, 32, 17, 63, 2176, 2936, 121, 974, 12, 8, 10274, 1566, 28767, 58, 1]\n", + "Decoded Prediction: When was the term \"Bootylicious\" added to the Oxford English Dictionary?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 58%|█████▊ | 93/160 [00:44<00:25, 2.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 93 ---\n", + "Raw token IDs: [0, 0, 571, 186, 2456, 410, 493, 63, 106, 75, 154, 911, 45, 6800, 7, 16, 37, 30979, 3, 15291, 17, 1628, 58, 1]\n", + "Decoded Prediction: How many reviews did Beyoncé receive from critics in The Fighting Temptations?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 59%|█████▉ | 94/160 [00:44<00:25, 2.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 94 ---\n", + "Raw token IDs: [0, 0, 493, 63, 106, 75, 154, 47, 2616, 859, 8, 910, 167, 16569, 151, 16, 8, 296, 16, 2038, 11, 1412, 58, 1]\n", + "Decoded Prediction: Beyoncé was listed among the 100 most influential people in the world in 2013 and 2014?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 59%|█████▉ | 95/160 [00:45<00:23, 2.73it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 95 ---\n", + "Raw token IDs: [0, 0, 2840, 410, 8, 9058, 29, 43, 3, 9, 471, 18, 2961, 9058, 624, 29, 58, 1]\n", + "Decoded Prediction: Where did the Klan have a week-long Klavern?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 60%|██████ | 96/160 [00:45<00:24, 2.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 96 ---\n", + "Raw token IDs: [0, 571, 186, 5128, 410, 493, 63, 106, 75, 154, 17046, 30, 8, 8667, 5, 17080, 3111, 1150, 3351, 58, 1]\n", + "Decoded Prediction: How many dates did Beyoncé embark on the Mrs. Carter Show World Tour?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 61%|██████ | 97/160 [00:45<00:22, 2.75it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 97 ---\n", + "Raw token IDs: [0, 2840, 19, 8, 23711, 2617, 13, 8, 3, 24756, 6219, 1069, 58, 1]\n", + "Decoded Prediction: Where is the Basilica of the Sacred Heart located?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 61%|██████▏ | 98/160 [00:46<00:24, 2.56it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 98 ---\n", + "Raw token IDs: [0, 4073, 3835, 2616, 493, 63, 14549, 38, 381, 1003, 30, 70, 570, 13, 8, 96, 22481, 460, 5396, 910, 11263, 12756, 7, 121, 58, 1]\n", + "Decoded Prediction: Which magazine listed Beyonce as number 17 on their list of the \"Top 20 Hot 100 Songwriters\"?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 62%|██████▏ | 99/160 [00:46<00:23, 2.57it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 99 ---\n", + "Raw token IDs: [0, 363, 47, 8, 564, 13, 19344, 63, 31, 7, 9364, 58, 1]\n", + "Decoded Prediction: What was the name of Destiny's Child?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 62%|██████▎ | 100/160 [00:46<00:22, 2.71it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 100 ---\n", + "Raw token IDs: [0, 0, 363, 1657, 410, 8, 783, 557, 169, 12, 5530, 493, 63, 106, 75, 154, 58, 1]\n", + "Decoded Prediction: What term did the media often use to describe Beyoncé?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 63%|██████▎ | 101/160 [00:47<00:21, 2.79it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 101 ---\n", + "Raw token IDs: [0, 0, 363, 47, 8, 564, 13, 8, 23739, 261, 57, 6248, 5, 29295, 58, 1]\n", + "Decoded Prediction: What was the name of the chapel used by Fr. Sorin?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 64%|██████▍ | 102/160 [00:47<00:19, 2.96it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 102 ---\n", + "Raw token IDs: [0, 571, 186, 203, 410, 8, 7262, 1369, 386, 1157, 10183, 7, 58, 1]\n", + "Decoded Prediction: How many years did the Irish win three national championships?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 64%|██████▍ | 103/160 [00:47<00:17, 3.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 103 ---\n", + "Raw token IDs: [0, 86, 125, 484, 47, 19344, 63, 31, 7, 9364, 3, 390, 58, 1]\n", + "Decoded Prediction: In what book was Destiny's Child based?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 65%|██████▌ | 104/160 [00:48<00:18, 3.02it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 104 ---\n", + "Raw token IDs: [0, 366, 19, 7711, 3, 17084, 234, 12, 8, 3699, 3, 14808, 257, 11507, 58, 1]\n", + "Decoded Prediction: When is Notre Dame home to the Global Adaptation Index?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 66%|██████▌ | 105/160 [00:48<00:19, 2.87it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 105 ---\n", + "Raw token IDs: [0, 571, 186, 481, 5526, 7711, 3, 17084, 16, 1412, 58, 1]\n", + "Decoded Prediction: How many students attended Notre Dame in 2014?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 66%|██████▋ | 106/160 [00:49<00:20, 2.64it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 106 ---\n", + "Raw token IDs: [0, 0, 363, 1974, 410, 493, 63, 106, 75, 154, 1912, 44, 8, 166, 1158, 31, 7, 166, 22145, 1996, 58, 1]\n", + "Decoded Prediction: What movie did Beyoncé perform at the first couple's first inaugural ball?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 67%|██████▋ | 107/160 [00:49<00:22, 2.40it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 107 ---\n", + "Raw token IDs: [0, 2645, 3977, 227, 493, 63, 106, 75, 154, 11, 9373, 18, 956, 1736, 28, 112, 384, 58, 1]\n", + "Decoded Prediction: Who died after Beyoncé and Jay-Z met with his family?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 68%|██████▊ | 108/160 [00:49<00:22, 2.36it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 108 ---\n", + "Raw token IDs: [0, 0, 363, 410, 493, 63, 106, 75, 154, 497, 5428, 15822, 3555, 160, 58, 1]\n", + "Decoded Prediction: What did Beyoncé say Madonna inspired her?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 68%|██████▊ | 109/160 [00:50<00:22, 2.23it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 109 ---\n", + "Raw token IDs: [0, 0, 366, 410, 493, 63, 14549, 1576, 2744, 1304, 11937, 16, 2129, 58, 1]\n", + "Decoded Prediction: When did Beyonce release Dangerously in Love?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 69%|██████▉ | 110/160 [00:50<00:21, 2.29it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 110 ---\n", + "Raw token IDs: [0, 0, 571, 186, 3187, 65, 493, 63, 106, 75, 154, 1916, 28, 19344, 63, 31, 7, 9364, 58, 1]\n", + "Decoded Prediction: How many records has Beyoncé sold with Destiny's Child?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 69%|██████▉ | 111/160 [00:51<00:20, 2.37it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 111 ---\n", + "Raw token IDs: [0, 37, 8026, 49, 1166, 21, 5408, 2855, 19, 1069, 3414, 13, 8, 3137, 26, 13277, 1888, 13, 1769, 4297, 1073, 3066, 58, 1]\n", + "Decoded Prediction: The Stayer Center for Executive Education is located south of the Mendoza College of Business executive education department?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 70%|███████ | 112/160 [00:51<00:18, 2.65it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 112 ---\n", + "Raw token IDs: [0, 2645, 1192, 325, 3809, 17, 444, 1166, 58, 1]\n", + "Decoded Prediction: Who built LaFortune Center?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 71%|███████ | 113/160 [00:51<00:16, 2.91it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 113 ---\n", + "Raw token IDs: [0, 2645, 3, 28285, 26, 120, 4283, 12, 8, 16823, 3790, 16, 507, 3449, 58, 1]\n", + "Decoded Prediction: Who reputedly appeared to the Virgin Mary in 1858?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 71%|███████▏ | 114/160 [00:52<00:16, 2.82it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 114 ---\n", + "Raw token IDs: [0, 0, 2840, 410, 493, 63, 106, 75, 154, 11, 9373, 1027, 281, 12, 12, 4279, 8, 2306, 1189, 21, 160, 314, 58, 1]\n", + "Decoded Prediction: Where did Beyoncé and Jay Z go to to shoot the album cover for her 4?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 72%|███████▏ | 115/160 [00:52<00:17, 2.62it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 115 ---\n", + "Raw token IDs: [0, 0, 2645, 1012, 1601, 8, 1657, 96, 279, 32, 32, 17, 63, 2176, 2936, 121, 12, 5530, 493, 63, 14549, 58, 1]\n", + "Decoded Prediction: Who popularized the term \"Bootylicious\" to describe Beyonce?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 72%|███████▎ | 116/160 [00:52<00:14, 2.99it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 116 ---\n", + "Raw token IDs: [0, 363, 2953, 186, 6502, 8445, 7, 12, 8079, 15, 1740, 58, 1]\n", + "Decoded Prediction: What caused many Catholic intellectuals to flee Europe?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 73%|███████▎ | 117/160 [00:53<00:12, 3.32it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 117 ---\n", + "Raw token IDs: [0, 363, 19, 8, 564, 13, 8, 2854, 2501, 58, 1]\n", + "Decoded Prediction: What is the name of the Science Hall?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 74%|███████▍ | 118/160 [00:53<00:13, 3.11it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 118 ---\n", + "Raw token IDs: [0, 363, 47, 8, 564, 13, 8, 8131, 10626, 57, 8, 896, 13, 1013, 22435, 12, 6774, 262, 8155, 1887, 7155, 7, 58, 1]\n", + "Decoded Prediction: What was the name of the contractor hired by the City of South Bend to construct Eddy Street Commons?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 74%|███████▍ | 119/160 [00:53<00:13, 3.15it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 119 ---\n", + "Raw token IDs: [0, 0, 2645, 130, 493, 63, 106, 75, 154, 31, 7, 5487, 8705, 12, 58, 1]\n", + "Decoded Prediction: Who were Beyoncé's letters addressed to?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 75%|███████▌ | 120/160 [00:54<00:15, 2.66it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 120 ---\n", + "Raw token IDs: [0, 0, 86, 125, 814, 410, 493, 63, 106, 75, 154, 1912, 28, 3, 9, 7100, 20790, 18, 3114, 77, 920, 821, 30, 19344, 63, 31, 7, 9364, 58, 1]\n", + "Decoded Prediction: In what film did Beyoncé perform with a Golden Globe-nominated performance on Destiny's Child?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 76%|███████▌ | 121/160 [00:54<00:13, 2.85it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 121 ---\n", + "Raw token IDs: [0, 366, 410, 493, 63, 14549, 428, 3879, 12, 3, 9, 3062, 58, 1]\n", + "Decoded Prediction: When did Beyonce give birth to a daughter?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 76%|███████▋ | 122/160 [00:54<00:13, 2.84it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 122 ---\n", + "Raw token IDs: [0, 0, 363, 410, 493, 63, 106, 75, 154, 2516, 13, 38, 8, 568, 113, 2139, 160, 2870, 160, 7562, 58, 1]\n", + "Decoded Prediction: What did Beyoncé speak of as the person who helped her fight her depression?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 77%|███████▋ | 123/160 [00:55<00:12, 2.90it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 123 ---\n", + "Raw token IDs: [0, 366, 47, 37, 7512, 15, 283, 5, 216, 7289, 107, 5355, 2012, 58, 1]\n", + "Decoded Prediction: When was Theodore M. Hesburgh Library completed?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 78%|███████▊ | 124/160 [00:55<00:12, 2.91it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 124 ---\n", + "Raw token IDs: [0, 86, 125, 215, 47, 17464, 1106, 3279, 12, 8, 2637, 13, 46, 2548, 58, 1]\n", + "Decoded Prediction: In what year was Lobund raised to the status of an Institute?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 78%|███████▊ | 125/160 [00:55<00:11, 3.04it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 125 ---\n", + "Raw token IDs: [0, 2645, 47, 8, 2753, 13, 7711, 3, 17084, 45, 25021, 12, 23744, 58, 1]\n", + "Decoded Prediction: Who was the president of Notre Dame from 1946 to 1952?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 79%|███████▉ | 126/160 [00:56<00:10, 3.24it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 126 ---\n", + "Raw token IDs: [0, 86, 125, 215, 410, 8, 7262, 1369, 8, 5088, 9713, 58, 1]\n", + "Decoded Prediction: In what year did the Irish win the Rose Bowl?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 79%|███████▉ | 127/160 [00:56<00:10, 3.27it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 127 ---\n", + "Raw token IDs: [0, 571, 186, 381, 80, 3605, 410, 493, 63, 106, 75, 154, 43, 58, 1]\n", + "Decoded Prediction: How many number one songs did Beyoncé have?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 80%|████████ | 128/160 [00:56<00:08, 3.56it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 128 ---\n", + "Raw token IDs: [0, 363, 19, 7711, 3, 17084, 31, 7, 1988, 38, 58, 1]\n", + "Decoded Prediction: What is Notre Dame's claim as?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 81%|████████ | 129/160 [00:56<00:08, 3.55it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 129 ---\n", + "Raw token IDs: [0, 2645, 1790, 493, 63, 106, 75, 154, 31, 7, 2772, 16, 1186, 2038, 58, 1]\n", + "Decoded Prediction: Who published Beyoncé's interview in April 2013?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 81%|████████▏ | 130/160 [00:57<00:08, 3.38it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 130 ---\n", + "Raw token IDs: [0, 37, 1022, 7628, 6455, 23, 32, 5929, 2781, 47, 3, 25125, 227, 125, 3819, 58, 1]\n", + "Decoded Prediction: The Jesuit Ratio Studiorum was modeled after what university?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 82%|████████▏ | 131/160 [00:57<00:08, 3.36it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 131 ---\n", + "Raw token IDs: [0, 0, 366, 410, 493, 63, 14549, 2467, 8, 283, 4562, 3953, 3057, 6580, 58, 1]\n", + "Decoded Prediction: When did Beyonce attend the MTV Video Music Awards?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 82%|████████▎ | 132/160 [00:57<00:09, 3.08it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 132 ---\n", + "Raw token IDs: [0, 4073, 3835, 2616, 493, 63, 106, 75, 154, 38, 8, 167, 2021, 3955, 16244, 13, 1230, 58, 1]\n", + "Decoded Prediction: Which magazine listed Beyoncé as the most powerful female musician of 2015?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 83%|████████▎ | 133/160 [00:58<00:08, 3.07it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 133 ---\n", + "Raw token IDs: [0, 0, 363, 5349, 13, 723, 405, 493, 63, 106, 75, 154, 31, 7, 723, 2389, 560, 58, 1]\n", + "Decoded Prediction: What genre of music does Beyoncé's music generally include?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 84%|████████▍ | 134/160 [00:58<00:08, 2.94it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 134 ---\n", + "Raw token IDs: [0, 363, 19, 8, 216, 159, 348, 21361, 107, 725, 16, 8, 1888, 10929, 2501, 13, 20758, 58, 1]\n", + "Decoded Prediction: What is the Heisman Trophies in the College Football Hall of Fame?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 84%|████████▍ | 135/160 [00:59<00:09, 2.67it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 135 ---\n", + "Raw token IDs: [0, 0, 363, 3, 288, 6015, 410, 493, 63, 106, 75, 154, 3508, 12, 227, 160, 1675, 28, 9373, 1027, 58, 1]\n", + "Decoded Prediction: What anthem did Beyoncé transition to after her relationship with Jay Z?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 85%|████████▌ | 136/160 [00:59<00:09, 2.55it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 136 ---\n", + "Raw token IDs: [0, 0, 363, 2324, 410, 493, 63, 14549, 10159, 30, 96, 345, 76, 17, 3, 9, 7888, 30, 94, 121, 58, 1]\n", + "Decoded Prediction: What song did Beyonce sing on \"Put a Ring on It\"?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 86%|████████▌ | 137/160 [00:59<00:08, 2.83it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 137 ---\n", + "Raw token IDs: [0, 363, 19, 493, 63, 14549, 31, 7, 991, 712, 58, 1]\n", + "Decoded Prediction: What is Beyonce's lead single?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 86%|████████▋ | 138/160 [01:00<00:07, 3.09it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 138 ---\n", + "Raw token IDs: [0, 0, 363, 19, 8, 10043, 740, 30, 7711, 3, 17084, 31, 7, 4730, 58, 1]\n", + "Decoded Prediction: What is the oldest building on Notre Dame's campus?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 87%|████████▋ | 139/160 [01:00<00:08, 2.50it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 139 ---\n", + "Raw token IDs: [0, 0, 363, 2760, 410, 493, 63, 14549, 1369, 16, 4402, 44, 8, 797, 3467, 13, 2570, 2748, 277, 6, 10236, 7, 6, 11, 19816, 7, 5777, 3057, 6580, 58, 1]\n", + "Decoded Prediction: What award did Beyonce win in 2001 at the American Society of Composers, Authors, and Publishers Pop Music Awards?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 88%|████████▊ | 140/160 [01:01<00:08, 2.39it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 140 ---\n", + "Raw token IDs: [0, 571, 186, 5128, 410, 493, 63, 106, 75, 154, 17046, 30, 8, 8667, 5, 17080, 3111, 1150, 3351, 58, 1]\n", + "Decoded Prediction: How many dates did Beyoncé embark on the Mrs. Carter Show World Tour?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 88%|████████▊ | 141/160 [01:01<00:08, 2.31it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 141 ---\n", + "Raw token IDs: [0, 486, 125, 1246, 410, 493, 63, 106, 75, 154, 942, 325, 382, 2960, 9, 5376, 13515, 58, 1]\n", + "Decoded Prediction: At what age did Beyoncé meet LaTavia Roberson?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 89%|████████▉ | 142/160 [01:02<00:09, 1.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 142 ---\n", + "Raw token IDs: [0, 19344, 63, 31, 7, 9364, 1883, 70, 779, 3783, 5695, 2324, 96, 439, 1092, 53, 2900, 121, 30, 125, 814, 58, 1]\n", + "Decoded Prediction: Destiny's Child released their major label debut song \"Killing Time\" on what film?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 89%|████████▉ | 143/160 [01:02<00:08, 1.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 143 ---\n", + "Raw token IDs: [0, 0, 2645, 410, 493, 63, 106, 75, 154, 11872, 135, 12, 992, 30, 16, 1230, 58, 1]\n", + "Decoded Prediction: Who did Beyoncé urge them to focus on in 2015?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 90%|█████████ | 144/160 [01:03<00:07, 2.18it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 144 ---\n", + "Raw token IDs: [0, 571, 5065, 19, 325, 3809, 17, 444, 58, 1]\n", + "Decoded Prediction: How tall is LaFortune?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 91%|█████████ | 145/160 [01:03<00:06, 2.34it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 145 ---\n", + "Raw token IDs: [0, 0, 571, 186, 3187, 65, 493, 63, 106, 75, 154, 1916, 38, 3, 9, 6729, 2377, 58, 1]\n", + "Decoded Prediction: How many records has Beyoncé sold as a solo artist?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 91%|█████████▏| 146/160 [01:03<00:06, 2.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 146 ---\n", + "Raw token IDs: [0, 0, 366, 410, 493, 63, 106, 75, 154, 143, 160, 5695, 30, 160, 66, 18, 89, 15, 13513, 1552, 1928, 180, 12204, 14200, 58, 1]\n", + "Decoded Prediction: When did Beyoncé make her debut on her all-female tour band Suga Mama?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 92%|█████████▏| 147/160 [01:04<00:04, 2.61it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 147 ---\n", + "Raw token IDs: [0, 571, 186, 8167, 410, 272, 31, 16803, 1789, 16, 165, 166, 471, 58, 1]\n", + "Decoded Prediction: How many copies did B'Day sell in its first week?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 92%|█████████▎| 148/160 [01:04<00:05, 2.38it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 148 ---\n", + "Raw token IDs: [0, 0, 363, 410, 493, 63, 106, 75, 154, 24861, 30, 1332, 13597, 7218, 227, 8, 8531, 2243, 5054, 30, 1826, 31, 7, 749, 4718, 505, 58, 1]\n", + "Decoded Prediction: What did Beyoncé endorse on March 26, 2013, after the Supreme Court debate on California's Proposition 8?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 93%|█████████▎| 149/160 [01:04<00:04, 2.75it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 149 ---\n", + "Raw token IDs: [0, 363, 19, 325, 3809, 17, 444, 31, 7, 2041, 1487, 58, 1]\n", + "Decoded Prediction: What is LaFortune's annual budget?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 94%|█████████▍| 150/160 [01:05<00:03, 3.02it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 150 ---\n", + "Raw token IDs: [0, 0, 2645, 19, 9762, 15, 210, 8900, 965, 58, 1]\n", + "Decoded Prediction: Who is Mathew Knowles?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 95%|█████████▌| 152/160 [01:05<00:02, 3.58it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 151 ---\n", + "Raw token IDs: [0, 2645, 19, 8, 2630, 8374, 651, 16, 1900, 3370, 16, 7711, 3, 17084, 58, 1]\n", + "Decoded Prediction: Who is the biggest rivalry in college football in Notre Dame?\n", + "\n", + "--- Sample 152 ---\n", + "Raw token IDs: [0, 366, 47, 3, 9, 8, 1863, 3595, 2946, 58, 1]\n", + "Decoded Prediction: When was a theology library opened?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 96%|█████████▌| 153/160 [01:05<00:01, 3.50it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 153 ---\n", + "Raw token IDs: [0, 0, 2840, 47, 493, 63, 106, 75, 154, 3, 9, 6729, 343, 21, 192, 203, 58, 1]\n", + "Decoded Prediction: Where was Beyoncé a soloist for two years?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 96%|█████████▋| 154/160 [01:06<00:01, 3.74it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 154 ---\n", + "Raw token IDs: [0, 571, 186, 6116, 405, 7711, 3, 17084, 12750, 43, 58, 1]\n", + "Decoded Prediction: How many seats does Notre Dame Stadium have?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 97%|█████████▋| 155/160 [01:07<00:02, 1.84it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 155 ---\n", + "Raw token IDs: [0, 0, 486, 125, 1246, 410, 493, 63, 14549, 11, 10605, 11768, 40, 232, 942, 325, 382, 2960, 9, 5376, 13515, 58, 1]\n", + "Decoded Prediction: At what age did Beyonce and Kelly Rowland meet LaTavia Roberson?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 98%|█████████▊| 156/160 [01:07<00:01, 2.14it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 156 ---\n", + "Raw token IDs: [0, 0, 2840, 47, 493, 63, 106, 75, 154, 3, 9, 6729, 343, 21, 192, 203, 58, 1]\n", + "Decoded Prediction: Where was Beyoncé a soloist for two years?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 98%|█████████▊| 157/160 [01:08<00:01, 2.15it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 157 ---\n", + "Raw token IDs: [0, 0, 86, 125, 5112, 410, 8, 11392, 53, 9036, 2125, 13, 1371, 4206, 493, 63, 106, 75, 154, 38, 8, 2224, 10178, 9152, 16, 1371, 58, 1]\n", + "Decoded Prediction: In what decade did the Recording Industry Association of America recognize Beyoncé as the Top Certified Artist in America?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 99%|█████████▉| 158/160 [01:08<00:00, 2.20it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 158 ---\n", + "Raw token IDs: [0, 0, 363, 47, 8, 564, 13, 8, 3, 60, 18, 21019, 13, 493, 63, 14549, 31, 7, 5093, 3605, 58, 1]\n", + "Decoded Prediction: What was the name of the re-release of Beyonce's Spanish songs?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 99%|█████████▉| 159/160 [01:08<00:00, 2.40it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 159 ---\n", + "Raw token IDs: [0, 363, 405, 493, 63, 106, 75, 154, 3115, 369, 95, 28, 383, 999, 58, 1]\n", + "Decoded Prediction: What does Beyoncé typically come up with during production?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 160/160 [01:09<00:00, 2.31it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Sample 160 ---\n", + "Raw token IDs: [0, 363, 4889, 405, 7711, 3, 17084, 5978, 38, 3, 9, 1144, 13, 58, 1]\n", + "Decoded Prediction: What division does Notre Dame compete as a member of?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "from tqdm import tqdm\n", + "\n", + "decoded_preds = []\n", + "decoded_refs = []\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "model.to(device)\n", + "model.eval()\n", + "\n", + "for i, sample in enumerate(tqdm(eval_dataset)):\n", + " if sample[\"answer\"] in sample[\"context\"]:\n", + " highlighted_context = sample[\"context\"].replace(sample[\"answer\"], f\" {sample['answer']} \")\n", + " else:\n", + " highlighted_context = sample[\"context\"] + f\" {sample['answer']} \"\n", + "\n", + " input_text = f\"generate question: {highlighted_context}\"\n", + " inputs = tokenizer(\n", + " input_text,\n", + " return_tensors=\"pt\",\n", + " truncation=True,\n", + " padding=\"max_length\",\n", + " max_length=256\n", + " ).to(device)\n", + "\n", + " output_ids = model.generate(\n", + " **inputs,\n", + " max_length=64,\n", + " num_beams=4,\n", + " early_stopping=False, # <— loosen this up for now\n", + " no_repeat_ngram_size=2\n", + " )\n", + "\n", + " # ���� Debug print\n", + " print(f\"\\n--- Sample {i + 1} ---\")\n", + " print(\"Raw token IDs:\", output_ids[0].tolist())\n", + "\n", + " decoded_pred = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + " print(\"Decoded Prediction:\", decoded_pred)\n", + "\n", + " decoded_preds.append(decoded_pred)\n", + " decoded_refs.append(sample[\"question\"])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vEf0yWcGEaEA", + "outputId": "d9aa2b3f-3eb7-46a9-c2ad-374875f2c519" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Sample Predictions vs References with BLEU-1:\n", + "--------------------------------------------------\n", + "\n", + "Sample 1\n", + "Prediction : How long did Burns become president of Notre Dame?\n", + "Reference : Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?\n", + "BLEU-1 : 0.0586\n", + "\n", + "Sample 2\n", + "Prediction : Who wrote that Beyonce has become a crossover sex symbol since the release of Dangerously in Love?\n", + "Reference : What journalist wrote that Beyoncé was a \"sex symbol\"?\n", + "BLEU-1 : 0.1765\n", + "\n", + "Sample 3\n", + "Prediction : Beyonce was the first African-American artist to cover the September issue of what magazine?\n", + "Reference : Which magazine did Beyoncé pose for the cover, making her the first black female artist to do so?\n", + "BLEU-1 : 0.2684\n", + "\n", + "Sample 4\n", + "Prediction : What was Beyonce's first solo album released on June 24, 2003?\n", + "Reference : Beyonce's first album by herself was called what?\n", + "BLEU-1 : 0.3636\n", + "\n", + "Sample 5\n", + "Prediction : Who did Beyoncé marry on April 4, 2008?\n", + "Reference : Beyonce got married in 2008 to whom?\n", + "BLEU-1 : 0.0000\n", + "\n", + "Average BLEU-1 Score on Eval Set: 0.1734\n" + ] + } + ], + "source": [ + "\n", + "from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction\n", + "\n", + "# Use smoothing to avoid zero score for short outputs\n", + "smoothie = SmoothingFunction().method1\n", + "\n", + "bleu_scores = []\n", + "print(\"\\nSample Predictions vs References with BLEU-1:\")\n", + "print(\"-\" * 50)\n", + "\n", + "for i in range(min(5, len(decoded_preds))):\n", + " pred = decoded_preds[i]\n", + " ref = decoded_refs[i]\n", + " bleu = sentence_bleu([ref.split()], pred.split(), weights=(1, 0, 0, 0), smoothing_function=smoothie)\n", + "\n", + " print(f\"\\nSample {i + 1}\")\n", + " print(f\"Prediction : {pred}\")\n", + " print(f\"Reference : {ref}\")\n", + " print(f\"BLEU-1 : {bleu:.4f}\")\n", + " bleu_scores.append(bleu)\n", + "\n", + "# Compute average BLEU-1 score across all examples\n", + "avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0\n", + "print(f\"\\nAverage BLEU-1 Score on Eval Set: {avg_bleu:.4f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GDEY6dHzNm6f", + "outputId": "08d85d94-4625-45a3-c505-612ae37d07b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample 1\n", + "BLEU-2: 0.0139, BLEU-4: 0.0075\n", + "Sample 2\n", + "BLEU-2: 0.1050, BLEU-4: 0.0269\n", + "Sample 3\n", + "BLEU-2: 0.1761, BLEU-4: 0.0339\n", + "Sample 4\n", + "BLEU-2: 0.1907, BLEU-4: 0.0474\n", + "Sample 5\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 6\n", + "BLEU-2: 0.0577, BLEU-4: 0.0408\n", + "Sample 7\n", + "BLEU-2: 0.0242, BLEU-4: 0.0121\n", + "Sample 8\n", + "BLEU-2: 0.4082, BLEU-4: 0.1313\n", + "Sample 9\n", + "BLEU-2: 0.0711, BLEU-4: 0.0626\n", + "Sample 10\n", + "BLEU-2: 0.0426, BLEU-4: 0.0224\n", + "Sample 11\n", + "BLEU-2: 0.1826, BLEU-4: 0.0494\n", + "Sample 12\n", + "BLEU-2: 0.1690, BLEU-4: 0.0368\n", + "Sample 13\n", + "BLEU-2: 0.0751, BLEU-4: 0.0211\n", + "Sample 14\n", + "BLEU-2: 0.4082, BLEU-4: 0.2778\n", + "Sample 15\n", + "BLEU-2: 0.4939, BLEU-4: 0.4115\n", + "Sample 16\n", + "BLEU-2: 0.0296, BLEU-4: 0.0210\n", + "Sample 17\n", + "BLEU-2: 0.3015, BLEU-4: 0.0564\n", + "Sample 18\n", + "BLEU-2: 0.2236, BLEU-4: 0.0724\n", + "Sample 19\n", + "BLEU-2: 0.4472, BLEU-4: 0.2907\n", + "Sample 20\n", + "BLEU-2: 0.2315, BLEU-4: 0.0650\n", + "Sample 21\n", + "BLEU-2: 0.0667, BLEU-4: 0.0298\n", + "Sample 22\n", + "BLEU-2: 0.0408, BLEU-4: 0.0174\n", + "Sample 23\n", + "BLEU-2: 0.3313, BLEU-4: 0.2037\n", + "Sample 24\n", + "BLEU-2: 0.0645, BLEU-4: 0.0316\n", + "Sample 25\n", + "BLEU-2: 0.1157, BLEU-4: 0.0291\n", + "Sample 26\n", + "BLEU-2: 0.6708, BLEU-4: 0.5424\n", + "Sample 27\n", + "BLEU-2: 0.2652, BLEU-4: 0.1631\n", + "Sample 28\n", + "BLEU-2: 0.2705, BLEU-4: 0.0517\n", + "Sample 29\n", + "BLEU-2: 0.0354, BLEU-4: 0.0162\n", + "Sample 30\n", + "BLEU-2: 0.4472, BLEU-4: 0.1375\n", + "Sample 31\n", + "BLEU-2: 0.0296, BLEU-4: 0.0210\n", + "Sample 32\n", + "BLEU-2: 0.1257, BLEU-4: 0.0336\n", + "Sample 33\n", + "BLEU-2: 0.2248, BLEU-4: 0.0836\n", + "Sample 34\n", + "BLEU-2: 0.2853, BLEU-4: 0.0897\n", + "Sample 35\n", + "BLEU-2: 0.0302, BLEU-4: 0.0189\n", + "Sample 36\n", + "BLEU-2: 0.2739, BLEU-4: 0.0801\n", + "Sample 37\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 38\n", + "BLEU-2: 0.5270, BLEU-4: 0.4463\n", + "Sample 39\n", + "BLEU-2: 0.5274, BLEU-4: 0.4562\n", + "Sample 40\n", + "BLEU-2: 0.1093, BLEU-4: 0.0307\n", + "Sample 41\n", + "BLEU-2: 0.3918, BLEU-4: 0.3642\n", + "Sample 42\n", + "BLEU-2: 0.5547, BLEU-4: 0.4280\n", + "Sample 43\n", + "BLEU-2: 0.0331, BLEU-4: 0.0170\n", + "Sample 44\n", + "BLEU-2: 0.0332, BLEU-4: 0.0151\n", + "Sample 45\n", + "BLEU-2: 0.1491, BLEU-4: 0.0446\n", + "Sample 46\n", + "BLEU-2: 0.2388, BLEU-4: 0.0914\n", + "Sample 47\n", + "BLEU-2: 0.1907, BLEU-4: 0.0474\n", + "Sample 48\n", + "BLEU-2: 0.8165, BLEU-4: 0.5969\n", + "Sample 49\n", + "BLEU-2: 0.4396, BLEU-4: 0.3716\n", + "Sample 50\n", + "BLEU-2: 0.1353, BLEU-4: 0.0366\n", + "Sample 51\n", + "BLEU-2: 0.0301, BLEU-4: 0.0192\n", + "Sample 52\n", + "BLEU-2: 0.0522, BLEU-4: 0.0248\n", + "Sample 53\n", + "BLEU-2: 0.0523, BLEU-4: 0.0188\n", + "Sample 54\n", + "BLEU-2: 0.3280, BLEU-4: 0.0742\n", + "Sample 55\n", + "BLEU-2: 0.1195, BLEU-4: 0.0309\n", + "Sample 56\n", + "BLEU-2: 0.0426, BLEU-4: 0.0224\n", + "Sample 57\n", + "BLEU-2: 0.1118, BLEU-4: 0.0288\n", + "Sample 58\n", + "BLEU-2: 0.6396, BLEU-4: 0.4833\n", + "Sample 59\n", + "BLEU-2: 0.0598, BLEU-4: 0.0341\n", + "Sample 60\n", + "BLEU-2: 0.0244, BLEU-4: 0.0215\n", + "Sample 61\n", + "BLEU-2: 0.0776, BLEU-4: 0.0188\n", + "Sample 62\n", + "BLEU-2: 0.1826, BLEU-4: 0.0494\n", + "Sample 63\n", + "BLEU-2: 0.1091, BLEU-4: 0.0243\n", + "Sample 64\n", + "BLEU-2: 0.0318, BLEU-4: 0.0215\n", + "Sample 65\n", + "BLEU-2: 0.0242, BLEU-4: 0.0131\n", + "Sample 66\n", + "BLEU-2: 0.0527, BLEU-4: 0.0292\n", + "Sample 67\n", + "BLEU-2: 0.0331, BLEU-4: 0.0170\n", + "Sample 68\n", + "BLEU-2: 0.2315, BLEU-4: 0.0650\n", + "Sample 69\n", + "BLEU-2: 0.1630, BLEU-4: 0.1046\n", + "Sample 70\n", + "BLEU-2: 0.1913, BLEU-4: 0.0389\n", + "Sample 71\n", + "BLEU-2: 0.4004, BLEU-4: 0.1251\n", + "Sample 72\n", + "BLEU-2: 0.0488, BLEU-4: 0.0330\n", + "Sample 73\n", + "BLEU-2: 0.2549, BLEU-4: 0.1071\n", + "Sample 74\n", + "BLEU-2: 0.0889, BLEU-4: 0.0225\n", + "Sample 75\n", + "BLEU-2: 0.3381, BLEU-4: 0.2575\n", + "Sample 76\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 77\n", + "BLEU-2: 0.1491, BLEU-4: 0.0446\n", + "Sample 78\n", + "BLEU-2: 0.2970, BLEU-4: 0.1703\n", + "Sample 79\n", + "BLEU-2: 0.4924, BLEU-4: 0.1281\n", + "Sample 80\n", + "BLEU-2: 0.6202, BLEU-4: 0.4572\n", + "Sample 81\n", + "BLEU-2: 0.2010, BLEU-4: 0.0541\n", + "Sample 82\n", + "BLEU-2: 0.0267, BLEU-4: 0.0172\n", + "Sample 83\n", + "BLEU-2: 0.0253, BLEU-4: 0.0155\n", + "Sample 84\n", + "BLEU-2: 0.0333, BLEU-4: 0.0211\n", + "Sample 85\n", + "BLEU-2: 0.0373, BLEU-4: 0.0240\n", + "Sample 86\n", + "BLEU-2: 0.2215, BLEU-4: 0.0623\n", + "Sample 87\n", + "BLEU-2: 0.3381, BLEU-4: 0.0925\n", + "Sample 88\n", + "BLEU-2: 0.5774, BLEU-4: 0.3656\n", + "Sample 89\n", + "BLEU-2: 0.1516, BLEU-4: 0.0353\n", + "Sample 90\n", + "BLEU-2: 0.0307, BLEU-4: 0.0158\n", + "Sample 91\n", + "BLEU-2: 0.0389, BLEU-4: 0.0203\n", + "Sample 92\n", + "BLEU-2: 0.0545, BLEU-4: 0.0128\n", + "Sample 93\n", + "BLEU-2: 0.0550, BLEU-4: 0.0241\n", + "Sample 94\n", + "BLEU-2: 0.0456, BLEU-4: 0.0184\n", + "Sample 95\n", + "BLEU-2: 0.1837, BLEU-4: 0.0480\n", + "Sample 96\n", + "BLEU-2: 0.6355, BLEU-4: 0.4844\n", + "Sample 97\n", + "BLEU-2: 0.2619, BLEU-4: 0.1811\n", + "Sample 98\n", + "BLEU-2: 0.2970, BLEU-4: 0.0805\n", + "Sample 99\n", + "BLEU-2: 0.0338, BLEU-4: 0.0192\n", + "Sample 100\n", + "BLEU-2: 0.0427, BLEU-4: 0.0227\n", + "Sample 101\n", + "BLEU-2: 0.2753, BLEU-4: 0.0544\n", + "Sample 102\n", + "BLEU-2: 0.2336, BLEU-4: 0.0945\n", + "Sample 103\n", + "BLEU-2: 0.0984, BLEU-4: 0.0265\n", + "Sample 104\n", + "BLEU-2: 0.4132, BLEU-4: 0.2405\n", + "Sample 105\n", + "BLEU-2: 0.5433, BLEU-4: 0.3436\n", + "Sample 106\n", + "BLEU-2: 0.4606, BLEU-4: 0.2620\n", + "Sample 107\n", + "BLEU-2: 0.1495, BLEU-4: 0.0404\n", + "Sample 108\n", + "BLEU-2: 0.0690, BLEU-4: 0.0393\n", + "Sample 109\n", + "BLEU-2: 0.2134, BLEU-4: 0.0923\n", + "Sample 110\n", + "BLEU-2: 0.4004, BLEU-4: 0.1251\n", + "Sample 111\n", + "BLEU-2: 0.0181, BLEU-4: 0.0108\n", + "Sample 112\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 113\n", + "BLEU-2: 0.2137, BLEU-4: 0.0818\n", + "Sample 114\n", + "BLEU-2: 0.0913, BLEU-4: 0.0260\n", + "Sample 115\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 116\n", + "BLEU-2: 0.2598, BLEU-4: 0.1015\n", + "Sample 117\n", + "BLEU-2: 0.1239, BLEU-4: 0.0348\n", + "Sample 118\n", + "BLEU-2: 0.1539, BLEU-4: 0.0290\n", + "Sample 119\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 120\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 121\n", + "BLEU-2: 0.5669, BLEU-4: 0.1809\n", + "Sample 122\n", + "BLEU-2: 0.1482, BLEU-4: 0.0359\n", + "Sample 123\n", + "BLEU-2: 0.2536, BLEU-4: 0.1845\n", + "Sample 124\n", + "BLEU-2: 0.3693, BLEU-4: 0.1109\n", + "Sample 125\n", + "BLEU-2: 0.1651, BLEU-4: 0.0441\n", + "Sample 126\n", + "BLEU-2: 0.5608, BLEU-4: 0.4411\n", + "Sample 127\n", + "BLEU-2: 0.0238, BLEU-4: 0.0119\n", + "Sample 128\n", + "BLEU-2: 0.1918, BLEU-4: 0.0580\n", + "Sample 129\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 130\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 131\n", + "BLEU-2: 0.0373, BLEU-4: 0.0240\n", + "Sample 132\n", + "BLEU-2: 0.6396, BLEU-4: 0.5491\n", + "Sample 133\n", + "BLEU-2: 0.3333, BLEU-4: 0.1275\n", + "Sample 134\n", + "BLEU-2: 0.2600, BLEU-4: 0.2118\n", + "Sample 135\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 136\n", + "BLEU-2: 0.0210, BLEU-4: 0.0131\n", + "Sample 137\n", + "BLEU-2: 0.0318, BLEU-4: 0.0241\n", + "Sample 138\n", + "BLEU-2: 0.4564, BLEU-4: 0.3156\n", + "Sample 139\n", + "BLEU-2: 0.1209, BLEU-4: 0.0271\n", + "Sample 140\n", + "BLEU-2: 0.3922, BLEU-4: 0.2300\n", + "Sample 141\n", + "BLEU-2: 0.6547, BLEU-4: 0.4111\n", + "Sample 142\n", + "BLEU-2: 0.0358, BLEU-4: 0.0185\n", + "Sample 143\n", + "BLEU-2: 0.1652, BLEU-4: 0.0447\n", + "Sample 144\n", + "BLEU-2: 0.0175, BLEU-4: 0.0129\n", + "Sample 145\n", + "BLEU-2: 0.1224, BLEU-4: 0.0331\n", + "Sample 146\n", + "BLEU-2: 0.1387, BLEU-4: 0.0364\n", + "Sample 147\n", + "BLEU-2: 0.2004, BLEU-4: 0.0383\n", + "Sample 148\n", + "BLEU-2: 0.3321, BLEU-4: 0.2818\n", + "Sample 149\n", + "BLEU-2: 0.1425, BLEU-4: 0.0463\n", + "Sample 150\n", + "BLEU-2: 0.0000, BLEU-4: 0.0000\n", + "Sample 151\n", + "BLEU-2: 0.0426, BLEU-4: 0.0224\n", + "Sample 152\n", + "BLEU-2: 0.1179, BLEU-4: 0.0533\n", + "Sample 153\n", + "BLEU-2: 0.3336, BLEU-4: 0.1304\n", + "Sample 154\n", + "BLEU-2: 0.5175, BLEU-4: 0.1729\n", + "Sample 155\n", + "BLEU-2: 0.1348, BLEU-4: 0.0399\n", + "Sample 156\n", + "BLEU-2: 0.0444, BLEU-4: 0.0222\n", + "Sample 157\n", + "BLEU-2: 0.6398, BLEU-4: 0.5014\n", + "Sample 158\n", + "BLEU-2: 0.0302, BLEU-4: 0.0189\n", + "Sample 159\n", + "BLEU-2: 0.2619, BLEU-4: 0.0856\n", + "Sample 160\n", + "BLEU-2: 0.0471, BLEU-4: 0.0251\n" + ] + } + ], + "source": [ + "from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction\n", + "\n", + "for i, (pred, ref) in enumerate(zip(decoded_preds, decoded_refs)):\n", + " bleu2 = sentence_bleu([ref.split()], pred.split(), weights=(0.5, 0.5), smoothing_function=smoothie)\n", + " bleu4 = sentence_bleu([ref.split()], pred.split(), weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothie)\n", + " print(f\"Sample {i+1}\\nBLEU-2: {bleu2:.4f}, BLEU-4: {bleu4:.4f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Gc3vsZ3aN6-b", + "outputId": "b8dec4e5-56d7-4b0c-8b27-5fe4cc4ea3b1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of decoded_preds: 160\n", + "Length of decoded_refs: 160\n", + "Length of bleu_scores: 5\n" + ] + } + ], + "source": [ + "print(\"Length of decoded_preds:\", len(decoded_preds))\n", + "print(\"Length of decoded_refs:\", len(decoded_refs))\n", + "print(\"Length of bleu_scores:\", len(bleu_scores))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "id": "M9GuNM1nOHtv" + }, + "outputs": [], + "source": [ + "from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction\n", + "\n", + "smoothing = SmoothingFunction().method1\n", + "bleu_scores = [\n", + " sentence_bleu([ref.split()], pred.split(), weights=(1, 0, 0, 0), smoothing_function=smoothing)\n", + " for pred, ref in zip(decoded_preds, decoded_refs)\n", + "]\n", + "\n", + "df = pd.DataFrame({\n", + " \"Prediction\": decoded_preds,\n", + " \"Reference\": decoded_refs,\n", + " \"BLEU-1\": bleu_scores\n", + "})\n", + "df.to_csv(\"question_generation_bleu_scores.csv\", index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sJ7J2ajfOUnJ", + "outputId": "5026cf2b-91ba-4ea2-aeb4-9124a6b4d468" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Prediction \\\n", + "0 How long did Burns become president of Notre D... \n", + "1 Who wrote that Beyonce has become a crossover ... \n", + "2 Beyonce was the first African-American artist ... \n", + "3 What was Beyonce's first solo album released o... \n", + "4 Who did Beyoncé marry on April 4, 2008? \n", + "\n", + " Reference BLEU-1 \n", + "0 Over how many years did the change to national... 0.058577 \n", + "1 What journalist wrote that Beyoncé was a \"sex ... 0.176471 \n", + "2 Which magazine did Beyoncé pose for the cover,... 0.268385 \n", + "3 Beyonce's first album by herself was called what? 0.363636 \n", + "4 Beyonce got married in 2008 to whom? 0.000000 \n" + ] + } + ], + "source": [ + "#preview of the file\n", + "import pandas as pd\n", + "\n", + "df_check = pd.read_csv(\"question_generation_bleu_scores.csv\")\n", + "print(df_check.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 431 + }, + "id": "X0DwkC8_Odlx", + "outputId": "dee22ff2-3535-49a6-e980-313749868bda" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot ROUGE-1 and ROUGE-L scores over epochs\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(epochs, rouge1_scores, marker='o', label='ROUGE-1')\n", + "plt.plot(epochs, rougeL_scores, marker='o', label='ROUGE-L')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Score')\n", + "plt.title('ROUGE Scores over Epochs')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "#ADD TO YOUR REPORT :\n", + "#INTERPRETATION : The line plot shows a steady increase in both ROUGE-1 and ROUGE-L scores over training epochs, indicating that the model's ability to generate relevant and coherent questions improved progressively. ROUGE-1 evaluates unigram overlap, while ROUGE-L captures longest common subsequence similarity, so their combined trend confirms enhanced syntactic and semantic alignment with reference questions." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 407 + }, + "id": "Qd-QC_RIPyQi", + "outputId": "b1c2a017-2942-48ef-c8f0-9b4754446b84" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Histogram: BLEU-1 Score Distribution\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# BLEU score histogram\n", + "plt.figure(figsize=(8, 4))\n", + "plt.hist(bleu_scores, bins=10, color='skyblue', edgecolor='black')\n", + "plt.title('BLEU-1 Score Distribution')\n", + "plt.xlabel('BLEU-1 Score')\n", + "plt.ylabel('Frequency')\n", + "plt.grid(True)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "#INTERPRETATION : The BLEU-1 histogram reveals that most generated questions received lower unigram overlap scores, with only a few predictions achieving high similarity with the reference. This is expected in generative tasks, especially when multiple valid phrasings exist for a single question." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EcvasJfXRB7x", + "outputId": "c2ffd227-7239-4231-e87f-c02dee005b54" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of BLEU-1 scores: 160\n", + "Length of ROUGE-1 scores: 3\n" + ] + } + ], + "source": [ + "print(\"Length of BLEU-1 scores:\", len(bleu_scores))\n", + "print(\"Length of ROUGE-1 scores:\", len(rouge1_scores))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "id": "QZtLnjPJRMry" + }, + "outputs": [], + "source": [ + "import evaluate\n", + "rouge = evaluate.load(\"rouge\")\n", + "\n", + "rouge1_scores = []\n", + "rougeL_scores = []\n", + "\n", + "for pred, ref in zip(decoded_preds, decoded_refs):\n", + " result = rouge.compute(predictions=[pred], references=[ref])\n", + " rouge1_scores.append(result[\"rouge1\"])\n", + " rougeL_scores.append(result[\"rougeL\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qHX-R-5xRRXN", + "outputId": "32572540-c8ec-40e2-af91-679a286e4f3c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length of BLEU-1 scores: 160\n", + "Length of ROUGE-1 scores: 160\n" + ] + } + ], + "source": [ + "print(\"Length of BLEU-1 scores:\", len(bleu_scores))\n", + "print(\"Length of ROUGE-1 scores:\", len(rouge1_scores))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 564 + }, + "id": "r70MHktlRfvh", + "outputId": "129bc598-7f7c-44d8-aba6-c140ffefdb9b" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Scatter Plot Between BLEU-1 and ROUGE-1\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "plt.scatter(bleu_scores, rouge1_scores, alpha=0.6, color='purple')\n", + "plt.title('BLEU-1 vs ROUGE-1 Scores')\n", + "plt.xlabel('BLEU-1 Score')\n", + "plt.ylabel('ROUGE-1 Score')\n", + "plt.grid(True)\n", + "plt.show()\n", + "\n", + "#Interpretation : To assess the quality of the generated questions, we computed BLEU-1, ROUGE-1, and ROUGE-L scores across the evaluation set. While BLEU-1 captures exact n-gram overlap, ROUGE measures both lexical and semantic similarity more flexibly. A scatter plot comparing BLEU-1 and ROUGE-1 scores showed moderate variation, with some samples scoring high on ROUGE despite lower BLEU, suggesting semantic validity despite lexical mismatch. This highlights the limitation of using a single metric and motivates multi-metric evaluation for generative tasks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rZ_1fFAtV60u" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}