dannoncaffeine
/

GPT2-124M-wikitext-v0.1

@@ -1,911 +1,1349 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
     "colab": {
-      "provenance": [],
-      "gpuType": "T4"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU",
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "72d3f33c56a14b01bef05ea2ae98e72f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "VBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "VBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "VBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_8d44cf16b34d40bd8702560ae149192c",
-              "IPY_MODEL_13af4957747d40bbb9501bc13c3c160b",
-              "IPY_MODEL_1be0b602c74f4a7182bd041aeba58112",
-              "IPY_MODEL_af225dc2d79649c2ac0e853c517a0482",
-              "IPY_MODEL_f019d8eee73f4b7b8312029f03040c45"
-            ],
-            "layout": "IPY_MODEL_d1deb908be244cfb8dd253b3cc081510"
-          }
-        },
-        "8d44cf16b34d40bd8702560ae149192c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_06e659f7bd9149e784a5cb68efbad736",
-            "placeholder": "",
-            "style": "IPY_MODEL_ba1f1326e12d437a9fd76faa226eec44",
-            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
-          }
-        },
-        "13af4957747d40bbb9501bc13c3c160b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "PasswordModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "PasswordModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "PasswordView",
-            "continuous_update": true,
-            "description": "Token:",
-            "description_tooltip": null,
-            "disabled": false,
-            "layout": "IPY_MODEL_65ea346ce3174bb098344531db439eac",
-            "placeholder": "",
-            "style": "IPY_MODEL_6ac7576be7204aeca73385c48e1c5d0a",
-            "value": "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"
-          }
-        },
-        "1be0b602c74f4a7182bd041aeba58112": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "CheckboxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "CheckboxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "CheckboxView",
-            "description": "Add token as git credential?",
-            "description_tooltip": null,
-            "disabled": false,
-            "indent": true,
-            "layout": "IPY_MODEL_7fd3c4de83234008a72d9541c29ce765",
-            "style": "IPY_MODEL_b81585732ca14c89ae1b5bb25735cd62",
-            "value": true
-          }
-        },
-        "af225dc2d79649c2ac0e853c517a0482": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ButtonModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ButtonView",
-            "button_style": "",
-            "description": "Login",
-            "disabled": false,
-            "icon": "",
-            "layout": "IPY_MODEL_a68a4415305045c0a89c60349175cc82",
-            "style": "IPY_MODEL_7316ece4ad474193bcc5b5db55632561",
-            "tooltip": ""
-          }
-        },
-        "f019d8eee73f4b7b8312029f03040c45": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c0bb9ace36d64b8296bd35d538bd4f7e",
-            "placeholder": "",
-            "style": "IPY_MODEL_03ad39c22f644e55bcffa070e3832582",
-            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
-          }
-        },
-        "d1deb908be244cfb8dd253b3cc081510": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": "center",
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": "flex",
-            "flex": null,
-            "flex_flow": "column",
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": "50%"
-          }
-        },
-        "06e659f7bd9149e784a5cb68efbad736": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ba1f1326e12d437a9fd76faa226eec44": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "65ea346ce3174bb098344531db439eac": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6ac7576be7204aeca73385c48e1c5d0a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "7fd3c4de83234008a72d9541c29ce765": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b81585732ca14c89ae1b5bb25735cd62": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "a68a4415305045c0a89c60349175cc82": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7316ece4ad474193bcc5b5db55632561": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ButtonStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "button_color": null,
-            "font_weight": ""
-          }
-        },
-        "c0bb9ace36d64b8296bd35d538bd4f7e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "03ad39c22f644e55bcffa070e3832582": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        }
-      }
     }
   },
-  "cells": [
     {
-      "cell_type": "markdown",
-      "source": [
-        "# 🧠 GPT-2 124M Fine-tuned on Wikitext\n",
-        "\n",
-        "This is an experiment aiming to achieve practical understanding of learning 🤗 Transformers and 🤗 Datasets. To follow the guide outlined [Hugging Face Notebooks](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)."
-      ],
-      "metadata": {
-        "id": "krQK4P9tWs_F"
-      }
     },
     {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "9-IVR9eMWY8v",
-        "outputId": "a2ebf62e-55b2-488e-95c8-b1ee10026308"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.15.0)\n",
-            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n",
-            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n",
-            "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
-            "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n",
-            "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n",
-            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
-            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
-            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n",
-            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n",
-            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n",
-            "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
-            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.1)\n",
-            "Requirement already satisfied: huggingface-hub>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.19.4)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n",
-            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n",
-            "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n",
-            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n",
-            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
-            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu118)\n",
-            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.3)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
-            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets) (4.5.0)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.11.17)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
-            "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n",
-            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Install neccessary packages\n",
-        "!pip install datasets transformers accelerate"
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# Authenticate with HuggingFace Hub\n",
-        "from huggingface_hub import notebook_login\n",
-        "\n",
-        "notebook_login()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 331,
-          "referenced_widgets": [
-            "72d3f33c56a14b01bef05ea2ae98e72f",
-            "8d44cf16b34d40bd8702560ae149192c",
-            "13af4957747d40bbb9501bc13c3c160b",
-            "1be0b602c74f4a7182bd041aeba58112",
-            "af225dc2d79649c2ac0e853c517a0482",
-            "f019d8eee73f4b7b8312029f03040c45",
-            "d1deb908be244cfb8dd253b3cc081510",
-            "06e659f7bd9149e784a5cb68efbad736",
-            "ba1f1326e12d437a9fd76faa226eec44",
-            "65ea346ce3174bb098344531db439eac",
-            "6ac7576be7204aeca73385c48e1c5d0a",
-            "7fd3c4de83234008a72d9541c29ce765",
-            "b81585732ca14c89ae1b5bb25735cd62",
-            "a68a4415305045c0a89c60349175cc82",
-            "7316ece4ad474193bcc5b5db55632561",
-            "c0bb9ace36d64b8296bd35d538bd4f7e",
-            "03ad39c22f644e55bcffa070e3832582"
-          ]
-        },
-        "id": "lpuAz9I-XsQQ",
-        "outputId": "bf420c42-7ff0-4a4c-da57-420691eaec5d"
       },
-      "execution_count": 2,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "72d3f33c56a14b01bef05ea2ae98e72f"
-            }
-          },
-          "metadata": {}
-        }
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "!apt install git-lfs"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ka0fqf1AYD2N",
-        "outputId": "8c1b6396-cd72-45fd-9d1f-4fd9c76b0966"
       },
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Reading package lists... Done\n",
-            "Building dependency tree... Done\n",
-            "Reading state information... Done\n",
-            "git-lfs is already the newest version (3.0.2-1ubuntu0.2).\n",
-            "0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.\n"
-          ]
-        }
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "import transformers\n",
-        "\n",
-        "print(transformers.__version__)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "evGbNxr8YIA1",
-        "outputId": "08025e50-d542-4f92-8c33-505cd2b44802"
       },
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "4.35.2\n"
-          ]
-        }
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# Load dataset\n",
-        "from datasets import load_dataset\n",
-        "\n",
-        "datasets = load_dataset('wikitext', 'wikitext-103-raw-v1')"
-      ],
-      "metadata": {
-        "id": "cdNnCQv4YMbT"
       },
-      "execution_count": 5,
-      "outputs": []
     },
     {
-      "cell_type": "code",
-      "source": [
-        "from transformers import AutoTokenizer\n",
-        "\n",
-        "model=\"gpt2\"\n",
-        "\n",
-        "tokenizer = AutoTokenizer.from_pretrained(model)\n",
-        "\n",
-        "def tokenize_function(examples):\n",
-        "    return tokenizer(examples[\"text\"])\n",
-        "\n",
-        "tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])"
-      ],
-      "metadata": {
-        "id": "RtYDKWCBY76H"
       },
-      "execution_count": 6,
-      "outputs": []
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# block_size = tokenizer.model_max_length\n",
-        "block_size = 256\n",
-        "\n",
-        "# Function to group texts\n",
-        "def group_texts(examples):\n",
-        "    # Concatenate all texts.\n",
-        "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
-        "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
-        "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
-        "        # customize this part to your needs.\n",
-        "    total_length = (total_length // block_size) * block_size\n",
-        "    # Split by chunks of max_len.\n",
-        "    result = {\n",
-        "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
-        "        for k, t in concatenated_examples.items()\n",
-        "    }\n",
-        "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
-        "    return result\n",
-        "\n",
-        "# Transform from tokenized dataset to lm dataset\n",
-        "lm_datasets = tokenized_datasets.map(\n",
-        "    group_texts,\n",
-        "    batched=True,\n",
-        "    batch_size=1000,\n",
-        "    num_proc=4,\n",
-        ")\n",
-        "\n",
-        "# Print a chunk of the dataset\n",
-        "print(tokenizer.decode(lm_datasets[\"train\"][1][\"input_ids\"]))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ar5R7hnYZyUm",
-        "outputId": "ceb702a8-8e60-4850-dfa7-ce111c21d875"
       },
-      "execution_count": 7,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "awa. A large team of writers handled the script. The game's opening theme was sung by May 'n. \n",
-            " It met with positive sales in Japan, and was praised by both Japanese and western critics. After release, it received downloadable content, along with an expanded edition in November of that year. It was also adapted into manga and an original video animation series. Due to low sales of Valkyria Chronicles II, Valkyria Chronicles III was not localized, but a fan translation compatible with the game's expanded edition was released in 2014. Media.Vision would return to the franchise with the development of Valkyria : Azure Revolution for the PlayStation 4. \n",
-            " = = Gameplay = = \n",
-            " As with previous Valkyira Chronicles games, Valkyria Chronicles III is a tactical role @-@ playing game where players take control of a military unit and take part in missions against enemy forces. Stories are told through comic book @-@ like panels with animated character portraits, with characters speaking partially through voiced speech bubbles and partially through unvoiced text. The player progresses through a series of linear missions, gradually unlocked as maps that can be freely scanned through and replayed as they are unlocked. The route to each story location on the map varies\n"
-          ]
-        }
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# Load GPT2 model\n",
-        "from transformers import AutoModelForCausalLM, Trainer, TrainingArguments\n",
-        "import torch\n",
-        "\n",
-        "language_model = AutoModelForCausalLM.from_pretrained(model)\n",
-        "language_model.train()\n",
-        "\n",
-        "training_args = TrainingArguments(\n",
-        "    f\"GPT2-124M-wikitext-v0.1\",\n",
-        "    evaluation_strategy = \"epoch\",\n",
-        "    num_train_epochs=5,\n",
-        "    learning_rate=2e-5,\n",
-        "    weight_decay=0.01,\n",
-        "    push_to_hub=True\n",
-        ")"
-      ],
-      "metadata": {
-        "id": "RZhL1hiibVNK"
       },
-      "execution_count": 8,
-      "outputs": []
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# Clear CUDA cache\n",
-        "torch.cuda.empty_cache()\n",
-        "\n",
-        "trainer = Trainer(\n",
-        "    model=language_model,\n",
-        "    args=training_args,\n",
-        "    train_dataset=lm_datasets[\"train\"],\n",
-        "    eval_dataset=lm_datasets[\"validation\"],\n",
-        ")\n",
-        "\n",
-        "trainer.train()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 75
-        },
-        "id": "EvanxM9wfl6_",
-        "outputId": "194b1923-f8bd-40c8-b62a-a6ae136df8be"
       },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2797' max='287335' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [  2797/287335 28:43 < 48:43:53, 1.62 it/s, Epoch 0.05/5]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Epoch</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ]
-          },
-          "metadata": {}
-        }
       ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "import math\n",
-        "\n",
-        "eval_results = trainer.evaluate()\n",
-        "print(f\"Perplexity: {math.exp(eval_results['eval_loss']):.2f}\")\n",
-        "\n",
-        "trainer.push_to_hub()"
       ],
-      "metadata": {
-        "id": "77GdAgpkiAOm"
       },
-      "execution_count": null,
-      "outputs": []
     }
-  ]
-}

 {
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "krQK4P9tWs_F"
+   },
+   "source": [
+    "# 🧠 GPT-2 124M Fine-tuned on Wikitext\n",
+    "\n",
+    "This is an experiment aiming to achieve practical understanding of learning 🤗 Transformers and 🤗 Datasets. To follow the guide outlined [Hugging Face Notebooks](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
     "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "9-IVR9eMWY8v",
+    "outputId": "a2ebf62e-55b2-488e-95c8-b1ee10026308"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting datasets\n",
+      "  Downloading datasets-2.15.0-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting transformers\n",
+      "  Downloading transformers-4.35.2-py3-none-any.whl.metadata (123 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.5/123.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hCollecting accelerate\n",
+      "  Downloading accelerate-0.25.0-py3-none-any.whl.metadata (18 kB)\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.24.1)\n",
+      "Collecting pyarrow>=8.0.0 (from datasets)\n",
+      "  Downloading pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)\n",
+      "Collecting pyarrow-hotfix (from datasets)\n",
+      "  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n",
+      "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n",
+      "  Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n",
+      "Collecting pandas (from datasets)\n",
+      "  Downloading pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
+      "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
+      "Collecting tqdm>=4.62.1 (from datasets)\n",
+      "  Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting xxhash (from datasets)\n",
+      "  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+      "Collecting multiprocess (from datasets)\n",
+      "  Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)\n",
+      "Requirement already satisfied: fsspec<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2023.10.0,>=2023.1.0->datasets) (2023.4.0)\n",
+      "Collecting aiohttp (from datasets)\n",
+      "  Downloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)\n",
+      "Collecting huggingface-hub>=0.18.0 (from datasets)\n",
+      "  Downloading huggingface_hub-0.19.4-py3-none-any.whl.metadata (14 kB)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.9.0)\n",
+      "Collecting regex!=2019.12.17 (from transformers)\n",
+      "  Downloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting tokenizers<0.19,>=0.14 (from transformers)\n",
+      "  Downloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
+      "Collecting safetensors>=0.3.1 (from transformers)\n",
+      "  Downloading safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
+      "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.6)\n",
+      "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu118)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
+      "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets)\n",
+      "  Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting yarl<2.0,>=1.0 (from aiohttp->datasets)\n",
+      "  Downloading yarl-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (28 kB)\n",
+      "Collecting frozenlist>=1.1.1 (from aiohttp->datasets)\n",
+      "  Downloading frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)\n",
+      "Collecting aiosignal>=1.1.2 (from aiohttp->datasets)\n",
+      "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
+      "Collecting async-timeout<5.0,>=4.0 (from aiohttp->datasets)\n",
+      "  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\n",
+      "INFO: pip is looking at multiple versions of huggingface-hub to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting huggingface-hub>=0.18.0 (from datasets)\n",
+      "  Downloading huggingface_hub-0.19.3-py3-none-any.whl.metadata (14 kB)\n",
+      "  Downloading huggingface_hub-0.19.2-py3-none-any.whl.metadata (13 kB)\n",
+      "  Downloading huggingface_hub-0.19.1-py3-none-any.whl.metadata (13 kB)\n",
+      "  Downloading huggingface_hub-0.19.0-py3-none-any.whl.metadata (13 kB)\n",
+      "  Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting fsspec[http]<=2023.10.0,>=2023.1.0 (from datasets)\n",
+      "  Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets) (4.4.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.1.1)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.13)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.0)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
+      "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+      "Collecting pytz>=2020.1 (from pandas->datasets)\n",
+      "  Downloading pytz-2023.3.post1-py2.py3-none-any.whl.metadata (22 kB)\n",
+      "Collecting tzdata>=2022.1 (from pandas->datasets)\n",
+      "  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.2)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n",
+      "Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading transformers-4.35.2-py3-none-any.whl (7.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m98.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m72.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading huggingface_hub-0.19.4-py3-none-any.whl (311 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.7/311.7 kB\u001b[0m \u001b[31m80.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m73.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl (38.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.0/38.0 MB\u001b[0m \u001b[31m94.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (773 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m773.9/773.9 kB\u001b[0m \u001b[31m188.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m239.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m123.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m26.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m111.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
+      "Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m75.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n",
+      "Downloading frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (225 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.7/225.7 kB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m502.5/502.5 kB\u001b[0m \u001b[31m108.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading yarl-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (300 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.7/300.7 kB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: pytz, xxhash, tzdata, tqdm, safetensors, regex, pyarrow-hotfix, pyarrow, multidict, fsspec, frozenlist, dill, async-timeout, yarl, pandas, multiprocess, huggingface-hub, aiosignal, tokenizers, aiohttp, accelerate, transformers, datasets\n",
+      "  Attempting uninstall: fsspec\n",
+      "    Found existing installation: fsspec 2023.4.0\n",
+      "    Uninstalling fsspec-2023.4.0:\n",
+      "      Successfully uninstalled fsspec-2023.4.0\n",
+      "Successfully installed accelerate-0.25.0 aiohttp-3.9.1 aiosignal-1.3.1 async-timeout-4.0.3 datasets-2.15.0 dill-0.3.7 frozenlist-1.4.0 fsspec-2023.10.0 huggingface-hub-0.19.4 multidict-6.0.4 multiprocess-0.70.15 pandas-2.1.3 pyarrow-14.0.1 pyarrow-hotfix-0.6 pytz-2023.3.post1 regex-2023.10.3 safetensors-0.4.1 tokenizers-0.15.0 tqdm-4.66.1 transformers-4.35.2 tzdata-2023.3 xxhash-3.4.1 yarl-1.9.3\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0m"
+     ]
     }
+   ],
+   "source": [
+    "# Install neccessary packages\n",
+    "!pip install datasets transformers accelerate"
+   ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 331,
+     "referenced_widgets": [
+      "72d3f33c56a14b01bef05ea2ae98e72f",
+      "8d44cf16b34d40bd8702560ae149192c",
+      "13af4957747d40bbb9501bc13c3c160b",
+      "1be0b602c74f4a7182bd041aeba58112",
+      "af225dc2d79649c2ac0e853c517a0482",
+      "f019d8eee73f4b7b8312029f03040c45",
+      "d1deb908be244cfb8dd253b3cc081510",
+      "06e659f7bd9149e784a5cb68efbad736",
+      "ba1f1326e12d437a9fd76faa226eec44",
+      "65ea346ce3174bb098344531db439eac",
+      "6ac7576be7204aeca73385c48e1c5d0a",
+      "7fd3c4de83234008a72d9541c29ce765",
+      "b81585732ca14c89ae1b5bb25735cd62",
+      "a68a4415305045c0a89c60349175cc82",
+      "7316ece4ad474193bcc5b5db55632561",
+      "c0bb9ace36d64b8296bd35d538bd4f7e",
+      "03ad39c22f644e55bcffa070e3832582"
+     ]
+    },
+    "id": "lpuAz9I-XsQQ",
+    "outputId": "bf420c42-7ff0-4a4c-da57-420691eaec5d"
+   },
+   "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e61086bda2ca40b5a189544f6a8b43a5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Authenticate with HuggingFace Hub\n",
+    "from huggingface_hub import notebook_login\n",
+    "\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "ka0fqf1AYD2N",
+    "outputId": "8c1b6396-cd72-45fd-9d1f-4fd9c76b0966"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reading package lists... Done\n",
+      "Building dependency tree... Done\n",
+      "Reading state information... Done\n",
+      "git-lfs is already the newest version (3.4.0).\n",
+      "0 upgraded, 0 newly installed, 0 to remove and 27 not upgraded.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!apt install git-lfs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "evGbNxr8YIA1",
+    "outputId": "08025e50-d542-4f92-8c33-505cd2b44802"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4.35.2\n"
+     ]
+    }
+   ],
+   "source": [
+    "import transformers\n",
+    "\n",
+    "print(transformers.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "id": "cdNnCQv4YMbT"
+   },
+   "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9bc2efdc8b104cba875de6338be8aa7d",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Downloading builder script:   0%|          | 0.00/8.48k [00:00<?, ?B/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "27db41fd4b0647978b8afc17bd08edd5",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Downloading metadata:   0%|          | 0.00/6.84k [00:00<?, ?B/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9ead5c3dbc6244648b4ad55f57f5dcfd",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Downloading readme:   0%|          | 0.00/9.62k [00:00<?, ?B/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d57037874a84ccdb853af35244752e0",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/192M [00:00<?, ?B/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bb1e8cf41b0346a3a9a652ab901e2c6f",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Generating test split:   0%|          | 0/4358 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e421bae84bdd476693e2f8db449fb9b5",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Generating train split:   0%|          | 0/1801350 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3324d8e2ad3c4db2a1c9779afa407b07",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Generating validation split:   0%|          | 0/3760 [00:00<?, ? examples/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Load dataset\n",
+    "from datasets import load_dataset\n",
+    "\n",
+    "datasets = load_dataset('wikitext', 'wikitext-103-raw-v1')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "editable": true,
+    "id": "RtYDKWCBY76H",
+    "slideshow": {
+     "slide_type": ""
     },
+    "tags": []
+   },
+   "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9879d982c28742faa5dd9a611623da66",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "69b00a15589b47f59c7b9591d62dce7c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "286aa0cb68904534aa1fb06dd3674c12",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dbc0fb3079ff446f9c8a7500c549a7ee",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a34f997d324e48628b2f9c7bd3b8cb7b",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3c65ed66ddf7416b8fbce34b7a14de0c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/1801350 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1132 > 1024). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1063 > 1024). Running this sequence through the model will result in indexing errors\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43a4cddf296748669540d04b6ea1ca98",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "model=\"gpt2\"\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model)\n",
+    "\n",
+    "def tokenize_function(examples):\n",
+    "    return tokenizer(examples[\"text\"])\n",
+    "\n",
+    "tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "ar5R7hnYZyUm",
+    "outputId": "ceb702a8-8e60-4850-dfa7-ce111c21d875"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8f7da61dac45436bae1028a5a5830ca3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d489a0dfd3646efb475785ccfeac718",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/1801350 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0e9ac2773d894bf89177f96df1945eb9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "awa. A large team of writers handled the script. The game's opening theme was sung by May 'n. \n",
+      " It met with positive sales in Japan, and was praised by both Japanese and western critics. After release, it received downloadable content, along with an expanded edition in November of that year. It was also adapted into manga and an original video animation series. Due to low sales of Valkyria Chronicles II, Valkyria Chronicles III was not localized, but a fan translation compatible with the game's expanded edition was released in 2014. Media.Vision would return to the franchise with the development of Valkyria : Azure Revolution for the PlayStation 4. \n",
+      " = = Gameplay = = \n",
+      " As with previous Valkyira Chronicles games, Valkyria Chronicles III is a tactical role @-@ playing game where players take control of a military unit and take part in missions against enemy forces. Stories are told through comic book @-@ like panels with animated character portraits, with characters speaking partially through voiced speech bubbles and partially through unvoiced text. The player progresses through a series of linear missions, gradually unlocked as maps that can be freely scanned through and replayed as they are unlocked. The route to each story location on the map varies\n"
+     ]
+    }
+   ],
+   "source": [
+    "# block_size = tokenizer.model_max_length\n",
+    "block_size = 256\n",
+    "\n",
+    "# Function to group texts\n",
+    "def group_texts(examples):\n",
+    "    # Concatenate all texts.\n",
+    "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
+    "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
+    "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
+    "        # customize this part to your needs.\n",
+    "    total_length = (total_length // block_size) * block_size\n",
+    "    # Split by chunks of max_len.\n",
+    "    result = {\n",
+    "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
+    "        for k, t in concatenated_examples.items()\n",
+    "    }\n",
+    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
+    "    return result\n",
+    "\n",
+    "# Transform from tokenized dataset to lm dataset\n",
+    "lm_datasets = tokenized_datasets.map(\n",
+    "    group_texts,\n",
+    "    batched=True,\n",
+    "    batch_size=1000,\n",
+    "    num_proc=4,\n",
+    ")\n",
+    "\n",
+    "# Print a chunk of the dataset\n",
+    "print(tokenizer.decode(lm_datasets[\"train\"][1][\"input_ids\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "RZhL1hiibVNK"
+   },
+   "outputs": [],
+   "source": [
+    "# Load GPT2 model\n",
+    "from transformers import AutoModelForCausalLM, Trainer, TrainingArguments\n",
+    "import torch\n",
+    "\n",
+    "language_model = AutoModelForCausalLM.from_pretrained(model)\n",
+    "language_model.train()\n",
+    "\n",
+    "training_args = TrainingArguments(\n",
+    "    f\"GPT2-124M-wikitext-v0.1\",\n",
+    "    evaluation_strategy = \"epoch\",\n",
+    "    num_train_epochs=3,\n",
+    "    learning_rate=2e-5,\n",
+    "    weight_decay=0.01,\n",
+    "    push_to_hub=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 75
+    },
+    "editable": true,
+    "id": "EvanxM9wfl6_",
+    "outputId": "194b1923-f8bd-40c8-b62a-a6ae136df8be",
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='172401' max='172401' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [172401/172401 4:00:07, Epoch 3/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>3.133500</td>\n",
+       "      <td>3.036319</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>3.064300</td>\n",
+       "      <td>2.996815</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>3.038400</td>\n",
+       "      <td>2.984082</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=172401, training_loss=3.115963939143425, metrics={'train_runtime': 14407.4906, 'train_samples_per_second': 95.729, 'train_steps_per_second': 11.966, 'total_flos': 1.80188030435328e+17, 'train_loss': 3.115963939143425, 'epoch': 3.0})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Clear CUDA cache\n",
+    "torch.cuda.empty_cache()\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=language_model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=lm_datasets[\"train\"],\n",
+    "    eval_dataset=lm_datasets[\"validation\"],\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "editable": true,
+    "id": "77GdAgpkiAOm",
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='121' max='121' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [121/121 00:02]\n",
+       "    </div>\n",
+       "    "
       ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Perplexity: 19.77\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e09e7aeb94d84691a5d4a31832d64e0d",
+       "version_major": 2,
+       "version_minor": 0
       },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/dannoncaffeine/GPT2-124M-wikitext-v0.1/tree/main/'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import math\n",
+    "\n",
+    "eval_results = trainer.evaluate()\n",
+    "print(f\"Perplexity: {math.exp(eval_results['eval_loss']):.2f}\")\n",
+    "\n",
+    "trainer.push_to_hub()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "03ad39c22f644e55bcffa070e3832582": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "06e659f7bd9149e784a5cb68efbad736": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "13af4957747d40bbb9501bc13c3c160b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "PasswordModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "PasswordModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "PasswordView",
+      "continuous_update": true,
+      "description": "Token:",
+      "description_tooltip": null,
+      "disabled": false,
+      "layout": "IPY_MODEL_65ea346ce3174bb098344531db439eac",
+      "placeholder": "",
+      "style": "IPY_MODEL_6ac7576be7204aeca73385c48e1c5d0a",
+      "value": "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"
+     }
+    },
+    "1be0b602c74f4a7182bd041aeba58112": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "CheckboxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "CheckboxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "CheckboxView",
+      "description": "Add token as git credential?",
+      "description_tooltip": null,
+      "disabled": false,
+      "indent": true,
+      "layout": "IPY_MODEL_7fd3c4de83234008a72d9541c29ce765",
+      "style": "IPY_MODEL_b81585732ca14c89ae1b5bb25735cd62",
+      "value": true
+     }
+    },
+    "65ea346ce3174bb098344531db439eac": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "6ac7576be7204aeca73385c48e1c5d0a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "72d3f33c56a14b01bef05ea2ae98e72f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "VBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "VBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "VBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_8d44cf16b34d40bd8702560ae149192c",
+       "IPY_MODEL_13af4957747d40bbb9501bc13c3c160b",
+       "IPY_MODEL_1be0b602c74f4a7182bd041aeba58112",
+       "IPY_MODEL_af225dc2d79649c2ac0e853c517a0482",
+       "IPY_MODEL_f019d8eee73f4b7b8312029f03040c45"
+      ],
+      "layout": "IPY_MODEL_d1deb908be244cfb8dd253b3cc081510"
+     }
+    },
+    "7316ece4ad474193bcc5b5db55632561": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ButtonStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "button_color": null,
+      "font_weight": ""
+     }
+    },
+    "7fd3c4de83234008a72d9541c29ce765": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8d44cf16b34d40bd8702560ae149192c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_06e659f7bd9149e784a5cb68efbad736",
+      "placeholder": "",
+      "style": "IPY_MODEL_ba1f1326e12d437a9fd76faa226eec44",
+      "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
+     }
+    },
+    "a68a4415305045c0a89c60349175cc82": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "af225dc2d79649c2ac0e853c517a0482": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ButtonModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ButtonView",
+      "button_style": "",
+      "description": "Login",
+      "disabled": false,
+      "icon": "",
+      "layout": "IPY_MODEL_a68a4415305045c0a89c60349175cc82",
+      "style": "IPY_MODEL_7316ece4ad474193bcc5b5db55632561",
+      "tooltip": ""
+     }
+    },
+    "b81585732ca14c89ae1b5bb25735cd62": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "ba1f1326e12d437a9fd76faa226eec44": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "c0bb9ace36d64b8296bd35d538bd4f7e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d1deb908be244cfb8dd253b3cc081510": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": "center",
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": "flex",
+      "flex": null,
+      "flex_flow": "column",
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": "50%"
+     }
+    },
+    "f019d8eee73f4b7b8312029f03040c45": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_c0bb9ace36d64b8296bd35d538bd4f7e",
+      "placeholder": "",
+      "style": "IPY_MODEL_03ad39c22f644e55bcffa070e3832582",
+      "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
+     }
     }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}