{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Fine-tune a language model with dataset streaming and 8-bit optimizers",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "language_info": {
      "name": "python"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "372609dca95b4ddcb51491283df860f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_e0b881dd26d54c7c92ba9ab5923fab10",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_90eb62f7ec634e098db511a5995d807f",
              "IPY_MODEL_d60a799761f649378d17a044362e55b9",
              "IPY_MODEL_a76cf6149c6748a5aa74ada58921d31e"
            ]
          }
        },
        "e0b881dd26d54c7c92ba9ab5923fab10": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "90eb62f7ec634e098db511a5995d807f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_5256dfd69e364597a29b2ad61c01ea93",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: ",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1a231c5cffbb4225941d02cb5e3bb273"
          }
        },
        "d60a799761f649378d17a044362e55b9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_cffbeabee69446c48dfa89ac38d9f45e",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1376,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1376,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_0c02af4a252e40fba08c097f59926dc8"
          }
        },
        "a76cf6149c6748a5aa74ada58921d31e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_2917b5df9cc14ec3a2fe356c12c8511e",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 3.29k/? [00:00<00:00, 76.9kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_5c3abd7a7f354ac0b1cd3d89506f417a"
          }
        },
        "5256dfd69e364597a29b2ad61c01ea93": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1a231c5cffbb4225941d02cb5e3bb273": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "cffbeabee69446c48dfa89ac38d9f45e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "0c02af4a252e40fba08c097f59926dc8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "2917b5df9cc14ec3a2fe356c12c8511e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "5c3abd7a7f354ac0b1cd3d89506f417a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8714bb6e944345b98b691f40adf0bf76": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_a2983efe78b94919891d6db909100934",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_b61fb9c3745f4c468954713b07f8f16f",
              "IPY_MODEL_5f05edadc8c943aa82a790e815253a48",
              "IPY_MODEL_867c7dd23eb64a1b8d02a7cf8a4ad64a"
            ]
          }
        },
        "a2983efe78b94919891d6db909100934": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b61fb9c3745f4c468954713b07f8f16f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_01445fea254a436fa464b6006f3abd92",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: ",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_74c4d6d598bf4173999a56fa849506d3"
          }
        },
        "5f05edadc8c943aa82a790e815253a48": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_e299d23c03444805b0a359eb049cddb3",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 492167,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 492167,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_c2f4080d692a46debfb5a8000d2637d6"
          }
        },
        "867c7dd23eb64a1b8d02a7cf8a4ad64a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_b0cbc589d149494cbb74139bedb0aafc",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 2.40M/? [00:00<00:00, 18.6MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_b3fec901d0cc48a2862b900928b681f3"
          }
        },
        "01445fea254a436fa464b6006f3abd92": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "74c4d6d598bf4173999a56fa849506d3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "e299d23c03444805b0a359eb049cddb3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "c2f4080d692a46debfb5a8000d2637d6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b0cbc589d149494cbb74139bedb0aafc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "b3fec901d0cc48a2862b900928b681f3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "619629ebf5fc4e3ba9ad49e7e767a37d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_87a5f7bca18e4a818d6d48e15bc68845",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_75d276bd972d429483af64d1eb27624c",
              "IPY_MODEL_6f778a9c54f042199869eb16563bb933",
              "IPY_MODEL_275f3cf6de1f49c1ae1f09991b4ea99c"
            ]
          }
        },
        "87a5f7bca18e4a818d6d48e15bc68845": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "75d276bd972d429483af64d1eb27624c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_670d440077b34a8e86bf2d620ab6fb6d",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_ac7455b1e5e5475cabf4dab505603a43"
          }
        },
        "6f778a9c54f042199869eb16563bb933": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_4e82c37304d446d88e852553cbe9acb9",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 666,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 666,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_213fc01fa39b4b77ba211a9304c5ea86"
          }
        },
        "275f3cf6de1f49c1ae1f09991b4ea99c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_a9dae405fbce41e3a46b05292313bab0",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 666/666 [00:00<00:00, 15.4kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_272807a127d54f5ea3f20c0bc7262a25"
          }
        },
        "670d440077b34a8e86bf2d620ab6fb6d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "ac7455b1e5e5475cabf4dab505603a43": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "4e82c37304d446d88e852553cbe9acb9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "213fc01fa39b4b77ba211a9304c5ea86": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "a9dae405fbce41e3a46b05292313bab0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "272807a127d54f5ea3f20c0bc7262a25": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1ce665de582c4bd392d6bc4fff9a1499": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_36cdf4ebc5684e9e88d80d5c98d86154",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_288d1670c87e469ba3fe7c0030887d19",
              "IPY_MODEL_eeb0bf36fb93410f82ab96b5414b6c50",
              "IPY_MODEL_3c8d0050e0904acf933d18316000ac8a"
            ]
          }
        },
        "36cdf4ebc5684e9e88d80d5c98d86154": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "288d1670c87e469ba3fe7c0030887d19": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_3be14001505b4dd19293365cb4a52cbc",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_4195ef4de4ba491995e28b72e4d82a5b"
          }
        },
        "eeb0bf36fb93410f82ab96b5414b6c50": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_cafc6a76a9ac4de79b46969ebab90265",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1042301,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1042301,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_9e6e0d9075454e17932e147678c1cef6"
          }
        },
        "3c8d0050e0904acf933d18316000ac8a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_7c83e50b67374fa5a2d713c036ba8e84",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 0.99M/0.99M [00:00<00:00, 1.42MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_0e0d370b48c84349a7f7cc45ed5f9d09"
          }
        },
        "3be14001505b4dd19293365cb4a52cbc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "4195ef4de4ba491995e28b72e4d82a5b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "cafc6a76a9ac4de79b46969ebab90265": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "9e6e0d9075454e17932e147678c1cef6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "7c83e50b67374fa5a2d713c036ba8e84": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "0e0d370b48c84349a7f7cc45ed5f9d09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "2222296ad8d14cd39e8af17c108ad5ed": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_e39daeca11584766a42f9bc6df76089c",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_8fad1d6738b84704a3b4d248dbcfac2b",
              "IPY_MODEL_405c42d0df19419e8563b00a3f402ae9",
              "IPY_MODEL_7e2ceabdd41446cc860e35f94e02de31"
            ]
          }
        },
        "e39daeca11584766a42f9bc6df76089c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8fad1d6738b84704a3b4d248dbcfac2b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_5b2f0da9535f46d78999e11162e16666",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d47fc8e1a9a84efcb6d9a02ba973e940"
          }
        },
        "405c42d0df19419e8563b00a3f402ae9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_a127db6cdeae4700834c1dc582ecb609",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 456318,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 456318,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_b9a0b5fe13134774a7560a583513c7cd"
          }
        },
        "7e2ceabdd41446cc860e35f94e02de31": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_66d58d9dab054294a81601572b143c00",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 446k/446k [00:00<00:00, 1.46MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_75f479bbe938484891214f27986364eb"
          }
        },
        "5b2f0da9535f46d78999e11162e16666": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d47fc8e1a9a84efcb6d9a02ba973e940": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "a127db6cdeae4700834c1dc582ecb609": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "b9a0b5fe13134774a7560a583513c7cd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "66d58d9dab054294a81601572b143c00": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "75f479bbe938484891214f27986364eb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "05c7977a9322499bbc00e80f0d767bee": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_9a76a0ee943343d781caf5b30ce0c6a5",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_f454951d36c74e13bc3cf04b41388345",
              "IPY_MODEL_1974ea03fa42428fa162eac65f8d71e4",
              "IPY_MODEL_96cdea5bf8ba44f3a37e5e1f70cbef62"
            ]
          }
        },
        "9a76a0ee943343d781caf5b30ce0c6a5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f454951d36c74e13bc3cf04b41388345": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_221d95935ae249bb8fbf60007ed39e82",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_0a5ca2e268c24ae1a9e42d5c9dcadb29"
          }
        },
        "1974ea03fa42428fa162eac65f8d71e4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_997827c35e02494d82209fa5f1232e5f",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 1355256,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 1355256,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_aaf3281319a94b42ba21941f41d262bc"
          }
        },
        "96cdea5bf8ba44f3a37e5e1f70cbef62": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_ffa4cf74c0c84e0797de60e50e1f579c",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 1.29M/1.29M [00:00<00:00, 5.20MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d60228cfbd9f44ddbcefb20a74d99779"
          }
        },
        "221d95935ae249bb8fbf60007ed39e82": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "0a5ca2e268c24ae1a9e42d5c9dcadb29": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "997827c35e02494d82209fa5f1232e5f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "aaf3281319a94b42ba21941f41d262bc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ffa4cf74c0c84e0797de60e50e1f579c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d60228cfbd9f44ddbcefb20a74d99779": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8bff97fa5703438685342e1b140234f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_542ccbd2abf941fea5312b72c51ec5b6",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_f71eb8c60e4b4ef6b44b9f6b90bf25c5",
              "IPY_MODEL_1355930a0edf40e5ac75198e9d901bfb",
              "IPY_MODEL_ce047a309f984458859ddfc15aad1125"
            ]
          }
        },
        "542ccbd2abf941fea5312b72c51ec5b6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f71eb8c60e4b4ef6b44b9f6b90bf25c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_c82b029db5a547f09d1b9607a5d26ecc",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "Downloading: 100%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_01312ccfa1254c7a8acc6a7410af711a"
          }
        },
        "1355930a0edf40e5ac75198e9d901bfb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_7db4a94408b94d71ab9f6fccded0f011",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 3247202234,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 3247202234,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_356eaaf643894327a0ba30049c3a2b92"
          }
        },
        "ce047a309f984458859ddfc15aad1125": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_874af3844342411096d6d8872354f531",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 3.02G/3.02G [02:35<00:00, 10.9MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_fd8cb9272f3d4ff9ae673c724a6b757b"
          }
        },
        "c82b029db5a547f09d1b9607a5d26ecc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "01312ccfa1254c7a8acc6a7410af711a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "7db4a94408b94d71ab9f6fccded0f011": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "356eaaf643894327a0ba30049c3a2b92": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "874af3844342411096d6d8872354f531": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "fd8cb9272f3d4ff9ae673c724a6b757b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6e2b38e0faf64f529e849f37ad4d4eab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_ba6e9cadc3274a64ae0738fb99c55860",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_62edc38a7caf4feca6db6de2d767fbb3",
              "IPY_MODEL_155616f4494440b291ae4722aaeca750",
              "IPY_MODEL_2045670c97194bd68a398213552d8364"
            ]
          }
        },
        "ba6e9cadc3274a64ae0738fb99c55860": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "62edc38a7caf4feca6db6de2d767fbb3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_1ca831c613424bea8f42c4a89f44b2b5",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": "  0%",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_b352385cb0664a8b873f5028a728c4e3"
          }
        },
        "155616f4494440b291ae4722aaeca750": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_085e99b8131f4f8bb65f2ed723a21b48",
            "_dom_classes": [],
            "description": "",
            "_model_name": "FloatProgressModel",
            "bar_style": "",
            "max": 1000000,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 32,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d1ec06e6abca4b35b8e22856bcc9c5ce"
          }
        },
        "2045670c97194bd68a398213552d8364": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_565f3cec67374748bdc8baf19f18963f",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 32/1000000 [03:13<1383:55:47,  4.98s/it]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_358cb048c5a04a7cb2e9de91017baf01"
          }
        },
        "1ca831c613424bea8f42c4a89f44b2b5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "b352385cb0664a8b873f5028a728c4e3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "085e99b8131f4f8bb65f2ed723a21b48": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d1ec06e6abca4b35b8e22856bcc9c5ce": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "565f3cec67374748bdc8baf19f18963f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "358cb048c5a04a7cb2e9de91017baf01": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/justheuristic/75f6a2a731f05a213a55cd2c8a458aaf/fine-tune-a-language-model-with-dataset-streaming-and-8-bit-optimizers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Kw6MQx4xFjgy"
      },
      "source": [
        "__This notebook__ explains how to fine-tune GPT-2 Large on a large dataset in colab or on your home computer.\n",
        "\n",
        "To fit this task into a colab instance, we will use two tricks:\n",
        "* streaming the [C4 dataset](https://huggingface.co/datasets/c4) using [`datasets` Streaming API](https://huggingface.co/docs/datasets/dataset_streaming.html). Without that, C4 would take up over 300GB of disk space.\n",
        "* training with 8-Bit Adam from the [`bitsandbytes` library](https://github.com/facebookresearch/bitsandbytes). Without 8-bit compression, training GPT-2 Large would not fit in GPU memory.\n",
        "\n",
        "\n",
        "\n",
        "This notebook is based on the [\"fine-tune a language model\"](https://github.com/huggingface/notebooks/blob/master/examples/language_modeling.ipynb) tutorial by [Sylvain Gugger](https://sgugger.github.io/pages/about-me.html#about-me) as well as the [pytorch language-model example](https://github.com/huggingface/transformers/blob/master/examples/pytorch/language-modeling/run_clm_no_trainer.py)."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-E5Z6CLC6UfJ"
      },
      "source": [
        "# Installation"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X4cRE8IbIrIV"
      },
      "source": [
        "If you're opening this Notebook on colab, you will probably need to install 🤗 Transformers and 🤗 Datasets. Uncomment the following cell and run it."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MOsHUjgdIrIW",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4940dd79-5eed-4eaa-8ac4-55d6ee49a454"
      },
      "source": [
        " ! pip install datasets transformers"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting datasets\n",
            "  Downloading datasets-1.16.1-py3-none-any.whl (298 kB)\n",
            "\u001b[K     |████████████████████████████████| 298 kB 5.3 MB/s \n",
            "\u001b[?25hCollecting transformers\n",
            "  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)\n",
            "\u001b[K     |████████████████████████████████| 3.1 MB 34.1 MB/s \n",
            "\u001b[?25hCollecting xxhash\n",
            "  Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n",
            "\u001b[K     |████████████████████████████████| 243 kB 52.1 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n",
            "Collecting fsspec[http]>=2021.05.0\n",
            "  Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)\n",
            "\u001b[K     |████████████████████████████████| 132 kB 38.4 MB/s \n",
            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
            "Requirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.8.2)\n",
            "Collecting aiohttp\n",
            "  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
            "\u001b[K     |████████████████████████████████| 1.1 MB 36.3 MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.19.5)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.62.3)\n",
            "Collecting huggingface-hub<1.0.0,>=0.1.0\n",
            "  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n",
            "\u001b[K     |████████████████████████████████| 61 kB 395 kB/s \n",
            "\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.10.0.2)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.4.0)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.13)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.6)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
            "Collecting tokenizers<0.11,>=0.10.1\n",
            "  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
            "\u001b[K     |████████████████████████████████| 3.3 MB 33.4 MB/s \n",
            "\u001b[?25hCollecting pyyaml\n",
            "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
            "\u001b[K     |████████████████████████████████| 596 kB 35.6 MB/s \n",
            "\u001b[?25hCollecting sacremoses\n",
            "  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
            "\u001b[K     |████████████████████████████████| 895 kB 42.3 MB/s \n",
            "\u001b[?25hCollecting aiosignal>=1.1.2\n",
            "  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.2.0)\n",
            "Collecting multidict<7.0,>=4.5\n",
            "  Downloading multidict-5.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (160 kB)\n",
            "\u001b[K     |████████████████████████████████| 160 kB 49.5 MB/s \n",
            "\u001b[?25hCollecting frozenlist>=1.1.1\n",
            "  Downloading frozenlist-1.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (192 kB)\n",
            "\u001b[K     |████████████████████████████████| 192 kB 52.2 MB/s \n",
            "\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.8)\n",
            "Collecting yarl<2.0,>=1.0\n",
            "  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n",
            "\u001b[K     |████████████████████████████████| 271 kB 55.7 MB/s \n",
            "\u001b[?25hCollecting asynctest==0.13.0\n",
            "  Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n",
            "Collecting async-timeout<5.0,>=4.0.0a3\n",
            "  Downloading async_timeout-4.0.1-py3-none-any.whl (5.7 kB)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.6.0)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2018.9)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
            "Installing collected packages: multidict, frozenlist, yarl, asynctest, async-timeout, aiosignal, pyyaml, fsspec, aiohttp, xxhash, tokenizers, sacremoses, huggingface-hub, transformers, datasets\n",
            "  Attempting uninstall: pyyaml\n",
            "    Found existing installation: PyYAML 3.13\n",
            "    Uninstalling PyYAML-3.13:\n",
            "      Successfully uninstalled PyYAML-3.13\n",
            "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.1 asynctest-0.13.0 datasets-1.16.1 frozenlist-1.2.0 fsspec-2021.11.1 huggingface-hub-0.2.1 multidict-5.2.0 pyyaml-6.0 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.12.5 xxhash-2.0.2 yarl-1.7.2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Aewv5KT-JnHz"
      },
      "source": [
        "\n",
        "We are also installing bitsandbytes which depends on the CUDA version run by your colab. The installed CUDA version is displayed in the top right when calling nvidia-smi. Use this version to install the right bitsandbytes version below. We need a GPU for this, so if you have not yet a GPU loaded use Runtime-> Change runtime type -> select GPU from the dropdown."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XprTmlZuJpXL",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c7343f62-43ef-41f2-972d-6285793fecba"
      },
      "source": [
        "! nvidia-smi\n",
        "! pip install bitsandbytes-cuda112"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Sat Dec  4 19:08:21 2021       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   36C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n",
            "Collecting bitsandbytes-cuda112\n",
            "  Downloading bitsandbytes_cuda112-0.26.0-py3-none-any.whl (4.2 MB)\n",
            "\u001b[K     |████████████████████████████████| 4.2 MB 5.4 MB/s \n",
            "\u001b[?25hInstalling collected packages: bitsandbytes-cuda112\n",
            "Successfully installed bitsandbytes-cuda112-0.26.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZyC8zpEqGddx"
      },
      "source": [
        "To test the bitsandbytes installation we can run a simple update with 8-bit Adam."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "A8HODqE2E_6N",
        "outputId": "e1888813-c491-4d02-baa9-7e0e312d2a36"
      },
      "source": [
        "import bitsandbytes as bnb\n",
        "import torch\n",
        "\n",
        "p = torch.nn.Parameter(torch.rand(10,10).cuda())\n",
        "a = torch.rand(10,10).cuda()\n",
        "\n",
        "p1 = p.data.sum().item()\n",
        "\n",
        "adam = bnb.optim.Adam8bit([p])\n",
        "\n",
        "out = a*p\n",
        "loss = out.sum()\n",
        "loss.backward()\n",
        "adam.step()\n",
        "\n",
        "p2 = p.data.sum().item()\n",
        "\n",
        "assert p1 != p2\n",
        "print('SUCCESS!')\n",
        "print('Installation was successful!')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "SUCCESS!\n",
            "Installation was successful!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GDkUr_zQBoZR"
      },
      "source": [
        "# Dataset Streaming"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "i4UPzShfBrgr"
      },
      "source": [
        "Pre-training often requires a huge text dataset. Some famous datasets for pre-training are [C4](https://huggingface.co/datasets/c4), its multilingual version [mC4](https://huggingface.co/datasets/mc4), as well as [OSCAR](https://huggingface.co/datasets/oscar).\n",
        "\n",
        "These datasets can be terabytes of data and require a lot of resources:\n",
        "- a good bandwidth to download all the data\n",
        "- terabytes of disk space to store the data\n",
        "- dozens of CPUs and a good infrastructure to tokenize the text dataset\n",
        "- lots of time to wait for the tokenization to happen\n",
        "\n",
        "This makes it very impractical to get your hands on a dataset for pretraining when you have limited resources."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6xabIliEFxyo"
      },
      "source": [
        "Dataset streaming is the solution in this case. Streaming allows to simply have access to the very small subset of the dataset that you need at any time during training. Text samples are progressively downloaded during training, and processed on-the-fly.\n",
        "\n",
        "Thanks to dataset streaming:\n",
        "- training can start directly without waiting for terabytes of data to be downloaded\n",
        "- you can use an arbitrarily large dataset, without being constrained by your disk space\n",
        "- you can process the batches of text as they arrive with a regular CPU\n",
        "- you don't waste time processing examples that are not immediately needed for training"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9JIEFS8nISgn"
      },
      "source": [
        "Dataset streaming is available in [Hugging Face Datasets](https://github.com/huggingface/datasets) - you simply need to pass `streaming=True` when loading a dataset, and it can be used with a PyTorch data loader.\n",
        "\n",
        "![dataset streaming](https://huggingface.co/docs/datasets/_images/stream.gif \"Dataset Streaming\")\n",
        "\n",
        "Hugging face Datasets also allows you to process examples on-the-fly via `.map()` and shuffle the dataset with `.shuffle()`.\n",
        "\n",
        "Here is an example on how to load the C4 dataset in streaming mode:"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 183,
          "referenced_widgets": [
            "372609dca95b4ddcb51491283df860f5",
            "e0b881dd26d54c7c92ba9ab5923fab10",
            "90eb62f7ec634e098db511a5995d807f",
            "d60a799761f649378d17a044362e55b9",
            "a76cf6149c6748a5aa74ada58921d31e",
            "5256dfd69e364597a29b2ad61c01ea93",
            "1a231c5cffbb4225941d02cb5e3bb273",
            "cffbeabee69446c48dfa89ac38d9f45e",
            "0c02af4a252e40fba08c097f59926dc8",
            "2917b5df9cc14ec3a2fe356c12c8511e",
            "5c3abd7a7f354ac0b1cd3d89506f417a",
            "8714bb6e944345b98b691f40adf0bf76",
            "a2983efe78b94919891d6db909100934",
            "b61fb9c3745f4c468954713b07f8f16f",
            "5f05edadc8c943aa82a790e815253a48",
            "867c7dd23eb64a1b8d02a7cf8a4ad64a",
            "01445fea254a436fa464b6006f3abd92",
            "74c4d6d598bf4173999a56fa849506d3",
            "e299d23c03444805b0a359eb049cddb3",
            "c2f4080d692a46debfb5a8000d2637d6",
            "b0cbc589d149494cbb74139bedb0aafc",
            "b3fec901d0cc48a2862b900928b681f3"
          ]
        },
        "id": "N4E5ZT6rJnm7",
        "outputId": "f7c35c58-83c5-4cd4-8032-059c6da674a3"
      },
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "c4 = load_dataset(\"c4\", \"en\", streaming=True)\n",
        "\n",
        "# Let's print a few examples\n",
        "for i, example in enumerate(c4[\"train\"]):\n",
        "    print(f\"{i}: {str(example)[:200]}...\")\n",
        "    if i == 5:\n",
        "        break\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "372609dca95b4ddcb51491283df860f5",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.38k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "8714bb6e944345b98b691f40adf0bf76",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/492k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0: {'text': 'Beginners BBQ Class Taking Place in Missoula!\\nDo you want to get better at making delicious BBQ? You will have the opportunity, put this on your calendar now. Thursday, September 22nd join ...\n",
            "1: {'text': 'Discussion in \\'Mac OS X Lion (10.7)\\' started by axboi87, Jan 20, 2012.\\nI\\'ve got a 500gb internal drive and a 240gb SSD.\\nWhen trying to restore using disk utility i\\'m given the error \"N...\n",
            "2: {'text': 'Foil plaid lycra and spandex shortall with metallic slinky insets. Attached metallic elastic belt with O-ring. Headband included. Great hip hop or jazz dance costume. Made in the USA.', 'tim...\n",
            "3: {'text': \"How many backlinks per day for new site?\\nDiscussion in 'Black Hat SEO' started by Omoplata, Dec 3, 2010.\\n1) for a newly created site, what's the max # backlinks per day I should do to be s...\n",
            "4: {'text': 'The Denver Board of Education opened the 2017-18 school year with an update on projects that include new construction, upgrades, heat mitigation and quality learning environments.\\nWe are ex...\n",
            "5: {'text': 'BANGALORE CY JUNCTION SBC to GONDIA JUNCTION G train timings, routes, stops, and complete info.\\nAs of now, 1 trains run between from BANGALORE CY JUNCTION (YPR) to GONDIA JUNCTION (G).\\nThe...\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "a3KD3WXU3l-O"
      },
      "source": [
        "# Fine-tuning a language model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qJzozNPFKc2x"
      },
      "source": [
        "We are fine-tuning without Trainer so we can specify dataset streaming and 8-bit optimizers. For that, we copy the main parts of the [example script for language modeling](https://github.com/huggingface/transformers/blob/master/examples/pytorch/language-modeling/run_clm_no_trainer.py). First we import boiler-plate and the go step-by-step throught the model setup and training."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "np2mr2Le6oqy"
      },
      "source": [
        "## Preprocessing & model setup"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ET8K4pK1Kq7O"
      },
      "source": [
        "# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.\n",
        "\n",
        "import argparse\n",
        "import logging\n",
        "import math\n",
        "import os\n",
        "import random\n",
        "from itertools import chain\n",
        "from pathlib import Path\n",
        "\n",
        "import datasets\n",
        "import torch\n",
        "from datasets import load_dataset\n",
        "from torch.utils.data import DataLoader\n",
        "from tqdm.auto import tqdm\n",
        "\n",
        "import transformers\n",
        "from huggingface_hub import Repository\n",
        "from transformers import (\n",
        "    CONFIG_MAPPING,\n",
        "    MODEL_MAPPING,\n",
        "    AdamW,\n",
        "    AutoConfig,\n",
        "    AutoModelForCausalLM,\n",
        "    AutoTokenizer,\n",
        "    SchedulerType,\n",
        "    default_data_collator,\n",
        "    get_scheduler,\n",
        "    set_seed,\n",
        ")\n",
        "from transformers.file_utils import get_full_repo_name\n",
        "from transformers.utils.versions import require_version\n",
        "\n",
        "\n",
        "logger = logging.getLogger(__name__)\n",
        "\n",
        "require_version(\"datasets>=1.16.1\", \"To fix: pip install -r examples/pytorch/language-modeling/requirements.txt\")\n",
        "\n",
        "MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())\n",
        "MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)\n",
        "\n",
        "\n",
        "def parse_args():\n",
        "    parser = argparse.ArgumentParser(description=\"Finetune a transformers model on a causal language modeling task\")\n",
        "    parser.add_argument(\n",
        "        \"--dataset_name\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"The name of the dataset to use (via the datasets library).\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--dataset_config_name\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"The configuration name of the dataset to use (via the datasets library).\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--text_column_name\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"The name of the column containing the text data.\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--dataset_streaming\",\n",
        "        action=\"store_true\",\n",
        "        help=\"If passed, will use dataset streaming (via the datasets library)\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--model_name_or_path\",\n",
        "        type=str,\n",
        "        help=\"Path to pretrained model or model identifier from huggingface.co/models.\",\n",
        "        required=False,\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--config_name\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"Pretrained config name or path if not the same as model_name\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--tokenizer_name\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"Pretrained tokenizer name or path if not the same as model_name\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--use_slow_tokenizer\",\n",
        "        action=\"store_true\",\n",
        "        help=\"If passed, will use a slow tokenizer (not backed by the 🤗 Tokenizers library).\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--per_device_train_batch_size\",\n",
        "        type=int,\n",
        "        default=1,\n",
        "        help=\"Batch size (per device) for the training dataloader.\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--learning_rate\",\n",
        "        type=float,\n",
        "        default=5e-5,\n",
        "        help=\"Initial learning rate (after the potential warmup period) to use.\",\n",
        "    )\n",
        "    parser.add_argument(\"--weight_decay\", type=float, default=0.0, help=\"Weight decay to use.\")\n",
        "    parser.add_argument(\"--num_train_epochs\", type=int, default=1, help=\"Total number of training epochs to perform.\")\n",
        "    parser.add_argument(\n",
        "        \"--max_train_steps\",\n",
        "        type=int,\n",
        "        default=None,\n",
        "        help=\"Total number of training steps to perform. If provided, overrides num_train_epochs.\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--gradient_accumulation_steps\",\n",
        "        type=int,\n",
        "        default=1,\n",
        "        help=\"Number of updates steps to accumulate before performing a backward/update pass.\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--lr_scheduler_type\",\n",
        "        type=SchedulerType,\n",
        "        default=\"linear\",\n",
        "        help=\"The scheduler type to use.\",\n",
        "        choices=[\"linear\", \"cosine\", \"cosine_with_restarts\", \"polynomial\", \"constant\", \"constant_with_warmup\"],\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--num_warmup_steps\", type=int, default=3000, help=\"Number of steps for the warmup in the lr scheduler.\"\n",
        "    )\n",
        "    parser.add_argument(\"--output_dir\", type=str, default=None, help=\"Where to store the final model.\")\n",
        "    parser.add_argument(\"--seed\", type=int, default=None, help=\"A seed for reproducible training.\")\n",
        "    parser.add_argument(\n",
        "        \"--model_type\",\n",
        "        type=str,\n",
        "        default=None,\n",
        "        help=\"Model type to use if training from scratch.\",\n",
        "        choices=MODEL_TYPES,\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--block_size\",\n",
        "        type=int,\n",
        "        default=None,\n",
        "        help=\"Optional input sequence length after tokenization. The training dataset will be truncated in block of this size for training. Default to the model max input length for single sentence inputs (take into account special tokens).\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--preprocessing_num_workers\",\n",
        "        type=int,\n",
        "        default=None,\n",
        "        help=\"The number of processes to use for the preprocessing.\",\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--overwrite_cache\", type=bool, default=False, help=\"Overwrite the cached training and evaluation sets\"\n",
        "    )\n",
        "    parser.add_argument(\n",
        "        \"--no_keep_linebreaks\", action=\"store_true\", help=\"Do not keep line breaks when using TXT files.\"\n",
        "    )\n",
        "    parser.add_argument(\"--push_to_hub\", action=\"store_true\", help=\"Whether or not to push the model to the Hub.\")\n",
        "    parser.add_argument(\n",
        "        \"--hub_model_id\", type=str, help=\"The name of the repository to keep in sync with the local `output_dir`.\"\n",
        "    )\n",
        "    parser.add_argument(\"--hub_token\", type=str, help=\"The token to use to push to the Model Hub.\")\n",
        "    args = parser.parse_args(args=[])\n",
        "\n",
        "    if args.push_to_hub:\n",
        "        assert args.output_dir is not None, \"Need an `output_dir` to create a repo when `--push_to_hub` is passed.\"\n",
        "\n",
        "    return args\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4QqgoTBcLzqq"
      },
      "source": [
        "We setup the streaming dataset, the tokenizer, and the model (GPT-2 medium)."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "619629ebf5fc4e3ba9ad49e7e767a37d",
            "87a5f7bca18e4a818d6d48e15bc68845",
            "75d276bd972d429483af64d1eb27624c",
            "6f778a9c54f042199869eb16563bb933",
            "275f3cf6de1f49c1ae1f09991b4ea99c",
            "670d440077b34a8e86bf2d620ab6fb6d",
            "ac7455b1e5e5475cabf4dab505603a43",
            "4e82c37304d446d88e852553cbe9acb9",
            "213fc01fa39b4b77ba211a9304c5ea86",
            "a9dae405fbce41e3a46b05292313bab0",
            "272807a127d54f5ea3f20c0bc7262a25",
            "1ce665de582c4bd392d6bc4fff9a1499",
            "36cdf4ebc5684e9e88d80d5c98d86154",
            "288d1670c87e469ba3fe7c0030887d19",
            "eeb0bf36fb93410f82ab96b5414b6c50",
            "3c8d0050e0904acf933d18316000ac8a",
            "3be14001505b4dd19293365cb4a52cbc",
            "4195ef4de4ba491995e28b72e4d82a5b",
            "cafc6a76a9ac4de79b46969ebab90265",
            "9e6e0d9075454e17932e147678c1cef6",
            "7c83e50b67374fa5a2d713c036ba8e84",
            "0e0d370b48c84349a7f7cc45ed5f9d09",
            "2222296ad8d14cd39e8af17c108ad5ed",
            "e39daeca11584766a42f9bc6df76089c",
            "8fad1d6738b84704a3b4d248dbcfac2b",
            "405c42d0df19419e8563b00a3f402ae9",
            "7e2ceabdd41446cc860e35f94e02de31",
            "5b2f0da9535f46d78999e11162e16666",
            "d47fc8e1a9a84efcb6d9a02ba973e940",
            "a127db6cdeae4700834c1dc582ecb609",
            "b9a0b5fe13134774a7560a583513c7cd",
            "66d58d9dab054294a81601572b143c00",
            "75f479bbe938484891214f27986364eb",
            "05c7977a9322499bbc00e80f0d767bee",
            "9a76a0ee943343d781caf5b30ce0c6a5",
            "f454951d36c74e13bc3cf04b41388345",
            "1974ea03fa42428fa162eac65f8d71e4",
            "96cdea5bf8ba44f3a37e5e1f70cbef62",
            "221d95935ae249bb8fbf60007ed39e82",
            "0a5ca2e268c24ae1a9e42d5c9dcadb29",
            "997827c35e02494d82209fa5f1232e5f",
            "aaf3281319a94b42ba21941f41d262bc",
            "ffa4cf74c0c84e0797de60e50e1f579c",
            "d60228cfbd9f44ddbcefb20a74d99779",
            "8bff97fa5703438685342e1b140234f5",
            "542ccbd2abf941fea5312b72c51ec5b6",
            "f71eb8c60e4b4ef6b44b9f6b90bf25c5",
            "1355930a0edf40e5ac75198e9d901bfb",
            "ce047a309f984458859ddfc15aad1125",
            "c82b029db5a547f09d1b9607a5d26ecc",
            "01312ccfa1254c7a8acc6a7410af711a",
            "7db4a94408b94d71ab9f6fccded0f011",
            "356eaaf643894327a0ba30049c3a2b92",
            "874af3844342411096d6d8872354f531",
            "fd8cb9272f3d4ff9ae673c724a6b757b"
          ]
        },
        "id": "PSzJDsewLPEi",
        "outputId": "f956fc65-1210-4db5-a2ea-fc482f357eb2"
      },
      "source": [
        "args = parse_args() # get default arguments\n",
        "\n",
        "# If passed along, set the training seed now.\n",
        "if args.seed is not None:\n",
        "    set_seed(args.seed)\n",
        "\n",
        "args.dataset_name = 'c4'\n",
        "args.dataset_streaming = True\n",
        "args.text_column_name = \"text\"\n",
        "args.model_name_or_path = 'gpt2-large'\n",
        "args.dataset_config_name = \"en\"\n",
        "args.block_size = 1024\n",
        "args.max_train_steps = 1_000_000\n",
        "args.log_loss_interval = 25\n",
        "\n",
        "\n",
        "# LOAD DATA\n",
        "raw_train_dataset = load_dataset(args.dataset_name, args.dataset_config_name, streaming=args.dataset_streaming, split=\"train\")\n",
        "\n",
        "if args.config_name:\n",
        "    config = AutoConfig.from_pretrained(args.config_name)\n",
        "elif args.model_name_or_path:\n",
        "    config = AutoConfig.from_pretrained(args.model_name_or_path)\n",
        "else:\n",
        "    config = CONFIG_MAPPING[args.model_type]()\n",
        "    logger.warning(\"You are instantiating a new config instance from scratch.\")\n",
        "\n",
        "# TOKENIZER\n",
        "if args.tokenizer_name:\n",
        "        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=not args.use_slow_tokenizer)\n",
        "elif args.model_name_or_path:\n",
        "    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=not args.use_slow_tokenizer)\n",
        "else:\n",
        "    raise ValueError(\n",
        "        \"You are instantiating a new tokenizer from scratch. This is not supported by this script.\"\n",
        "        \"You can do it from another script, save it, and load it from here, using --tokenizer_name.\"\n",
        "    )\n",
        "\n",
        "if args.model_name_or_path:\n",
        "    model = AutoModelForCausalLM.from_pretrained(\n",
        "        args.model_name_or_path,\n",
        "        from_tf=bool(\".ckpt\" in args.model_name_or_path),\n",
        "        config=config,\n",
        "    )\n",
        "else:\n",
        "    logger.info(\"Training new model from scratch\")\n",
        "    model = AutoModelForCausalLM.from_config(config)\n",
        "\n",
        "model.resize_token_embeddings(len(tokenizer))\n",
        "\n",
        "model.gradient_checkpointing_enable()\n",
        "model.cuda() # send model to cuda preemptively to free RAM. Yep, we're using GPU as an offload memory. O tempora! O mores!"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "619629ebf5fc4e3ba9ad49e7e767a37d",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/666 [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "1ce665de582c4bd392d6bc4fff9a1499",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "2222296ad8d14cd39e8af17c108ad5ed",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "05c7977a9322499bbc00e80f0d767bee",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "8bff97fa5703438685342e1b140234f5",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "Downloading:   0%|          | 0.00/3.02G [00:00<?, ?B/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "GPT2LMHeadModel(\n",
              "  (transformer): GPT2Model(\n",
              "    (wte): Embedding(50257, 1280)\n",
              "    (wpe): Embedding(1024, 1280)\n",
              "    (drop): Dropout(p=0.1, inplace=False)\n",
              "    (h): ModuleList(\n",
              "      (0): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (1): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (2): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (3): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (4): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (5): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (6): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (7): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (8): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (9): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (10): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (11): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (12): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (13): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (14): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (15): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (16): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (17): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (18): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (19): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (20): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (21): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (22): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (23): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (24): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (25): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (26): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (27): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (28): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (29): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (30): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (31): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (32): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (33): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (34): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "      (35): GPT2Block(\n",
              "        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (attn): GPT2Attention(\n",
              "          (c_attn): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
              "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "        (mlp): GPT2MLP(\n",
              "          (c_fc): Conv1D()\n",
              "          (c_proj): Conv1D()\n",
              "          (dropout): Dropout(p=0.1, inplace=False)\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "    (ln_f): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)\n",
              "  )\n",
              "  (lm_head): Linear(in_features=1280, out_features=50257, bias=False)\n",
              ")"
            ]
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B2zzrdxL-_-3"
      },
      "source": [
        "We now preprocess the dataset into blocks of the specified size (768)."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ThKeDte__C3A"
      },
      "source": [
        "text_column_name = args.text_column_name\n",
        "\n",
        "def tokenize_function(examples):\n",
        "    return tokenizer(examples[text_column_name])\n",
        "\n",
        "# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.\n",
        "def group_texts(examples):\n",
        "    # Concatenate all texts.\n",
        "    concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}\n",
        "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
        "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
        "    # customize this part to your needs.\n",
        "    if total_length >= block_size:\n",
        "        total_length = (total_length // block_size) * block_size\n",
        "    # Split by chunks of max_len.\n",
        "    result = {\n",
        "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
        "        for k, t in concatenated_examples.items()\n",
        "    }\n",
        "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
        "    return result\n",
        "\n",
        "tokenized_train_dataset = raw_train_dataset.shuffle(10_000, seed=42).map(tokenize_function, batched=True)\n",
        "\n",
        "if args.block_size is None:\n",
        "    block_size = tokenizer.model_max_length\n",
        "    if block_size > 1024:\n",
        "        logger.warning(\n",
        "            f\"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). \"\n",
        "            \"Picking 1024 instead. You can change that default value by passing --block_size xxx.\"\n",
        "        )\n",
        "    block_size = 1024\n",
        "else:\n",
        "    if args.block_size > tokenizer.model_max_length:\n",
        "        logger.warning(\n",
        "            f\"The block_size passed ({args.block_size}) is larger than the maximum length for the model\"\n",
        "            f\"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}.\"\n",
        "        )\n",
        "    block_size = min(args.block_size, tokenizer.model_max_length)\n",
        "\n",
        "# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder\n",
        "# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower\n",
        "# to preprocess.\n",
        "train_dataset = tokenized_train_dataset.map(group_texts, batched=True)\n",
        "train_dataset = train_dataset.shuffle(10_000, seed=42).with_format(\"torch\")\n",
        "\n",
        "# DataLoaders creation:\n",
        "train_dataloader = DataLoader(\n",
        "    train_dataset, collate_fn=default_data_collator, batch_size=args.per_device_train_batch_size\n",
        ")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "E0Wl7_Vx8HiP"
      },
      "source": [
        "## 8-bit Optimizers"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "l3Z-4DmQ_etm"
      },
      "source": [
        "\n",
        "In this example, we fine-tune GPT-2 medium with a sequence dimension of 768 which runs out of memory. How can we fit this model on a colab GPU with 12 GB of memory? One solution is to use 8-bit optimizers.\n",
        "\n",
        "8-bit opitimizers decrease the memory footprint for training models by compressing and storing the optimizer statistics for optimizers. For Adam, there are two optimizer buffers, one for an estimate of the running mean of the gradient and one for the standard deviation. Each of the buffers has the size of the full model, as such, the Adam optimizers uses 2x more memory than the model itself. With 8-bit optimizers we reduce that from 32-bit to 8-bit thus reducing the memory due to Adam from 2x the model size to 0.5x the model size -- a reduction by 75%.\n",
        "\n",
        "8-bit optimizers work by using dynamic quantization and block-wise quantization to ensure stable training and the same performance as 32-bit optimizers while achieving the 75% reduction in memory.\n",
        "\n",
        "8-bit optimizers work as follows\n",
        "1. Chunk optimizer states into blocks\n",
        "2. Normalize each block into the range [-1, 1] by dividing by the absmax of the block\n",
        "3. Perform dynamic quantization\n",
        "4. Store 8-bit data\n",
        "\n",
        "For dequantization we reverse these steps. These steps are demonstrated by the example below:\n",
        "\n",
        " ![Schematic of 8-bit optimizers](https://timdettmers.com/wp-content/uploads/2021/12/schematic2.svg)\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tpoSBTsR_n7n"
      },
      "source": [
        "import bitsandbytes as bnb\n",
        "#optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # this crashes with out-of-memory error\n",
        "optimizer = bnb.optim.Adam8bit(model.parameters(), lr=args.learning_rate)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "q0MHy1x07_Ac"
      },
      "source": [
        "## Train the model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4pOO1F7b_-f9"
      },
      "source": [
        "Now we are training the model with dataset streaming and 8-bit optimizers."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 909,
          "referenced_widgets": [
            "6e2b38e0faf64f529e849f37ad4d4eab",
            "ba6e9cadc3274a64ae0738fb99c55860",
            "62edc38a7caf4feca6db6de2d767fbb3",
            "155616f4494440b291ae4722aaeca750",
            "2045670c97194bd68a398213552d8364",
            "1ca831c613424bea8f42c4a89f44b2b5",
            "b352385cb0664a8b873f5028a728c4e3",
            "085e99b8131f4f8bb65f2ed723a21b48",
            "d1ec06e6abca4b35b8e22856bcc9c5ce",
            "565f3cec67374748bdc8baf19f18963f",
            "358cb048c5a04a7cb2e9de91017baf01"
          ]
        },
        "id": "ONzFs98p__5M",
        "outputId": "ac4fb040-a3f3-41aa-cf8d-9699e621faad"
      },
      "source": [
        "lr_scheduler = get_scheduler(\n",
        "    name=args.lr_scheduler_type,\n",
        "    optimizer=optimizer,\n",
        "    num_warmup_steps=args.num_warmup_steps,\n",
        "    num_training_steps=args.max_train_steps,\n",
        ")\n",
        "# Train!\n",
        "total_batch_size = args.per_device_train_batch_size * args.gradient_accumulation_steps\n",
        "# Only show the progress bar once on each machine.\n",
        "progress_bar = tqdm(range(args.max_train_steps), disable=False)\n",
        "completed_steps = 0\n",
        "\n",
        "def get_free_mem():\n",
        "    t = torch.cuda.get_device_properties(0).total_memory\n",
        "    r = torch.cuda.memory_reserved(0)\n",
        "    a = torch.cuda.memory_allocated(0)\n",
        "    f = r - a\n",
        "    return f/1024**3, r/1024**3, a/1024**3\n",
        "\n",
        "for epoch in range(args.num_train_epochs):\n",
        "    model.train()\n",
        "    losses = []\n",
        "    for step, batch in enumerate(train_dataloader):\n",
        "        gpu_data = {}\n",
        "        for key, value in batch.items():\n",
        "            gpu_data[key] = value.cuda()\n",
        "\n",
        "        outputs = model(**gpu_data, use_cache=False)\n",
        "        loss = outputs.loss\n",
        "        losses.append(loss.item())\n",
        "        loss = loss / args.gradient_accumulation_steps\n",
        "        loss.backward()\n",
        "        if step % args.gradient_accumulation_steps == 0 or step == args.max_train_steps:\n",
        "            optimizer.step()\n",
        "            lr_scheduler.step()\n",
        "            optimizer.zero_grad()\n",
        "            progress_bar.update(1)\n",
        "            completed_steps += 1\n",
        "\n",
        "        if step % args.log_loss_interval == 0 and step > 0:\n",
        "            try:\n",
        "                perplexity = math.exp(sum(losses)/len(losses))\n",
        "            except OverflowError:\n",
        "                perplexity = float(\"inf\")\n",
        "            losses = []\n",
        "            print(f\"epoch: {epoch+1}, step: {step}, perplexity: {perplexity}\")\n",
        "            \n",
        "\n",
        "        if completed_steps >= args.max_train_steps:\n",
        "            break\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "6e2b38e0faf64f529e849f37ad4d4eab",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "  0%|          | 0/1000000 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Token indices sequence length is longer than the specified maximum sequence length for this model (11128 > 1024). Running this sequence through the model will result in indexing errors\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "epoch: 1, step: 25, perplexity: 21.84923336079448\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
          ]
        }
      ]
    }
  ]
}