diff --git "a/xlm_roberta_large.ipynb" "b/xlm_roberta_large.ipynb"
new file mode 100644--- /dev/null
+++ "b/xlm_roberta_large.ipynb"
@@ -0,0 +1,2588 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "A100",
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "366e5a0ac67d4e0e94da459f3e69804e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_3c6cd74053f74ac18c4f5bbfb9a2fc69",
+              "IPY_MODEL_22d5df7f49b34fec91c7eb4e7e4ab33e",
+              "IPY_MODEL_25153fcf872048379de7c71420f3a581"
+            ],
+            "layout": "IPY_MODEL_a1883d8b08cc458287224bc89aeb54d1"
+          }
+        },
+        "3c6cd74053f74ac18c4f5bbfb9a2fc69": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e03078ea896e41e7bcd922afd77b83c9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_793237ce29034606b2b34bf559cd87da",
+            "value": "tokenizer_config.json: 100%"
+          }
+        },
+        "22d5df7f49b34fec91c7eb4e7e4ab33e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_54177c30c7974ab9ac986cb9aa17793c",
+            "max": 25,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_95a31c2e01744ccca1fd1d07e1e99d19",
+            "value": 25
+          }
+        },
+        "25153fcf872048379de7c71420f3a581": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_1d20d5f57db24eb59f4f633ee1443495",
+            "placeholder": "​",
+            "style": "IPY_MODEL_31f2258ec506441e83752bfa67d53398",
+            "value": " 25.0/25.0 [00:00&lt;00:00, 1.86kB/s]"
+          }
+        },
+        "a1883d8b08cc458287224bc89aeb54d1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e03078ea896e41e7bcd922afd77b83c9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "793237ce29034606b2b34bf559cd87da": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "54177c30c7974ab9ac986cb9aa17793c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "95a31c2e01744ccca1fd1d07e1e99d19": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "1d20d5f57db24eb59f4f633ee1443495": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "31f2258ec506441e83752bfa67d53398": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4628c887a3404cb79319e2586cbf81af": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_8ae15ae97e85478aaf8ff109349f419a",
+              "IPY_MODEL_adc84a2b4e54479d927ae5b253eb90c2",
+              "IPY_MODEL_549602a8d77241929793d70afa0d54b9"
+            ],
+            "layout": "IPY_MODEL_5d1d0adb88b748e4859c71019a0cf8e2"
+          }
+        },
+        "8ae15ae97e85478aaf8ff109349f419a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b0c34ffabd284318842c23cc4baba1cf",
+            "placeholder": "​",
+            "style": "IPY_MODEL_b30aeec96e4d4826bab3c207561b4778",
+            "value": "sentencepiece.bpe.model: 100%"
+          }
+        },
+        "adc84a2b4e54479d927ae5b253eb90c2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_10b114cb480141cbab6a26f9a89d2a7e",
+            "max": 5069051,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_3943a1720767453784dfaa6e9017afb2",
+            "value": 5069051
+          }
+        },
+        "549602a8d77241929793d70afa0d54b9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_1d26be052e6d4d479a2c4c68f027a719",
+            "placeholder": "​",
+            "style": "IPY_MODEL_5c35bb1be95e4d6c9736330953e045e3",
+            "value": " 5.07M/5.07M [00:01&lt;00:00, 3.39MB/s]"
+          }
+        },
+        "5d1d0adb88b748e4859c71019a0cf8e2": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b0c34ffabd284318842c23cc4baba1cf": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b30aeec96e4d4826bab3c207561b4778": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "10b114cb480141cbab6a26f9a89d2a7e": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3943a1720767453784dfaa6e9017afb2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "1d26be052e6d4d479a2c4c68f027a719": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "5c35bb1be95e4d6c9736330953e045e3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "550652ab3d9f482ba2a5485cd84c939b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_09a5d2c99fb9434ab90b3200cd51a3ae",
+              "IPY_MODEL_b4dbc8e0dbd342d19c5f652a004bc765",
+              "IPY_MODEL_4dc271194c7648c8894dd510a69c103d"
+            ],
+            "layout": "IPY_MODEL_4debd0c75c79416d917ea5641e4a8841"
+          }
+        },
+        "09a5d2c99fb9434ab90b3200cd51a3ae": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_7c644402f92b408182ab014e2ea02daa",
+            "placeholder": "​",
+            "style": "IPY_MODEL_affe4914cd6f41e39124f093e36cdb07",
+            "value": "tokenizer.json: 100%"
+          }
+        },
+        "b4dbc8e0dbd342d19c5f652a004bc765": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ea2d20664c5640ff87cd1b909800722c",
+            "max": 9096718,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_ca95df7382f2412b9328f96a463209a1",
+            "value": 9096718
+          }
+        },
+        "4dc271194c7648c8894dd510a69c103d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_186e1b0766044f718d2024645c4e87c7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_57d59fcaff5e466b8605b23887650cf7",
+            "value": " 9.10M/9.10M [00:01&lt;00:00, 5.30MB/s]"
+          }
+        },
+        "4debd0c75c79416d917ea5641e4a8841": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7c644402f92b408182ab014e2ea02daa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "affe4914cd6f41e39124f093e36cdb07": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ea2d20664c5640ff87cd1b909800722c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ca95df7382f2412b9328f96a463209a1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "186e1b0766044f718d2024645c4e87c7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "57d59fcaff5e466b8605b23887650cf7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ce139b88df824efea4d55e4813ee1b88": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_1fb3250b1b5540d8a9365435900db8b5",
+              "IPY_MODEL_675aa319a3504e22a9b1d58eff9188a2",
+              "IPY_MODEL_48e49cdb0ec8417782ed042ca84d4597"
+            ],
+            "layout": "IPY_MODEL_f15259b4926d40b5a70ee8eb5213e9f5"
+          }
+        },
+        "1fb3250b1b5540d8a9365435900db8b5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b0f1e42f5e4f4ac8b1c4ca12cfebabec",
+            "placeholder": "​",
+            "style": "IPY_MODEL_41ea4f253b6b44129196e0d894777c4a",
+            "value": "Map: 100%"
+          }
+        },
+        "675aa319a3504e22a9b1d58eff9188a2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ccf8fe1474d540a7be7b6757119d92fd",
+            "max": 99545,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_34326871a80140679ffe40ac560192a9",
+            "value": 99545
+          }
+        },
+        "48e49cdb0ec8417782ed042ca84d4597": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_19987899825a49b19c31a7225d3ff0b8",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ed46a4b1d6b647fcaa01526262b19431",
+            "value": " 99545/99545 [00:52&lt;00:00, 1964.80 examples/s]"
+          }
+        },
+        "f15259b4926d40b5a70ee8eb5213e9f5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b0f1e42f5e4f4ac8b1c4ca12cfebabec": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "41ea4f253b6b44129196e0d894777c4a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ccf8fe1474d540a7be7b6757119d92fd": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "34326871a80140679ffe40ac560192a9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "19987899825a49b19c31a7225d3ff0b8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ed46a4b1d6b647fcaa01526262b19431": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install transformers datasets seqeval huggingface_hub\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5v8KnAaD-z9t",
+        "outputId": "01e664a6-6621-4ccb-cb02-25e09af4fa9f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
+            "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.1.0)\n",
+            "Requirement already satisfied: seqeval in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
+            "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.24.7)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
+            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
+            "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.6)\n",
+            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n",
+            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
+            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n",
+            "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
+            "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n",
+            "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.5.2)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.12.2)\n",
+            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.17.0)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.0)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n",
+            "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.13.1)\n",
+            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
+            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
+            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Standard library imports\n",
+        "import os                 # Provides functions for interacting with the operating system\n",
+        "import warnings           # Used to handle or suppress warnings\n",
+        "import numpy as np        # Essential for numerical operations and array manipulation\n",
+        "import torch              # PyTorch library for tensor computations and model handling\n",
+        "import ast                # Used for safe evaluation of strings to Python objects (e.g., parsing tokens)\n",
+        "\n",
+        "# Hugging Face and Transformers imports\n",
+        "from datasets import load_dataset                     # Loads datasets for model training and evaluation\n",
+        "from transformers import (\n",
+        "    AutoTokenizer,                                   # Initializes a tokenizer from a pre-trained model\n",
+        "    DataCollatorForTokenClassification,              # Handles padding and formatting of token classification data\n",
+        "    TrainingArguments,                               # Defines training parameters like batch size and learning rate\n",
+        "    Trainer,                                         # High-level API for managing training and evaluation\n",
+        "    AutoModelForTokenClassification,                 # Loads a pre-trained model for token classification tasks\n",
+        "    get_linear_schedule_with_warmup,                 # Learning rate scheduler for gradual warm-up and linear decay\n",
+        "    EarlyStoppingCallback                           # Callback to stop training if validation performance plateaus\n",
+        ")\n",
+        "\n",
+        "# Hugging Face Hub\n",
+        "from huggingface_hub import login                   # Allows logging in to Hugging Face Hub to upload models\n",
+        "\n",
+        "# seqeval metrics for NER evaluation\n",
+        "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
+        "# Provides precision, recall, F1-score, and classification report for evaluating NER model performance\n"
+      ],
+      "metadata": {
+        "id": "amREIFSH-z7r"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Log in to Hugging Face Hub\n",
+        "login(token=\"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\")\n"
+      ],
+      "metadata": {
+        "id": "K7adlboI-z4p",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "88717ba2-23e2-4aff-d1cf-ca876f0f3d46"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
+            "Token is valid (permission: fineGrained).\n",
+            "Your token has been saved to /root/.cache/huggingface/token\n",
+            "Login successful\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Disable WandB (Weights & Biases) logging to avoid unwanted log outputs during training\n",
+        "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+        "\n",
+        "# Suppress warning messages to keep output clean, especially during training and evaluation\n",
+        "warnings.filterwarnings(\"ignore\")\n"
+      ],
+      "metadata": {
+        "id": "Qccgsjfs-zzA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load the Azerbaijani NER dataset from Hugging Face\n",
+        "dataset = load_dataset(\"LocalDoc/azerbaijani-ner-dataset\")\n",
+        "print(dataset)  # Display dataset structure (e.g., train/validation splits)\n",
+        "\n",
+        "# Preprocessing function to format tokens and NER tags correctly\n",
+        "def preprocess_example(example):\n",
+        "    try:\n",
+        "        # Convert string of tokens to a list and parse NER tags to integers\n",
+        "        example[\"tokens\"] = ast.literal_eval(example[\"tokens\"])\n",
+        "        example[\"ner_tags\"] = list(map(int, ast.literal_eval(example[\"ner_tags\"])))\n",
+        "    except (ValueError, SyntaxError) as e:\n",
+        "        # Skip and log malformed examples, ensuring error resilience\n",
+        "        print(f\"Skipping malformed example: {example['index']} due to error: {e}\")\n",
+        "        example[\"tokens\"] = []\n",
+        "        example[\"ner_tags\"] = []\n",
+        "    return example\n",
+        "\n",
+        "# Apply preprocessing to each dataset entry, ensuring consistent formatting\n",
+        "dataset = dataset.map(preprocess_example)\n"
+      ],
+      "metadata": {
+        "id": "fQ6ttUM8-zwM",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "362280bb-16c3-4462-f568-6eba09915ec1"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "DatasetDict({\n",
+            "    train: Dataset({\n",
+            "        features: ['index', 'tokens', 'ner_tags'],\n",
+            "        num_rows: 99545\n",
+            "    })\n",
+            "})\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Initialize the tokenizer for multilingual NER using xlm-roberta-large\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\")\n",
+        "\n",
+        "# Function to tokenize input and align labels with tokenized words\n",
+        "def tokenize_and_align_labels(example):\n",
+        "    # Tokenize the sentence while preserving word boundaries for correct NER tag alignment\n",
+        "    tokenized_inputs = tokenizer(\n",
+        "        example[\"tokens\"],            # List of words (tokens) in the sentence\n",
+        "        truncation=True,               # Truncate sentences longer than max_length\n",
+        "        is_split_into_words=True,      # Specify that input is a list of words\n",
+        "        padding=\"max_length\",          # Pad to maximum sequence length\n",
+        "        max_length=128,                # Set the maximum sequence length to 128 tokens\n",
+        "    )\n",
+        "\n",
+        "    labels = []                        # List to store aligned NER labels\n",
+        "    word_ids = tokenized_inputs.word_ids()  # Get word IDs for each token\n",
+        "    previous_word_idx = None           # Initialize previous word index for tracking\n",
+        "\n",
+        "    # Loop through word indices to align NER tags with subword tokens\n",
+        "    for word_idx in word_ids:\n",
+        "        if word_idx is None:\n",
+        "            labels.append(-100)        # Set padding token labels to -100 (ignored in loss)\n",
+        "        elif word_idx != previous_word_idx:\n",
+        "            # Assign the label from example's NER tags if word index matches\n",
+        "            labels.append(example[\"ner_tags\"][word_idx] if word_idx < len(example[\"ner_tags\"]) else -100)\n",
+        "        else:\n",
+        "            labels.append(-100)        # Label subword tokens with -100 to avoid redundant labels\n",
+        "        previous_word_idx = word_idx   # Update previous word index\n",
+        "\n",
+        "    tokenized_inputs[\"labels\"] = labels  # Add labels to tokenized inputs\n",
+        "    return tokenized_inputs\n",
+        "\n",
+        "# Apply tokenization and label alignment function to the dataset\n",
+        "tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=False)\n"
+      ],
+      "metadata": {
+        "id": "-24SJijT-zth",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 145,
+          "referenced_widgets": [
+            "366e5a0ac67d4e0e94da459f3e69804e",
+            "3c6cd74053f74ac18c4f5bbfb9a2fc69",
+            "22d5df7f49b34fec91c7eb4e7e4ab33e",
+            "25153fcf872048379de7c71420f3a581",
+            "a1883d8b08cc458287224bc89aeb54d1",
+            "e03078ea896e41e7bcd922afd77b83c9",
+            "793237ce29034606b2b34bf559cd87da",
+            "54177c30c7974ab9ac986cb9aa17793c",
+            "95a31c2e01744ccca1fd1d07e1e99d19",
+            "1d20d5f57db24eb59f4f633ee1443495",
+            "31f2258ec506441e83752bfa67d53398",
+            "4628c887a3404cb79319e2586cbf81af",
+            "8ae15ae97e85478aaf8ff109349f419a",
+            "adc84a2b4e54479d927ae5b253eb90c2",
+            "549602a8d77241929793d70afa0d54b9",
+            "5d1d0adb88b748e4859c71019a0cf8e2",
+            "b0c34ffabd284318842c23cc4baba1cf",
+            "b30aeec96e4d4826bab3c207561b4778",
+            "10b114cb480141cbab6a26f9a89d2a7e",
+            "3943a1720767453784dfaa6e9017afb2",
+            "1d26be052e6d4d479a2c4c68f027a719",
+            "5c35bb1be95e4d6c9736330953e045e3",
+            "550652ab3d9f482ba2a5485cd84c939b",
+            "09a5d2c99fb9434ab90b3200cd51a3ae",
+            "b4dbc8e0dbd342d19c5f652a004bc765",
+            "4dc271194c7648c8894dd510a69c103d",
+            "4debd0c75c79416d917ea5641e4a8841",
+            "7c644402f92b408182ab014e2ea02daa",
+            "affe4914cd6f41e39124f093e36cdb07",
+            "ea2d20664c5640ff87cd1b909800722c",
+            "ca95df7382f2412b9328f96a463209a1",
+            "186e1b0766044f718d2024645c4e87c7",
+            "57d59fcaff5e466b8605b23887650cf7",
+            "ce139b88df824efea4d55e4813ee1b88",
+            "1fb3250b1b5540d8a9365435900db8b5",
+            "675aa319a3504e22a9b1d58eff9188a2",
+            "48e49cdb0ec8417782ed042ca84d4597",
+            "f15259b4926d40b5a70ee8eb5213e9f5",
+            "b0f1e42f5e4f4ac8b1c4ca12cfebabec",
+            "41ea4f253b6b44129196e0d894777c4a",
+            "ccf8fe1474d540a7be7b6757119d92fd",
+            "34326871a80140679ffe40ac560192a9",
+            "19987899825a49b19c31a7225d3ff0b8",
+            "ed46a4b1d6b647fcaa01526262b19431"
+          ]
+        },
+        "outputId": "ddc67c6c-b931-466e-8da8-90c7ead34f0d"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "366e5a0ac67d4e0e94da459f3e69804e"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "4628c887a3404cb79319e2586cbf81af"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "550652ab3d9f482ba2a5485cd84c939b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Map:   0%|          | 0/99545 [00:00<?, ? examples/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "ce139b88df824efea4d55e4813ee1b88"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Create a 90-10 split of the dataset for training and validation\n",
+        "tokenized_datasets = tokenized_datasets[\"train\"].train_test_split(test_size=0.1)\n",
+        "print(tokenized_datasets)  # Output structure of split datasets"
+      ],
+      "metadata": {
+        "id": "DA7mW2it-zoo",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "6c8b73c2-6192-4bd4-87fe-86856ee70625"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "DatasetDict({\n",
+            "    train: Dataset({\n",
+            "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],\n",
+            "        num_rows: 89590\n",
+            "    })\n",
+            "    test: Dataset({\n",
+            "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],\n",
+            "        num_rows: 9955\n",
+            "    })\n",
+            "})\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define a list of entity labels for NER tagging with B- (beginning) and I- (inside) markers\n",
+        "label_list = [\n",
+        "    \"O\",                  # Outside of a named entity\n",
+        "    \"B-PERSON\", \"I-PERSON\",         # Person name (e.g., \"John\" in \"John Doe\")\n",
+        "    \"B-LOCATION\", \"I-LOCATION\",     # Geographical location (e.g., \"Paris\")\n",
+        "    \"B-ORGANISATION\", \"I-ORGANISATION\", # Organization name (e.g., \"UNICEF\")\n",
+        "    \"B-DATE\", \"I-DATE\",             # Date entity (e.g., \"2024-11-05\")\n",
+        "    \"B-TIME\", \"I-TIME\",             # Time (e.g., \"12:00 PM\")\n",
+        "    \"B-MONEY\", \"I-MONEY\",           # Monetary values (e.g., \"$20\")\n",
+        "    \"B-PERCENTAGE\", \"I-PERCENTAGE\", # Percentage values (e.g., \"20%\")\n",
+        "    \"B-FACILITY\", \"I-FACILITY\",     # Physical facilities (e.g., \"Airport\")\n",
+        "    \"B-PRODUCT\", \"I-PRODUCT\",       # Product names (e.g., \"iPhone\")\n",
+        "    \"B-EVENT\", \"I-EVENT\",           # Named events (e.g., \"Olympics\")\n",
+        "    \"B-ART\", \"I-ART\",               # Works of art (e.g., \"Mona Lisa\")\n",
+        "    \"B-LAW\", \"I-LAW\",               # Laws and legal documents (e.g., \"Article 50\")\n",
+        "    \"B-LANGUAGE\", \"I-LANGUAGE\",     # Languages (e.g., \"Azerbaijani\")\n",
+        "    \"B-GPE\", \"I-GPE\",               # Geopolitical entities (e.g., \"Europe\")\n",
+        "    \"B-NORP\", \"I-NORP\",             # Nationalities, religious groups, political groups\n",
+        "    \"B-ORDINAL\", \"I-ORDINAL\",       # Ordinal indicators (e.g., \"first\", \"second\")\n",
+        "    \"B-CARDINAL\", \"I-CARDINAL\",     # Cardinal numbers (e.g., \"three\")\n",
+        "    \"B-DISEASE\", \"I-DISEASE\",       # Diseases (e.g., \"COVID-19\")\n",
+        "    \"B-CONTACT\", \"I-CONTACT\",       # Contact info (e.g., email or phone number)\n",
+        "    \"B-ADAGE\", \"I-ADAGE\",           # Common sayings or adages\n",
+        "    \"B-QUANTITY\", \"I-QUANTITY\",     # Quantities (e.g., \"5 km\")\n",
+        "    \"B-MISCELLANEOUS\", \"I-MISCELLANEOUS\", # Miscellaneous entities not fitting other categories\n",
+        "    \"B-POSITION\", \"I-POSITION\",     # Job titles or positions (e.g., \"CEO\")\n",
+        "    \"B-PROJECT\", \"I-PROJECT\"        # Project names (e.g., \"Project Apollo\")\n",
+        "]"
+      ],
+      "metadata": {
+        "id": "-lVHfKEE-zmm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Initialize a data collator to handle padding and formatting for token classification\n",
+        "data_collator = DataCollatorForTokenClassification(tokenizer)\n",
+        "\n",
+        "# Load a pre-trained model for token classification, adapted for NER tasks\n",
+        "model = AutoModelForTokenClassification.from_pretrained(\n",
+        "    \"xlm-roberta-large\",               # Base model (multilingual XLM-RoBERTa) for NER\n",
+        "    num_labels=len(label_list)        # Set the number of output labels to match NER categories\n",
+        ")\n"
+      ],
+      "metadata": {
+        "id": "jUfWCaen-zjr",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5399146a-29d0-4dfd-a93b-dc22779dbbdd"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define a function to compute evaluation metrics for the model's predictions\n",
+        "def compute_metrics(p):\n",
+        "    predictions, labels = p  # Unpack predictions and true labels from the input\n",
+        "\n",
+        "    # Convert logits to predicted label indices by taking the argmax along the last axis\n",
+        "    predictions = np.argmax(predictions, axis=2)\n",
+        "\n",
+        "    # Filter out special padding labels (-100) and convert indices to label names\n",
+        "    true_labels = [[label_list[l] for l in label if l != -100] for label in labels]\n",
+        "    true_predictions = [\n",
+        "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
+        "        for prediction, label in zip(predictions, labels)\n",
+        "    ]\n",
+        "\n",
+        "    # Print a detailed classification report for each label category\n",
+        "    print(classification_report(true_labels, true_predictions))\n",
+        "\n",
+        "    # Calculate and return key evaluation metrics\n",
+        "    return {\n",
+        "        # Precision measures the accuracy of predicted positive instances\n",
+        "        # Important in NER to ensure entity predictions are correct and reduce false positives.\n",
+        "        \"precision\": precision_score(true_labels, true_predictions),\n",
+        "\n",
+        "        # Recall measures the model's ability to capture all relevant entities\n",
+        "        # Essential in NER to ensure the model captures all entities, reducing false negatives.\n",
+        "        \"recall\": recall_score(true_labels, true_predictions),\n",
+        "\n",
+        "        # F1-score is the harmonic mean of precision and recall, balancing both metrics\n",
+        "        # Useful in NER for providing an overall performance measure, especially when precision and recall are both important.\n",
+        "        \"f1\": f1_score(true_labels, true_predictions),\n",
+        "    }"
+      ],
+      "metadata": {
+        "id": "9b7EajE_-zhS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set up training arguments for model training, defining essential training configurations\n",
+        "training_args = TrainingArguments(\n",
+        "    output_dir=\"./results\",               # Directory to save model checkpoints and final outputs\n",
+        "    evaluation_strategy=\"epoch\",          # Evaluate model on the validation set at the end of each epoch\n",
+        "    save_strategy=\"epoch\",                # Save model checkpoints at the end of each epoch\n",
+        "    learning_rate=2e-5,                   # Set a low learning rate to ensure stable training for fine-tuning\n",
+        "    per_device_train_batch_size=128,       # Number of examples per batch during training, balancing speed and memory\n",
+        "    per_device_eval_batch_size=128,        # Number of examples per batch during evaluation\n",
+        "    num_train_epochs=12,                   # Number of full training passes over the dataset\n",
+        "    weight_decay=0.005,                    # Regularization term to prevent overfitting by penalizing large weights\n",
+        "    fp16=True,                            # Use 16-bit floating point for faster and memory-efficient training\n",
+        "    logging_dir='./logs',                 # Directory to store training logs\n",
+        "    save_total_limit=2,                   # Keep only the 2 latest model checkpoints to save storage space\n",
+        "    load_best_model_at_end=True,          # Load the best model based on metrics at the end of training\n",
+        "    metric_for_best_model=\"f1\",           # Use F1-score to determine the best model checkpoint\n",
+        "    report_to=\"none\"                      # Disable reporting to external services (useful in local runs)\n",
+        ")\n"
+      ],
+      "metadata": {
+        "id": "PmJTMpp6-zew"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Initialize the Trainer class to manage the training loop with all necessary components\n",
+        "trainer = Trainer(\n",
+        "    model=model,                         # The pre-trained model to be fine-tuned\n",
+        "    args=training_args,                  # Training configuration parameters defined in TrainingArguments\n",
+        "    train_dataset=tokenized_datasets[\"train\"],  # Tokenized training dataset\n",
+        "    eval_dataset=tokenized_datasets[\"test\"],    # Tokenized validation dataset\n",
+        "    tokenizer=tokenizer,                 # Tokenizer used for processing input text\n",
+        "    data_collator=data_collator,         # Data collator for padding and batching during training\n",
+        "    compute_metrics=compute_metrics,     # Function to calculate evaluation metrics like precision, recall, F1\n",
+        "    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Stop training early if validation metrics don't improve for 2 epochs\n",
+        ")\n"
+      ],
+      "metadata": {
+        "id": "WqoF7QJy-zb2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Begin the training process and capture the training metrics\n",
+        "training_metrics = trainer.train()\n",
+        "\n",
+        "# Evaluate the model on the validation set after training\n",
+        "eval_results = trainer.evaluate()\n",
+        "\n",
+        "# Print evaluation results, including precision, recall, and F1-score\n",
+        "print(eval_results)\n"
+      ],
+      "metadata": {
+        "id": "QveYYwvA-zUR",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "outputId": "a432a1a6-fc14-471e-ad2f-ec25e15fcac8"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='6666' max='8400' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [6666/8400 50:10 < 13:03, 2.21 it/s, Epoch 9.52/12]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Epoch</th>\n",
+              "      <th>Training Loss</th>\n",
+              "      <th>Validation Loss</th>\n",
+              "      <th>Precision</th>\n",
+              "      <th>Recall</th>\n",
+              "      <th>F1</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>1</td>\n",
+              "      <td>0.407500</td>\n",
+              "      <td>0.253823</td>\n",
+              "      <td>0.768923</td>\n",
+              "      <td>0.721350</td>\n",
+              "      <td>0.744377</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>2</td>\n",
+              "      <td>0.255600</td>\n",
+              "      <td>0.249694</td>\n",
+              "      <td>0.783549</td>\n",
+              "      <td>0.724464</td>\n",
+              "      <td>0.752849</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>3</td>\n",
+              "      <td>0.214400</td>\n",
+              "      <td>0.248773</td>\n",
+              "      <td>0.750857</td>\n",
+              "      <td>0.748900</td>\n",
+              "      <td>0.749877</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>4</td>\n",
+              "      <td>0.193400</td>\n",
+              "      <td>0.257051</td>\n",
+              "      <td>0.768623</td>\n",
+              "      <td>0.740371</td>\n",
+              "      <td>0.754232</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>5</td>\n",
+              "      <td>0.169800</td>\n",
+              "      <td>0.275679</td>\n",
+              "      <td>0.745789</td>\n",
+              "      <td>0.753740</td>\n",
+              "      <td>0.749743</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>6</td>\n",
+              "      <td>0.152600</td>\n",
+              "      <td>0.288074</td>\n",
+              "      <td>0.783131</td>\n",
+              "      <td>0.728423</td>\n",
+              "      <td>0.754787</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>7</td>\n",
+              "      <td>0.144300</td>\n",
+              "      <td>0.303378</td>\n",
+              "      <td>0.758504</td>\n",
+              "      <td>0.738069</td>\n",
+              "      <td>0.748147</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>8</td>\n",
+              "      <td>0.126800</td>\n",
+              "      <td>0.311300</td>\n",
+              "      <td>0.745589</td>\n",
+              "      <td>0.750863</td>\n",
+              "      <td>0.748217</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>9</td>\n",
+              "      <td>0.119400</td>\n",
+              "      <td>0.331631</td>\n",
+              "      <td>0.739316</td>\n",
+              "      <td>0.749475</td>\n",
+              "      <td>0.744361</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "metadata": {
+            "tags": null
+          },
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.64      0.12      0.20      1828\n",
+            "        DATE       0.50      0.49      0.49       834\n",
+            "       EVENT       0.63      0.46      0.53        63\n",
+            "    FACILITY       0.70      0.69      0.70      1134\n",
+            "         LAW       0.64      0.49      0.56      1066\n",
+            "    LOCATION       0.78      0.80      0.79      8795\n",
+            "       MONEY       0.62      0.51      0.56       555\n",
+            "ORGANISATION       0.64      0.70      0.67       554\n",
+            "  PERCENTAGE       0.76      0.84      0.80      3502\n",
+            "      PERSON       0.89      0.81      0.85      7007\n",
+            "     PRODUCT       0.82      0.83      0.83      2624\n",
+            "        TIME       0.55      0.54      0.54      1584\n",
+            "\n",
+            "   micro avg       0.77      0.72      0.74     29546\n",
+            "   macro avg       0.68      0.61      0.63     29546\n",
+            "weighted avg       0.76      0.72      0.73     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.60      0.16      0.25      1828\n",
+            "        DATE       0.56      0.47      0.51       834\n",
+            "       EVENT       0.55      0.56      0.55        63\n",
+            "    FACILITY       0.75      0.66      0.70      1134\n",
+            "         LAW       0.61      0.57      0.59      1066\n",
+            "    LOCATION       0.80      0.78      0.79      8795\n",
+            "       MONEY       0.62      0.56      0.59       555\n",
+            "ORGANISATION       0.66      0.66      0.66       554\n",
+            "  PERCENTAGE       0.78      0.84      0.81      3502\n",
+            "      PERSON       0.88      0.84      0.86      7007\n",
+            "     PRODUCT       0.81      0.86      0.83      2624\n",
+            "        TIME       0.61      0.47      0.53      1584\n",
+            "\n",
+            "   micro avg       0.78      0.72      0.75     29546\n",
+            "   macro avg       0.68      0.62      0.64     29546\n",
+            "weighted avg       0.77      0.72      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.41      0.22      0.28      1828\n",
+            "        DATE       0.47      0.52      0.49       834\n",
+            "       EVENT       0.64      0.51      0.57        63\n",
+            "    FACILITY       0.71      0.70      0.71      1134\n",
+            "         LAW       0.63      0.56      0.59      1066\n",
+            "    LOCATION       0.77      0.82      0.80      8795\n",
+            "       MONEY       0.58      0.61      0.59       555\n",
+            "ORGANISATION       0.64      0.69      0.67       554\n",
+            "  PERCENTAGE       0.79      0.82      0.80      3502\n",
+            "      PERSON       0.84      0.86      0.85      7007\n",
+            "     PRODUCT       0.80      0.86      0.83      2624\n",
+            "        TIME       0.59      0.53      0.56      1584\n",
+            "\n",
+            "   micro avg       0.75      0.75      0.75     29546\n",
+            "   macro avg       0.66      0.64      0.64     29546\n",
+            "weighted avg       0.74      0.75      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.40      0.18      0.25      1828\n",
+            "        DATE       0.52      0.49      0.50       834\n",
+            "       EVENT       0.67      0.51      0.58        63\n",
+            "    FACILITY       0.78      0.63      0.70      1134\n",
+            "         LAW       0.63      0.60      0.61      1066\n",
+            "    LOCATION       0.78      0.81      0.80      8795\n",
+            "       MONEY       0.63      0.50      0.56       555\n",
+            "ORGANISATION       0.64      0.66      0.65       554\n",
+            "  PERCENTAGE       0.79      0.83      0.81      3502\n",
+            "      PERSON       0.86      0.85      0.86      7007\n",
+            "     PRODUCT       0.81      0.87      0.84      2624\n",
+            "        TIME       0.59      0.53      0.56      1584\n",
+            "\n",
+            "   micro avg       0.77      0.74      0.75     29546\n",
+            "   macro avg       0.68      0.62      0.64     29546\n",
+            "weighted avg       0.75      0.74      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.39      0.20      0.27      1828\n",
+            "        DATE       0.51      0.51      0.51       834\n",
+            "       EVENT       0.66      0.59      0.62        63\n",
+            "    FACILITY       0.73      0.69      0.71      1134\n",
+            "         LAW       0.57      0.63      0.60      1066\n",
+            "    LOCATION       0.76      0.82      0.79      8795\n",
+            "       MONEY       0.59      0.57      0.58       555\n",
+            "ORGANISATION       0.60      0.69      0.64       554\n",
+            "  PERCENTAGE       0.76      0.84      0.80      3502\n",
+            "      PERSON       0.86      0.84      0.85      7007\n",
+            "     PRODUCT       0.79      0.88      0.83      2624\n",
+            "        TIME       0.58      0.55      0.56      1584\n",
+            "\n",
+            "   micro avg       0.75      0.75      0.75     29546\n",
+            "   macro avg       0.65      0.65      0.65     29546\n",
+            "weighted avg       0.74      0.75      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.41      0.19      0.26      1828\n",
+            "        DATE       0.53      0.49      0.51       834\n",
+            "       EVENT       0.67      0.51      0.58        63\n",
+            "    FACILITY       0.74      0.68      0.71      1134\n",
+            "         LAW       0.62      0.58      0.60      1066\n",
+            "    LOCATION       0.81      0.79      0.80      8795\n",
+            "       MONEY       0.59      0.56      0.58       555\n",
+            "ORGANISATION       0.70      0.69      0.70       554\n",
+            "  PERCENTAGE       0.80      0.82      0.81      3502\n",
+            "      PERSON       0.90      0.82      0.86      7007\n",
+            "     PRODUCT       0.83      0.84      0.84      2624\n",
+            "        TIME       0.60      0.53      0.57      1584\n",
+            "\n",
+            "   micro avg       0.78      0.73      0.75     29546\n",
+            "   macro avg       0.68      0.63      0.65     29546\n",
+            "weighted avg       0.77      0.73      0.75     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.34      0.21      0.26      1828\n",
+            "        DATE       0.51      0.51      0.51       834\n",
+            "       EVENT       0.61      0.52      0.56        63\n",
+            "    FACILITY       0.74      0.67      0.70      1134\n",
+            "         LAW       0.63      0.56      0.59      1066\n",
+            "    LOCATION       0.79      0.79      0.79      8795\n",
+            "       MONEY       0.57      0.55      0.56       555\n",
+            "ORGANISATION       0.66      0.68      0.67       554\n",
+            "  PERCENTAGE       0.78      0.82      0.80      3502\n",
+            "      PERSON       0.86      0.85      0.85      7007\n",
+            "     PRODUCT       0.80      0.87      0.83      2624\n",
+            "        TIME       0.59      0.54      0.56      1584\n",
+            "\n",
+            "   micro avg       0.76      0.74      0.75     29546\n",
+            "   macro avg       0.66      0.63      0.64     29546\n",
+            "weighted avg       0.75      0.74      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.33      0.22      0.26      1828\n",
+            "        DATE       0.52      0.52      0.52       834\n",
+            "       EVENT       0.58      0.52      0.55        63\n",
+            "    FACILITY       0.74      0.69      0.71      1134\n",
+            "         LAW       0.59      0.61      0.60      1066\n",
+            "    LOCATION       0.77      0.82      0.79      8795\n",
+            "       MONEY       0.56      0.59      0.57       555\n",
+            "ORGANISATION       0.65      0.68      0.66       554\n",
+            "  PERCENTAGE       0.79      0.81      0.80      3502\n",
+            "      PERSON       0.86      0.86      0.86      7007\n",
+            "     PRODUCT       0.82      0.87      0.84      2624\n",
+            "        TIME       0.57      0.56      0.56      1584\n",
+            "\n",
+            "   micro avg       0.75      0.75      0.75     29546\n",
+            "   macro avg       0.65      0.65      0.64     29546\n",
+            "weighted avg       0.74      0.75      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.31      0.22      0.26      1828\n",
+            "        DATE       0.50      0.54      0.52       834\n",
+            "       EVENT       0.57      0.56      0.56        63\n",
+            "    FACILITY       0.72      0.69      0.71      1134\n",
+            "         LAW       0.57      0.63      0.60      1066\n",
+            "    LOCATION       0.77      0.81      0.79      8795\n",
+            "       MONEY       0.51      0.62      0.56       555\n",
+            "ORGANISATION       0.64      0.69      0.66       554\n",
+            "  PERCENTAGE       0.78      0.81      0.80      3502\n",
+            "      PERSON       0.86      0.84      0.85      7007\n",
+            "     PRODUCT       0.81      0.86      0.83      2624\n",
+            "        TIME       0.56      0.58      0.57      1584\n",
+            "\n",
+            "   micro avg       0.74      0.75      0.74     29546\n",
+            "   macro avg       0.63      0.65      0.64     29546\n",
+            "weighted avg       0.73      0.75      0.74     29546\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='7700' max='8400' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [7700/8400 58:30 < 05:19, 2.19 it/s, Epoch 11/12]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Epoch</th>\n",
+              "      <th>Training Loss</th>\n",
+              "      <th>Validation Loss</th>\n",
+              "      <th>Precision</th>\n",
+              "      <th>Recall</th>\n",
+              "      <th>F1</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>1</td>\n",
+              "      <td>0.407500</td>\n",
+              "      <td>0.253823</td>\n",
+              "      <td>0.768923</td>\n",
+              "      <td>0.721350</td>\n",
+              "      <td>0.744377</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>2</td>\n",
+              "      <td>0.255600</td>\n",
+              "      <td>0.249694</td>\n",
+              "      <td>0.783549</td>\n",
+              "      <td>0.724464</td>\n",
+              "      <td>0.752849</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>3</td>\n",
+              "      <td>0.214400</td>\n",
+              "      <td>0.248773</td>\n",
+              "      <td>0.750857</td>\n",
+              "      <td>0.748900</td>\n",
+              "      <td>0.749877</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>4</td>\n",
+              "      <td>0.193400</td>\n",
+              "      <td>0.257051</td>\n",
+              "      <td>0.768623</td>\n",
+              "      <td>0.740371</td>\n",
+              "      <td>0.754232</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>5</td>\n",
+              "      <td>0.169800</td>\n",
+              "      <td>0.275679</td>\n",
+              "      <td>0.745789</td>\n",
+              "      <td>0.753740</td>\n",
+              "      <td>0.749743</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>6</td>\n",
+              "      <td>0.152600</td>\n",
+              "      <td>0.288074</td>\n",
+              "      <td>0.783131</td>\n",
+              "      <td>0.728423</td>\n",
+              "      <td>0.754787</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>7</td>\n",
+              "      <td>0.144300</td>\n",
+              "      <td>0.303378</td>\n",
+              "      <td>0.758504</td>\n",
+              "      <td>0.738069</td>\n",
+              "      <td>0.748147</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>8</td>\n",
+              "      <td>0.126800</td>\n",
+              "      <td>0.311300</td>\n",
+              "      <td>0.745589</td>\n",
+              "      <td>0.750863</td>\n",
+              "      <td>0.748217</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>9</td>\n",
+              "      <td>0.119400</td>\n",
+              "      <td>0.331631</td>\n",
+              "      <td>0.739316</td>\n",
+              "      <td>0.749475</td>\n",
+              "      <td>0.744361</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>10</td>\n",
+              "      <td>0.109400</td>\n",
+              "      <td>0.344823</td>\n",
+              "      <td>0.754268</td>\n",
+              "      <td>0.737189</td>\n",
+              "      <td>0.745631</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>11</td>\n",
+              "      <td>0.102900</td>\n",
+              "      <td>0.354887</td>\n",
+              "      <td>0.751948</td>\n",
+              "      <td>0.741285</td>\n",
+              "      <td>0.746578</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.30      0.21      0.25      1828\n",
+            "        DATE       0.52      0.52      0.52       834\n",
+            "       EVENT       0.63      0.54      0.58        63\n",
+            "    FACILITY       0.73      0.70      0.71      1134\n",
+            "         LAW       0.60      0.59      0.60      1066\n",
+            "    LOCATION       0.79      0.79      0.79      8795\n",
+            "       MONEY       0.55      0.60      0.57       555\n",
+            "ORGANISATION       0.64      0.68      0.66       554\n",
+            "  PERCENTAGE       0.78      0.82      0.80      3502\n",
+            "      PERSON       0.87      0.84      0.85      7007\n",
+            "     PRODUCT       0.83      0.84      0.83      2624\n",
+            "        TIME       0.58      0.56      0.57      1584\n",
+            "\n",
+            "   micro avg       0.75      0.74      0.75     29546\n",
+            "   macro avg       0.65      0.64      0.65     29546\n",
+            "weighted avg       0.75      0.74      0.74     29546\n",
+            "\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.32      0.22      0.26      1828\n",
+            "        DATE       0.51      0.52      0.51       834\n",
+            "       EVENT       0.64      0.54      0.59        63\n",
+            "    FACILITY       0.73      0.69      0.71      1134\n",
+            "         LAW       0.60      0.59      0.60      1066\n",
+            "    LOCATION       0.79      0.80      0.79      8795\n",
+            "       MONEY       0.53      0.58      0.55       555\n",
+            "ORGANISATION       0.65      0.68      0.66       554\n",
+            "  PERCENTAGE       0.79      0.82      0.80      3502\n",
+            "      PERSON       0.87      0.84      0.85      7007\n",
+            "     PRODUCT       0.83      0.85      0.84      2624\n",
+            "        TIME       0.58      0.57      0.57      1584\n",
+            "\n",
+            "   micro avg       0.75      0.74      0.75     29546\n",
+            "   macro avg       0.65      0.64      0.65     29546\n",
+            "weighted avg       0.74      0.74      0.74     29546\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='78' max='78' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [78/78 00:10]\n",
+              "    </div>\n",
+              "    "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "         ART       0.41      0.19      0.26      1828\n",
+            "        DATE       0.53      0.49      0.51       834\n",
+            "       EVENT       0.67      0.51      0.58        63\n",
+            "    FACILITY       0.74      0.68      0.71      1134\n",
+            "         LAW       0.62      0.58      0.60      1066\n",
+            "    LOCATION       0.81      0.79      0.80      8795\n",
+            "       MONEY       0.59      0.56      0.58       555\n",
+            "ORGANISATION       0.70      0.69      0.70       554\n",
+            "  PERCENTAGE       0.80      0.82      0.81      3502\n",
+            "      PERSON       0.90      0.82      0.86      7007\n",
+            "     PRODUCT       0.83      0.84      0.84      2624\n",
+            "        TIME       0.60      0.53      0.57      1584\n",
+            "\n",
+            "   micro avg       0.78      0.73      0.75     29546\n",
+            "   macro avg       0.68      0.63      0.65     29546\n",
+            "weighted avg       0.77      0.73      0.75     29546\n",
+            "\n",
+            "{'eval_loss': 0.28807422518730164, 'eval_precision': 0.7831307765082599, 'eval_recall': 0.7284234752589183, 'eval_f1': 0.754787122115452, 'eval_runtime': 16.1047, 'eval_samples_per_second': 618.142, 'eval_steps_per_second': 4.843, 'epoch': 11.0}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the directory where the trained model and tokenizer will be saved\n",
+        "save_directory = \"./xlm-roberta-large\"\n",
+        "\n",
+        "# Save the trained model to the specified directory\n",
+        "model.save_pretrained(save_directory)\n",
+        "\n",
+        "# Save the tokenizer to the same directory for compatibility with the model\n",
+        "tokenizer.save_pretrained(save_directory)\n"
+      ],
+      "metadata": {
+        "id": "7yEFe2_n-zPG",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "d8184694-0ab9-44e4-9b4e-859cd2ea6188"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "('./xlm-roberta-large/tokenizer_config.json',\n",
+              " './xlm-roberta-large/special_tokens_map.json',\n",
+              " './xlm-roberta-large/sentencepiece.bpe.model',\n",
+              " './xlm-roberta-large/added_tokens.json',\n",
+              " './xlm-roberta-large/tokenizer.json')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 19
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import pipeline\n",
+        "\n",
+        "# Load tokenizer and model\n",
+        "tokenizer = AutoTokenizer.from_pretrained(save_directory)\n",
+        "model = AutoModelForTokenClassification.from_pretrained(save_directory)\n",
+        "\n",
+        "# Initialize the NER pipeline\n",
+        "device = 0 if torch.cuda.is_available() else -1\n",
+        "nlp_ner = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"simple\", device=device)\n"
+      ],
+      "metadata": {
+        "id": "zkECg3v9-zNQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "label_mapping = {f\"LABEL_{i}\": label for i, label in enumerate(label_list) if label != \"O\"}\n",
+        "\n",
+        "def evaluate_model(test_texts, true_labels):\n",
+        "    predictions = []\n",
+        "    for i, text in enumerate(test_texts):\n",
+        "        pred_entities = nlp_ner(text)\n",
+        "        pred_labels = [label_mapping.get(entity[\"entity_group\"], \"O\") for entity in pred_entities if entity[\"entity_group\"] in label_mapping]\n",
+        "        if len(pred_labels) != len(true_labels[i]):\n",
+        "            print(f\"Warning: Inconsistent number of entities in sample {i+1}. Adjusting predicted entities.\")\n",
+        "            pred_labels = pred_labels[:len(true_labels[i])]\n",
+        "        predictions.append(pred_labels)\n",
+        "    if all(len(true) == len(pred) for true, pred in zip(true_labels, predictions)):\n",
+        "        precision = precision_score(true_labels, predictions)\n",
+        "        recall = recall_score(true_labels, predictions)\n",
+        "        f1 = f1_score(true_labels, predictions)\n",
+        "        print(\"Precision:\", precision)\n",
+        "        print(\"Recall:\", recall)\n",
+        "        print(\"F1-Score:\", f1)\n",
+        "        print(classification_report(true_labels, predictions))\n",
+        "    else:\n",
+        "        print(\"Error: Could not align all samples correctly for evaluation.\")\n"
+      ],
+      "metadata": {
+        "id": "SOFqXU-M_bxO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_texts = [\"Shahla Khuduyeva və Pasha Sığorta şirkəti haqqında məlumat.\"]\n",
+        "true_labels = [[\"B-PERSON\", \"B-ORGANISATION\"]]\n",
+        "evaluate_model(test_texts, true_labels)\n"
+      ],
+      "metadata": {
+        "id": "WRCB-_66_buE",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "da8833c8-27e8-40cc-d32f-4eb11158278d"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Precision: 0.5\n",
+            "Recall: 0.5\n",
+            "F1-Score: 0.5\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "    LOCATION       0.00      0.00      0.00         0\n",
+            "ORGANISATION       0.00      0.00      0.00         1\n",
+            "      PERSON       1.00      1.00      1.00         1\n",
+            "\n",
+            "   micro avg       0.50      0.50      0.50         2\n",
+            "   macro avg       0.33      0.33      0.33         2\n",
+            "weighted avg       0.50      0.50      0.50         2\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "x53zS3Vv_brU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "5Uoebirj_boo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "RKounG2l_bl5"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file