diff --git "a/space/space/space/space/space/space/notebooks/Duc_Notebook.ipynb" "b/space/space/space/space/space/space/notebooks/Duc_Notebook.ipynb"
new file mode 100644--- /dev/null
+++ "b/space/space/space/space/space/space/notebooks/Duc_Notebook.ipynb"
@@ -0,0 +1,7467 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "2707f2f1d216421385cc4166127d696a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_5350c7b689f14d138357f92a78479d4b",
+              "IPY_MODEL_5423cc4795f9415ebcf7eb2eb45f08b4",
+              "IPY_MODEL_f1ef72618a0b4710ac6ab5cfc86ed252"
+            ],
+            "layout": "IPY_MODEL_8eb197c462304d6fb6d15c175db315f5"
+          }
+        },
+        "5350c7b689f14d138357f92a78479d4b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a4178b6f78bf4f2aa6cb7ad924308970",
+            "placeholder": "​",
+            "style": "IPY_MODEL_59f7b90017364fc3ad2969061e3efba2",
+            "value": "config.json: 100%"
+          }
+        },
+        "5423cc4795f9415ebcf7eb2eb45f08b4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_3ca4b088872649c7856c3be691ca6224",
+            "max": 557,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_1c77b809b5ec42e7b00b512cbbc7071f",
+            "value": 557
+          }
+        },
+        "f1ef72618a0b4710ac6ab5cfc86ed252": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c657eed438b741189da3846983d8e0a6",
+            "placeholder": "​",
+            "style": "IPY_MODEL_21f740caf6a94a468a54552961c54d63",
+            "value": " 557/557 [00:00&lt;00:00, 13.2kB/s]"
+          }
+        },
+        "8eb197c462304d6fb6d15c175db315f5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a4178b6f78bf4f2aa6cb7ad924308970": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "59f7b90017364fc3ad2969061e3efba2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3ca4b088872649c7856c3be691ca6224": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1c77b809b5ec42e7b00b512cbbc7071f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "c657eed438b741189da3846983d8e0a6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "21f740caf6a94a468a54552961c54d63": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "7b988f4f4c97462c9ee30aebabf4029b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_8b5ccad1921342dca6cbf5adcc93e9fa",
+              "IPY_MODEL_25c32ab8424242daa414680dc5b8ea57",
+              "IPY_MODEL_71a5bbc69fe648168877b7ab6f6cd8a6"
+            ],
+            "layout": "IPY_MODEL_0434bc2965584b018978d590bcda68c6"
+          }
+        },
+        "8b5ccad1921342dca6cbf5adcc93e9fa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b9ba2a9d9c704dd091cf17241541c280",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a75ea7ca7e384c948f07eeffa8f676b5",
+            "value": "vocab.txt: 100%"
+          }
+        },
+        "25c32ab8424242daa414680dc5b8ea57": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e0a24e13af474afc98fc5c93c561e880",
+            "max": 895321,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_4a1b96a5fde64fb499eeacd733b72c32",
+            "value": 895321
+          }
+        },
+        "71a5bbc69fe648168877b7ab6f6cd8a6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f761d67cb46a4af3b49a22209cd450a9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_8125e9952f68467d8c7d55da426c9098",
+            "value": " 895k/895k [00:00&lt;00:00, 4.78MB/s]"
+          }
+        },
+        "0434bc2965584b018978d590bcda68c6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b9ba2a9d9c704dd091cf17241541c280": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a75ea7ca7e384c948f07eeffa8f676b5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "e0a24e13af474afc98fc5c93c561e880": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4a1b96a5fde64fb499eeacd733b72c32": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "f761d67cb46a4af3b49a22209cd450a9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8125e9952f68467d8c7d55da426c9098": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "0885e06d76f24053890d4ade7044b22e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_4303d7ea0bf14661803caf8f617ce788",
+              "IPY_MODEL_cd2aec8cb6de49f095681da2b99e7660",
+              "IPY_MODEL_fe84d9c4f3124682809f6e7117b40638"
+            ],
+            "layout": "IPY_MODEL_c14214a879ca425c8955b380d73f3010"
+          }
+        },
+        "4303d7ea0bf14661803caf8f617ce788": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2f28ad6792294553b24cbaa7dea533af",
+            "placeholder": "​",
+            "style": "IPY_MODEL_c58168f9246046728211a403540060f5",
+            "value": "bpe.codes: 100%"
+          }
+        },
+        "cd2aec8cb6de49f095681da2b99e7660": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_64473dfca69a45438094656d2b995207",
+            "max": 1135173,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_0a782a4d3cfc4b9cbd802bedcdae3153",
+            "value": 1135173
+          }
+        },
+        "fe84d9c4f3124682809f6e7117b40638": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_dc5b47931e0340a4980ae315c6a802a5",
+            "placeholder": "​",
+            "style": "IPY_MODEL_8d431574a7a14c5fb1466fa97a33e4fb",
+            "value": " 1.14M/1.14M [00:00&lt;00:00, 8.93MB/s]"
+          }
+        },
+        "c14214a879ca425c8955b380d73f3010": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2f28ad6792294553b24cbaa7dea533af": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c58168f9246046728211a403540060f5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "64473dfca69a45438094656d2b995207": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0a782a4d3cfc4b9cbd802bedcdae3153": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "dc5b47931e0340a4980ae315c6a802a5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8d431574a7a14c5fb1466fa97a33e4fb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "960273e5205f49efb2be0576d2f74bca": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_7e3192df593248c7bfafd5b0347a2b1b",
+              "IPY_MODEL_d18a2302adaa415785ed8f8bb578b5b9",
+              "IPY_MODEL_9604f5d16db5446a83400c70071c90e7"
+            ],
+            "layout": "IPY_MODEL_337bbd72f0d4481f8a13cb8323afa241"
+          }
+        },
+        "7e3192df593248c7bfafd5b0347a2b1b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8b2536405b1b4c62a0988b6360379060",
+            "placeholder": "​",
+            "style": "IPY_MODEL_24ea201c035d4e5a96f6d95c146c6ca8",
+            "value": "tokenizer.json: 100%"
+          }
+        },
+        "d18a2302adaa415785ed8f8bb578b5b9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_51027870cc714d8db898838afc41d396",
+            "max": 3132320,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_380dca91b19d43d4b3de84afe29f3bd4",
+            "value": 3132320
+          }
+        },
+        "9604f5d16db5446a83400c70071c90e7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_5d102b9cc45943808fadad7c06ee4352",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ba6e6b0b454b471a9b529dc24bb13bdd",
+            "value": " 3.13M/3.13M [00:00&lt;00:00, 24.4MB/s]"
+          }
+        },
+        "337bbd72f0d4481f8a13cb8323afa241": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8b2536405b1b4c62a0988b6360379060": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "24ea201c035d4e5a96f6d95c146c6ca8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "51027870cc714d8db898838afc41d396": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "380dca91b19d43d4b3de84afe29f3bd4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "5d102b9cc45943808fadad7c06ee4352": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ba6e6b0b454b471a9b529dc24bb13bdd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "9c6331e2efe74bfd9292c4948beaafb5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_26e942f1e9b441b1861a6ffc5b3299ed",
+              "IPY_MODEL_2d8c0bd34c104619bee375c98eb47160",
+              "IPY_MODEL_1702bb0d2e964f28bca673b1ac4550d3"
+            ],
+            "layout": "IPY_MODEL_1a128f1ccf93416a873560bd462a287e"
+          }
+        },
+        "26e942f1e9b441b1861a6ffc5b3299ed": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_6ebff4a83fe54c688224e27bd56b1d80",
+            "placeholder": "​",
+            "style": "IPY_MODEL_4cd7105d16db47ca90f66d6932beed36",
+            "value": "pytorch_model.bin: 100%"
+          }
+        },
+        "2d8c0bd34c104619bee375c98eb47160": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e0e19cc9d12a4f91a4b37fcc8ffd691a",
+            "max": 542923308,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_aa5bf384ac5d4aa9976fda08d2574d57",
+            "value": 542923308
+          }
+        },
+        "1702bb0d2e964f28bca673b1ac4550d3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_92ee08ad38d541c8a0d7e151cb478ab9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_871356ac545e462d8318ba3830de1ac9",
+            "value": " 543M/543M [00:03&lt;00:00, 176MB/s]"
+          }
+        },
+        "1a128f1ccf93416a873560bd462a287e": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6ebff4a83fe54c688224e27bd56b1d80": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4cd7105d16db47ca90f66d6932beed36": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "e0e19cc9d12a4f91a4b37fcc8ffd691a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "aa5bf384ac5d4aa9976fda08d2574d57": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "92ee08ad38d541c8a0d7e151cb478ab9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "871356ac545e462d8318ba3830de1ac9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "356930c123634c258b194b79654b602c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ff5fe04a8b43428f94e82affa61c8aa6",
+              "IPY_MODEL_89389fd2337f4e6fa564282157d0f9a8",
+              "IPY_MODEL_ec5b0bbf78fd4118b455040b801cd0fa"
+            ],
+            "layout": "IPY_MODEL_fe441fbf9bdd4d2099e67ed31eafce12"
+          }
+        },
+        "ff5fe04a8b43428f94e82affa61c8aa6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c3d75f70be8a41f0a4aaaf43b65df684",
+            "placeholder": "​",
+            "style": "IPY_MODEL_da5dfc79703041c78fd2de3ea04ae025",
+            "value": "model.safetensors: 100%"
+          }
+        },
+        "89389fd2337f4e6fa564282157d0f9a8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_18a9ab8c76b84ebc8a17c5854649e6ce",
+            "max": 542900336,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_d128a1638ad0472d99a3bd52b5aae3a7",
+            "value": 542900336
+          }
+        },
+        "ec5b0bbf78fd4118b455040b801cd0fa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_06b631379c0740289420fda9a8b57892",
+            "placeholder": "​",
+            "style": "IPY_MODEL_29cbf804df244f41a57d9b83c7c2427e",
+            "value": " 543M/543M [00:05&lt;00:00, 110MB/s]"
+          }
+        },
+        "fe441fbf9bdd4d2099e67ed31eafce12": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c3d75f70be8a41f0a4aaaf43b65df684": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "da5dfc79703041c78fd2de3ea04ae025": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "18a9ab8c76b84ebc8a17c5854649e6ce": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d128a1638ad0472d99a3bd52b5aae3a7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "06b631379c0740289420fda9a8b57892": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "29cbf804df244f41a57d9b83c7c2427e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install pytorch-crf"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3OUdWCMb_XpJ",
+        "outputId": "593a403e-3432-428f-fd8e-93f8957d740a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting pytorch-crf\n",
+            "  Downloading pytorch_crf-0.7.2-py3-none-any.whl.metadata (2.4 kB)\n",
+            "Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)\n",
+            "Installing collected packages: pytorch-crf\n",
+            "Successfully installed pytorch-crf-0.7.2\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import wandb\n",
+        "wandb.login()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 191
+        },
+        "id": "inx5CwCVgIvl",
+        "outputId": "f9317181-b433-468e-ecec-dc392e540e52"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ],
+            "application/javascript": [
+              "\n",
+              "        window._wandbApiKey = new Promise((resolve, reject) => {\n",
+              "            function loadScript(url) {\n",
+              "            return new Promise(function(resolve, reject) {\n",
+              "                let newScript = document.createElement(\"script\");\n",
+              "                newScript.onerror = reject;\n",
+              "                newScript.onload = resolve;\n",
+              "                document.body.appendChild(newScript);\n",
+              "                newScript.src = url;\n",
+              "            });\n",
+              "            }\n",
+              "            loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n",
+              "            const iframe = document.createElement('iframe')\n",
+              "            iframe.style.cssText = \"width:0;height:0;border:none\"\n",
+              "            document.body.appendChild(iframe)\n",
+              "            const handshake = new Postmate({\n",
+              "                container: iframe,\n",
+              "                url: 'https://wandb.ai/authorize'\n",
+              "            });\n",
+              "            const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n",
+              "            handshake.then(function(child) {\n",
+              "                child.on('authorize', data => {\n",
+              "                    clearTimeout(timeout)\n",
+              "                    resolve(data)\n",
+              "                });\n",
+              "            });\n",
+              "            })\n",
+              "        });\n",
+              "    "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
+            "wandb: Paste an API key from your profile and hit enter:"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            " ··········\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mlaiducaivn\u001b[0m (\u001b[33mlaiducaivn-fpt-university\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 2
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Data Preparation"
+      ],
+      "metadata": {
+        "id": "YY74yDYXID_a"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "splits = {'train': 'data/train-00000-of-00001-b0417886a268b83a.parquet', 'valid': 'data/valid-00000-of-00001-846411c236133ba3.parquet'}\n",
+        "df_train = pd.read_parquet(\"hf://datasets/datnth1709/VLSP2016-NER-data/\" + splits[\"train\"])\n",
+        "df_valid = pd.read_parquet(\"hf://datasets/datnth1709/VLSP2016-NER-data/\" + splits[\"valid\"])\n",
+        "df = pd.concat([df_train, df_valid]).reset_index(drop=True)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "66m2J73nGXEV",
+        "outputId": "5a9a1457-9660-47ab-a5b7-85264c1cd34b"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Tạo thêm các cột khác\n",
+        "def join_tokens(tokens):\n",
+        "    text = ' '.join(tokens)\n",
+        "    return text\n",
+        "\n",
+        "def reform_raw_text(tokens):\n",
+        "    text = ' '.join(tokens)\n",
+        "    return text.replace(\"_\", \" \")\n",
+        "\n",
+        "def label(x):\n",
+        "  return [id_tag[int(i)] for i in x]\n",
+        "\n",
+        "def replace_7_8(lst):\n",
+        "    return [0 if x in (7, 8) else x for x in lst]\n",
+        "\n",
+        "\n",
+        "tag_id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}\n",
+        "id_tag = {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}\n",
+        "\n",
+        "\n",
+        "df['ner_tags'] = df['ner_tags'].apply(replace_7_8)\n",
+        "df['text_withseg'] = df['tokens'].apply(join_tokens)\n",
+        "df['text_raw'] = df['tokens'].apply(reform_raw_text)\n",
+        "df[\"ner_labels\"] = df.ner_tags.apply(label)\n",
+        "df.columns = ['tokens', 'id', 'seg_text', 'raw_text', 'labels']\n",
+        "df\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "id": "U81OmhBeGmMM",
+        "outputId": "c8bec51d-a878-4b12-e2f1-42076572a731"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                  tokens  \\\n",
+              "0                        [Không_khí, thật, náo_nhiệt, .]   \n",
+              "1      [Chị, Lãnh, và, Xăng, ra, đi, ,, mình, đứng, n...   \n",
+              "2      [Suy_tính, mãi, ,, khóc, mãi, rồi, Phúc, lấy, ...   \n",
+              "3      [Hoà, bảo, hồi, mới, qua, đâu, có, biết, nấu_n...   \n",
+              "4                         [Nhật_ký, của, thuyền_viên, .]   \n",
+              "...                                                  ...   \n",
+              "16853  [Nghe, thấy, đã, ghê_ghê, nhưng, Nhiêu, chưa, ...   \n",
+              "16854        [Nhưng, mọi, chuyện, không, dừng, ở, đó, .]   \n",
+              "16855  [Hoà, bảo, thời_gian, đầu, mặc_cảm, lắm, ,, ở,...   \n",
+              "16856  [Biết_bao, người, đã, tình_nguyện, hiến_dâng, ...   \n",
+              "16857  [Trên, đây, mới, là, “, thành_tích, ”, tiêu, t...   \n",
+              "\n",
+              "                                                      id  \\\n",
+              "0                                           [0, 0, 0, 0]   \n",
+              "1      [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "2      [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "3      [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, ...   \n",
+              "4                                           [0, 0, 0, 0]   \n",
+              "...                                                  ...   \n",
+              "16853  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...   \n",
+              "16854                           [0, 0, 0, 0, 0, 0, 0, 0]   \n",
+              "16855  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "16856      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]   \n",
+              "16857  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "\n",
+              "                                                seg_text  \\\n",
+              "0                             Không_khí thật náo_nhiệt .   \n",
+              "1      Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...   \n",
+              "2      Suy_tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...   \n",
+              "3      Hoà bảo hồi mới qua đâu có biết nấu_nướng gì ,...   \n",
+              "4                              Nhật_ký của thuyền_viên .   \n",
+              "...                                                  ...   \n",
+              "16853  Nghe thấy đã ghê_ghê nhưng Nhiêu chưa được tườ...   \n",
+              "16854                 Nhưng mọi chuyện không dừng ở đó .   \n",
+              "16855  Hoà bảo thời_gian đầu mặc_cảm lắm , ở trong nh...   \n",
+              "16856  Biết_bao người đã tình_nguyện hiến_dâng cả cuộ...   \n",
+              "16857  Trên đây mới là “ thành_tích ” tiêu tiền của m...   \n",
+              "\n",
+              "                                                raw_text  \\\n",
+              "0                             Không khí thật náo nhiệt .   \n",
+              "1      Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...   \n",
+              "2      Suy tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...   \n",
+              "3      Hoà bảo hồi mới qua đâu có biết nấu nướng gì ,...   \n",
+              "4                              Nhật ký của thuyền viên .   \n",
+              "...                                                  ...   \n",
+              "16853  Nghe thấy đã ghê ghê nhưng Nhiêu chưa được tườ...   \n",
+              "16854                 Nhưng mọi chuyện không dừng ở đó .   \n",
+              "16855  Hoà bảo thời gian đầu mặc cảm lắm , ở trong nh...   \n",
+              "16856  Biết bao người đã tình nguyện hiến dâng cả cuộ...   \n",
+              "16857  Trên đây mới là “ thành tích ” tiêu tiền của m...   \n",
+              "\n",
+              "                                                  labels  \n",
+              "0                                           [O, O, O, O]  \n",
+              "1      [O, B-PER, O, B-PER, O, O, O, O, O, O, O, O, O...  \n",
+              "2      [O, O, O, O, O, O, B-PER, O, O, O, O, O, O, O,...  \n",
+              "3      [B-PER, O, O, O, O, O, O, O, O, O, O, O, O, B-...  \n",
+              "4                                           [O, O, O, O]  \n",
+              "...                                                  ...  \n",
+              "16853  [O, O, O, O, O, B-PER, O, O, O, O, O, O, O, O,...  \n",
+              "16854                           [O, O, O, O, O, O, O, O]  \n",
+              "16855  [B-PER, O, O, O, O, O, O, O, O, O, O, O, O, O,...  \n",
+              "16856      [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]  \n",
+              "16857  [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...  \n",
+              "\n",
+              "[16858 rows x 5 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-58218936-4588-48aa-b6be-ac72e77b3ce5\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>tokens</th>\n",
+              "      <th>id</th>\n",
+              "      <th>seg_text</th>\n",
+              "      <th>raw_text</th>\n",
+              "      <th>labels</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>[Không_khí, thật, náo_nhiệt, .]</td>\n",
+              "      <td>[0, 0, 0, 0]</td>\n",
+              "      <td>Không_khí thật náo_nhiệt .</td>\n",
+              "      <td>Không khí thật náo nhiệt .</td>\n",
+              "      <td>[O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>[Chị, Lãnh, và, Xăng, ra, đi, ,, mình, đứng, n...</td>\n",
+              "      <td>[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...</td>\n",
+              "      <td>Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...</td>\n",
+              "      <td>[O, B-PER, O, B-PER, O, O, O, O, O, O, O, O, O...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>[Suy_tính, mãi, ,, khóc, mãi, rồi, Phúc, lấy, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Suy_tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...</td>\n",
+              "      <td>Suy tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...</td>\n",
+              "      <td>[O, O, O, O, O, O, B-PER, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>[Hoà, bảo, hồi, mới, qua, đâu, có, biết, nấu_n...</td>\n",
+              "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, ...</td>\n",
+              "      <td>Hoà bảo hồi mới qua đâu có biết nấu_nướng gì ,...</td>\n",
+              "      <td>Hoà bảo hồi mới qua đâu có biết nấu nướng gì ,...</td>\n",
+              "      <td>[B-PER, O, O, O, O, O, O, O, O, O, O, O, O, B-...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>[Nhật_ký, của, thuyền_viên, .]</td>\n",
+              "      <td>[0, 0, 0, 0]</td>\n",
+              "      <td>Nhật_ký của thuyền_viên .</td>\n",
+              "      <td>Nhật ký của thuyền viên .</td>\n",
+              "      <td>[O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16853</th>\n",
+              "      <td>[Nghe, thấy, đã, ghê_ghê, nhưng, Nhiêu, chưa, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...</td>\n",
+              "      <td>Nghe thấy đã ghê_ghê nhưng Nhiêu chưa được tườ...</td>\n",
+              "      <td>Nghe thấy đã ghê ghê nhưng Nhiêu chưa được tườ...</td>\n",
+              "      <td>[O, O, O, O, O, B-PER, O, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16854</th>\n",
+              "      <td>[Nhưng, mọi, chuyện, không, dừng, ở, đó, .]</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
+              "      <td>Nhưng mọi chuyện không dừng ở đó .</td>\n",
+              "      <td>Nhưng mọi chuyện không dừng ở đó .</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16855</th>\n",
+              "      <td>[Hoà, bảo, thời_gian, đầu, mặc_cảm, lắm, ,, ở,...</td>\n",
+              "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Hoà bảo thời_gian đầu mặc_cảm lắm , ở trong nh...</td>\n",
+              "      <td>Hoà bảo thời gian đầu mặc cảm lắm , ở trong nh...</td>\n",
+              "      <td>[B-PER, O, O, O, O, O, O, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16856</th>\n",
+              "      <td>[Biết_bao, người, đã, tình_nguyện, hiến_dâng, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
+              "      <td>Biết_bao người đã tình_nguyện hiến_dâng cả cuộ...</td>\n",
+              "      <td>Biết bao người đã tình nguyện hiến dâng cả cuộ...</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16857</th>\n",
+              "      <td>[Trên, đây, mới, là, “, thành_tích, ”, tiêu, t...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Trên đây mới là “ thành_tích ” tiêu tiền của m...</td>\n",
+              "      <td>Trên đây mới là “ thành tích ” tiêu tiền của m...</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>16858 rows × 5 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-58218936-4588-48aa-b6be-ac72e77b3ce5')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-58218936-4588-48aa-b6be-ac72e77b3ce5 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-58218936-4588-48aa-b6be-ac72e77b3ce5');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-20de5a4b-7254-4fae-85f0-276a7bd335ae\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-20de5a4b-7254-4fae-85f0-276a7bd335ae')\"\n",
+              "                title=\"Suggest charts\"\n",
+              "                style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "      <script>\n",
+              "        async function quickchart(key) {\n",
+              "          const quickchartButtonEl =\n",
+              "            document.querySelector('#' + key + ' button');\n",
+              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "          try {\n",
+              "            const charts = await google.colab.kernel.invokeFunction(\n",
+              "                'suggestCharts', [key], {});\n",
+              "          } catch (error) {\n",
+              "            console.error('Error during call to suggestCharts:', error);\n",
+              "          }\n",
+              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "        }\n",
+              "        (() => {\n",
+              "          let quickchartButtonEl =\n",
+              "            document.querySelector('#df-20de5a4b-7254-4fae-85f0-276a7bd335ae button');\n",
+              "          quickchartButtonEl.style.display =\n",
+              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "        })();\n",
+              "      </script>\n",
+              "    </div>\n",
+              "\n",
+              "  <div id=\"id_14960943-8d68-4fa8-9d2e-a22dda1f04dd\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_14960943-8d68-4fa8-9d2e-a22dda1f04dd button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('df');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 16858,\n  \"fields\": [\n    {\n      \"column\": \"tokens\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"id\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"seg_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 16787,\n        \"samples\": [\n          \"T\\u00ednh th\\u00f4ng_minh , l\\u1ea1i t\\u00f2_m\\u00f2 , anh Ki\\u1ec7m b\\u1eaft_\\u0111\\u1ea7u \\u0111i \\u0111\\u1ebfn c\\u00e1c x\\u01b0\\u1edfng c\\u01a1_kh\\u00ed \\u0111\\u1ec3 quan_s\\u00e1t c\\u00e1c lo\\u1ea1i m\\u00e1y_m\\u00f3c , r\\u1ed3i v\\u1ec1 nh\\u00e0 suy_ngh\\u0129 v\\u00e0 c\\u1ea7m b\\u00fat v\\u1ebd ph\\u00e1c_ho\\u1ea1 ra c\\u00e1i m\\u00e1y v\\u00fat g\\u1ea1o .\",\n          \"V\\u1eady th\\u00ec , h\\u1ecd c\\u1ea7n ph\\u1ea3i \\u0111\\u01b0\\u1ee3c gi\\u00fap_\\u0111\\u1ee1 , ph\\u1ea3i \\u0111\\u01b0\\u1ee3c s\\u1ed1ng \\u0111\\u00e0ng_ho\\u00e0ng , ph\\u1ea3i \\u0111\\u01b0\\u1ee3c l\\u00e0m ng\\u01b0\\u1eddi d\\u00f9 ch\\u1ec9 l\\u00e0 nh\\u1eefng ng\\u00e0y cu\\u1ed1i_c\\u00f9ng .\",\n          \"Nhi\\u1ec1u ng\\u01b0\\u1eddi th\\u00f4ng_d\\u1ecbch c\\u00f9ng th\\u1eddi v\\u1edbi Nguy\\u1ec5n Trung Hi\\u1ebfu c\\u0169ng \\u0111\\u00e3 ch\\u1ebft trong khi th\\u1ef1c_hi\\u1ec7n nhi\\u1ec7m_v\\u1ee5 t\\u1ea1i chi\\u1ebfn_tr\\u01b0\\u1eddng ho\\u1eb7c tr\\u00ean \\u0111\\u01b0\\u1eddng h\\u00e0nh_qu\\u00e2n .\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"raw_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 16785,\n        \"samples\": [\n          \"Trong kho\\u1ea3ng th\\u1eddi gian \\u0111\\u00f3 ch\\u1ecb c\\u1ed1 c\\u00f4ng t\\u1ef1 h\\u1ecdc ti\\u1ebfng Anh .\",\n          \"Sau \\u0111\\u00f3 , ch\\u00ednh b\\u00e0 Susan \\u0111\\u00e3 \\u0111\\u01b0a Mai l\\u00ean h\\u1ecdc \\u0111\\u1ea1i h\\u1ecdc , m\\u1ed7i n\\u0103m chu c\\u1ea5p cho c\\u00f4 30.000 USD .\",\n          \"T\\u1eeb r\\u1ea5t l\\u00e2u r\\u1ed3i t\\u00f4i v\\u1eabn ngh\\u0129 n\\u1ebfu nh\\u01b0 cu\\u1ed1n s\\u00e1ch \\u0111\\u01b0\\u1ee3c xu\\u1ea5t b\\u1ea3n , ho\\u1eb7c ng\\u01b0\\u1eddi ta l\\u00e0m phim v\\u1ec1 n\\u00f3 th\\u00ec t\\u00f4i s\\u1ebd d\\u00f9ng s\\u1ed1 ti\\u1ec1n b\\u00e1n s\\u00e1ch \\u0111\\u1ec3 thi\\u1ebft l\\u1eadp m\\u1ed9t s\\u1ed1 gi\\u01b0\\u1eddng b\\u1ec7nh t\\u1ea1i H\\u00e0 N\\u1ed9i .\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"labels\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Get Embedding Vectors"
+      ],
+      "metadata": {
+        "id": "ooewb479FdqS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "from transformers import AutoTokenizer, AutoModel\n",
+        "from tqdm import tqdm\n",
+        "\n",
+        "# Load PhoBERT tokenizer và model\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"vinai/phobert-base\", use_fast=False)\n",
+        "model = AutoModel.from_pretrained(\"vinai/phobert-base\")\n",
+        "model.eval()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 920,
+          "referenced_widgets": [
+            "2707f2f1d216421385cc4166127d696a",
+            "5350c7b689f14d138357f92a78479d4b",
+            "5423cc4795f9415ebcf7eb2eb45f08b4",
+            "f1ef72618a0b4710ac6ab5cfc86ed252",
+            "8eb197c462304d6fb6d15c175db315f5",
+            "a4178b6f78bf4f2aa6cb7ad924308970",
+            "59f7b90017364fc3ad2969061e3efba2",
+            "3ca4b088872649c7856c3be691ca6224",
+            "1c77b809b5ec42e7b00b512cbbc7071f",
+            "c657eed438b741189da3846983d8e0a6",
+            "21f740caf6a94a468a54552961c54d63",
+            "7b988f4f4c97462c9ee30aebabf4029b",
+            "8b5ccad1921342dca6cbf5adcc93e9fa",
+            "25c32ab8424242daa414680dc5b8ea57",
+            "71a5bbc69fe648168877b7ab6f6cd8a6",
+            "0434bc2965584b018978d590bcda68c6",
+            "b9ba2a9d9c704dd091cf17241541c280",
+            "a75ea7ca7e384c948f07eeffa8f676b5",
+            "e0a24e13af474afc98fc5c93c561e880",
+            "4a1b96a5fde64fb499eeacd733b72c32",
+            "f761d67cb46a4af3b49a22209cd450a9",
+            "8125e9952f68467d8c7d55da426c9098",
+            "0885e06d76f24053890d4ade7044b22e",
+            "4303d7ea0bf14661803caf8f617ce788",
+            "cd2aec8cb6de49f095681da2b99e7660",
+            "fe84d9c4f3124682809f6e7117b40638",
+            "c14214a879ca425c8955b380d73f3010",
+            "2f28ad6792294553b24cbaa7dea533af",
+            "c58168f9246046728211a403540060f5",
+            "64473dfca69a45438094656d2b995207",
+            "0a782a4d3cfc4b9cbd802bedcdae3153",
+            "dc5b47931e0340a4980ae315c6a802a5",
+            "8d431574a7a14c5fb1466fa97a33e4fb",
+            "960273e5205f49efb2be0576d2f74bca",
+            "7e3192df593248c7bfafd5b0347a2b1b",
+            "d18a2302adaa415785ed8f8bb578b5b9",
+            "9604f5d16db5446a83400c70071c90e7",
+            "337bbd72f0d4481f8a13cb8323afa241",
+            "8b2536405b1b4c62a0988b6360379060",
+            "24ea201c035d4e5a96f6d95c146c6ca8",
+            "51027870cc714d8db898838afc41d396",
+            "380dca91b19d43d4b3de84afe29f3bd4",
+            "5d102b9cc45943808fadad7c06ee4352",
+            "ba6e6b0b454b471a9b529dc24bb13bdd",
+            "9c6331e2efe74bfd9292c4948beaafb5",
+            "26e942f1e9b441b1861a6ffc5b3299ed",
+            "2d8c0bd34c104619bee375c98eb47160",
+            "1702bb0d2e964f28bca673b1ac4550d3",
+            "1a128f1ccf93416a873560bd462a287e",
+            "6ebff4a83fe54c688224e27bd56b1d80",
+            "4cd7105d16db47ca90f66d6932beed36",
+            "e0e19cc9d12a4f91a4b37fcc8ffd691a",
+            "aa5bf384ac5d4aa9976fda08d2574d57",
+            "92ee08ad38d541c8a0d7e151cb478ab9",
+            "871356ac545e462d8318ba3830de1ac9",
+            "356930c123634c258b194b79654b602c",
+            "ff5fe04a8b43428f94e82affa61c8aa6",
+            "89389fd2337f4e6fa564282157d0f9a8",
+            "ec5b0bbf78fd4118b455040b801cd0fa",
+            "fe441fbf9bdd4d2099e67ed31eafce12",
+            "c3d75f70be8a41f0a4aaaf43b65df684",
+            "da5dfc79703041c78fd2de3ea04ae025",
+            "18a9ab8c76b84ebc8a17c5854649e6ce",
+            "d128a1638ad0472d99a3bd52b5aae3a7",
+            "06b631379c0740289420fda9a8b57892",
+            "29cbf804df244f41a57d9b83c7c2427e"
+          ]
+        },
+        "id": "b04c2Xq7IBac",
+        "outputId": "c8575bc2-8b3d-415c-8d67-b7cbed0343d3"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "2707f2f1d216421385cc4166127d696a"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "7b988f4f4c97462c9ee30aebabf4029b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "0885e06d76f24053890d4ade7044b22e"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "960273e5205f49efb2be0576d2f74bca"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "9c6331e2efe74bfd9292c4948beaafb5"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "356930c123634c258b194b79654b602c"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "RobertaModel(\n",
+              "  (embeddings): RobertaEmbeddings(\n",
+              "    (word_embeddings): Embedding(64001, 768, padding_idx=1)\n",
+              "    (position_embeddings): Embedding(258, 768, padding_idx=1)\n",
+              "    (token_type_embeddings): Embedding(1, 768)\n",
+              "    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "    (dropout): Dropout(p=0.1, inplace=False)\n",
+              "  )\n",
+              "  (encoder): RobertaEncoder(\n",
+              "    (layer): ModuleList(\n",
+              "      (0-11): 12 x RobertaLayer(\n",
+              "        (attention): RobertaAttention(\n",
+              "          (self): RobertaSdpaSelfAttention(\n",
+              "            (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "          (output): RobertaSelfOutput(\n",
+              "            (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (intermediate): RobertaIntermediate(\n",
+              "          (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          (intermediate_act_fn): GELUActivation()\n",
+              "        )\n",
+              "        (output): RobertaOutput(\n",
+              "          (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "          (dropout): Dropout(p=0.1, inplace=False)\n",
+              "        )\n",
+              "      )\n",
+              "    )\n",
+              "  )\n",
+              "  (pooler): RobertaPooler(\n",
+              "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "    (activation): Tanh()\n",
+              "  )\n",
+              ")"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Hàm gộp các embedding vectors của token bị tách ra khi qua SentencePiece\n",
+        "def group_embeddings(tokens, embeddings):\n",
+        "    word_embeddings = []\n",
+        "    current_vecs = []\n",
+        "\n",
+        "    for token, emb in zip(tokens, embeddings):\n",
+        "        if token in [\"<s>\", \"</s>\"]:\n",
+        "            continue\n",
+        "\n",
+        "        if token.endswith(\"@@\"):\n",
+        "            current_vecs.append(emb)\n",
+        "        else:\n",
+        "            current_vecs.append(emb)\n",
+        "            word_emb = torch.mean(torch.stack(current_vecs), dim=0)\n",
+        "            word_embeddings.append(word_emb)\n",
+        "            current_vecs = []\n",
+        "\n",
+        "    if current_vecs:  # Trong trường hợp sót lại cuối câu\n",
+        "        word_emb = torch.mean(torch.stack(current_vecs), dim=0)\n",
+        "        word_embeddings.append(word_emb)\n",
+        "\n",
+        "    return word_embeddings"
+      ],
+      "metadata": {
+        "id": "z-JZZ2VrJiQ6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "model.to(device)\n",
+        "\n",
+        "all_embeddings = []  # list of [seq_len_i, 768] tensors\n",
+        "all_labels = [] # list of [seq_len_i,] tensors\n",
+        "len_em = []\n",
+        "\n",
+        "# count = 0\n",
+        "\n",
+        "for i, row in tqdm(df.iterrows(), total=len(df)):\n",
+        "\n",
+        "    # count += 1\n",
+        "    # if count == 500:\n",
+        "    #   break\n",
+        "\n",
+        "    # Truy cập phần tử từng dòng\n",
+        "    sentence = row['seg_text']\n",
+        "    gold_labels = row[\"id\"]\n",
+        "\n",
+        "    # Cho sentence đi qua SentencePiece\n",
+        "    input_ids = tokenizer.encode(sentence, return_tensors=\"pt\").to(device)\n",
+        "\n",
+        "    tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())\n",
+        "\n",
+        "    # Encode tạo embeddings\n",
+        "    with torch.no_grad():\n",
+        "        outputs = model(input_ids)\n",
+        "        last_hidden_state = outputs.last_hidden_state.squeeze(0).cpu()\n",
+        "\n",
+        "    # Gộp các embeddings đã bị tách khi đi qua SentencePiece\n",
+        "    word_embeds = group_embeddings(tokens, last_hidden_state)\n",
+        "\n",
+        "    # Kiểm tra số lượng embeddings và số lượng labels\n",
+        "    if len(word_embeds) != len(gold_labels):\n",
+        "        print(f\"Warning: Skipping row {i} - length mismatch\")\n",
+        "        continue\n",
+        "\n",
+        "    # Thêm vào list tổng / Tới đây là data đã sẵn sàng cho training\n",
+        "    all_embeddings.append(torch.stack(word_embeds))\n",
+        "    all_labels.append(torch.tensor(gold_labels))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "3wpjBGK3JuwS",
+        "outputId": "6788bd6f-d9c7-498f-f5dc-0e2766656ed1"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            "  0%|          | 0/16858 [00:00<?, ?it/s]\u001b[A\n",
+            "  0%|          | 1/16858 [00:01<5:55:57,  1.27s/it]\u001b[A\n",
+            "  0%|          | 2/16858 [00:01<2:49:26,  1.66it/s]\u001b[A\n",
+            "  0%|          | 4/16858 [00:01<1:16:24,  3.68it/s]\u001b[A\n",
+            "  0%|          | 6/16858 [00:01<50:05,  5.61it/s]  \u001b[A\n",
+            "  0%|          | 8/16858 [00:01<37:11,  7.55it/s]\u001b[A\n",
+            "  0%|          | 10/16858 [00:01<29:31,  9.51it/s]\u001b[A\n",
+            "  0%|          | 12/16858 [00:02<26:55, 10.43it/s]\u001b[A\n",
+            "  0%|          | 14/16858 [00:02<26:48, 10.47it/s]\u001b[A\n",
+            "  0%|          | 16/16858 [00:02<25:35, 10.97it/s]\u001b[A\n",
+            "  0%|          | 19/16858 [00:02<20:47, 13.49it/s]\u001b[A\n",
+            "  0%|          | 21/16858 [00:02<20:36, 13.61it/s]\u001b[A\n",
+            "  0%|          | 23/16858 [00:02<20:29, 13.69it/s]\u001b[A\n",
+            "  0%|          | 26/16858 [00:03<18:39, 15.03it/s]\u001b[A\n",
+            "  0%|          | 29/16858 [00:03<16:51, 16.63it/s]\u001b[A\n",
+            "  0%|          | 32/16858 [00:03<15:48, 17.74it/s]\u001b[A\n",
+            "  0%|          | 34/16858 [00:03<19:59, 14.03it/s]\u001b[A\n",
+            "  0%|          | 37/16858 [00:03<17:04, 16.42it/s]\u001b[A\n",
+            "  0%|          | 39/16858 [00:03<18:10, 15.42it/s]\u001b[A\n",
+            "  0%|          | 43/16858 [00:03<14:16, 19.62it/s]\u001b[A\n",
+            "  0%|          | 49/16858 [00:04<09:50, 28.47it/s]\u001b[A\n",
+            "  0%|          | 56/16858 [00:04<07:20, 38.18it/s]\u001b[A\n",
+            "  0%|          | 63/16858 [00:04<06:12, 45.13it/s]\u001b[A\n",
+            "  0%|          | 69/16858 [00:04<06:03, 46.14it/s]\u001b[A\n",
+            "  0%|          | 74/16858 [00:04<05:58, 46.85it/s]\u001b[A\n",
+            "  0%|          | 79/16858 [00:04<06:00, 46.59it/s]\u001b[A\n",
+            "  0%|          | 84/16858 [00:04<05:59, 46.63it/s]\u001b[A\n",
+            "  1%|          | 89/16858 [00:04<06:00, 46.54it/s]\u001b[A\n",
+            "  1%|          | 94/16858 [00:04<06:07, 45.61it/s]\u001b[A\n",
+            "  1%|          | 100/16858 [00:05<05:42, 48.91it/s]\u001b[A\n",
+            "  1%|          | 107/16858 [00:05<05:09, 54.09it/s]\u001b[A\n",
+            "  1%|          | 114/16858 [00:05<04:49, 57.80it/s]\u001b[A\n",
+            "  1%|          | 121/16858 [00:05<04:34, 60.92it/s]\u001b[A\n",
+            "  1%|          | 128/16858 [00:05<04:27, 62.65it/s]\u001b[A\n",
+            "  1%|          | 135/16858 [00:05<04:29, 61.95it/s]\u001b[A\n",
+            "  1%|          | 142/16858 [00:05<04:41, 59.31it/s]\u001b[A\n",
+            "  1%|          | 148/16858 [00:05<04:48, 57.88it/s]\u001b[A\n",
+            "  1%|          | 155/16858 [00:05<04:38, 59.92it/s]\u001b[A\n",
+            "  1%|          | 162/16858 [00:06<04:34, 60.84it/s]\u001b[A\n",
+            "  1%|          | 169/16858 [00:06<04:47, 58.00it/s]\u001b[A\n",
+            "  1%|          | 175/16858 [00:06<05:00, 55.48it/s]\u001b[A\n",
+            "  1%|          | 181/16858 [00:06<05:08, 54.12it/s]\u001b[A\n",
+            "  1%|          | 187/16858 [00:06<06:56, 39.98it/s]\u001b[A\n",
+            "  1%|          | 192/16858 [00:06<06:54, 40.24it/s]\u001b[A\n",
+            "  1%|          | 197/16858 [00:06<07:28, 37.14it/s]\u001b[A\n",
+            "  1%|          | 202/16858 [00:07<08:57, 30.99it/s]\u001b[A\n",
+            "  1%|          | 206/16858 [00:07<09:27, 29.33it/s]\u001b[A\n",
+            "  1%|          | 210/16858 [00:07<10:46, 25.76it/s]\u001b[A\n",
+            "  1%|▏         | 213/16858 [00:07<11:13, 24.73it/s]\u001b[A\n",
+            "  1%|▏         | 216/16858 [00:07<11:26, 24.24it/s]\u001b[A\n",
+            "  1%|▏         | 219/16858 [00:07<11:49, 23.44it/s]\u001b[A\n",
+            "  1%|▏         | 222/16858 [00:08<11:56, 23.21it/s]\u001b[A\n",
+            "  1%|▏         | 225/16858 [00:08<12:04, 22.95it/s]\u001b[A\n",
+            "  1%|▏         | 228/16858 [00:08<12:56, 21.43it/s]\u001b[A\n",
+            "  1%|▏         | 231/16858 [00:08<12:38, 21.91it/s]\u001b[A\n",
+            "  1%|▏         | 234/16858 [00:08<12:19, 22.48it/s]\u001b[A\n",
+            "  1%|▏         | 239/16858 [00:08<09:43, 28.49it/s]\u001b[A\n",
+            "  1%|▏         | 242/16858 [00:08<09:52, 28.04it/s]\u001b[A\n",
+            "  1%|▏         | 246/16858 [00:09<10:02, 27.59it/s]\u001b[A\n",
+            "  1%|▏         | 249/16858 [00:09<10:40, 25.94it/s]\u001b[A\n",
+            "  1%|▏         | 252/16858 [00:09<11:15, 24.58it/s]\u001b[A\n",
+            "  2%|▏         | 255/16858 [00:09<11:34, 23.92it/s]\u001b[A\n",
+            "  2%|▏         | 265/16858 [00:09<06:38, 41.63it/s]\u001b[A\n",
+            "  2%|▏         | 276/16858 [00:09<04:45, 58.16it/s]\u001b[A\n",
+            "  2%|▏         | 287/16858 [00:09<03:54, 70.55it/s]\u001b[A\n",
+            "  2%|▏         | 297/16858 [00:09<03:33, 77.71it/s]\u001b[A\n",
+            "  2%|▏         | 307/16858 [00:09<03:19, 82.93it/s]\u001b[A\n",
+            "  2%|▏         | 318/16858 [00:10<03:03, 89.97it/s]\u001b[A\n",
+            "  2%|▏         | 329/16858 [00:10<02:54, 94.90it/s]\u001b[A\n",
+            "  2%|▏         | 340/16858 [00:10<02:49, 97.55it/s]\u001b[A\n",
+            "  2%|▏         | 351/16858 [00:10<02:45, 99.62it/s]\u001b[A\n",
+            "  2%|▏         | 362/16858 [00:10<02:44, 100.05it/s]\u001b[A\n",
+            "  2%|▏         | 373/16858 [00:10<02:42, 101.57it/s]\u001b[A\n",
+            "  2%|▏         | 384/16858 [00:10<02:43, 100.98it/s]\u001b[A\n",
+            "  2%|▏         | 395/16858 [00:10<02:42, 101.26it/s]\u001b[A\n",
+            "  2%|▏         | 406/16858 [00:10<02:49, 96.85it/s] \u001b[A\n",
+            "  2%|▏         | 417/16858 [00:11<02:44, 99.81it/s]\u001b[A\n",
+            "  3%|▎         | 428/16858 [00:11<02:42, 101.21it/s]\u001b[A\n",
+            "  3%|▎         | 439/16858 [00:11<02:41, 101.61it/s]\u001b[A\n",
+            "  3%|▎         | 450/16858 [00:11<02:39, 103.08it/s]\u001b[A\n",
+            "  3%|▎         | 461/16858 [00:11<02:41, 101.80it/s]\u001b[A\n",
+            "  3%|▎         | 472/16858 [00:11<02:40, 102.41it/s]\u001b[A\n",
+            "  3%|▎         | 483/16858 [00:11<02:37, 103.91it/s]\u001b[A\n",
+            "  3%|▎         | 494/16858 [00:11<02:37, 103.62it/s]\u001b[A\n",
+            "  3%|▎         | 505/16858 [00:11<02:36, 104.53it/s]\u001b[A\n",
+            "  3%|▎         | 516/16858 [00:12<02:40, 101.67it/s]\u001b[A\n",
+            "  3%|▎         | 527/16858 [00:12<02:39, 102.68it/s]\u001b[A\n",
+            "  3%|▎         | 538/16858 [00:12<02:37, 103.71it/s]\u001b[A\n",
+            "  3%|▎         | 549/16858 [00:12<02:34, 105.27it/s]\u001b[A\n",
+            "  3%|▎         | 560/16858 [00:12<02:37, 103.52it/s]\u001b[A\n",
+            "  3%|▎         | 571/16858 [00:12<02:37, 103.46it/s]\u001b[A\n",
+            "  3%|▎         | 582/16858 [00:12<02:36, 104.03it/s]\u001b[A\n",
+            "  4%|▎         | 593/16858 [00:12<02:35, 104.61it/s]\u001b[A\n",
+            "  4%|▎         | 604/16858 [00:12<02:35, 104.53it/s]\u001b[A\n",
+            "  4%|▎         | 615/16858 [00:12<02:39, 101.93it/s]\u001b[A\n",
+            "  4%|▎         | 626/16858 [00:13<02:41, 100.35it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 610 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            "  4%|▍         | 637/16858 [00:13<02:39, 101.94it/s]\u001b[A\n",
+            "  4%|▍         | 648/16858 [00:13<02:37, 103.05it/s]\u001b[A\n",
+            "  4%|▍         | 659/16858 [00:13<02:37, 102.84it/s]\u001b[A\n",
+            "  4%|▍         | 670/16858 [00:13<02:36, 103.33it/s]\u001b[A\n",
+            "  4%|▍         | 681/16858 [00:13<02:35, 104.18it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 659 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            "  4%|▍         | 692/16858 [00:13<02:33, 105.44it/s]\u001b[A\n",
+            "  4%|▍         | 703/16858 [00:13<02:32, 105.66it/s]\u001b[A\n",
+            "  4%|▍         | 714/16858 [00:13<02:55, 92.19it/s] \u001b[A\n",
+            "  4%|▍         | 724/16858 [00:14<03:11, 84.21it/s]\u001b[A\n",
+            "  4%|▍         | 733/16858 [00:14<03:15, 82.38it/s]\u001b[A\n",
+            "  4%|▍         | 742/16858 [00:14<03:19, 80.93it/s]\u001b[A\n",
+            "  4%|▍         | 751/16858 [00:14<03:18, 81.20it/s]\u001b[A\n",
+            "  5%|▍         | 760/16858 [00:14<03:20, 80.23it/s]\u001b[A\n",
+            "  5%|▍         | 769/16858 [00:14<03:24, 78.84it/s]\u001b[A\n",
+            "  5%|▍         | 777/16858 [00:14<03:27, 77.33it/s]\u001b[A\n",
+            "  5%|▍         | 786/16858 [00:14<03:24, 78.74it/s]\u001b[A\n",
+            "  5%|▍         | 795/16858 [00:15<03:22, 79.22it/s]\u001b[A\n",
+            "  5%|▍         | 803/16858 [00:15<03:28, 76.85it/s]\u001b[A\n",
+            "  5%|▍         | 811/16858 [00:15<03:31, 75.97it/s]\u001b[A\n",
+            "  5%|▍         | 820/16858 [00:15<03:25, 77.88it/s]\u001b[A\n",
+            "  5%|▍         | 829/16858 [00:15<03:23, 78.69it/s]\u001b[A\n",
+            "  5%|▍         | 838/16858 [00:15<03:18, 80.77it/s]\u001b[A\n",
+            "  5%|▌         | 847/16858 [00:15<03:20, 80.00it/s]\u001b[A\n",
+            "  5%|▌         | 856/16858 [00:15<03:31, 75.69it/s]\u001b[A\n",
+            "  5%|▌         | 864/16858 [00:15<03:34, 74.47it/s]\u001b[A\n",
+            "  5%|▌         | 872/16858 [00:16<03:46, 70.63it/s]\u001b[A\n",
+            "  5%|▌         | 880/16858 [00:16<03:51, 69.01it/s]\u001b[A\n",
+            "  5%|▌         | 887/16858 [00:16<04:01, 66.18it/s]\u001b[A\n",
+            "  5%|▌         | 894/16858 [00:16<04:09, 64.08it/s]\u001b[A\n",
+            "  5%|▌         | 905/16858 [00:16<03:34, 74.48it/s]\u001b[A\n",
+            "  5%|▌         | 915/16858 [00:16<03:16, 81.33it/s]\u001b[A\n",
+            "  5%|▌         | 926/16858 [00:16<02:59, 88.66it/s]\u001b[A\n",
+            "  6%|▌         | 937/16858 [00:16<02:49, 93.74it/s]\u001b[A\n",
+            "  6%|▌         | 948/16858 [00:16<02:45, 96.18it/s]\u001b[A\n",
+            "  6%|▌         | 959/16858 [00:17<02:40, 99.12it/s]\u001b[A\n",
+            "  6%|▌         | 970/16858 [00:17<02:38, 100.08it/s]\u001b[A\n",
+            "  6%|▌         | 981/16858 [00:17<02:43, 97.05it/s] \u001b[A\n",
+            "  6%|▌         | 992/16858 [00:17<02:41, 98.45it/s]\u001b[A\n",
+            "  6%|▌         | 1003/16858 [00:17<02:36, 101.03it/s]\u001b[A\n",
+            "  6%|▌         | 1014/16858 [00:17<02:42, 97.25it/s] \u001b[A\n",
+            "  6%|▌         | 1025/16858 [00:17<02:38, 99.89it/s]\u001b[A\n",
+            "  6%|▌         | 1036/16858 [00:17<02:37, 100.28it/s]\u001b[A\n",
+            "  6%|▌         | 1047/16858 [00:17<02:36, 101.07it/s]\u001b[A\n",
+            "  6%|▋         | 1058/16858 [00:18<02:38, 99.65it/s] \u001b[A\n",
+            "  6%|▋         | 1069/16858 [00:18<02:36, 101.09it/s]\u001b[A\n",
+            "  6%|▋         | 1080/16858 [00:18<02:38, 99.47it/s] \u001b[A\n",
+            "  6%|▋         | 1091/16858 [00:18<02:36, 100.74it/s]\u001b[A\n",
+            "  7%|▋         | 1102/16858 [00:18<02:34, 101.80it/s]\u001b[A\n",
+            "  7%|▋         | 1113/16858 [00:18<02:36, 100.59it/s]\u001b[A\n",
+            "  7%|▋         | 1124/16858 [00:18<02:34, 101.91it/s]\u001b[A\n",
+            "  7%|▋         | 1135/16858 [00:18<02:33, 102.55it/s]\u001b[A\n",
+            "  7%|▋         | 1146/16858 [00:18<02:33, 102.31it/s]\u001b[A\n",
+            "  7%|▋         | 1157/16858 [00:19<02:34, 101.80it/s]\u001b[A\n",
+            "  7%|▋         | 1168/16858 [00:19<02:32, 103.09it/s]\u001b[A\n",
+            "  7%|▋         | 1179/16858 [00:19<02:32, 103.04it/s]\u001b[A\n",
+            "  7%|▋         | 1190/16858 [00:19<02:39, 98.29it/s] \u001b[A\n",
+            "  7%|▋         | 1201/16858 [00:19<02:37, 99.47it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 1187 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            "  7%|▋         | 1212/16858 [00:19<02:35, 100.83it/s]\u001b[A\n",
+            "  7%|▋         | 1223/16858 [00:19<02:32, 102.62it/s]\u001b[A\n",
+            "  7%|▋         | 1234/16858 [00:19<02:30, 103.47it/s]\u001b[A\n",
+            "  7%|▋         | 1245/16858 [00:19<02:32, 102.66it/s]\u001b[A\n",
+            "  7%|▋         | 1256/16858 [00:19<02:32, 102.37it/s]\u001b[A\n",
+            "  8%|▊         | 1267/16858 [00:20<02:31, 103.17it/s]\u001b[A\n",
+            "  8%|▊         | 1278/16858 [00:20<02:29, 104.25it/s]\u001b[A\n",
+            "  8%|▊         | 1289/16858 [00:20<02:29, 103.84it/s]\u001b[A\n",
+            "  8%|▊         | 1300/16858 [00:20<02:38, 97.92it/s] \u001b[A\n",
+            "  8%|▊         | 1311/16858 [00:20<02:36, 99.28it/s]\u001b[A\n",
+            "  8%|▊         | 1322/16858 [00:20<02:35, 100.10it/s]\u001b[A\n",
+            "  8%|▊         | 1333/16858 [00:20<02:31, 102.67it/s]\u001b[A\n",
+            "  8%|▊         | 1344/16858 [00:20<02:32, 101.49it/s]\u001b[A\n",
+            "  8%|▊         | 1355/16858 [00:20<02:30, 102.89it/s]\u001b[A\n",
+            "  8%|▊         | 1366/16858 [00:21<02:28, 104.20it/s]\u001b[A\n",
+            "  8%|▊         | 1377/16858 [00:21<02:27, 105.23it/s]\u001b[A\n",
+            "  8%|▊         | 1388/16858 [00:21<02:26, 105.39it/s]\u001b[A\n",
+            "  8%|▊         | 1399/16858 [00:21<02:31, 101.86it/s]\u001b[A\n",
+            "  8%|▊         | 1410/16858 [00:21<02:32, 101.55it/s]\u001b[A\n",
+            "  8%|▊         | 1421/16858 [00:21<02:29, 103.00it/s]\u001b[A\n",
+            "  8%|▊         | 1432/16858 [00:21<02:30, 102.50it/s]\u001b[A\n",
+            "  9%|▊         | 1443/16858 [00:21<02:29, 103.10it/s]\u001b[A\n",
+            "  9%|▊         | 1454/16858 [00:21<02:28, 103.76it/s]\u001b[A\n",
+            "  9%|▊         | 1465/16858 [00:22<02:28, 103.95it/s]\u001b[A\n",
+            "  9%|▉         | 1476/16858 [00:22<02:28, 103.87it/s]\u001b[A\n",
+            "  9%|▉         | 1487/16858 [00:22<02:28, 103.80it/s]\u001b[A\n",
+            "  9%|▉         | 1498/16858 [00:22<02:26, 104.90it/s]\u001b[A\n",
+            "  9%|▉         | 1509/16858 [00:22<02:34, 99.51it/s] \u001b[A\n",
+            "  9%|▉         | 1520/16858 [00:22<02:31, 101.19it/s]\u001b[A\n",
+            "  9%|▉         | 1531/16858 [00:22<02:28, 103.48it/s]\u001b[A\n",
+            "  9%|▉         | 1542/16858 [00:22<02:26, 104.23it/s]\u001b[A\n",
+            "  9%|▉         | 1553/16858 [00:22<02:27, 103.68it/s]\u001b[A\n",
+            "  9%|▉         | 1564/16858 [00:22<02:29, 102.01it/s]\u001b[A\n",
+            "  9%|▉         | 1575/16858 [00:23<02:27, 103.27it/s]\u001b[A\n",
+            "  9%|▉         | 1586/16858 [00:23<02:25, 104.81it/s]\u001b[A\n",
+            "  9%|▉         | 1597/16858 [00:23<02:27, 103.77it/s]\u001b[A\n",
+            " 10%|▉         | 1608/16858 [00:23<02:29, 101.68it/s]\u001b[A\n",
+            " 10%|▉         | 1619/16858 [00:23<02:31, 100.60it/s]\u001b[A\n",
+            " 10%|▉         | 1630/16858 [00:23<02:28, 102.67it/s]\u001b[A\n",
+            " 10%|▉         | 1641/16858 [00:23<02:27, 103.40it/s]\u001b[A\n",
+            " 10%|▉         | 1652/16858 [00:23<02:27, 102.79it/s]\u001b[A\n",
+            " 10%|▉         | 1663/16858 [00:23<02:26, 103.38it/s]\u001b[A\n",
+            " 10%|▉         | 1674/16858 [00:24<02:26, 103.95it/s]\u001b[A\n",
+            " 10%|▉         | 1685/16858 [00:24<02:25, 104.56it/s]\u001b[A\n",
+            " 10%|█         | 1696/16858 [00:24<02:24, 104.59it/s]\u001b[A\n",
+            " 10%|█         | 1707/16858 [00:24<02:23, 105.91it/s]\u001b[A\n",
+            " 10%|█         | 1718/16858 [00:24<02:27, 102.30it/s]\u001b[A\n",
+            " 10%|█         | 1729/16858 [00:24<02:28, 102.11it/s]\u001b[A\n",
+            " 10%|█         | 1740/16858 [00:24<02:40, 94.31it/s] \u001b[A\n",
+            " 10%|█         | 1751/16858 [00:24<02:36, 96.82it/s]\u001b[A\n",
+            " 10%|█         | 1762/16858 [00:24<02:32, 99.12it/s]\u001b[A\n",
+            " 11%|█         | 1773/16858 [00:25<02:30, 100.11it/s]\u001b[A\n",
+            " 11%|█         | 1784/16858 [00:25<02:26, 102.62it/s]\u001b[A\n",
+            " 11%|█         | 1795/16858 [00:25<02:26, 102.66it/s]\u001b[A\n",
+            " 11%|█         | 1806/16858 [00:25<02:25, 103.33it/s]\u001b[A\n",
+            " 11%|█         | 1817/16858 [00:25<02:24, 103.99it/s]\u001b[A\n",
+            " 11%|█         | 1828/16858 [00:25<02:27, 101.75it/s]\u001b[A\n",
+            " 11%|█         | 1839/16858 [00:25<02:28, 101.07it/s]\u001b[A\n",
+            " 11%|█         | 1850/16858 [00:25<02:26, 102.47it/s]\u001b[A\n",
+            " 11%|█         | 1861/16858 [00:25<02:25, 103.40it/s]\u001b[A\n",
+            " 11%|█         | 1872/16858 [00:25<02:25, 102.78it/s]\u001b[A\n",
+            " 11%|█         | 1883/16858 [00:26<02:23, 104.06it/s]\u001b[A\n",
+            " 11%|█         | 1894/16858 [00:26<02:22, 104.95it/s]\u001b[A\n",
+            " 11%|█▏        | 1905/16858 [00:26<02:22, 105.17it/s]\u001b[A\n",
+            " 11%|█▏        | 1916/16858 [00:26<02:21, 105.60it/s]\u001b[A\n",
+            " 11%|█▏        | 1927/16858 [00:26<02:44, 90.96it/s] \u001b[A\n",
+            " 11%|█▏        | 1937/16858 [00:26<02:55, 85.10it/s]\u001b[A\n",
+            " 12%|█▏        | 1946/16858 [00:26<03:03, 81.40it/s]\u001b[A\n",
+            " 12%|█▏        | 1955/16858 [00:26<03:05, 80.46it/s]\u001b[A\n",
+            " 12%|█▏        | 1964/16858 [00:27<03:02, 81.78it/s]\u001b[A\n",
+            " 12%|█▏        | 1973/16858 [00:27<03:07, 79.33it/s]\u001b[A\n",
+            " 12%|█▏        | 1982/16858 [00:27<03:13, 77.03it/s]\u001b[A\n",
+            " 12%|█▏        | 1990/16858 [00:27<03:20, 74.19it/s]\u001b[A\n",
+            " 12%|█▏        | 1998/16858 [00:27<03:22, 73.23it/s]\u001b[A\n",
+            " 12%|█▏        | 2006/16858 [00:27<03:32, 69.97it/s]\u001b[A\n",
+            " 12%|█▏        | 2014/16858 [00:27<03:29, 70.95it/s]\u001b[A\n",
+            " 12%|█▏        | 2022/16858 [00:27<03:28, 71.20it/s]\u001b[A\n",
+            " 12%|█▏        | 2030/16858 [00:27<03:26, 71.78it/s]\u001b[A\n",
+            " 12%|█▏        | 2038/16858 [00:28<03:22, 73.17it/s]\u001b[A\n",
+            " 12%|█▏        | 2047/16858 [00:28<03:14, 76.27it/s]\u001b[A\n",
+            " 12%|█▏        | 2056/16858 [00:28<03:09, 78.14it/s]\u001b[A\n",
+            " 12%|█▏        | 2064/16858 [00:28<03:18, 74.50it/s]\u001b[A\n",
+            " 12%|█▏        | 2072/16858 [00:28<03:21, 73.49it/s]\u001b[A\n",
+            " 12%|█▏        | 2080/16858 [00:28<03:23, 72.64it/s]\u001b[A\n",
+            " 12%|█▏        | 2088/16858 [00:28<03:32, 69.57it/s]\u001b[A\n",
+            " 12%|█▏        | 2095/16858 [00:28<03:36, 68.26it/s]\u001b[A\n",
+            " 12%|█▏        | 2102/16858 [00:28<03:44, 65.76it/s]\u001b[A\n",
+            " 13%|█▎        | 2109/16858 [00:29<03:42, 66.18it/s]\u001b[A\n",
+            " 13%|█▎        | 2120/16858 [00:29<03:10, 77.48it/s]\u001b[A\n",
+            " 13%|█▎        | 2131/16858 [00:29<02:53, 84.99it/s]\u001b[A\n",
+            " 13%|█▎        | 2142/16858 [00:29<02:42, 90.45it/s]\u001b[A\n",
+            " 13%|█▎        | 2153/16858 [00:29<02:36, 93.91it/s]\u001b[A\n",
+            " 13%|█▎        | 2164/16858 [00:29<02:33, 95.91it/s]\u001b[A\n",
+            " 13%|█▎        | 2174/16858 [00:29<02:37, 93.28it/s]\u001b[A\n",
+            " 13%|█▎        | 2184/16858 [00:29<02:35, 94.12it/s]\u001b[A\n",
+            " 13%|█▎        | 2195/16858 [00:29<02:29, 97.82it/s]\u001b[A\n",
+            " 13%|█��        | 2206/16858 [00:30<02:26, 100.27it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 2190 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 13%|█▎        | 2217/16858 [00:30<02:26, 100.20it/s]\u001b[A\n",
+            " 13%|█▎        | 2228/16858 [00:30<02:23, 102.21it/s]\u001b[A\n",
+            " 13%|█▎        | 2239/16858 [00:30<02:20, 103.91it/s]\u001b[A\n",
+            " 13%|█▎        | 2250/16858 [00:30<02:21, 102.96it/s]\u001b[A\n",
+            " 13%|█▎        | 2261/16858 [00:30<02:19, 104.73it/s]\u001b[A\n",
+            " 13%|█▎        | 2272/16858 [00:30<02:19, 104.39it/s]\u001b[A\n",
+            " 14%|█▎        | 2283/16858 [00:30<02:24, 100.82it/s]\u001b[A\n",
+            " 14%|█▎        | 2294/16858 [00:30<02:24, 100.88it/s]\u001b[A\n",
+            " 14%|█▎        | 2305/16858 [00:31<02:22, 102.22it/s]\u001b[A\n",
+            " 14%|█▎        | 2316/16858 [00:31<02:21, 102.61it/s]\u001b[A\n",
+            " 14%|█▍        | 2327/16858 [00:31<02:23, 101.23it/s]\u001b[A\n",
+            " 14%|█▍        | 2338/16858 [00:31<02:23, 101.38it/s]\u001b[A\n",
+            " 14%|█▍        | 2349/16858 [00:31<02:25, 99.57it/s] \u001b[A\n",
+            " 14%|█▍        | 2359/16858 [00:31<02:26, 98.86it/s]\u001b[A\n",
+            " 14%|█▍        | 2370/16858 [00:31<02:25, 99.63it/s]\u001b[A\n",
+            " 14%|█▍        | 2380/16858 [00:31<02:27, 97.97it/s]\u001b[A\n",
+            " 14%|█▍        | 2390/16858 [00:31<02:26, 98.47it/s]\u001b[A\n",
+            " 14%|█▍        | 2400/16858 [00:31<02:26, 98.54it/s]\u001b[A\n",
+            " 14%|█▍        | 2411/16858 [00:32<02:23, 100.54it/s]\u001b[A\n",
+            " 14%|█▍        | 2422/16858 [00:32<02:21, 101.85it/s]\u001b[A\n",
+            " 14%|█▍        | 2433/16858 [00:32<02:20, 102.58it/s]\u001b[A\n",
+            " 14%|█▍        | 2444/16858 [00:32<02:22, 101.33it/s]\u001b[A\n",
+            " 15%|█▍        | 2455/16858 [00:32<02:25, 98.80it/s] \u001b[A\n",
+            " 15%|█▍        | 2466/16858 [00:32<02:23, 99.99it/s]\u001b[A\n",
+            " 15%|█▍        | 2477/16858 [00:32<02:20, 102.22it/s]\u001b[A\n",
+            " 15%|█▍        | 2488/16858 [00:32<02:25, 98.92it/s] \u001b[A\n",
+            " 15%|█▍        | 2499/16858 [00:32<02:23, 100.26it/s]\u001b[A\n",
+            " 15%|█▍        | 2510/16858 [00:33<02:21, 101.59it/s]\u001b[A\n",
+            " 15%|█▍        | 2521/16858 [00:33<02:20, 102.24it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 2507 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 15%|█▌        | 2532/16858 [00:33<02:19, 102.63it/s]\u001b[A\n",
+            " 15%|█▌        | 2543/16858 [00:33<02:18, 103.06it/s]\u001b[A\n",
+            " 15%|█▌        | 2554/16858 [00:33<02:16, 104.86it/s]\u001b[A\n",
+            " 15%|█▌        | 2565/16858 [00:33<02:17, 103.90it/s]\u001b[A\n",
+            " 15%|█▌        | 2576/16858 [00:33<02:17, 103.72it/s]\u001b[A\n",
+            " 15%|█▌        | 2587/16858 [00:33<02:16, 104.47it/s]\u001b[A\n",
+            " 15%|█▌        | 2598/16858 [00:33<02:22, 99.79it/s] \u001b[A\n",
+            " 15%|█▌        | 2609/16858 [00:34<02:21, 100.68it/s]\u001b[A\n",
+            " 16%|█▌        | 2620/16858 [00:34<02:19, 102.42it/s]\u001b[A\n",
+            " 16%|█▌        | 2631/16858 [00:34<02:17, 103.79it/s]\u001b[A\n",
+            " 16%|█▌        | 2642/16858 [00:34<02:17, 103.43it/s]\u001b[A\n",
+            " 16%|█▌        | 2653/16858 [00:34<02:18, 102.92it/s]\u001b[A\n",
+            " 16%|█▌        | 2664/16858 [00:34<02:18, 102.31it/s]\u001b[A\n",
+            " 16%|█▌        | 2675/16858 [00:34<02:17, 103.07it/s]\u001b[A\n",
+            " 16%|█▌        | 2686/16858 [00:34<02:16, 104.12it/s]\u001b[A\n",
+            " 16%|█▌        | 2697/16858 [00:34<02:22, 99.60it/s] \u001b[A\n",
+            " 16%|█▌        | 2708/16858 [00:34<02:19, 101.57it/s]\u001b[A\n",
+            " 16%|█▌        | 2719/16858 [00:35<02:18, 102.33it/s]\u001b[A\n",
+            " 16%|█▌        | 2730/16858 [00:35<02:16, 103.47it/s]\u001b[A\n",
+            " 16%|█▋        | 2741/16858 [00:35<02:15, 104.19it/s]\u001b[A\n",
+            " 16%|█▋        | 2752/16858 [00:35<02:15, 104.17it/s]\u001b[A\n",
+            " 16%|█▋        | 2763/16858 [00:35<02:14, 104.74it/s]\u001b[A\n",
+            " 16%|█▋        | 2774/16858 [00:35<02:17, 102.61it/s]\u001b[A\n",
+            " 17%|█▋        | 2785/16858 [00:35<02:16, 103.38it/s]\u001b[A\n",
+            " 17%|█▋        | 2796/16858 [00:35<02:14, 104.17it/s]\u001b[A\n",
+            " 17%|█▋        | 2807/16858 [00:35<02:20, 100.21it/s]\u001b[A\n",
+            " 17%|█▋        | 2818/16858 [00:36<02:18, 101.52it/s]\u001b[A\n",
+            " 17%|█▋        | 2829/16858 [00:36<02:17, 102.25it/s]\u001b[A\n",
+            " 17%|█▋        | 2840/16858 [00:36<02:15, 103.20it/s]\u001b[A\n",
+            " 17%|█▋        | 2851/16858 [00:36<02:14, 104.21it/s]\u001b[A\n",
+            " 17%|█▋        | 2862/16858 [00:36<02:13, 104.55it/s]\u001b[A\n",
+            " 17%|█▋        | 2873/16858 [00:36<02:15, 103.07it/s]\u001b[A\n",
+            " 17%|█▋        | 2884/16858 [00:36<02:15, 103.26it/s]\u001b[A\n",
+            " 17%|█▋        | 2895/16858 [00:36<02:14, 104.04it/s]\u001b[A\n",
+            " 17%|█▋        | 2906/16858 [00:36<02:14, 103.78it/s]\u001b[A\n",
+            " 17%|█▋        | 2917/16858 [00:37<02:19, 99.72it/s] \u001b[A\n",
+            " 17%|█▋        | 2928/16858 [00:37<02:16, 102.29it/s]\u001b[A\n",
+            " 17%|█▋        | 2939/16858 [00:37<02:16, 102.06it/s]\u001b[A\n",
+            " 17%|█▋        | 2950/16858 [00:37<02:14, 103.77it/s]\u001b[A\n",
+            " 18%|█▊        | 2961/16858 [00:37<02:15, 102.50it/s]\u001b[A\n",
+            " 18%|█▊        | 2972/16858 [00:37<02:15, 102.61it/s]\u001b[A\n",
+            " 18%|█▊        | 2983/16858 [00:37<02:13, 103.74it/s]\u001b[A\n",
+            " 18%|█▊        | 2994/16858 [00:37<02:12, 104.83it/s]\u001b[A\n",
+            " 18%|█▊        | 3005/16858 [00:37<02:13, 104.12it/s]\u001b[A\n",
+            " 18%|█▊        | 3016/16858 [00:37<02:19, 99.25it/s] \u001b[A\n",
+            " 18%|█▊        | 3027/16858 [00:38<02:16, 101.52it/s]\u001b[A\n",
+            " 18%|█▊        | 3038/16858 [00:38<02:14, 102.65it/s]\u001b[A\n",
+            " 18%|█▊        | 3049/16858 [00:38<02:17, 100.48it/s]\u001b[A\n",
+            " 18%|█▊        | 3060/16858 [00:38<02:17, 100.26it/s]\u001b[A\n",
+            " 18%|█▊        | 3071/16858 [00:38<02:14, 102.65it/s]\u001b[A\n",
+            " 18%|█▊        | 3082/16858 [00:38<02:13, 103.47it/s]\u001b[A\n",
+            " 18%|█▊        | 3093/16858 [00:38<02:11, 104.33it/s]\u001b[A\n",
+            " 18%|█▊        | 3104/16858 [00:38<02:12, 104.07it/s]\u001b[A\n",
+            " 18%|█▊        | 3115/16858 [00:38<02:11, 104.37it/s]\u001b[A\n",
+            " 19%|█▊        | 3126/16858 [00:39<02:15, 101.62it/s]\u001b[A\n",
+            " 19%|█▊        | 3137/16858 [00:39<02:33, 89.45it/s] \u001b[A\n",
+            " 19%|█▊        | 3147/16858 [00:39<02:43, 83.97it/s]\u001b[A\n",
+            " 19%|█▊        | 3156/16858 [00:39<02:47, 81.83it/s]\u001b[A\n",
+            " 19%|█▉        | 3165/16858 [00:39<02:51, 80.02it/s]\u001b[A\n",
+            " 19%|█▉        | 3174/16858 [00:39<02:51, 79.96it/s]\u001b[A\n",
+            " 19%|█▉        | 3183/16858 [00:39<02:54, 78.24it/s]\u001b[A\n",
+            " 19%|█▉        | 3191/16858 [00:39<02:57, 76.89it/s]\u001b[A\n",
+            " 19%|█▉        | 3199/16858 [00:40<03:00, 75.79it/s]\u001b[A\n",
+            " 19%|█▉        | 3207/16858 [00:40<03:00, 75.65it/s]\u001b[A\n",
+            " 19%|█▉        | 3215/16858 [00:40<02:58, 76.27it/s]\u001b[A\n",
+            " 19%|█▉        | 3223/16858 [00:40<03:00, 75.40it/s]\u001b[A\n",
+            " 19%|█▉        | 3231/16858 [00:40<03:01, 75.07it/s]\u001b[A\n",
+            " 19%|█▉        | 3240/16858 [00:40<02:56, 77.24it/s]\u001b[A\n",
+            " 19%|█▉        | 3249/16858 [00:40<02:51, 79.36it/s]\u001b[A\n",
+            " 19%|█▉        | 3258/16858 [00:40<02:48, 80.80it/s]\u001b[A\n",
+            " 19%|█▉        | 3267/16858 [00:40<02:53, 78.22it/s]\u001b[A\n",
+            " 19%|█▉        | 3275/16858 [00:41<03:03, 74.14it/s]\u001b[A\n",
+            " 19%|█▉        | 3283/16858 [00:41<03:09, 71.71it/s]\u001b[A\n",
+            " 20%|█▉        | 3291/16858 [00:41<03:17, 68.75it/s]\u001b[A\n",
+            " 20%|█▉        | 3298/16858 [00:41<03:18, 68.23it/s]\u001b[A\n",
+            " 20%|█▉        | 3305/16858 [00:41<03:25, 66.09it/s]\u001b[A\n",
+            " 20%|█▉        | 3312/16858 [00:41<03:31, 64.19it/s]\u001b[A\n",
+            " 20%|█▉        | 3323/16858 [00:41<02:59, 75.43it/s]\u001b[A\n",
+            " 20%|█▉        | 3334/16858 [00:41<02:42, 83.37it/s]\u001b[A\n",
+            " 20%|█▉        | 3345/16858 [00:41<02:31, 89.29it/s]\u001b[A\n",
+            " 20%|█▉        | 3356/16858 [00:42<02:24, 93.30it/s]\u001b[A\n",
+            " 20%|█▉        | 3366/16858 [00:42<02:24, 93.22it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3348 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 20%|██        | 3377/16858 [00:42<02:20, 96.17it/s]\u001b[A\n",
+            " 20%|██        | 3388/16858 [00:42<02:17, 98.05it/s]\u001b[A\n",
+            " 20%|██        | 3399/16858 [00:42<02:14, 100.14it/s]\u001b[A\n",
+            " 20%|██        | 3410/16858 [00:42<02:15, 99.32it/s] \u001b[A\n",
+            " 20%|██        | 3421/16858 [00:42<02:12, 101.57it/s]\u001b[A\n",
+            " 20%|██        | 3432/16858 [00:42<02:11, 102.33it/s]\u001b[A\n",
+            " 20%|██        | 3443/16858 [00:42<02:11, 101.93it/s]\u001b[A\n",
+            " 20%|██        | 3454/16858 [00:43<02:11, 101.93it/s]\u001b[A\n",
+            " 21%|██        | 3466/16858 [00:43<02:08, 104.27it/s]\u001b[A\n",
+            " 21%|██        | 3477/16858 [00:43<02:12, 101.26it/s]\u001b[A\n",
+            " 21%|██        | 3488/16858 [00:43<02:10, 102.48it/s]\u001b[A\n",
+            " 21%|██        | 3499/16858 [00:43<02:09, 102.94it/s]\u001b[A\n",
+            " 21%|██        | 3510/16858 [00:43<02:08, 103.76it/s]\u001b[A\n",
+            " 21%|██        | 3521/16858 [00:43<02:10, 102.43it/s]\u001b[A\n",
+            " 21%|██        | 3532/16858 [00:43<02:08, 103.95it/s]\u001b[A\n",
+            " 21%|██        | 3543/16858 [00:43<02:07, 104.77it/s]\u001b[A\n",
+            " 21%|██        | 3554/16858 [00:43<02:10, 102.07it/s]\u001b[A\n",
+            " 21%|██        | 3565/16858 [00:44<02:08, 103.07it/s]\u001b[A\n",
+            " 21%|██        | 3576/16858 [00:44<02:09, 102.42it/s]\u001b[A\n",
+            " 21%|██▏       | 3587/16858 [00:44<02:11, 100.92it/s]\u001b[A\n",
+            " 21%|██▏       | 3599/16858 [00:44<02:07, 104.21it/s]\u001b[A\n",
+            " 21%|██▏       | 3610/16858 [00:44<02:08, 102.85it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3589 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 21%|██▏       | 3621/16858 [00:44<02:10, 101.13it/s]\u001b[A\n",
+            " 22%|██▏       | 3632/16858 [00:44<02:11, 100.55it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3611 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 22%|██▏       | 3643/16858 [00:44<02:12, 99.46it/s] \u001b[A\n",
+            " 22%|██▏       | 3654/16858 [00:44<02:11, 100.15it/s]\u001b[A\n",
+            " 22%|██▏       | 3665/16858 [00:45<02:09, 101.79it/s]\u001b[A\n",
+            " 22%|██▏       | 3676/16858 [00:45<02:07, 103.03it/s]\u001b[A\n",
+            " 22%|██▏       | 3687/16858 [00:45<02:12, 99.50it/s] \u001b[A\n",
+            " 22%|██▏       | 3698/16858 [00:45<02:10, 101.02it/s]\u001b[A\n",
+            " 22%|██▏       | 3709/16858 [00:45<02:09, 101.70it/s]\u001b[A\n",
+            " 22%|██▏       | 3720/16858 [00:45<02:09, 101.42it/s]\u001b[A\n",
+            " 22%|██▏       | 3731/16858 [00:45<02:07, 103.05it/s]\u001b[A\n",
+            " 22%|██▏       | 3742/16858 [00:45<02:08, 102.39it/s]\u001b[A\n",
+            " 22%|██▏       | 3753/16858 [00:45<02:07, 102.65it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3739 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 22%|██▏       | 3764/16858 [00:46<02:06, 103.59it/s]\u001b[A\n",
+            " 22%|██▏       | 3775/16858 [00:46<02:06, 103.70it/s]\u001b[A\n",
+            " 22%|██▏       | 3786/16858 [00:46<02:05, 104.47it/s]\u001b[A\n",
+            " 23%|██▎       | 3797/16858 [00:46<02:08, 101.65it/s]\u001b[A\n",
+            " 23%|██▎       | 3808/16858 [00:46<02:06, 103.34it/s]\u001b[A\n",
+            " 23%|██▎       | 3819/16858 [00:46<02:07, 102.51it/s]\u001b[A\n",
+            " 23%|██▎       | 3830/16858 [00:46<02:06, 102.81it/s]\u001b[A\n",
+            " 23%|██▎       | 3841/16858 [00:46<02:05, 104.01it/s]\u001b[A\n",
+            " 23%|██▎       | 3852/16858 [00:46<02:04, 104.42it/s]\u001b[A\n",
+            " 23%|██▎       | 3863/16858 [00:46<02:04, 104.37it/s]\u001b[A\n",
+            " 23%|██▎       | 3874/16858 [00:47<02:03, 104.98it/s]\u001b[A\n",
+            " 23%|██▎       | 3885/16858 [00:47<02:03, 104.73it/s]\u001b[A\n",
+            " 23%|██▎       | 3896/16858 [00:47<02:09, 100.12it/s]\u001b[A\n",
+            " 23%|██▎       | 3907/16858 [00:47<02:08, 100.66it/s]\u001b[A\n",
+            " 23%|██▎       | 3918/16858 [00:47<02:06, 102.18it/s]\u001b[A\n",
+            " 23%|██▎       | 3929/16858 [00:47<02:04, 103.48it/s]\u001b[A\n",
+            " 23%|██▎       | 3941/16858 [00:47<02:01, 106.03it/s]\u001b[A\n",
+            " 23%|██▎       | 3952/16858 [00:47<02:03, 104.86it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3932 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 24%|██▎       | 3963/16858 [00:47<02:03, 104.19it/s]\u001b[A\n",
+            " 24%|██▎       | 3974/16858 [00:48<02:05, 102.64it/s]\u001b[A\n",
+            " 24%|██▎       | 3985/16858 [00:48<02:04, 103.10it/s]\u001b[A\n",
+            " 24%|██▎       | 3996/16858 [00:48<02:05, 102.19it/s]\u001b[A\n",
+            " 24%|██▍       | 4007/16858 [00:48<02:11, 97.96it/s] \u001b[A\n",
+            " 24%|██▍       | 4017/16858 [00:48<02:13, 96.49it/s]\u001b[A\n",
+            " 24%|██▍       | 4028/16858 [00:48<02:09, 99.08it/s]\u001b[A\n",
+            " 24%|██▍       | 4039/16858 [00:48<02:06, 101.06it/s]\u001b[A\n",
+            " 24%|██▍       | 4050/16858 [00:48<02:07, 100.63it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4036 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 24%|██▍       | 4061/16858 [00:48<02:06, 101.12it/s]\u001b[A\n",
+            " 24%|██▍       | 4072/16858 [00:49<02:04, 102.64it/s]\u001b[A\n",
+            " 24%|██▍       | 4083/16858 [00:49<02:03, 103.23it/s]\u001b[A\n",
+            " 24%|██▍       | 4094/16858 [00:49<02:04, 102.65it/s]\u001b[A\n",
+            " 24%|██▍       | 4105/16858 [00:49<02:09, 98.74it/s] \u001b[A\n",
+            " 24%|██▍       | 4116/16858 [00:49<02:07, 99.69it/s]\u001b[A\n",
+            " 24%|██▍       | 4127/16858 [00:49<02:05, 101.35it/s]\u001b[A\n",
+            " 25%|██▍       | 4138/16858 [00:49<02:03, 103.00it/s]\u001b[A\n",
+            " 25%|██▍       | 4149/16858 [00:49<02:02, 103.97it/s]\u001b[A\n",
+            " 25%|██▍       | 4160/16858 [00:49<02:04, 102.22it/s]\u001b[A\n",
+            " 25%|██▍       | 4171/16858 [00:50<02:05, 100.91it/s]\u001b[A\n",
+            " 25%|██▍       | 4182/16858 [00:50<02:03, 102.58it/s]\u001b[A\n",
+            " 25%|██▍       | 4193/16858 [00:50<02:05, 101.12it/s]\u001b[A\n",
+            " 25%|██▍       | 4204/16858 [00:50<02:04, 101.86it/s]\u001b[A\n",
+            " 25%|██▌       | 4215/16858 [00:50<02:11, 96.33it/s] \u001b[A\n",
+            " 25%|██▌       | 4226/16858 [00:50<02:07, 98.70it/s]\u001b[A\n",
+            " 25%|██▌       | 4237/16858 [00:50<02:06, 99.63it/s]\u001b[A\n",
+            " 25%|██▌       | 4248/16858 [00:50<02:04, 101.23it/s]\u001b[A\n",
+            " 25%|██▌       | 4259/16858 [00:50<02:05, 100.25it/s]\u001b[A\n",
+            " 25%|██▌       | 4270/16858 [00:51<02:04, 101.17it/s]\u001b[A\n",
+            " 25%|██▌       | 4281/16858 [00:51<02:03, 102.06it/s]\u001b[A\n",
+            " 25%|██▌       | 4292/16858 [00:51<02:01, 103.18it/s]\u001b[A\n",
+            " 26%|██▌       | 4303/16858 [00:51<02:01, 103.63it/s]\u001b[A\n",
+            " 26%|██▌       | 4314/16858 [00:51<02:07, 98.55it/s] \u001b[A\n",
+            " 26%|██▌       | 4325/16858 [00:51<02:05, 99.90it/s]\u001b[A\n",
+            " 26%|██▌       | 4336/16858 [00:51<02:13, 93.47it/s]\u001b[A\n",
+            " 26%|██▌       | 4346/16858 [00:51<02:25, 86.24it/s]\u001b[A\n",
+            " 26%|██▌       | 4355/16858 [00:51<02:31, 82.70it/s]\u001b[A\n",
+            " 26%|██▌       | 4364/16858 [00:52<02:33, 81.39it/s]\u001b[A\n",
+            " 26%|██▌       | 4373/16858 [00:52<02:35, 80.11it/s]\u001b[A\n",
+            " 26%|██▌       | 4382/16858 [00:52<02:34, 80.49it/s]\u001b[A\n",
+            " 26%|██▌       | 4391/16858 [00:52<02:38, 78.87it/s]\u001b[A\n",
+            " 26%|██▌       | 4399/16858 [00:52<02:43, 76.11it/s]\u001b[A\n",
+            " 26%|██▌       | 4407/16858 [00:52<02:42, 76.72it/s]\u001b[A\n",
+            " 26%|██▌       | 4415/16858 [00:52<02:40, 77.47it/s]\u001b[A\n",
+            " 26%|██▌       | 4423/16858 [00:52<02:39, 77.87it/s]\u001b[A\n",
+            " 26%|██▋       | 4432/16858 [00:52<02:37, 78.94it/s]\u001b[A\n",
+            " 26%|██▋       | 4440/16858 [00:53<02:43, 75.74it/s]\u001b[A\n",
+            " 26%|██▋       | 4449/16858 [00:53<02:40, 77.40it/s]\u001b[A\n",
+            " 26%|██▋       | 4458/16858 [00:53<02:36, 79.10it/s]\u001b[A\n",
+            " 26%|██▋       | 4467/16858 [00:53<02:34, 80.42it/s]\u001b[A\n",
+            " 27%|██▋       | 4476/16858 [00:53<02:35, 79.75it/s]\u001b[A\n",
+            " 27%|██▋       | 4484/16858 [00:53<02:52, 71.69it/s]\u001b[A\n",
+            " 27%|██▋       | 4492/16858 [00:53<02:49, 73.13it/s]\u001b[A\n",
+            " 27%|██▋       | 4500/16858 [00:53<03:00, 68.48it/s]\u001b[A\n",
+            " 27%|██▋       | 4507/16858 [00:53<03:03, 67.49it/s]\u001b[A\n",
+            " 27%|██▋       | 4514/16858 [00:54<03:06, 66.18it/s]\u001b[A\n",
+            " 27%|██▋       | 4521/16858 [00:54<03:08, 65.61it/s]\u001b[A\n",
+            " 27%|██▋       | 4532/16858 [00:54<02:40, 76.78it/s]\u001b[A\n",
+            " 27%|██▋       | 4543/16858 [00:54<02:25, 84.47it/s]\u001b[A\n",
+            " 27%|██▋       | 4554/16858 [00:54<02:16, 90.12it/s]\u001b[A\n",
+            " 27%|██▋       | 4564/16858 [00:54<02:14, 91.31it/s]\u001b[A\n",
+            " 27%|██▋       | 4575/16858 [00:54<02:09, 94.91it/s]\u001b[A\n",
+            " 27%|██▋       | 4585/16858 [00:54<02:19, 88.25it/s]\u001b[A\n",
+            " 27%|██▋       | 4596/16858 [00:54<02:12, 92.76it/s]\u001b[A\n",
+            " 27%|██▋       | 4607/16858 [00:55<02:07, 96.43it/s]\u001b[A\n",
+            " 27%|██▋       | 4618/16858 [00:55<02:03, 98.91it/s]\u001b[A\n",
+            " 27%|██▋       | 4628/16858 [00:55<02:04, 98.45it/s]\u001b[A\n",
+            " 28%|██▊       | 4639/16858 [00:55<02:02, 99.37it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4624 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 28%|██▊       | 4650/16858 [00:55<02:00, 101.04it/s]\u001b[A\n",
+            " 28%|██▊       | 4661/16858 [00:55<01:58, 103.02it/s]\u001b[A\n",
+            " 28%|██▊       | 4672/16858 [00:55<02:02, 99.72it/s] \u001b[A\n",
+            " 28%|██▊       | 4683/16858 [00:55<02:00, 100.75it/s]\u001b[A\n",
+            " 28%|██▊       | 4694/16858 [00:55<02:01, 100.37it/s]\u001b[A\n",
+            " 28%|██▊       | 4705/16858 [00:56<02:02, 99.44it/s] \u001b[A\n",
+            " 28%|██▊       | 4716/16858 [00:56<01:59, 101.25it/s]\u001b[A\n",
+            " 28%|██▊       | 4727/16858 [00:56<01:59, 101.91it/s]\u001b[A\n",
+            " 28%|██▊       | 4738/16858 [00:56<01:59, 101.27it/s]\u001b[A\n",
+            " 28%|██▊       | 4749/16858 [00:56<01:59, 101.36it/s]\u001b[A\n",
+            " 28%|██▊       | 4760/16858 [00:56<01:57, 102.61it/s]\u001b[A\n",
+            " 28%|██▊       | 4771/16858 [00:56<02:01, 99.15it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4759 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 28%|██▊       | 4782/16858 [00:56<02:00, 100.55it/s]\u001b[A\n",
+            " 28%|██▊       | 4793/16858 [00:56<02:00, 100.33it/s]\u001b[A\n",
+            " 28%|██▊       | 4804/16858 [00:57<01:59, 101.08it/s]\u001b[A\n",
+            " 29%|██▊       | 4815/16858 [00:57<01:58, 101.84it/s]\u001b[A\n",
+            " 29%|██▊       | 4826/16858 [00:57<01:56, 102.97it/s]\u001b[A\n",
+            " 29%|██▊       | 4837/16858 [00:57<01:56, 103.23it/s]\u001b[A\n",
+            " 29%|██▉       | 4848/16858 [00:57<01:54, 104.69it/s]\u001b[A\n",
+            " 29%|██▉       | 4859/16858 [00:57<01:54, 104.74it/s]\u001b[A\n",
+            " 29%|██▉       | 4870/16858 [00:57<01:56, 103.18it/s]\u001b[A\n",
+            " 29%|██▉       | 4881/16858 [00:57<02:00, 99.59it/s] \u001b[A\n",
+            " 29%|██▉       | 4892/16858 [00:57<01:58, 100.82it/s]\u001b[A\n",
+            " 29%|██▉       | 4903/16858 [00:57<01:57, 101.36it/s]\u001b[A\n",
+            " 29%|██▉       | 4914/16858 [00:58<01:56, 102.38it/s]\u001b[A\n",
+            " 29%|██▉       | 4925/16858 [00:58<01:55, 103.42it/s]\u001b[A\n",
+            " 29%|██▉       | 4936/16858 [00:58<01:55, 103.23it/s]\u001b[A\n",
+            " 29%|██▉       | 4947/16858 [00:58<01:54, 104.09it/s]\u001b[A\n",
+            " 29%|██▉       | 4958/16858 [00:58<01:53, 104.61it/s]\u001b[A\n",
+            " 29%|██▉       | 4969/16858 [00:58<01:53, 104.32it/s]\u001b[A\n",
+            " 30%|██▉       | 4980/16858 [00:58<01:57, 101.47it/s]\u001b[A\n",
+            " 30%|██▉       | 4991/16858 [00:58<02:00, 98.80it/s] \u001b[A\n",
+            " 30%|██▉       | 5002/16858 [00:58<01:57, 101.15it/s]\u001b[A\n",
+            " 30%|██▉       | 5013/16858 [00:59<01:57, 101.11it/s]\u001b[A\n",
+            " 30%|██▉       | 5024/16858 [00:59<01:55, 102.18it/s]\u001b[A\n",
+            " 30%|██▉       | 5035/16858 [00:59<01:56, 101.88it/s]\u001b[A\n",
+            " 30%|██▉       | 5046/16858 [00:59<01:54, 103.41it/s]\u001b[A\n",
+            " 30%|██▉       | 5057/16858 [00:59<01:55, 102.13it/s]\u001b[A\n",
+            " 30%|███       | 5068/16858 [00:59<01:53, 103.57it/s]\u001b[A\n",
+            " 30%|███       | 5079/16858 [00:59<01:53, 103.53it/s]\u001b[A\n",
+            " 30%|███       | 5090/16858 [00:59<01:57, 100.13it/s]\u001b[A\n",
+            " 30%|███       | 5101/16858 [00:59<01:55, 101.43it/s]\u001b[A\n",
+            " 30%|███       | 5112/16858 [01:00<01:55, 101.26it/s]\u001b[A\n",
+            " 30%|███       | 5123/16858 [01:00<01:55, 102.00it/s]\u001b[A\n",
+            " 30%|███       | 5134/16858 [01:00<01:54, 102.19it/s]\u001b[A\n",
+            " 31%|███       | 5146/16858 [01:00<01:51, 104.65it/s]\u001b[A\n",
+            " 31%|███       | 5157/16858 [01:00<01:53, 103.53it/s]\u001b[A\n",
+            " 31%|███       | 5168/16858 [01:00<01:52, 103.84it/s]\u001b[A\n",
+            " 31%|███       | 5179/16858 [01:00<01:52, 104.16it/s]\u001b[A\n",
+            " 31%|███       | 5190/16858 [01:00<01:51, 104.62it/s]\u001b[A\n",
+            " 31%|███       | 5201/16858 [01:00<01:57, 98.90it/s] \u001b[A\n",
+            " 31%|███       | 5212/16858 [01:01<01:55, 100.58it/s]\u001b[A\n",
+            " 31%|███       | 5223/16858 [01:01<01:54, 101.26it/s]\u001b[A\n",
+            " 31%|███       | 5234/16858 [01:01<01:53, 102.29it/s]\u001b[A\n",
+            " 31%|███       | 5245/16858 [01:01<01:51, 104.02it/s]\u001b[A\n",
+            " 31%|███       | 5256/16858 [01:01<01:51, 104.10it/s]\u001b[A\n",
+            " 31%|███       | 5267/16858 [01:01<01:52, 102.99it/s]\u001b[A\n",
+            " 31%|███▏      | 5278/16858 [01:01<01:51, 103.68it/s]\u001b[A\n",
+            " 31%|███▏      | 5289/16858 [01:01<01:50, 104.32it/s]\u001b[A\n",
+            " 31%|███▏      | 5300/16858 [01:01<01:54, 101.16it/s]\u001b[A\n",
+            " 32%|███▏      | 5311/16858 [01:01<01:52, 102.38it/s]\u001b[A\n",
+            " 32%|███▏      | 5322/16858 [01:02<01:53, 101.50it/s]\u001b[A\n",
+            " 32%|███▏      | 5333/16858 [01:02<01:51, 102.90it/s]\u001b[A\n",
+            " 32%|███▏      | 5344/16858 [01:02<01:52, 102.71it/s]\u001b[A\n",
+            " 32%|███▏      | 5355/16858 [01:02<01:53, 101.67it/s]\u001b[A\n",
+            " 32%|███▏      | 5366/16858 [01:02<01:51, 102.77it/s]\u001b[A\n",
+            " 32%|███▏      | 5377/16858 [01:02<01:51, 103.00it/s]\u001b[A\n",
+            " 32%|███▏      | 5388/16858 [01:02<01:50, 104.00it/s]\u001b[A\n",
+            " 32%|███▏      | 5399/16858 [01:02<01:49, 104.42it/s]\u001b[A\n",
+            " 32%|███▏      | 5410/16858 [01:02<01:54, 99.88it/s] \u001b[A\n",
+            " 32%|███▏      | 5421/16858 [01:03<01:53, 101.05it/s]\u001b[A\n",
+            " 32%|███▏      | 5432/16858 [01:03<01:52, 101.83it/s]\u001b[A\n",
+            " 32%|███▏      | 5443/16858 [01:03<02:29, 76.32it/s] \u001b[A\n",
+            " 32%|███▏      | 5452/16858 [01:03<03:08, 60.50it/s]\u001b[A\n",
+            " 32%|███▏      | 5460/16858 [01:03<04:19, 43.87it/s]\u001b[A\n",
+            " 32%|███▏      | 5466/16858 [01:04<04:51, 39.03it/s]\u001b[A\n",
+            " 32%|███▏      | 5471/16858 [01:04<06:19, 29.99it/s]\u001b[A\n",
+            " 32%|███▏      | 5475/16858 [01:04<07:51, 24.13it/s]\u001b[A\n",
+            " 33%|███▎      | 5479/16858 [01:05<09:28, 20.02it/s]\u001b[A\n",
+            " 33%|███▎      | 5482/16858 [01:05<10:29, 18.08it/s]\u001b[A\n",
+            " 33%|███▎      | 5485/16858 [01:05<11:33, 16.39it/s]\u001b[A\n",
+            " 33%|███▎      | 5487/16858 [01:05<12:10, 15.57it/s]\u001b[A\n",
+            " 33%|███▎      | 5489/16858 [01:05<13:04, 14.49it/s]\u001b[A\n",
+            " 33%|███▎      | 5491/16858 [01:06<13:34, 13.96it/s]\u001b[A\n",
+            " 33%|███▎      | 5493/16858 [01:06<13:22, 14.16it/s]\u001b[A\n",
+            " 33%|███▎      | 5495/16858 [01:06<13:48, 13.71it/s]\u001b[A\n",
+            " 33%|███▎      | 5497/16858 [01:06<13:22, 14.15it/s]\u001b[A\n",
+            " 33%|███▎      | 5499/16858 [01:06<12:36, 15.02it/s]\u001b[A\n",
+            " 33%|███▎      | 5501/16858 [01:06<13:12, 14.32it/s]\u001b[A\n",
+            " 33%|███▎      | 5503/16858 [01:06<13:11, 14.35it/s]\u001b[A\n",
+            " 33%|███▎      | 5505/16858 [01:07<13:18, 14.22it/s]\u001b[A\n",
+            " 33%|███▎      | 5507/16858 [01:07<13:19, 14.19it/s]\u001b[A\n",
+            " 33%|███▎      | 5509/16858 [01:07<13:04, 14.47it/s]\u001b[A\n",
+            " 33%|███▎      | 5512/16858 [01:07<10:44, 17.61it/s]\u001b[A\n",
+            " 33%|███▎      | 5515/16858 [01:07<10:01, 18.84it/s]\u001b[A\n",
+            " 33%|███▎      | 5519/16858 [01:07<08:38, 21.87it/s]\u001b[A\n",
+            " 33%|███▎      | 5522/16858 [01:07<08:44, 21.62it/s]\u001b[A\n",
+            " 33%|███▎      | 5525/16858 [01:08<08:39, 21.81it/s]\u001b[A\n",
+            " 33%|███▎      | 5530/16858 [01:08<07:13, 26.13it/s]\u001b[A\n",
+            " 33%|███▎      | 5535/16858 [01:08<06:10, 30.59it/s]\u001b[A\n",
+            " 33%|███▎      | 5539/16858 [01:08<06:36, 28.57it/s]\u001b[A\n",
+            " 33%|███▎      | 5542/16858 [01:08<07:25, 25.39it/s]\u001b[A\n",
+            " 33%|███▎      | 5545/16858 [01:08<07:39, 24.60it/s]\u001b[A\n",
+            " 33%|███▎      | 5548/16858 [01:08<07:44, 24.36it/s]\u001b[A\n",
+            " 33%|███▎      | 5551/16858 [01:09<08:26, 22.33it/s]\u001b[A\n",
+            " 33%|███▎      | 5554/16858 [01:09<08:19, 22.61it/s]\u001b[A\n",
+            " 33%|███▎      | 5557/16858 [01:09<09:09, 20.55it/s]\u001b[A\n",
+            " 33%|███▎      | 5560/16858 [01:09<08:55, 21.08it/s]\u001b[A\n",
+            " 33%|███▎      | 5563/16858 [01:09<08:53, 21.16it/s]\u001b[A\n",
+            " 33%|███▎      | 5566/16858 [01:09<08:36, 21.86it/s]\u001b[A\n",
+            " 33%|███▎      | 5569/16858 [01:09<08:39, 21.71it/s]\u001b[A\n",
+            " 33%|███▎      | 5573/16858 [01:10<07:36, 24.73it/s]\u001b[A\n",
+            " 33%|███▎      | 5578/16858 [01:10<06:06, 30.76it/s]\u001b[A\n",
+            " 33%|███▎      | 5582/16858 [01:10<06:02, 31.14it/s]\u001b[A\n",
+            " 33%|███▎      | 5586/16858 [01:10<06:25, 29.28it/s]\u001b[A\n",
+            " 33%|███▎      | 5590/16858 [01:10<07:40, 24.49it/s]\u001b[A\n",
+            " 33%|███▎      | 5593/16858 [01:10<08:45, 21.44it/s]\u001b[A\n",
+            " 33%|███▎      | 5596/16858 [01:11<10:20, 18.15it/s]\u001b[A\n",
+            " 33%|███▎      | 5599/16858 [01:11<11:09, 16.81it/s]\u001b[A\n",
+            " 33%|███▎      | 5601/16858 [01:11<12:10, 15.40it/s]\u001b[A\n",
+            " 33%|███▎      | 5603/16858 [01:11<12:44, 14.73it/s]\u001b[A\n",
+            " 33%|███▎      | 5605/16858 [01:11<12:49, 14.61it/s]\u001b[A\n",
+            " 33%|███▎      | 5607/16858 [01:11<13:08, 14.26it/s]\u001b[A\n",
+            " 33%|███▎      | 5609/16858 [01:12<13:49, 13.56it/s]\u001b[A\n",
+            " 33%|███▎      | 5611/16858 [01:12<14:28, 12.95it/s]\u001b[A\n",
+            " 33%|███▎      | 5613/16858 [01:12<14:36, 12.83it/s]\u001b[A\n",
+            " 33%|███▎      | 5615/16858 [01:12<15:29, 12.09it/s]\u001b[A\n",
+            " 33%|███▎      | 5617/16858 [01:12<15:24, 12.16it/s]\u001b[A\n",
+            " 33%|███▎      | 5619/16858 [01:12<15:45, 11.89it/s]\u001b[A\n",
+            " 33%|███▎      | 5621/16858 [01:13<16:20, 11.46it/s]\u001b[A\n",
+            " 33%|███▎      | 5623/16858 [01:13<16:00, 11.70it/s]\u001b[A\n",
+            " 33%|███▎      | 5625/16858 [01:13<14:17, 13.10it/s]\u001b[A\n",
+            " 33%|███▎      | 5627/16858 [01:13<14:38, 12.79it/s]\u001b[A\n",
+            " 33%|███▎      | 5630/16858 [01:13<11:39, 16.05it/s]\u001b[A\n",
+            " 33%|███▎      | 5634/16858 [01:13<08:49, 21.18it/s]\u001b[A\n",
+            " 33%|███▎      | 5637/16858 [01:13<08:19, 22.48it/s]\u001b[A\n",
+            " 33%|███▎      | 5640/16858 [01:13<07:41, 24.33it/s]\u001b[A\n",
+            " 33%|███▎      | 5643/16858 [01:14<07:36, 24.56it/s]\u001b[A\n",
+            " 33%|███▎      | 5646/16858 [01:14<07:42, 24.24it/s]\u001b[A\n",
+            " 34%|███▎      | 5649/16858 [01:14<07:39, 24.39it/s]\u001b[A\n",
+            " 34%|███▎      | 5652/16858 [01:14<07:27, 25.04it/s]\u001b[A\n",
+            " 34%|███▎      | 5656/16858 [01:14<06:35, 28.35it/s]\u001b[A\n",
+            " 34%|███▎      | 5659/16858 [01:14<07:44, 24.09it/s]\u001b[A\n",
+            " 34%|███▎      | 5662/16858 [01:14<07:41, 24.26it/s]\u001b[A\n",
+            " 34%|███▎      | 5665/16858 [01:14<07:54, 23.59it/s]\u001b[A\n",
+            " 34%|███▎      | 5668/16858 [01:15<08:04, 23.09it/s]\u001b[A\n",
+            " 34%|███▎      | 5671/16858 [01:15<07:32, 24.71it/s]\u001b[A\n",
+            " 34%|███▎      | 5682/16858 [01:15<04:03, 45.99it/s]\u001b[A\n",
+            " 34%|███▍      | 5692/16858 [01:15<03:07, 59.65it/s]\u001b[A\n",
+            " 34%|���██▍      | 5703/16858 [01:15<02:35, 71.66it/s]\u001b[A\n",
+            " 34%|███▍      | 5713/16858 [01:15<02:23, 77.88it/s]\u001b[A\n",
+            " 34%|███▍      | 5724/16858 [01:15<02:11, 84.90it/s]\u001b[A\n",
+            " 34%|███▍      | 5735/16858 [01:15<02:02, 90.50it/s]\u001b[A\n",
+            " 34%|███▍      | 5746/16858 [01:15<01:58, 93.74it/s]\u001b[A\n",
+            " 34%|███▍      | 5757/16858 [01:16<01:54, 97.00it/s]\u001b[A\n",
+            " 34%|███▍      | 5767/16858 [01:16<01:53, 97.81it/s]\u001b[A\n",
+            " 34%|███▍      | 5778/16858 [01:16<01:50, 100.23it/s]\u001b[A\n",
+            " 34%|███▍      | 5789/16858 [01:16<01:48, 102.16it/s]\u001b[A\n",
+            " 34%|███▍      | 5800/16858 [01:16<01:48, 101.91it/s]\u001b[A\n",
+            " 34%|███▍      | 5811/16858 [01:16<01:48, 101.53it/s]\u001b[A\n",
+            " 35%|███▍      | 5822/16858 [01:16<01:52, 97.81it/s] \u001b[A\n",
+            " 35%|███▍      | 5833/16858 [01:16<01:51, 99.12it/s]\u001b[A\n",
+            " 35%|███▍      | 5844/16858 [01:16<01:49, 100.16it/s]\u001b[A\n",
+            " 35%|███▍      | 5855/16858 [01:17<01:47, 102.20it/s]\u001b[A\n",
+            " 35%|███▍      | 5866/16858 [01:17<01:47, 102.49it/s]\u001b[A\n",
+            " 35%|███▍      | 5877/16858 [01:17<01:45, 103.68it/s]\u001b[A\n",
+            " 35%|███▍      | 5888/16858 [01:17<01:45, 103.69it/s]\u001b[A\n",
+            " 35%|███▍      | 5899/16858 [01:17<02:04, 87.84it/s] \u001b[A\n",
+            " 35%|███▌      | 5909/16858 [01:17<02:12, 82.40it/s]\u001b[A\n",
+            " 35%|███▌      | 5918/16858 [01:17<02:22, 76.81it/s]\u001b[A\n",
+            " 35%|███▌      | 5926/16858 [01:17<02:21, 77.20it/s]\u001b[A\n",
+            " 35%|███▌      | 5934/16858 [01:18<02:22, 76.72it/s]\u001b[A\n",
+            " 35%|███▌      | 5942/16858 [01:18<02:22, 76.61it/s]\u001b[A\n",
+            " 35%|███▌      | 5950/16858 [01:18<02:23, 75.98it/s]\u001b[A\n",
+            " 35%|███▌      | 5958/16858 [01:18<02:24, 75.42it/s]\u001b[A\n",
+            " 35%|███▌      | 5966/16858 [01:18<02:24, 75.20it/s]\u001b[A\n",
+            " 35%|███▌      | 5974/16858 [01:18<02:26, 74.50it/s]\u001b[A\n",
+            " 35%|███▌      | 5982/16858 [01:18<02:23, 75.90it/s]\u001b[A\n",
+            " 36%|███▌      | 5990/16858 [01:18<02:22, 76.18it/s]\u001b[A\n",
+            " 36%|███▌      | 5998/16858 [01:18<02:28, 73.20it/s]\u001b[A\n",
+            " 36%|███▌      | 6006/16858 [01:18<02:25, 74.45it/s]\u001b[A\n",
+            " 36%|███▌      | 6015/16858 [01:19<02:21, 76.38it/s]\u001b[A\n",
+            " 36%|███▌      | 6023/16858 [01:19<02:21, 76.74it/s]\u001b[A\n",
+            " 36%|███▌      | 6031/16858 [01:19<02:30, 72.03it/s]\u001b[A\n",
+            " 36%|███▌      | 6039/16858 [01:19<02:31, 71.26it/s]\u001b[A\n",
+            " 36%|███▌      | 6047/16858 [01:19<02:30, 71.65it/s]\u001b[A\n",
+            " 36%|███▌      | 6055/16858 [01:19<02:34, 69.85it/s]\u001b[A\n",
+            " 36%|███▌      | 6063/16858 [01:19<02:35, 69.43it/s]\u001b[A\n",
+            " 36%|███▌      | 6070/16858 [01:19<02:46, 64.91it/s]\u001b[A\n",
+            " 36%|███▌      | 6077/16858 [01:20<02:49, 63.51it/s]\u001b[A\n",
+            " 36%|███▌      | 6087/16858 [01:20<02:27, 73.13it/s]\u001b[A\n",
+            " 36%|███▌      | 6098/16858 [01:20<02:11, 81.74it/s]\u001b[A\n",
+            " 36%|███▌      | 6109/16858 [01:20<02:01, 88.70it/s]\u001b[A\n",
+            " 36%|███▋      | 6119/16858 [01:20<01:57, 91.44it/s]\u001b[A\n",
+            " 36%|███▋      | 6129/16858 [01:20<01:54, 93.38it/s]\u001b[A\n",
+            " 36%|███▋      | 6140/16858 [01:20<01:51, 96.53it/s]\u001b[A\n",
+            " 36%|███▋      | 6151/16858 [01:20<01:47, 99.46it/s]\u001b[A\n",
+            " 37%|███▋      | 6162/16858 [01:20<01:46, 100.74it/s]\u001b[A\n",
+            " 37%|███▋      | 6173/16858 [01:20<01:48, 98.55it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6158 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 37%|███▋      | 6183/16858 [01:21<01:49, 97.80it/s]\u001b[A\n",
+            " 37%|███▋      | 6194/16858 [01:21<01:46, 99.82it/s]\u001b[A\n",
+            " 37%|███▋      | 6205/16858 [01:21<01:46, 100.47it/s]\u001b[A\n",
+            " 37%|███▋      | 6216/16858 [01:21<01:44, 101.62it/s]\u001b[A\n",
+            " 37%|███▋      | 6228/16858 [01:21<01:41, 104.35it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6215 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 37%|███▋      | 6239/16858 [01:21<01:41, 104.38it/s]\u001b[A\n",
+            " 37%|███▋      | 6251/16858 [01:21<01:39, 106.62it/s]\u001b[A\n",
+            " 37%|███▋      | 6263/16858 [01:21<01:38, 107.42it/s]\u001b[A\n",
+            " 37%|███▋      | 6274/16858 [01:21<01:41, 104.13it/s]\u001b[A\n",
+            " 37%|███▋      | 6285/16858 [01:22<01:44, 100.73it/s]\u001b[A\n",
+            " 37%|███▋      | 6296/16858 [01:22<01:44, 101.37it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6284 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 37%|███▋      | 6307/16858 [01:22<01:44, 100.63it/s]\u001b[A\n",
+            " 37%|███▋      | 6318/16858 [01:22<01:43, 101.46it/s]\u001b[A\n",
+            " 38%|███▊      | 6329/16858 [01:22<01:42, 102.56it/s]\u001b[A\n",
+            " 38%|███▊      | 6340/16858 [01:22<01:42, 102.72it/s]\u001b[A\n",
+            " 38%|███▊      | 6351/16858 [01:22<01:41, 103.60it/s]\u001b[A\n",
+            " 38%|███▊      | 6362/16858 [01:22<01:41, 103.85it/s]\u001b[A\n",
+            " 38%|███▊      | 6373/16858 [01:22<01:42, 101.85it/s]\u001b[A\n",
+            " 38%|███▊      | 6384/16858 [01:23<01:46, 98.40it/s] \u001b[A\n",
+            " 38%|███▊      | 6394/16858 [01:23<01:49, 95.75it/s]\u001b[A\n",
+            " 38%|███▊      | 6405/16858 [01:23<01:47, 97.54it/s]\u001b[A\n",
+            " 38%|███▊      | 6416/16858 [01:23<01:44, 100.15it/s]\u001b[A\n",
+            " 38%|███▊      | 6427/16858 [01:23<01:42, 101.41it/s]\u001b[A\n",
+            " 38%|███▊      | 6438/16858 [01:23<01:42, 101.70it/s]\u001b[A\n",
+            " 38%|███▊      | 6449/16858 [01:23<01:42, 101.64it/s]\u001b[A\n",
+            " 38%|███▊      | 6460/16858 [01:23<01:40, 103.32it/s]\u001b[A\n",
+            " 38%|███▊      | 6471/16858 [01:23<01:39, 104.29it/s]\u001b[A\n",
+            " 38%|███▊      | 6482/16858 [01:23<01:41, 102.57it/s]\u001b[A\n",
+            " 39%|███▊      | 6493/16858 [01:24<01:45, 98.64it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6479 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 39%|███▊      | 6503/16858 [01:24<01:44, 98.70it/s]\u001b[A\n",
+            " 39%|███▊      | 6514/16858 [01:24<01:43, 100.21it/s]\u001b[A\n",
+            " 39%|███▊      | 6525/16858 [01:24<01:41, 102.09it/s]\u001b[A\n",
+            " 39%|███▉      | 6536/16858 [01:24<01:42, 101.18it/s]\u001b[A\n",
+            " 39%|███▉      | 6547/16858 [01:24<01:40, 102.96it/s]\u001b[A\n",
+            " 39%|███▉      | 6558/16858 [01:24<01:39, 104.02it/s]\u001b[A\n",
+            " 39%|███▉      | 6569/16858 [01:24<01:39, 103.54it/s]\u001b[A\n",
+            " 39%|███▉      | 6580/16858 [01:24<01:46, 96.09it/s] \u001b[A\n",
+            " 39%|███▉      | 6590/16858 [01:25<01:49, 93.39it/s]\u001b[A\n",
+            " 39%|███▉      | 6601/16858 [01:25<01:47, 95.62it/s]\u001b[A\n",
+            " 39%|███▉      | 6612/16858 [01:25<01:44, 98.41it/s]\u001b[A\n",
+            " 39%|███▉      | 6623/16858 [01:25<01:43, 99.11it/s]\u001b[A\n",
+            " 39%|███▉      | 6634/16858 [01:25<01:41, 100.25it/s]\u001b[A\n",
+            " 39%|███▉      | 6645/16858 [01:25<01:41, 100.92it/s]\u001b[A\n",
+            " 39%|███▉      | 6656/16858 [01:25<01:40, 101.41it/s]\u001b[A\n",
+            " 40%|███▉      | 6667/16858 [01:25<01:39, 101.97it/s]\u001b[A\n",
+            " 40%|███▉      | 6678/16858 [01:25<01:39, 102.12it/s]\u001b[A\n",
+            " 40%|███▉      | 6689/16858 [01:26<01:38, 103.67it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6678 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 40%|███▉      | 6700/16858 [01:26<01:43, 98.10it/s] \u001b[A\n",
+            " 40%|███▉      | 6711/16858 [01:26<01:41, 100.25it/s]\u001b[A\n",
+            " 40%|███▉      | 6723/16858 [01:26<01:38, 102.76it/s]\u001b[A\n",
+            " 40%|███▉      | 6734/16858 [01:26<01:38, 102.74it/s]\u001b[A\n",
+            " 40%|████      | 6745/16858 [01:26<01:38, 102.98it/s]\u001b[A\n",
+            " 40%|████      | 6756/16858 [01:26<01:38, 102.92it/s]\u001b[A\n",
+            " 40%|████      | 6767/16858 [01:26<01:39, 101.57it/s]\u001b[A\n",
+            " 40%|████      | 6778/16858 [01:26<01:38, 101.82it/s]\u001b[A\n",
+            " 40%|████      | 6789/16858 [01:27<01:37, 103.31it/s]\u001b[A\n",
+            " 40%|████      | 6800/16858 [01:27<01:41, 99.33it/s] \u001b[A\n",
+            " 40%|████      | 6811/16858 [01:27<01:39, 100.74it/s]\u001b[A\n",
+            " 40%|████      | 6822/16858 [01:27<01:38, 102.10it/s]\u001b[A\n",
+            " 41%|████      | 6833/16858 [01:27<01:37, 102.52it/s]\u001b[A\n",
+            " 41%|████      | 6844/16858 [01:27<01:37, 102.97it/s]\u001b[A\n",
+            " 41%|████      | 6855/16858 [01:27<01:36, 103.96it/s]\u001b[A\n",
+            " 41%|████      | 6866/16858 [01:27<01:37, 102.16it/s]\u001b[A\n",
+            " 41%|████      | 6877/16858 [01:27<01:39, 100.67it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6865 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 41%|████      | 6888/16858 [01:28<01:39, 100.56it/s]\u001b[A\n",
+            " 41%|████      | 6899/16858 [01:28<01:37, 101.86it/s]\u001b[A\n",
+            " 41%|████      | 6910/16858 [01:28<01:42, 97.04it/s] \u001b[A\n",
+            " 41%|████      | 6921/16858 [01:28<01:40, 98.83it/s]\u001b[A\n",
+            " 41%|████      | 6932/16858 [01:28<01:38, 100.90it/s]\u001b[A\n",
+            " 41%|████      | 6943/16858 [01:28<01:38, 101.17it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6925 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 41%|████▏     | 6954/16858 [01:28<01:37, 101.42it/s]\u001b[A\n",
+            " 41%|████▏     | 6965/16858 [01:28<01:35, 103.55it/s]\u001b[A\n",
+            " 41%|████▏     | 6976/16858 [01:28<01:35, 103.27it/s]\u001b[A\n",
+            " 41%|████▏     | 6987/16858 [01:28<01:34, 104.06it/s]\u001b[A\n",
+            " 42%|████▏     | 6998/16858 [01:29<01:34, 104.54it/s]\u001b[A\n",
+            " 42%|████▏     | 7009/16858 [01:29<01:38, 100.30it/s]\u001b[A\n",
+            " 42%|████▏     | 7020/16858 [01:29<01:36, 101.92it/s]\u001b[A\n",
+            " 42%|████▏     | 7031/16858 [01:29<01:35, 102.92it/s]\u001b[A\n",
+            " 42%|████▏     | 7042/16858 [01:29<01:35, 102.90it/s]\u001b[A\n",
+            " 42%|████▏     | 7053/16858 [01:29<01:36, 101.56it/s]\u001b[A\n",
+            " 42%|████▏     | 7064/16858 [01:29<01:36, 101.86it/s]\u001b[A\n",
+            " 42%|████▏     | 7075/16858 [01:29<01:35, 102.63it/s]\u001b[A\n",
+            " 42%|████▏     | 7086/16858 [01:29<01:33, 103.98it/s]\u001b[A\n",
+            " 42%|████▏     | 7097/16858 [01:30<01:40, 96.73it/s] \u001b[A\n",
+            " 42%|████▏     | 7107/16858 [01:30<01:56, 83.40it/s]\u001b[A\n",
+            " 42%|████▏     | 7116/16858 [01:30<02:03, 78.79it/s]\u001b[A\n",
+            " 42%|████▏     | 7125/16858 [01:30<02:05, 77.85it/s]\u001b[A\n",
+            " 42%|████▏     | 7133/16858 [01:30<02:05, 77.33it/s]\u001b[A\n",
+            " 42%|████▏     | 7141/16858 [01:30<02:07, 76.38it/s]\u001b[A\n",
+            " 42%|████▏     | 7149/16858 [01:30<02:09, 75.09it/s]\u001b[A\n",
+            " 42%|████▏     | 7157/16858 [01:30<02:08, 75.39it/s]\u001b[A\n",
+            " 43%|████▎     | 7165/16858 [01:31<02:09, 75.03it/s]\u001b[A\n",
+            " 43%|████▎     | 7173/16858 [01:31<02:06, 76.40it/s]\u001b[A\n",
+            " 43%|████▎     | 7181/16858 [01:31<02:08, 75.03it/s]\u001b[A\n",
+            " 43%|████▎     | 7189/16858 [01:31<02:12, 72.97it/s]\u001b[A\n",
+            " 43%|████▎     | 7197/16858 [01:31<02:09, 74.54it/s]\u001b[A\n",
+            " 43%|████▎     | 7205/16858 [01:31<02:10, 74.23it/s]\u001b[A\n",
+            " 43%|████▎     | 7213/16858 [01:31<02:08, 75.34it/s]\u001b[A\n",
+            " 43%|████▎     | 7221/16858 [01:31<02:07, 75.47it/s]\u001b[A\n",
+            " 43%|████▎     | 7229/16858 [01:31<02:09, 74.41it/s]\u001b[A\n",
+            " 43%|████▎     | 7237/16858 [01:32<02:13, 71.95it/s]\u001b[A\n",
+            " 43%|████▎     | 7245/16858 [01:32<02:11, 72.84it/s]\u001b[A\n",
+            " 43%|████▎     | 7253/16858 [01:32<02:17, 69.94it/s]\u001b[A\n",
+            " 43%|████▎     | 7261/16858 [01:32<02:19, 68.59it/s]\u001b[A\n",
+            " 43%|████▎     | 7268/16858 [01:32<02:26, 65.49it/s]\u001b[A\n",
+            " 43%|████▎     | 7275/16858 [01:32<02:30, 63.80it/s]\u001b[A\n",
+            " 43%|████▎     | 7284/16858 [01:32<02:15, 70.73it/s]\u001b[A\n",
+            " 43%|████▎     | 7295/16858 [01:32<01:59, 80.10it/s]\u001b[A\n",
+            " 43%|████▎     | 7306/16858 [01:32<01:50, 86.50it/s]\u001b[A\n",
+            " 43%|████▎     | 7317/16858 [01:33<01:43, 91.77it/s]\u001b[A\n",
+            " 43%|████▎     | 7328/16858 [01:33<01:39, 95.65it/s]\u001b[A\n",
+            " 44%|████▎     | 7339/16858 [01:33<01:37, 97.29it/s]\u001b[A\n",
+            " 44%|████▎     | 7349/16858 [01:33<01:38, 96.29it/s]\u001b[A\n",
+            " 44%|████▎     | 7359/16858 [01:33<01:39, 95.35it/s]\u001b[A\n",
+            " 44%|████▎     | 7369/16858 [01:33<01:38, 96.60it/s]\u001b[A\n",
+            " 44%|████▍     | 7380/16858 [01:33<01:35, 99.05it/s]\u001b[A\n",
+            " 44%|████▍     | 7391/16858 [01:33<01:33, 101.34it/s]\u001b[A\n",
+            " 44%|████▍     | 7402/16858 [01:33<01:32, 102.18it/s]\u001b[A\n",
+            " 44%|████▍     | 7413/16858 [01:33<01:33, 100.79it/s]\u001b[A\n",
+            " 44%|████▍     | 7424/16858 [01:34<01:32, 101.57it/s]\u001b[A\n",
+            " 44%|████▍     | 7435/16858 [01:34<01:33, 100.89it/s]\u001b[A\n",
+            " 44%|████▍     | 7446/16858 [01:34<01:31, 102.39it/s]\u001b[A\n",
+            " 44%|████▍     | 7457/16858 [01:34<01:30, 103.78it/s]\u001b[A\n",
+            " 44%|████▍     | 7468/16858 [01:34<01:33, 100.22it/s]\u001b[A\n",
+            " 44%|████▍     | 7479/16858 [01:34<01:33, 99.94it/s] \u001b[A\n",
+            " 44%|████▍     | 7490/16858 [01:34<01:32, 101.19it/s]\u001b[A\n",
+            " 44%|████▍     | 7501/16858 [01:34<01:31, 101.72it/s]\u001b[A\n",
+            " 45%|████▍     | 7512/16858 [01:34<01:32, 101.34it/s]\u001b[A\n",
+            " 45%|████▍     | 7523/16858 [01:35<01:31, 101.75it/s]\u001b[A\n",
+            " 45%|████▍     | 7534/16858 [01:35<01:30, 103.16it/s]\u001b[A\n",
+            " 45%|████▍     | 7545/16858 [01:35<01:32, 100.48it/s]\u001b[A\n",
+            " 45%|████▍     | 7556/16858 [01:35<01:30, 102.86it/s]\u001b[A\n",
+            " 45%|████▍     | 7567/16858 [01:35<01:32, 100.22it/s]\u001b[A\n",
+            " 45%|████▍     | 7578/16858 [01:35<01:32, 100.43it/s]\u001b[A\n",
+            " 45%|████▌     | 7589/16858 [01:35<01:30, 102.16it/s]\u001b[A\n",
+            " 45%|████▌     | 7600/16858 [01:35<01:29, 103.48it/s]\u001b[A\n",
+            " 45%|████▌     | 7611/16858 [01:35<01:29, 103.66it/s]\u001b[A\n",
+            " 45%|████▌     | 7622/16858 [01:36<01:28, 103.90it/s]\u001b[A\n",
+            " 45%|████▌     | 7633/16858 [01:36<01:28, 103.99it/s]\u001b[A\n",
+            " 45%|████▌     | 7644/16858 [01:36<01:30, 102.09it/s]\u001b[A\n",
+            " 45%|████▌     | 7655/16858 [01:36<01:30, 102.25it/s]\u001b[A\n",
+            " 45%|████▌     | 7666/16858 [01:36<01:29, 102.31it/s]\u001b[A\n",
+            " 46%|████▌     | 7677/16858 [01:36<01:32, 99.35it/s] \u001b[A\n",
+            " 46%|████▌     | 7688/16858 [01:36<01:31, 100.75it/s]\u001b[A\n",
+            " 46%|████▌     | 7699/16858 [01:36<01:28, 102.97it/s]\u001b[A\n",
+            " 46%|████▌     | 7710/16858 [01:36<01:28, 103.74it/s]\u001b[A\n",
+            " 46%|████▌     | 7721/16858 [01:36<01:28, 103.64it/s]\u001b[A\n",
+            " 46%|████▌     | 7732/16858 [01:37<01:28, 103.52it/s]\u001b[A\n",
+            " 46%|████▌     | 7743/16858 [01:37<01:28, 103.03it/s]\u001b[A\n",
+            " 46%|████▌     | 7754/16858 [01:37<01:28, 102.60it/s]\u001b[A\n",
+            " 46%|████▌     | 7765/16858 [01:37<01:29, 101.49it/s]\u001b[A\n",
+            " 46%|████▌     | 7776/16858 [01:37<01:29, 101.91it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 7755 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 46%|████▌     | 7787/16858 [01:37<01:32, 98.14it/s] \u001b[A\n",
+            " 46%|████▋     | 7798/16858 [01:37<01:30, 100.31it/s]\u001b[A\n",
+            " 46%|████▋     | 7809/16858 [01:37<01:29, 101.14it/s]\u001b[A\n",
+            " 46%|████▋     | 7820/16858 [01:37<01:30, 100.14it/s]\u001b[A\n",
+            " 46%|████▋     | 7831/16858 [01:38<01:29, 100.32it/s]\u001b[A\n",
+            " 47%|████▋     | 7842/16858 [01:38<01:29, 100.35it/s]\u001b[A\n",
+            " 47%|████▋     | 7853/16858 [01:38<01:28, 101.89it/s]\u001b[A\n",
+            " 47%|████▋     | 7864/16858 [01:38<01:29, 101.05it/s]\u001b[A\n",
+            " 47%|████▋     | 7875/16858 [01:38<01:27, 102.48it/s]\u001b[A\n",
+            " 47%|████▋     | 7886/16858 [01:38<01:30, 99.01it/s] \u001b[A\n",
+            " 47%|████▋     | 7897/16858 [01:38<01:29, 99.63it/s]\u001b[A\n",
+            " 47%|████▋     | 7908/16858 [01:38<01:29, 100.08it/s]\u001b[A\n",
+            " 47%|████▋     | 7919/16858 [01:38<01:27, 101.58it/s]\u001b[A\n",
+            " 47%|████▋     | 7930/16858 [01:39<01:27, 102.50it/s]\u001b[A\n",
+            " 47%|████▋     | 7941/16858 [01:39<01:25, 103.76it/s]\u001b[A\n",
+            " 47%|████▋     | 7952/16858 [01:39<01:26, 103.13it/s]\u001b[A\n",
+            " 47%|████▋     | 7963/16858 [01:39<01:25, 104.32it/s]\u001b[A\n",
+            " 47%|████▋     | 7974/16858 [01:39<01:26, 103.21it/s]\u001b[A\n",
+            " 47%|████▋     | 7985/16858 [01:39<01:26, 102.87it/s]\u001b[A\n",
+            " 47%|████▋     | 7996/16858 [01:39<01:29, 98.74it/s] \u001b[A\n",
+            " 47%|████▋     | 8007/16858 [01:39<01:28, 99.67it/s]\u001b[A\n",
+            " 48%|████▊     | 8018/16858 [01:39<01:27, 101.28it/s]\u001b[A\n",
+            " 48%|████▊     | 8029/16858 [01:40<01:27, 100.76it/s]\u001b[A\n",
+            " 48%|████▊     | 8040/16858 [01:40<01:27, 101.08it/s]\u001b[A\n",
+            " 48%|████▊     | 8051/16858 [01:40<01:26, 101.91it/s]\u001b[A\n",
+            " 48%|████▊     | 8062/16858 [01:40<01:24, 103.53it/s]\u001b[A\n",
+            " 48%|████▊     | 8073/16858 [01:40<01:26, 101.92it/s]\u001b[A\n",
+            " 48%|████▊     | 8084/16858 [01:40<01:25, 102.62it/s]\u001b[A\n",
+            " 48%|████▊     | 8095/16858 [01:40<01:28, 98.68it/s] \u001b[A\n",
+            " 48%|████▊     | 8106/16858 [01:40<01:26, 101.40it/s]\u001b[A\n",
+            " 48%|████▊     | 8117/16858 [01:40<01:25, 102.15it/s]\u001b[A\n",
+            " 48%|████▊     | 8128/16858 [01:41<01:25, 101.85it/s]\u001b[A\n",
+            " 48%|████▊     | 8139/16858 [01:41<01:26, 100.73it/s]\u001b[A\n",
+            " 48%|████▊     | 8150/16858 [01:41<01:26, 100.63it/s]\u001b[A\n",
+            " 48%|████▊     | 8161/16858 [01:41<01:24, 102.59it/s]\u001b[A\n",
+            " 48%|████▊     | 8172/16858 [01:41<01:25, 101.97it/s]\u001b[A\n",
+            " 49%|████▊     | 8183/16858 [01:41<01:25, 102.04it/s]\u001b[A\n",
+            " 49%|████▊     | 8194/16858 [01:41<01:24, 102.16it/s]\u001b[A\n",
+            " 49%|████▊     | 8205/16858 [01:41<01:26, 100.21it/s]\u001b[A\n",
+            " 49%|████▊     | 8216/16858 [01:41<01:26, 100.14it/s]\u001b[A\n",
+            " 49%|████▉     | 8227/16858 [01:41<01:26, 100.35it/s]\u001b[A\n",
+            " 49%|████▉     | 8238/16858 [01:42<01:25, 100.76it/s]\u001b[A\n",
+            " 49%|████▉     | 8249/16858 [01:42<01:23, 103.02it/s]\u001b[A\n",
+            " 49%|████▉     | 8260/16858 [01:42<01:23, 103.38it/s]\u001b[A\n",
+            " 49%|████▉     | 8271/16858 [01:42<01:23, 103.26it/s]\u001b[A\n",
+            " 49%|████▉     | 8283/16858 [01:42<01:21, 105.27it/s]\u001b[A\n",
+            " 49%|████▉     | 8294/16858 [01:42<01:20, 105.79it/s]\u001b[A\n",
+            " 49%|████▉     | 8305/16858 [01:42<01:33, 91.03it/s] \u001b[A\n",
+            " 49%|████▉     | 8315/16858 [01:42<01:40, 85.06it/s]\u001b[A\n",
+            " 49%|████▉     | 8324/16858 [01:43<01:43, 82.39it/s]\u001b[A\n",
+            " 49%|████▉     | 8333/16858 [01:43<01:45, 80.74it/s]\u001b[A\n",
+            " 49%|████▉     | 8342/16858 [01:43<01:45, 80.83it/s]\u001b[A\n",
+            " 50%|████▉     | 8351/16858 [01:43<01:47, 79.00it/s]\u001b[A\n",
+            " 50%|████▉     | 8359/16858 [01:43<01:48, 78.49it/s]\u001b[A\n",
+            " 50%|████▉     | 8367/16858 [01:43<01:48, 78.40it/s]\u001b[A\n",
+            " 50%|████▉     | 8375/16858 [01:43<01:51, 75.86it/s]\u001b[A\n",
+            " 50%|████▉     | 8383/16858 [01:43<01:55, 73.68it/s]\u001b[A\n",
+            " 50%|████▉     | 8391/16858 [01:43<01:54, 73.68it/s]\u001b[A\n",
+            " 50%|████▉     | 8399/16858 [01:44<01:55, 73.40it/s]\u001b[A\n",
+            " 50%|████▉     | 8407/16858 [01:44<01:53, 74.20it/s]\u001b[A\n",
+            " 50%|████▉     | 8415/16858 [01:44<01:52, 75.13it/s]\u001b[A\n",
+            " 50%|████▉     | 8424/16858 [01:44<01:49, 76.94it/s]\u001b[A\n",
+            " 50%|█████     | 8433/16858 [01:44<01:45, 79.55it/s]\u001b[A\n",
+            " 50%|█████     | 8441/16858 [01:44<01:50, 76.45it/s]\u001b[A\n",
+            " 50%|█████     | 8449/16858 [01:44<01:55, 72.56it/s]\u001b[A\n",
+            " 50%|█████     | 8457/16858 [01:44<01:57, 71.21it/s]\u001b[A\n",
+            " 50%|█████     | 8465/16858 [01:44<02:07, 65.66it/s]\u001b[A\n",
+            " 50%|█████     | 8472/16858 [01:45<02:06, 66.16it/s]\u001b[A\n",
+            " 50%|█████     | 8479/16858 [01:45<02:12, 63.18it/s]\u001b[A\n",
+            " 50%|█████     | 8487/16858 [01:45<02:06, 66.12it/s]\u001b[A\n",
+            " 50%|█████     | 8498/16858 [01:45<01:47, 77.55it/s]\u001b[A\n",
+            " 50%|█████     | 8509/16858 [01:45<01:37, 85.80it/s]\u001b[A\n",
+            " 51%|█████     | 8519/16858 [01:45<01:33, 89.47it/s]\u001b[A\n",
+            " 51%|█████     | 8530/16858 [01:45<01:28, 94.17it/s]\u001b[A\n",
+            " 51%|█████     | 8541/16858 [01:45<01:25, 97.61it/s]\u001b[A\n",
+            " 51%|█████     | 8551/16858 [01:45<01:27, 94.79it/s]\u001b[A\n",
+            " 51%|█████     | 8562/16858 [01:46<01:25, 97.37it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 8546 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 51%|█████     | 8572/16858 [01:46<01:25, 97.37it/s]\u001b[A\n",
+            " 51%|█████     | 8583/16858 [01:46<01:23, 99.06it/s]\u001b[A\n",
+            " 51%|█████     | 8594/16858 [01:46<01:22, 100.40it/s]\u001b[A\n",
+            " 51%|█████     | 8605/16858 [01:46<01:21, 101.15it/s]\u001b[A\n",
+            " 51%|█████     | 8616/16858 [01:46<01:20, 102.18it/s]\u001b[A\n",
+            " 51%|█████     | 8627/16858 [01:46<01:21, 101.18it/s]\u001b[A\n",
+            " 51%|█████     | 8638/16858 [01:46<01:19, 102.95it/s]\u001b[A\n",
+            " 51%|█████▏    | 8649/16858 [01:46<01:20, 102.56it/s]\u001b[A\n",
+            " 51%|█████▏    | 8660/16858 [01:47<01:22, 99.93it/s] \u001b[A\n",
+            " 51%|█████▏    | 8671/16858 [01:47<01:20, 102.15it/s]\u001b[A\n",
+            " 52%|█████▏    | 8682/16858 [01:47<01:19, 102.96it/s]\u001b[A\n",
+            " 52%|█████▏    | 8693/16858 [01:47<01:19, 102.76it/s]\u001b[A\n",
+            " 52%|█████▏    | 8704/16858 [01:47<01:19, 102.83it/s]\u001b[A\n",
+            " 52%|█████▏    | 8715/16858 [01:47<01:18, 103.22it/s]\u001b[A\n",
+            " 52%|█████▏    | 8726/16858 [01:47<01:18, 103.84it/s]\u001b[A\n",
+            " 52%|█████▏    | 8737/16858 [01:47<01:17, 104.69it/s]\u001b[A\n",
+            " 52%|█████▏    | 8748/16858 [01:47<01:17, 104.51it/s]\u001b[A\n",
+            " 52%|█████▏    | 8759/16858 [01:47<01:17, 104.78it/s]\u001b[A\n",
+            " 52%|█████▏    | 8770/16858 [01:48<01:20, 100.35it/s]\u001b[A\n",
+            " 52%|█████▏    | 8781/16858 [01:48<01:19, 100.97it/s]\u001b[A\n",
+            " 52%|█████▏    | 8792/16858 [01:48<01:20, 100.23it/s]\u001b[A\n",
+            " 52%|█████▏    | 8803/16858 [01:48<01:19, 101.22it/s]\u001b[A\n",
+            " 52%|█████▏    | 8814/16858 [01:48<01:18, 102.91it/s]\u001b[A\n",
+            " 52%|█████▏    | 8825/16858 [01:48<01:16, 104.65it/s]\u001b[A\n",
+            " 52%|█████▏    | 8836/16858 [01:48<01:17, 103.82it/s]\u001b[A\n",
+            " 52%|█████▏    | 8847/16858 [01:48<01:15, 105.41it/s]\u001b[A\n",
+            " 53%|█████▎    | 8858/16858 [01:48<01:15, 105.77it/s]\u001b[A\n",
+            " 53%|█████▎    | 8869/16858 [01:49<01:18, 101.38it/s]\u001b[A\n",
+            " 53%|█████▎    | 8880/16858 [01:49<01:17, 102.93it/s]\u001b[A\n",
+            " 53%|█████▎    | 8891/16858 [01:49<01:18, 101.71it/s]\u001b[A\n",
+            " 53%|█████▎    | 8902/16858 [01:49<01:20, 99.41it/s] \u001b[A\n",
+            " 53%|█████▎    | 8913/16858 [01:49<01:18, 101.46it/s]\u001b[A\n",
+            " 53%|█████▎    | 8924/16858 [01:49<01:17, 102.44it/s]\u001b[A\n",
+            " 53%|█████▎    | 8935/16858 [01:49<01:17, 101.67it/s]\u001b[A\n",
+            " 53%|█████▎    | 8946/16858 [01:49<01:16, 103.12it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 8924 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 53%|█████▎    | 8957/16858 [01:49<01:17, 102.09it/s]\u001b[A\n",
+            " 53%|█████▎    | 8968/16858 [01:50<01:18, 100.44it/s]\u001b[A\n",
+            " 53%|█████▎    | 8979/16858 [01:50<01:20, 98.37it/s] \u001b[A\n",
+            " 53%|█████▎    | 8989/16858 [01:50<01:19, 98.50it/s]\u001b[A\n",
+            " 53%|█████▎    | 9000/16858 [01:50<01:18, 99.89it/s]\u001b[A\n",
+            " 53%|█████▎    | 9011/16858 [01:50<01:16, 102.01it/s]\u001b[A\n",
+            " 54%|█████▎    | 9022/16858 [01:50<01:15, 103.11it/s]\u001b[A\n",
+            " 54%|█████▎    | 9033/16858 [01:50<01:16, 102.65it/s]\u001b[A\n",
+            " 54%|█████▎    | 9044/16858 [01:50<01:14, 104.44it/s]\u001b[A\n",
+            " 54%|█████▎    | 9055/16858 [01:50<01:14, 104.05it/s]\u001b[A\n",
+            " 54%|█████▍    | 9066/16858 [01:50<01:15, 103.81it/s]\u001b[A\n",
+            " 54%|█████▍    | 9077/16858 [01:51<01:15, 102.82it/s]\u001b[A\n",
+            " 54%|█████▍    | 9088/16858 [01:51<01:18, 99.19it/s] \u001b[A\n",
+            " 54%|█████▍    | 9098/16858 [01:51<01:19, 97.98it/s]\u001b[A\n",
+            " 54%|█████▍    | 9109/16858 [01:51<01:17, 100.64it/s]\u001b[A\n",
+            " 54%|█████▍    | 9120/16858 [01:51<01:15, 102.82it/s]\u001b[A\n",
+            " 54%|█████▍    | 9131/16858 [01:51<01:15, 101.90it/s]\u001b[A\n",
+            " 54%|█████▍    | 9142/16858 [01:51<01:16, 100.51it/s]\u001b[A\n",
+            " 54%|█████▍    | 9153/16858 [01:51<01:16, 100.77it/s]\u001b[A\n",
+            " 54%|█████▍    | 9164/16858 [01:51<01:15, 101.65it/s]\u001b[A\n",
+            " 54%|█████▍    | 9175/16858 [01:52<01:14, 102.52it/s]\u001b[A\n",
+            " 54%|█████▍    | 9186/16858 [01:52<01:17, 98.56it/s] \u001b[A\n",
+            " 55%|█████▍    | 9196/16858 [01:52<01:17, 98.95it/s]\u001b[A\n",
+            " 55%|█████▍    | 9207/16858 [01:52<01:16, 99.63it/s]\u001b[A\n",
+            " 55%|█████▍    | 9218/16858 [01:52<01:15, 100.98it/s]\u001b[A\n",
+            " 55%|█████▍    | 9229/16858 [01:52<01:16, 100.18it/s]\u001b[A\n",
+            " 55%|█████▍    | 9240/16858 [01:52<01:15, 101.49it/s]\u001b[A\n",
+            " 55%|█████▍    | 9251/16858 [01:52<01:13, 102.80it/s]\u001b[A\n",
+            " 55%|█████▍    | 9262/16858 [01:52<01:14, 102.16it/s]\u001b[A\n",
+            " 55%|█████▌    | 9273/16858 [01:53<01:14, 101.89it/s]\u001b[A\n",
+            " 55%|█████▌    | 9284/16858 [01:53<01:13, 102.76it/s]\u001b[A\n",
+            " 55%|█████▌    | 9295/16858 [01:53<01:18, 96.82it/s] \u001b[A\n",
+            " 55%|█████▌    | 9305/16858 [01:53<01:18, 96.82it/s]\u001b[A\n",
+            " 55%|█████▌    | 9315/16858 [01:53<01:18, 96.39it/s]\u001b[A\n",
+            " 55%|█████▌    | 9326/16858 [01:53<01:16, 98.85it/s]\u001b[A\n",
+            " 55%|█████▌    | 9337/16858 [01:53<01:15, 99.70it/s]\u001b[A\n",
+            " 55%|█████▌    | 9348/16858 [01:53<01:13, 101.95it/s]\u001b[A\n",
+            " 56%|█████▌    | 9359/16858 [01:53<01:14, 101.21it/s]\u001b[A\n",
+            " 56%|█████▌    | 9370/16858 [01:53<01:15, 99.82it/s] \u001b[A\n",
+            " 56%|█████▌    | 9381/16858 [01:54<01:13, 101.83it/s]\u001b[A\n",
+            " 56%|█████▌    | 9392/16858 [01:54<01:16, 98.14it/s] \u001b[A\n",
+            " 56%|█████▌    | 9402/16858 [01:54<01:15, 98.58it/s]\u001b[A\n",
+            " 56%|█████▌    | 9413/16858 [01:54<01:14, 99.78it/s]\u001b[A\n",
+            " 56%|█████▌    | 9424/16858 [01:54<01:13, 101.32it/s]\u001b[A\n",
+            " 56%|█████▌    | 9435/16858 [01:54<01:12, 102.49it/s]\u001b[A\n",
+            " 56%|█████▌    | 9446/16858 [01:54<01:11, 103.39it/s]\u001b[A\n",
+            " 56%|█████▌    | 9457/16858 [01:54<01:11, 103.43it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 9445 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 56%|█████▌    | 9468/16858 [01:54<01:12, 101.83it/s]\u001b[A\n",
+            " 56%|█████▌    | 9479/16858 [01:55<01:17, 94.73it/s] \u001b[A\n",
+            " 56%|█████▋    | 9489/16858 [01:55<01:16, 96.11it/s]\u001b[A\n",
+            " 56%|█████▋    | 9499/16858 [01:55<01:24, 87.02it/s]\u001b[A\n",
+            " 56%|█████▋    | 9508/16858 [01:55<01:30, 81.16it/s]\u001b[A\n",
+            " 56%|█████▋    | 9517/16858 [01:55<01:33, 78.79it/s]\u001b[A\n",
+            " 57%|█████▋    | 9526/16858 [01:55<01:34, 77.52it/s]\u001b[A\n",
+            " 57%|█████▋    | 9534/16858 [01:55<01:34, 77.79it/s]\u001b[A\n",
+            " 57%|█████▋    | 9542/16858 [01:55<01:34, 77.36it/s]\u001b[A\n",
+            " 57%|█████▋    | 9550/16858 [01:56<01:34, 77.54it/s]\u001b[A\n",
+            " 57%|█████▋    | 9558/16858 [01:56<01:34, 77.64it/s]\u001b[A\n",
+            " 57%|█████▋    | 9566/16858 [01:56<01:35, 76.15it/s]\u001b[A\n",
+            " 57%|█████▋    | 9574/16858 [01:56<01:41, 72.04it/s]\u001b[A\n",
+            " 57%|█████▋    | 9583/16858 [01:56<01:37, 74.33it/s]\u001b[A\n",
+            " 57%|█████▋    | 9591/16858 [01:56<01:36, 75.68it/s]\u001b[A\n",
+            " 57%|█████▋    | 9599/16858 [01:56<01:36, 75.61it/s]\u001b[A\n",
+            " 57%|█████▋    | 9608/16858 [01:56<01:32, 78.03it/s]\u001b[A\n",
+            " 57%|█████▋    | 9616/16858 [01:56<01:33, 77.60it/s]\u001b[A\n",
+            " 57%|█████▋    | 9624/16858 [01:56<01:32, 78.03it/s]\u001b[A\n",
+            " 57%|█████▋    | 9633/16858 [01:57<01:30, 79.52it/s]\u001b[A\n",
+            " 57%|█████▋    | 9641/16858 [01:57<01:39, 72.47it/s]\u001b[A\n",
+            " 57%|█████▋    | 9649/16858 [01:57<01:45, 68.13it/s]\u001b[A\n",
+            " 57%|█████▋    | 9657/16858 [01:57<01:42, 70.17it/s]\u001b[A\n",
+            " 57%|█████▋    | 9665/16858 [01:57<01:45, 68.39it/s]\u001b[A\n",
+            " 57%|█████▋    | 9672/16858 [01:57<01:49, 65.83it/s]\u001b[A\n",
+            " 57%|█████▋    | 9679/16858 [01:57<01:54, 62.53it/s]\u001b[A\n",
+            " 57%|█████▋    | 9688/16858 [01:57<01:43, 69.35it/s]\u001b[A\n",
+            " 58%|█████▊    | 9699/16858 [01:58<01:31, 78.63it/s]\u001b[A\n",
+            " 58%|█████▊    | 9710/16858 [01:58<01:23, 85.36it/s]\u001b[A\n",
+            " 58%|█████▊    | 9721/16858 [01:58<01:19, 90.33it/s]\u001b[A\n",
+            " 58%|█████▊    | 9731/16858 [01:58<01:19, 89.12it/s]\u001b[A\n",
+            " 58%|█████▊    | 9742/16858 [01:58<01:16, 93.51it/s]\u001b[A\n",
+            " 58%|█████▊    | 9753/16858 [01:58<01:13, 96.37it/s]\u001b[A\n",
+            " 58%|█████▊    | 9764/16858 [01:58<01:11, 99.12it/s]\u001b[A\n",
+            " 58%|█████▊    | 9775/16858 [01:58<01:10, 100.42it/s]\u001b[A\n",
+            " 58%|█████▊    | 9786/16858 [01:58<01:09, 101.30it/s]\u001b[A\n",
+            " 58%|█████▊    | 9797/16858 [01:59<01:08, 102.59it/s]\u001b[A\n",
+            " 58%|█████▊    | 9808/16858 [01:59<01:08, 102.22it/s]\u001b[A\n",
+            " 58%|█████▊    | 9819/16858 [01:59<01:08, 102.51it/s]\u001b[A\n",
+            " 58%|█████▊    | 9830/16858 [01:59<01:08, 102.23it/s]\u001b[A\n",
+            " 58%|█████▊    | 9841/16858 [01:59<01:10, 99.04it/s] \u001b[A\n",
+            " 58%|█████▊    | 9852/16858 [01:59<01:09, 100.80it/s]\u001b[A\n",
+            " 59%|█████▊    | 9863/16858 [01:59<01:08, 102.13it/s]\u001b[A\n",
+            " 59%|█████▊    | 9874/16858 [01:59<01:08, 102.57it/s]\u001b[A\n",
+            " 59%|█████▊    | 9885/16858 [01:59<01:09, 101.05it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 9868 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 59%|█████▊    | 9896/16858 [01:59<01:08, 101.14it/s]\u001b[A\n",
+            " 59%|█████▉    | 9907/16858 [02:00<01:08, 101.55it/s]\u001b[A\n",
+            " 59%|█████▉    | 9918/16858 [02:00<01:07, 102.55it/s]\u001b[A\n",
+            " 59%|█████▉    | 9929/16858 [02:00<01:07, 102.39it/s]\u001b[A\n",
+            " 59%|█████▉    | 9940/16858 [02:00<01:10, 98.66it/s] \u001b[A\n",
+            " 59%|█████▉    | 9951/16858 [02:00<01:08, 100.16it/s]\u001b[A\n",
+            " 59%|█████▉    | 9962/16858 [02:00<01:07, 102.16it/s]\u001b[A\n",
+            " 59%|█████▉    | 9973/16858 [02:00<01:06, 103.93it/s]\u001b[A\n",
+            " 59%|█████▉    | 9984/16858 [02:00<01:06, 103.11it/s]\u001b[A\n",
+            " 59%|█████▉    | 9995/16858 [02:00<01:05, 104.25it/s]\u001b[A\n",
+            " 59%|█████▉    | 10006/16858 [02:01<01:06, 103.47it/s]\u001b[A\n",
+            " 59%|█████▉    | 10017/16858 [02:01<01:05, 103.80it/s]\u001b[A\n",
+            " 59%|█████▉    | 10028/16858 [02:01<01:06, 102.53it/s]\u001b[A\n",
+            " 60%|█████▉    | 10039/16858 [02:01<01:06, 102.97it/s]\u001b[A\n",
+            " 60%|█████▉    | 10050/16858 [02:01<01:09, 97.57it/s] \u001b[A\n",
+            " 60%|█████▉    | 10061/16858 [02:01<01:07, 100.29it/s]\u001b[A\n",
+            " 60%|█████▉    | 10072/16858 [02:01<01:07, 101.17it/s]\u001b[A\n",
+            " 60%|█████▉    | 10083/16858 [02:01<01:07, 101.08it/s]\u001b[A\n",
+            " 60%|█████▉    | 10094/16858 [02:01<01:06, 101.90it/s]\u001b[A\n",
+            " 60%|█████▉    | 10105/16858 [02:02<01:05, 103.13it/s]\u001b[A\n",
+            " 60%|██████    | 10116/16858 [02:02<01:05, 103.55it/s]\u001b[A\n",
+            " 60%|██████    | 10127/16858 [02:02<01:05, 103.20it/s]\u001b[A\n",
+            " 60%|██████    | 10138/16858 [02:02<01:04, 104.61it/s]\u001b[A\n",
+            " 60%|██████    | 10149/16858 [02:02<01:05, 101.94it/s]\u001b[A\n",
+            " 60%|██████    | 10160/16858 [02:02<01:07, 99.84it/s] \u001b[A\n",
+            " 60%|██████    | 10171/16858 [02:02<01:06, 99.92it/s]\u001b[A\n",
+            " 60%|██████    | 10182/16858 [02:02<01:05, 101.57it/s]\u001b[A\n",
+            " 60%|██████    | 10193/16858 [02:02<01:05, 101.24it/s]\u001b[A\n",
+            " 61%|██████    | 10204/16858 [02:03<01:04, 102.74it/s]\u001b[A\n",
+            " 61%|██████    | 10215/16858 [02:03<01:04, 103.20it/s]\u001b[A\n",
+            " 61%|██████    | 10226/16858 [02:03<01:04, 103.12it/s]\u001b[A\n",
+            " 61%|██████    | 10237/16858 [02:03<01:04, 103.05it/s]\u001b[A\n",
+            " 61%|██████    | 10248/16858 [02:03<01:06, 99.91it/s] \u001b[A\n",
+            " 61%|██████    | 10259/16858 [02:03<01:08, 96.29it/s]\u001b[A\n",
+            " 61%|██████    | 10269/16858 [02:03<01:08, 96.42it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 10257 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 61%|██████    | 10279/16858 [02:03<01:07, 97.33it/s]\u001b[A\n",
+            " 61%|██████    | 10290/16858 [02:03<01:06, 98.59it/s]\u001b[A\n",
+            " 61%|██████    | 10301/16858 [02:03<01:04, 101.03it/s]\u001b[A\n",
+            " 61%|██████    | 10312/16858 [02:04<01:04, 101.76it/s]\u001b[A\n",
+            " 61%|██████    | 10323/16858 [02:04<01:03, 102.88it/s]\u001b[A\n",
+            " 61%|██████▏   | 10334/16858 [02:04<01:03, 103.07it/s]\u001b[A\n",
+            " 61%|██████▏   | 10345/16858 [02:04<01:05, 99.42it/s] \u001b[A\n",
+            " 61%|██████▏   | 10356/16858 [02:04<01:06, 98.47it/s]\u001b[A\n",
+            " 61%|██████▏   | 10366/16858 [02:04<01:06, 97.20it/s]\u001b[A\n",
+            " 62%|██████▏   | 10377/16858 [02:04<01:05, 98.98it/s]\u001b[A\n",
+            " 62%|██████▏   | 10388/16858 [02:04<01:04, 100.56it/s]\u001b[A\n",
+            " 62%|██████▏   | 10399/16858 [02:04<01:04, 100.38it/s]\u001b[A\n",
+            " 62%|██████▏   | 10410/16858 [02:05<01:03, 101.38it/s]\u001b[A\n",
+            " 62%|██████▏   | 10421/16858 [02:05<01:03, 101.97it/s]\u001b[A\n",
+            " 62%|██████▏   | 10432/16858 [02:05<01:03, 100.84it/s]\u001b[A\n",
+            " 62%|██████▏   | 10443/16858 [02:05<01:03, 100.92it/s]\u001b[A\n",
+            " 62%|██████▏   | 10454/16858 [02:05<01:02, 101.77it/s]\u001b[A\n",
+            " 62%|██████▏   | 10465/16858 [02:05<01:05, 98.11it/s] \u001b[A\n",
+            " 62%|██████▏   | 10476/16858 [02:05<01:04, 99.49it/s]\u001b[A\n",
+            " 62%|██████▏   | 10486/16858 [02:05<01:03, 99.57it/s]\u001b[A\n",
+            " 62%|██████▏   | 10497/16858 [02:05<01:03, 99.96it/s]\u001b[A\n",
+            " 62%|██████▏   | 10508/16858 [02:06<01:02, 101.84it/s]\u001b[A\n",
+            " 62%|██████▏   | 10519/16858 [02:06<01:02, 101.40it/s]\u001b[A\n",
+            " 62%|██████▏   | 10530/16858 [02:06<01:02, 100.50it/s]\u001b[A\n",
+            " 63%|█��████▎   | 10541/16858 [02:06<01:04, 98.54it/s] \u001b[A\n",
+            " 63%|██████▎   | 10552/16858 [02:06<01:03, 99.85it/s]\u001b[A\n",
+            " 63%|██████▎   | 10563/16858 [02:06<01:03, 99.84it/s]\u001b[A\n",
+            " 63%|██████▎   | 10573/16858 [02:06<01:04, 96.72it/s]\u001b[A\n",
+            " 63%|██████▎   | 10584/16858 [02:06<01:03, 99.01it/s]\u001b[A\n",
+            " 63%|██████▎   | 10595/16858 [02:06<01:02, 99.89it/s]\u001b[A\n",
+            " 63%|██████▎   | 10606/16858 [02:07<01:01, 101.81it/s]\u001b[A\n",
+            " 63%|██████▎   | 10617/16858 [02:07<01:01, 101.60it/s]\u001b[A\n",
+            " 63%|██████▎   | 10628/16858 [02:07<01:00, 102.30it/s]\u001b[A\n",
+            " 63%|██████▎   | 10639/16858 [02:07<01:01, 101.82it/s]\u001b[A\n",
+            " 63%|██████▎   | 10650/16858 [02:07<01:00, 102.92it/s]\u001b[A\n",
+            " 63%|██████▎   | 10661/16858 [02:07<01:00, 103.26it/s]\u001b[A\n",
+            " 63%|██████▎   | 10672/16858 [02:07<01:03, 97.40it/s] \u001b[A\n",
+            " 63%|██████▎   | 10683/16858 [02:07<01:01, 100.06it/s]\u001b[A\n",
+            " 63%|██████▎   | 10694/16858 [02:07<01:02, 98.34it/s] \u001b[A\n",
+            " 63%|██████▎   | 10704/16858 [02:08<01:11, 86.51it/s]\u001b[A\n",
+            " 64%|██████▎   | 10713/16858 [02:08<01:15, 81.87it/s]\u001b[A\n",
+            " 64%|██████▎   | 10722/16858 [02:08<01:16, 80.20it/s]\u001b[A\n",
+            " 64%|██████▎   | 10731/16858 [02:08<01:17, 79.46it/s]\u001b[A\n",
+            " 64%|██████▎   | 10740/16858 [02:08<01:16, 80.14it/s]\u001b[A\n",
+            " 64%|██████▍   | 10749/16858 [02:08<01:18, 78.31it/s]\u001b[A\n",
+            " 64%|██████▍   | 10757/16858 [02:08<01:20, 75.88it/s]\u001b[A\n",
+            " 64%|██████▍   | 10765/16858 [02:08<01:21, 75.00it/s]\u001b[A\n",
+            " 64%|██████▍   | 10773/16858 [02:08<01:20, 75.53it/s]\u001b[A\n",
+            " 64%|██████▍   | 10781/16858 [02:09<01:19, 76.64it/s]\u001b[A\n",
+            " 64%|██████▍   | 10789/16858 [02:09<01:19, 76.67it/s]\u001b[A\n",
+            " 64%|██████▍   | 10797/16858 [02:09<01:19, 76.27it/s]\u001b[A\n",
+            " 64%|██████▍   | 10806/16858 [02:09<01:17, 77.88it/s]\u001b[A\n",
+            " 64%|██████▍   | 10815/16858 [02:09<01:16, 79.33it/s]\u001b[A\n",
+            " 64%|██████▍   | 10823/16858 [02:09<01:16, 78.85it/s]\u001b[A\n",
+            " 64%|██████▍   | 10832/16858 [02:09<01:14, 80.80it/s]\u001b[A\n",
+            " 64%|██████▍   | 10841/16858 [02:09<01:18, 76.20it/s]\u001b[A\n",
+            " 64%|██████▍   | 10849/16858 [02:09<01:21, 73.61it/s]\u001b[A\n",
+            " 64%|██████▍   | 10857/16858 [02:10<01:21, 73.74it/s]\u001b[A\n",
+            " 64%|██████▍   | 10865/16858 [02:10<01:23, 72.12it/s]\u001b[A\n",
+            " 64%|██████▍   | 10873/16858 [02:10<01:25, 70.13it/s]\u001b[A\n",
+            " 65%|██████▍   | 10881/16858 [02:10<01:28, 67.34it/s]\u001b[A\n",
+            " 65%|██████▍   | 10888/16858 [02:10<01:33, 64.17it/s]\u001b[A\n",
+            " 65%|██████▍   | 10898/16858 [02:10<01:22, 72.15it/s]\u001b[A\n",
+            " 65%|██████▍   | 10909/16858 [02:10<01:12, 82.21it/s]\u001b[A\n",
+            " 65%|██████▍   | 10918/16858 [02:10<01:11, 83.56it/s]\u001b[A\n",
+            " 65%|██████▍   | 10928/16858 [02:10<01:07, 87.96it/s]\u001b[A\n",
+            " 65%|██████▍   | 10938/16858 [02:11<01:04, 91.31it/s]\u001b[A\n",
+            " 65%|██████▍   | 10949/16858 [02:11<01:02, 95.27it/s]\u001b[A\n",
+            " 65%|██████▌   | 10960/16858 [02:11<01:00, 97.47it/s]\u001b[A\n",
+            " 65%|██████▌   | 10970/16858 [02:11<01:01, 96.14it/s]\u001b[A\n",
+            " 65%|██████▌   | 10981/16858 [02:11<00:59, 98.24it/s]\u001b[A\n",
+            " 65%|██████▌   | 10992/16858 [02:11<00:58, 99.85it/s]\u001b[A\n",
+            " 65%|██████▌   | 11003/16858 [02:11<00:57, 101.43it/s]\u001b[A\n",
+            " 65%|██████▌   | 11014/16858 [02:11<00:57, 102.05it/s]\u001b[A\n",
+            " 65%|██████▌   | 11025/16858 [02:11<01:00, 97.11it/s] \u001b[A\n",
+            " 65%|██████▌   | 11036/16858 [02:12<00:58, 99.73it/s]\u001b[A\n",
+            " 66%|██████▌   | 11047/16858 [02:12<00:57, 100.99it/s]\u001b[A\n",
+            " 66%|██████▌   | 11058/16858 [02:12<00:57, 101.69it/s]\u001b[A\n",
+            " 66%|██████▌   | 11069/16858 [02:12<00:57, 101.21it/s]\u001b[A\n",
+            " 66%|██████▌   | 11080/16858 [02:12<00:57, 101.10it/s]\u001b[A\n",
+            " 66%|██████▌   | 11091/16858 [02:12<00:56, 101.86it/s]\u001b[A\n",
+            " 66%|██████▌   | 11102/16858 [02:12<00:56, 101.47it/s]\u001b[A\n",
+            " 66%|██████▌   | 11113/16858 [02:12<00:56, 101.88it/s]\u001b[A\n",
+            " 66%|██████▌   | 11124/16858 [02:12<00:57, 99.27it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11107 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 66%|██████▌   | 11134/16858 [02:13<00:58, 98.52it/s]\u001b[A\n",
+            " 66%|██████▌   | 11145/16858 [02:13<00:57, 100.02it/s]\u001b[A\n",
+            " 66%|██████▌   | 11156/16858 [02:13<00:56, 100.74it/s]\u001b[A\n",
+            " 66%|██████▌   | 11167/16858 [02:13<00:56, 101.12it/s]\u001b[A\n",
+            " 66%|██████▋   | 11178/16858 [02:13<00:56, 100.82it/s]\u001b[A\n",
+            " 66%|██████▋   | 11189/16858 [02:13<00:55, 102.19it/s]\u001b[A\n",
+            " 66%|██████▋   | 11200/16858 [02:13<00:55, 101.22it/s]\u001b[A\n",
+            " 67%|██████▋   | 11211/16858 [02:13<00:55, 101.84it/s]\u001b[A\n",
+            " 67%|██████▋   | 11222/16858 [02:13<00:55, 102.35it/s]\u001b[A\n",
+            " 67%|██████▋   | 11233/16858 [02:14<00:57, 97.71it/s] \u001b[A\n",
+            " 67%|██████▋   | 11244/16858 [02:14<00:56, 99.23it/s]\u001b[A\n",
+            " 67%|██████▋   | 11255/16858 [02:14<00:55, 100.93it/s]\u001b[A\n",
+            " 67%|██████▋   | 11266/16858 [02:14<00:54, 102.13it/s]\u001b[A\n",
+            " 67%|██████▋   | 11277/16858 [02:14<00:54, 101.87it/s]\u001b[A\n",
+            " 67%|██████▋   | 11288/16858 [02:14<00:54, 102.64it/s]\u001b[A\n",
+            " 67%|██████▋   | 11299/16858 [02:14<00:54, 102.10it/s]\u001b[A\n",
+            " 67%|██████▋   | 11310/16858 [02:14<00:53, 103.46it/s]\u001b[A\n",
+            " 67%|██████▋   | 11321/16858 [02:14<00:54, 101.65it/s]\u001b[A\n",
+            " 67%|██████▋   | 11332/16858 [02:14<00:56, 97.40it/s] \u001b[A\n",
+            " 67%|██████▋   | 11343/16858 [02:15<00:55, 99.00it/s]\u001b[A\n",
+            " 67%|██████▋   | 11354/16858 [02:15<00:54, 100.36it/s]\u001b[A\n",
+            " 67%|██████▋   | 11365/16858 [02:15<00:53, 101.75it/s]\u001b[A\n",
+            " 67%|██████▋   | 11376/16858 [02:15<00:53, 101.64it/s]\u001b[A\n",
+            " 68%|██████▊   | 11387/16858 [02:15<00:53, 102.21it/s]\u001b[A\n",
+            " 68%|██████▊   | 11398/16858 [02:15<00:53, 102.81it/s]\u001b[A\n",
+            " 68%|██████▊   | 11409/16858 [02:15<00:52, 103.18it/s]\u001b[A\n",
+            " 68%|██████▊   | 11420/16858 [02:15<00:52, 103.09it/s]\u001b[A\n",
+            " 68%|██████▊   | 11431/16858 [02:15<00:52, 104.15it/s]\u001b[A\n",
+            " 68%|██████▊   | 11442/16858 [02:16<00:53, 100.88it/s]\u001b[A\n",
+            " 68%|██████▊   | 11453/16858 [02:16<00:53, 101.88it/s]\u001b[A\n",
+            " 68%|██████▊   | 11464/16858 [02:16<00:53, 101.60it/s]\u001b[A\n",
+            " 68%|██████▊   | 11475/16858 [02:16<00:53, 100.13it/s]\u001b[A\n",
+            " 68%|██████▊   | 11486/16858 [02:16<00:52, 102.01it/s]\u001b[A\n",
+            " 68%|██████▊   | 11497/16858 [02:16<00:52, 102.05it/s]\u001b[A\n",
+            " 68%|██████▊   | 11508/16858 [02:16<00:52, 101.71it/s]\u001b[A\n",
+            " 68%|██████▊   | 11519/16858 [02:16<00:51, 103.38it/s]\u001b[A\n",
+            " 68%|██████▊   | 11530/16858 [02:16<00:51, 103.10it/s]\u001b[A\n",
+            " 68%|██████▊   | 11541/16858 [02:17<00:54, 98.30it/s] \u001b[A\n",
+            " 69%|██████▊   | 11552/16858 [02:17<00:53, 99.33it/s]\u001b[A\n",
+            " 69%|██████▊   | 11563/16858 [02:17<00:52, 100.39it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11544 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 69%|██████▊   | 11574/16858 [02:17<00:51, 102.50it/s]\u001b[A\n",
+            " 69%|██████▊   | 11585/16858 [02:17<00:51, 102.55it/s]\u001b[A\n",
+            " 69%|██████▉   | 11596/16858 [02:17<00:51, 101.90it/s]\u001b[A\n",
+            " 69%|██████▉   | 11607/16858 [02:17<00:50, 103.74it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11595 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 69%|██████▉   | 11618/16858 [02:17<00:50, 104.01it/s]\u001b[A\n",
+            " 69%|██████▉   | 11629/16858 [02:17<00:50, 103.48it/s]\u001b[A\n",
+            " 69%|██████▉   | 11640/16858 [02:18<00:50, 103.02it/s]\u001b[A\n",
+            " 69%|██████▉   | 11651/16858 [02:18<00:52, 98.52it/s] \u001b[A\n",
+            " 69%|██████▉   | 11662/16858 [02:18<00:52, 99.62it/s]\u001b[A\n",
+            " 69%|██████▉   | 11672/16858 [02:18<00:52, 98.93it/s]\u001b[A\n",
+            " 69%|██████▉   | 11682/16858 [02:18<00:52, 98.34it/s]\u001b[A\n",
+            " 69%|██████▉   | 11692/16858 [02:18<00:52, 98.74it/s]\u001b[A\n",
+            " 69%|██████▉   | 11703/16858 [02:18<00:50, 101.51it/s]\u001b[A\n",
+            " 69%|██████▉   | 11714/16858 [02:18<00:50, 102.10it/s]\u001b[A\n",
+            " 70%|██████▉   | 11725/16858 [02:18<00:50, 102.62it/s]\u001b[A\n",
+            " 70%|██████▉   | 11736/16858 [02:18<00:49, 104.02it/s]\u001b[A\n",
+            " 70%|██████▉   | 11747/16858 [02:19<00:48, 104.34it/s]\u001b[A\n",
+            " 70%|██████▉   | 11758/16858 [02:19<00:50, 100.27it/s]\u001b[A\n",
+            " 70%|██████▉   | 11769/16858 [02:19<00:50, 100.89it/s]\u001b[A\n",
+            " 70%|██████▉   | 11780/16858 [02:19<00:50, 100.11it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11764 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 70%|██████▉   | 11791/16858 [02:19<00:51, 97.76it/s] \u001b[A\n",
+            " 70%|███████   | 11802/16858 [02:19<00:50, 99.91it/s]\u001b[A\n",
+            " 70%|███████   | 11813/16858 [02:19<00:49, 101.53it/s]\u001b[A\n",
+            " 70%|███████   | 11824/16858 [02:19<00:49, 102.69it/s]\u001b[A\n",
+            " 70%|███████   | 11835/16858 [02:19<00:48, 103.27it/s]\u001b[A\n",
+            " 70%|███████   | 11846/16858 [02:20<00:48, 103.92it/s]\u001b[A\n",
+            " 70%|███████   | 11857/16858 [02:20<00:49, 100.63it/s]\u001b[A\n",
+            " 70%|███████   | 11868/16858 [02:20<00:49, 100.87it/s]\u001b[A\n",
+            " 70%|███████   | 11879/16858 [02:20<00:49, 101.58it/s]\u001b[A\n",
+            " 71%|███████   | 11890/16858 [02:20<00:49, 100.27it/s]\u001b[A\n",
+            " 71%|███████   | 11901/16858 [02:20<00:48, 101.56it/s]\u001b[A\n",
+            " 71%|███████   | 11912/16858 [02:20<00:56, 87.92it/s] \u001b[A\n",
+            " 71%|███████   | 11922/16858 [02:20<01:00, 82.17it/s]\u001b[A\n",
+            " 71%|███████   | 11931/16858 [02:21<01:01, 80.60it/s]\u001b[A\n",
+            " 71%|███████   | 11940/16858 [02:21<01:02, 78.42it/s]\u001b[A\n",
+            " 71%|███████   | 11948/16858 [02:21<01:03, 77.25it/s]\u001b[A\n",
+            " 71%|███████   | 11957/16858 [02:21<01:02, 77.81it/s]\u001b[A\n",
+            " 71%|███████   | 11965/16858 [02:21<01:04, 76.08it/s]\u001b[A\n",
+            " 71%|███████   | 11973/16858 [02:21<01:04, 76.13it/s]\u001b[A\n",
+            " 71%|███████   | 11981/16858 [02:21<01:04, 76.18it/s]\u001b[A\n",
+            " 71%|███████   | 11989/16858 [02:21<01:04, 75.93it/s]\u001b[A\n",
+            " 71%|███████   | 11997/16858 [02:21<01:04, 75.88it/s]\u001b[A\n",
+            " 71%|███████   | 12005/16858 [02:21<01:03, 75.89it/s]\u001b[A\n",
+            " 71%|███████▏  | 12014/16858 [02:22<01:02, 77.87it/s]\u001b[A\n",
+            " 71%|███████▏  | 12022/16858 [02:22<01:03, 76.18it/s]\u001b[A\n",
+            " 71%|███████▏  | 12031/16858 [02:22<01:01, 78.30it/s]\u001b[A\n",
+            " 71%|███████▏  | 12039/16858 [02:22<01:01, 78.10it/s]\u001b[A\n",
+            " 71%|███████▏  | 12047/16858 [02:22<01:02, 77.17it/s]\u001b[A\n",
+            " 72%|███████▏  | 12055/16858 [02:22<01:04, 74.66it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12047 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 72%|███████▏  | 12063/16858 [02:22<01:05, 73.71it/s]\u001b[A\n",
+            " 72%|███████▏  | 12071/16858 [02:22<01:09, 68.76it/s]\u001b[A\n",
+            " 72%|███████▏  | 12078/16858 [02:23<01:10, 68.03it/s]\u001b[A\n",
+            " 72%|███████▏  | 12085/16858 [02:23<01:13, 64.98it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12073 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 72%|███████▏  | 12092/16858 [02:23<01:18, 60.79it/s]\u001b[A\n",
+            " 72%|███████▏  | 12102/16858 [02:23<01:08, 69.92it/s]\u001b[A\n",
+            " 72%|███████▏  | 12113/16858 [02:23<00:59, 79.36it/s]\u001b[A\n",
+            " 72%|███████▏  | 12124/16858 [02:23<00:55, 85.86it/s]\u001b[A\n",
+            " 72%|███████▏  | 12135/16858 [02:23<00:52, 90.72it/s]\u001b[A\n",
+            " 72%|███████▏  | 12146/16858 [02:23<00:49, 95.31it/s]\u001b[A\n",
+            " 72%|███████▏  | 12157/16858 [02:23<00:48, 97.01it/s]\u001b[A\n",
+            " 72%|███████▏  | 12168/16858 [02:24<00:47, 98.81it/s]\u001b[A\n",
+            " 72%|███████▏  | 12179/16858 [02:24<00:46, 100.79it/s]\u001b[A\n",
+            " 72%|███████▏  | 12190/16858 [02:24<00:46, 99.64it/s] \u001b[A\n",
+            " 72%|███████▏  | 12200/16858 [02:24<00:48, 96.48it/s]\u001b[A\n",
+            " 72%|███████▏  | 12211/16858 [02:24<00:46, 99.19it/s]\u001b[A\n",
+            " 72%|███████▏  | 12222/16858 [02:24<00:46, 99.94it/s]\u001b[A\n",
+            " 73%|███████▎  | 12233/16858 [02:24<00:45, 101.22it/s]\u001b[A\n",
+            " 73%|███████▎  | 12244/16858 [02:24<00:45, 101.26it/s]\u001b[A\n",
+            " 73%|███████▎  | 12255/16858 [02:24<00:45, 100.98it/s]\u001b[A\n",
+            " 73%|███████▎  | 12266/16858 [02:24<00:45, 101.37it/s]\u001b[A\n",
+            " 73%|███████▎  | 12277/16858 [02:25<00:44, 102.06it/s]\u001b[A\n",
+            " 73%|███████▎  | 12288/16858 [02:25<00:47, 95.45it/s] \u001b[A\n",
+            " 73%|███████▎  | 12298/16858 [02:25<00:49, 92.66it/s]\u001b[A\n",
+            " 73%|███████▎  | 12309/16858 [02:25<00:46, 96.92it/s]\u001b[A\n",
+            " 73%|███████▎  | 12319/16858 [02:25<00:46, 96.72it/s]\u001b[A\n",
+            " 73%|███████▎  | 12330/16858 [02:25<00:45, 99.66it/s]\u001b[A\n",
+            " 73%|███████▎  | 12341/16858 [02:25<00:45, 99.79it/s]\u001b[A\n",
+            " 73%|███████▎  | 12352/16858 [02:25<00:44, 100.34it/s]\u001b[A\n",
+            " 73%|███████▎  | 12363/16858 [02:25<00:43, 102.52it/s]\u001b[A\n",
+            " 73%|███████▎  | 12374/16858 [02:26<00:43, 102.89it/s]\u001b[A\n",
+            " 73%|███████▎  | 12385/16858 [02:26<00:43, 102.90it/s]\u001b[A\n",
+            " 74%|███████▎  | 12396/16858 [02:26<00:43, 102.84it/s]\u001b[A\n",
+            " 74%|███████▎  | 12407/16858 [02:26<00:45, 98.06it/s] \u001b[A\n",
+            " 74%|███████▎  | 12418/16858 [02:26<00:44, 99.87it/s]\u001b[A\n",
+            " 74%|███████▎  | 12429/16858 [02:26<00:43, 101.53it/s]\u001b[A\n",
+            " 74%|███████▍  | 12440/16858 [02:26<00:43, 101.52it/s]\u001b[A\n",
+            " 74%|███████▍  | 12451/16858 [02:26<00:43, 101.23it/s]\u001b[A\n",
+            " 74%|███████▍  | 12462/16858 [02:26<00:43, 100.19it/s]\u001b[A\n",
+            " 74%|███████▍  | 12473/16858 [02:27<00:43, 100.47it/s]\u001b[A\n",
+            " 74%|███████▍  | 12484/16858 [02:27<00:42, 102.08it/s]\u001b[A\n",
+            " 74%|███████▍  | 12495/16858 [02:27<00:42, 102.30it/s]\u001b[A\n",
+            " 74%|███████▍  | 12506/16858 [02:27<00:43, 100.17it/s]\u001b[A\n",
+            " 74%|███████▍  | 12517/16858 [02:27<00:43, 99.11it/s] \u001b[A\n",
+            " 74%|███████▍  | 12528/16858 [02:27<00:43, 100.05it/s]\u001b[A\n",
+            " 74%|███████▍  | 12539/16858 [02:27<00:42, 100.92it/s]\u001b[A\n",
+            " 74%|█���█████▍  | 12550/16858 [02:27<00:42, 100.72it/s]\u001b[A\n",
+            " 75%|███████▍  | 12561/16858 [02:27<00:43, 98.38it/s] \u001b[A\n",
+            " 75%|███████▍  | 12572/16858 [02:28<00:42, 100.18it/s]\u001b[A\n",
+            " 75%|███████▍  | 12583/16858 [02:28<00:42, 100.24it/s]\u001b[A\n",
+            " 75%|███████▍  | 12594/16858 [02:28<00:42, 100.86it/s]\u001b[A\n",
+            " 75%|███████▍  | 12605/16858 [02:28<00:41, 102.57it/s]\u001b[A\n",
+            " 75%|███████▍  | 12616/16858 [02:28<00:44, 96.21it/s] \u001b[A\n",
+            " 75%|███████▍  | 12627/16858 [02:28<00:43, 97.86it/s]\u001b[A\n",
+            " 75%|███████▍  | 12638/16858 [02:28<00:42, 99.27it/s]\u001b[A\n",
+            " 75%|███████▌  | 12649/16858 [02:28<00:41, 101.11it/s]\u001b[A\n",
+            " 75%|███████▌  | 12660/16858 [02:28<00:41, 101.75it/s]\u001b[A\n",
+            " 75%|███████▌  | 12671/16858 [02:29<00:40, 102.58it/s]\u001b[A\n",
+            " 75%|███████▌  | 12682/16858 [02:29<00:41, 101.37it/s]\u001b[A\n",
+            " 75%|███████▌  | 12693/16858 [02:29<00:40, 101.68it/s]\u001b[A\n",
+            " 75%|███████▌  | 12704/16858 [02:29<00:40, 102.30it/s]\u001b[A\n",
+            " 75%|███████▌  | 12715/16858 [02:29<00:42, 97.97it/s] \u001b[A\n",
+            " 75%|███████▌  | 12726/16858 [02:29<00:41, 99.17it/s]\u001b[A\n",
+            " 76%|███████▌  | 12737/16858 [02:29<00:41, 100.35it/s]\u001b[A\n",
+            " 76%|███████▌  | 12748/16858 [02:29<00:41, 98.80it/s] \u001b[A\n",
+            " 76%|███████▌  | 12759/16858 [02:29<00:41, 99.73it/s]\u001b[A\n",
+            " 76%|███████▌  | 12769/16858 [02:30<00:41, 99.40it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12750 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 76%|███████▌  | 12780/16858 [02:30<00:40, 100.49it/s]\u001b[A\n",
+            " 76%|███████▌  | 12791/16858 [02:30<00:40, 100.98it/s]\u001b[A\n",
+            " 76%|███████▌  | 12802/16858 [02:30<00:39, 102.62it/s]\u001b[A\n",
+            " 76%|███████▌  | 12813/16858 [02:30<00:39, 102.16it/s]\u001b[A\n",
+            " 76%|███████▌  | 12824/16858 [02:30<00:41, 98.14it/s] \u001b[A\n",
+            " 76%|███████▌  | 12835/16858 [02:30<00:39, 100.90it/s]\u001b[A\n",
+            " 76%|███████▌  | 12846/16858 [02:30<00:38, 102.87it/s]\u001b[A\n",
+            " 76%|███████▋  | 12857/16858 [02:30<00:39, 102.58it/s]\u001b[A\n",
+            " 76%|███████▋  | 12868/16858 [02:30<00:38, 103.13it/s]\u001b[A\n",
+            " 76%|███████▋  | 12879/16858 [02:31<00:38, 103.03it/s]\u001b[A\n",
+            " 76%|███████▋  | 12890/16858 [02:31<00:38, 104.08it/s]\u001b[A\n",
+            " 77%|███████▋  | 12901/16858 [02:31<00:38, 103.33it/s]\u001b[A\n",
+            " 77%|███████▋  | 12912/16858 [02:31<00:38, 101.55it/s]\u001b[A\n",
+            " 77%|███████▋  | 12923/16858 [02:31<00:40, 97.63it/s] \u001b[A\n",
+            " 77%|███████▋  | 12934/16858 [02:31<00:39, 99.71it/s]\u001b[A\n",
+            " 77%|███████▋  | 12945/16858 [02:31<00:38, 101.16it/s]\u001b[A\n",
+            " 77%|███████▋  | 12956/16858 [02:31<00:38, 102.29it/s]\u001b[A\n",
+            " 77%|███████▋  | 12967/16858 [02:31<00:37, 102.67it/s]\u001b[A\n",
+            " 77%|███████▋  | 12978/16858 [02:32<00:38, 102.08it/s]\u001b[A\n",
+            " 77%|███████▋  | 12989/16858 [02:32<00:38, 101.25it/s]\u001b[A\n",
+            " 77%|███████▋  | 13000/16858 [02:32<00:37, 102.39it/s]\u001b[A\n",
+            " 77%|███████▋  | 13011/16858 [02:32<00:37, 101.73it/s]\u001b[A\n",
+            " 77%|███████▋  | 13022/16858 [02:32<00:37, 102.04it/s]\u001b[A\n",
+            " 77%|███████▋  | 13033/16858 [02:32<00:38, 99.72it/s] \u001b[A\n",
+            " 77%|███████▋  | 13044/16858 [02:32<00:37, 101.53it/s]\u001b[A\n",
+            " 77%|███████▋  | 13055/16858 [02:32<00:37, 102.62it/s]\u001b[A\n",
+            " 78%|███████▊  | 13066/16858 [02:32<00:37, 100.90it/s]\u001b[A\n",
+            " 78%|███████▊  | 13077/16858 [02:33<00:37, 101.50it/s]\u001b[A\n",
+            " 78%|███████▊  | 13088/16858 [02:33<00:36, 102.41it/s]\u001b[A\n",
+            " 78%|███████▊  | 13099/16858 [02:33<00:37, 99.03it/s] \u001b[A\n",
+            " 78%|███████▊  | 13109/16858 [02:33<00:43, 86.30it/s]\u001b[A\n",
+            " 78%|███████▊  | 13118/16858 [02:33<00:46, 80.40it/s]\u001b[A\n",
+            " 78%|███████▊  | 13127/16858 [02:33<00:48, 76.29it/s]\u001b[A\n",
+            " 78%|███████▊  | 13135/16858 [02:33<00:49, 74.58it/s]\u001b[A\n",
+            " 78%|███████▊  | 13143/16858 [02:33<00:49, 74.50it/s]\u001b[A\n",
+            " 78%|███████▊  | 13151/16858 [02:34<00:49, 74.87it/s]\u001b[A\n",
+            " 78%|███████▊  | 13159/16858 [02:34<00:49, 74.08it/s]\u001b[A\n",
+            " 78%|███████▊  | 13167/16858 [02:34<00:50, 72.97it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 13158 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 78%|███████▊  | 13175/16858 [02:34<00:51, 72.18it/s]\u001b[A\n",
+            " 78%|███████▊  | 13183/16858 [02:34<00:50, 73.03it/s]\u001b[A\n",
+            " 78%|███████▊  | 13191/16858 [02:34<00:49, 73.60it/s]\u001b[A\n",
+            " 78%|███████▊  | 13199/16858 [02:34<00:52, 69.13it/s]\u001b[A\n",
+            " 78%|███████▊  | 13207/16858 [02:34<00:51, 70.84it/s]\u001b[A\n",
+            " 78%|███████▊  | 13216/16858 [02:34<00:49, 74.05it/s]\u001b[A\n",
+            " 78%|███████▊  | 13224/16858 [02:35<00:48, 75.52it/s]\u001b[A\n",
+            " 78%|███████▊  | 13233/16858 [02:35<00:46, 77.23it/s]\u001b[A\n",
+            " 79%|███████▊  | 13241/16858 [02:35<00:48, 74.29it/s]\u001b[A\n",
+            " 79%|███████▊  | 13249/16858 [02:35<00:50, 71.41it/s]\u001b[A\n",
+            " 79%|███████▊  | 13257/16858 [02:35<00:51, 70.02it/s]\u001b[A\n",
+            " 79%|███████▊  | 13265/16858 [02:35<00:53, 67.04it/s]\u001b[A\n",
+            " 79%|███████▊  | 13272/16858 [02:35<00:56, 63.77it/s]\u001b[A\n",
+            " 79%|███████▉  | 13279/16858 [02:35<00:55, 63.95it/s]\u001b[A\n",
+            " 79%|███████▉  | 13286/16858 [02:35<00:56, 62.92it/s]\u001b[A\n",
+            " 79%|███████▉  | 13297/16858 [02:36<00:47, 74.88it/s]\u001b[A\n",
+            " 79%|███████▉  | 13308/16858 [02:36<00:42, 83.34it/s]\u001b[A\n",
+            " 79%|███████▉  | 13318/16858 [02:36<00:40, 87.91it/s]\u001b[A\n",
+            " 79%|███████▉  | 13328/16858 [02:36<00:38, 90.54it/s]\u001b[A\n",
+            " 79%|███████▉  | 13338/16858 [02:36<00:37, 93.22it/s]\u001b[A\n",
+            " 79%|███████▉  | 13348/16858 [02:36<00:37, 94.55it/s]\u001b[A\n",
+            " 79%|███████▉  | 13359/16858 [02:36<00:35, 97.22it/s]\u001b[A\n",
+            " 79%|███████▉  | 13369/16858 [02:36<00:36, 94.71it/s]\u001b[A\n",
+            " 79%|███████▉  | 13379/16858 [02:36<00:36, 95.48it/s]\u001b[A\n",
+            " 79%|███████▉  | 13390/16858 [02:37<00:35, 98.06it/s]\u001b[A\n",
+            " 79%|███████▉  | 13401/16858 [02:37<00:34, 99.95it/s]\u001b[A\n",
+            " 80%|███████▉  | 13412/16858 [02:37<00:34, 99.07it/s]\u001b[A\n",
+            " 80%|███████▉  | 13423/16858 [02:37<00:34, 100.09it/s]\u001b[A\n",
+            " 80%|███████▉  | 13434/16858 [02:37<00:33, 102.74it/s]\u001b[A\n",
+            " 80%|███████▉  | 13445/16858 [02:37<00:33, 103.34it/s]\u001b[A\n",
+            " 80%|███████▉  | 13456/16858 [02:37<00:32, 103.13it/s]\u001b[A\n",
+            " 80%|███████▉  | 13467/16858 [02:37<00:33, 99.93it/s] \u001b[A\n",
+            " 80%|███████▉  | 13478/16858 [02:37<00:34, 98.96it/s]\u001b[A\n",
+            " 80%|████████  | 13489/16858 [02:37<00:33, 99.71it/s]\u001b[A\n",
+            " 80%|████████  | 13499/16858 [02:38<00:34, 98.72it/s]\u001b[A\n",
+            " 80%|████████  | 13509/16858 [02:38<00:33, 99.04it/s]\u001b[A\n",
+            " 80%|████████  | 13520/16858 [02:38<00:33, 100.38it/s]\u001b[A\n",
+            " 80%|████████  | 13531/16858 [02:38<00:32, 102.04it/s]\u001b[A\n",
+            " 80%|████████  | 13542/16858 [02:38<00:31, 103.91it/s]\u001b[A\n",
+            " 80%|████████  | 13553/16858 [02:38<00:32, 102.27it/s]\u001b[A\n",
+            " 80%|████████  | 13564/16858 [02:38<00:32, 102.67it/s]\u001b[A\n",
+            " 81%|████████  | 13575/16858 [02:38<00:33, 98.63it/s] \u001b[A\n",
+            " 81%|████████  | 13586/16858 [02:38<00:32, 99.79it/s]\u001b[A\n",
+            " 81%|████████  | 13597/16858 [02:39<00:32, 100.19it/s]\u001b[A\n",
+            " 81%|████████  | 13608/16858 [02:39<00:32, 99.82it/s] \u001b[A\n",
+            " 81%|████████  | 13618/16858 [02:39<00:32, 99.79it/s]\u001b[A\n",
+            " 81%|████████  | 13629/16858 [02:39<00:32, 100.60it/s]\u001b[A\n",
+            " 81%|████████  | 13640/16858 [02:39<00:31, 101.98it/s]\u001b[A\n",
+            " 81%|████████  | 13651/16858 [02:39<00:32, 98.37it/s] \u001b[A\n",
+            " 81%|████████  | 13662/16858 [02:39<00:32, 99.87it/s]\u001b[A\n",
+            " 81%|████████  | 13673/16858 [02:39<00:32, 99.09it/s]\u001b[A\n",
+            " 81%|████████  | 13683/16858 [02:39<00:33, 96.14it/s]\u001b[A\n",
+            " 81%|████████  | 13694/16858 [02:40<00:32, 96.13it/s]\u001b[A\n",
+            " 81%|████████▏ | 13704/16858 [02:40<00:33, 95.40it/s]\u001b[A\n",
+            " 81%|████████▏ | 13715/16858 [02:40<00:32, 97.38it/s]\u001b[A\n",
+            " 81%|████████▏ | 13726/16858 [02:40<00:31, 98.72it/s]\u001b[A\n",
+            " 81%|████████▏ | 13737/16858 [02:40<00:31, 99.77it/s]\u001b[A\n",
+            " 82%|████████▏ | 13747/16858 [02:40<00:31, 98.68it/s]\u001b[A\n",
+            " 82%|████████▏ | 13758/16858 [02:40<00:30, 100.70it/s]\u001b[A\n",
+            " 82%|████████▏ | 13769/16858 [02:40<00:30, 101.64it/s]\u001b[A\n",
+            " 82%|████████▏ | 13780/16858 [02:40<00:31, 98.09it/s] \u001b[A\n",
+            " 82%|████████▏ | 13791/16858 [02:41<00:30, 100.37it/s]\u001b[A\n",
+            " 82%|████████▏ | 13802/16858 [02:41<00:30, 99.90it/s] \u001b[A\n",
+            " 82%|████████▏ | 13813/16858 [02:41<00:30, 99.94it/s]\u001b[A\n",
+            " 82%|████████▏ | 13824/16858 [02:41<00:29, 101.72it/s]\u001b[A\n",
+            " 82%|████████▏ | 13835/16858 [02:41<00:30, 99.43it/s] \u001b[A\n",
+            " 82%|████████▏ | 13846/16858 [02:41<00:30, 100.03it/s]\u001b[A\n",
+            " 82%|████████▏ | 13857/16858 [02:41<00:29, 101.23it/s]\u001b[A\n",
+            " 82%|████████▏ | 13868/16858 [02:41<00:29, 100.45it/s]\u001b[A\n",
+            " 82%|████████▏ | 13879/16858 [02:41<00:30, 98.23it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 13866 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 82%|████████▏ | 13889/16858 [02:42<00:30, 96.60it/s]\u001b[A\n",
+            " 82%|████████▏ | 13900/16858 [02:42<00:30, 98.43it/s]\u001b[A\n",
+            " 83%|████████▎ | 13911/16858 [02:42<00:29, 99.26it/s]\u001b[A\n",
+            " 83%|████████▎ | 13922/16858 [02:42<00:29, 100.55it/s]\u001b[A\n",
+            " 83%|████████▎ | 13933/16858 [02:42<00:29, 100.65it/s]\u001b[A\n",
+            " 83%|████████▎ | 13944/16858 [02:42<00:28, 101.98it/s]\u001b[A\n",
+            " 83%|████████▎ | 13955/16858 [02:42<00:28, 102.56it/s]\u001b[A\n",
+            " 83%|████████▎ | 13966/16858 [02:42<00:27, 103.58it/s]\u001b[A\n",
+            " 83%|████████▎ | 13977/16858 [02:42<00:27, 103.08it/s]\u001b[A\n",
+            " 83%|████████▎ | 13988/16858 [02:42<00:28, 99.37it/s] \u001b[A\n",
+            " 83%|████████▎ | 13999/16858 [02:43<00:28, 100.64it/s]\u001b[A\n",
+            " 83%|████████▎ | 14010/16858 [02:43<00:28, 99.59it/s] \u001b[A\n",
+            " 83%|████████▎ | 14021/16858 [02:43<00:28, 100.06it/s]\u001b[A\n",
+            " 83%|████████▎ | 14032/16858 [02:43<00:28, 100.06it/s]\u001b[A\n",
+            " 83%|████████▎ | 14043/16858 [02:43<00:28, 100.38it/s]\u001b[A\n",
+            " 83%|████████▎ | 14054/16858 [02:43<00:27, 100.42it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14033 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 83%|���███████▎ | 14065/16858 [02:43<00:28, 99.52it/s] \u001b[A\n",
+            " 83%|████████▎ | 14075/16858 [02:43<00:28, 99.32it/s]\u001b[A\n",
+            " 84%|████████▎ | 14085/16858 [02:43<00:28, 97.56it/s]\u001b[A\n",
+            " 84%|████████▎ | 14095/16858 [02:44<00:28, 97.21it/s]\u001b[A\n",
+            " 84%|████████▎ | 14105/16858 [02:44<00:28, 98.00it/s]\u001b[A\n",
+            " 84%|████████▎ | 14116/16858 [02:44<00:27, 99.05it/s]\u001b[A\n",
+            " 84%|████████▍ | 14127/16858 [02:44<00:27, 100.07it/s]\u001b[A\n",
+            " 84%|████████▍ | 14138/16858 [02:44<00:27, 99.92it/s] \u001b[A\n",
+            " 84%|████████▍ | 14149/16858 [02:44<00:26, 100.60it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14128 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 84%|████████▍ | 14160/16858 [02:44<00:26, 100.10it/s]\u001b[A\n",
+            " 84%|████████▍ | 14171/16858 [02:44<00:26, 100.57it/s]\u001b[A\n",
+            " 84%|████████▍ | 14182/16858 [02:44<00:26, 101.46it/s]\u001b[A\n",
+            " 84%|████████▍ | 14193/16858 [02:45<00:27, 97.61it/s] \u001b[A\n",
+            " 84%|████████▍ | 14203/16858 [02:45<00:27, 97.72it/s]\u001b[A\n",
+            " 84%|████████▍ | 14214/16858 [02:45<00:26, 99.45it/s]\u001b[A\n",
+            " 84%|████████▍ | 14225/16858 [02:45<00:25, 101.43it/s]\u001b[A\n",
+            " 84%|████████▍ | 14236/16858 [02:45<00:26, 100.48it/s]\u001b[A\n",
+            " 85%|████████▍ | 14247/16858 [02:45<00:25, 101.48it/s]\u001b[A\n",
+            " 85%|████████▍ | 14258/16858 [02:45<00:25, 100.98it/s]\u001b[A\n",
+            " 85%|████████▍ | 14269/16858 [02:45<00:25, 100.20it/s]\u001b[A\n",
+            " 85%|████████▍ | 14280/16858 [02:45<00:25, 100.07it/s]\u001b[A\n",
+            " 85%|████████▍ | 14291/16858 [02:46<00:29, 87.43it/s] \u001b[A\n",
+            " 85%|████████▍ | 14301/16858 [02:46<00:31, 81.65it/s]\u001b[A\n",
+            " 85%|████████▍ | 14310/16858 [02:46<00:32, 79.62it/s]\u001b[A\n",
+            " 85%|████████▍ | 14319/16858 [02:46<00:32, 77.23it/s]\u001b[A\n",
+            " 85%|████████▍ | 14327/16858 [02:46<00:32, 76.87it/s]\u001b[A\n",
+            " 85%|████████▌ | 14335/16858 [02:46<00:33, 76.30it/s]\u001b[A\n",
+            " 85%|████████▌ | 14343/16858 [02:46<00:33, 74.21it/s]\u001b[A\n",
+            " 85%|████████▌ | 14351/16858 [02:46<00:33, 74.34it/s]\u001b[A\n",
+            " 85%|████████▌ | 14359/16858 [02:46<00:33, 74.90it/s]\u001b[A\n",
+            " 85%|████████▌ | 14367/16858 [02:47<00:34, 71.99it/s]\u001b[A\n",
+            " 85%|████████▌ | 14375/16858 [02:47<00:34, 72.64it/s]\u001b[A\n",
+            " 85%|████████▌ | 14383/16858 [02:47<00:34, 71.34it/s]\u001b[A\n",
+            " 85%|████████▌ | 14391/16858 [02:47<00:34, 70.61it/s]\u001b[A\n",
+            " 85%|████████▌ | 14400/16858 [02:47<00:33, 74.15it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14386 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 85%|████████▌ | 14408/16858 [02:47<00:33, 73.57it/s]\u001b[A\n",
+            " 86%|████████▌ | 14416/16858 [02:47<00:32, 74.19it/s]\u001b[A\n",
+            " 86%|████████▌ | 14424/16858 [02:47<00:33, 73.04it/s]\u001b[A\n",
+            " 86%|████████▌ | 14432/16858 [02:48<00:34, 69.69it/s]\u001b[A\n",
+            " 86%|████████▌ | 14440/16858 [02:48<00:35, 68.80it/s]\u001b[A\n",
+            " 86%|████████▌ | 14447/16858 [02:48<00:37, 64.49it/s]\u001b[A\n",
+            " 86%|████████▌ | 14454/16858 [02:48<00:36, 65.27it/s]\u001b[A\n",
+            " 86%|████████▌ | 14461/16858 [02:48<00:37, 63.63it/s]\u001b[A\n",
+            " 86%|████████▌ | 14468/16858 [02:48<00:39, 60.89it/s]\u001b[A\n",
+            " 86%|████████▌ | 14479/16858 [02:48<00:32, 72.52it/s]\u001b[A\n",
+            " 86%|████████▌ | 14489/16858 [02:48<00:29, 79.54it/s]\u001b[A\n",
+            " 86%|████████▌ | 14499/16858 [02:48<00:27, 84.63it/s]\u001b[A\n",
+            " 86%|████████▌ | 14510/16858 [02:49<00:26, 90.12it/s]\u001b[A\n",
+            " 86%|████████▌ | 14521/16858 [02:49<00:24, 94.20it/s]\u001b[A\n",
+            " 86%|████████▌ | 14531/16858 [02:49<00:25, 92.24it/s]\u001b[A\n",
+            " 86%|████████▋ | 14542/16858 [02:49<00:24, 95.29it/s]\u001b[A\n",
+            " 86%|████████▋ | 14553/16858 [02:49<00:23, 97.96it/s]\u001b[A\n",
+            " 86%|████████▋ | 14563/16858 [02:49<00:23, 97.85it/s]\u001b[A\n",
+            " 86%|████████▋ | 14573/16858 [02:49<00:23, 98.09it/s]\u001b[A\n",
+            " 87%|████████▋ | 14583/16858 [02:49<00:23, 98.10it/s]\u001b[A\n",
+            " 87%|████████▋ | 14593/16858 [02:49<00:23, 98.03it/s]\u001b[A\n",
+            " 87%|████████▋ | 14603/16858 [02:49<00:23, 96.64it/s]\u001b[A\n",
+            " 87%|████████▋ | 14613/16858 [02:50<00:23, 97.37it/s]\u001b[A\n",
+            " 87%|████████▋ | 14623/16858 [02:50<00:22, 97.25it/s]\u001b[A\n",
+            " 87%|████████▋ | 14633/16858 [02:50<00:24, 90.78it/s]\u001b[A\n",
+            " 87%|████████▋ | 14643/16858 [02:50<00:24, 89.93it/s]\u001b[A\n",
+            " 87%|████████▋ | 14653/16858 [02:50<00:23, 92.24it/s]\u001b[A\n",
+            " 87%|████████▋ | 14663/16858 [02:50<00:23, 94.16it/s]\u001b[A\n",
+            " 87%|████████▋ | 14674/16858 [02:50<00:22, 97.10it/s]\u001b[A\n",
+            " 87%|████████▋ | 14684/16858 [02:50<00:22, 97.07it/s]\u001b[A\n",
+            " 87%|████████▋ | 14694/16858 [02:50<00:22, 95.66it/s]\u001b[A\n",
+            " 87%|████████▋ | 14704/16858 [02:51<00:22, 96.81it/s]\u001b[A\n",
+            " 87%|████████▋ | 14715/16858 [02:51<00:21, 98.59it/s]\u001b[A\n",
+            " 87%|████████▋ | 14725/16858 [02:51<00:21, 98.88it/s]\u001b[A\n",
+            " 87%|████████▋ | 14735/16858 [02:51<00:22, 94.08it/s]\u001b[A\n",
+            " 87%|████████▋ | 14746/16858 [02:51<00:21, 97.78it/s]\u001b[A\n",
+            " 88%|████████▊ | 14756/16858 [02:51<00:21, 97.82it/s]\u001b[A\n",
+            " 88%|████████▊ | 14766/16858 [02:51<00:21, 97.36it/s]\u001b[A\n",
+            " 88%|████████▊ | 14777/16858 [02:51<00:21, 98.63it/s]\u001b[A\n",
+            " 88%|████████▊ | 14787/16858 [02:51<00:20, 98.69it/s]\u001b[A\n",
+            " 88%|████████▊ | 14798/16858 [02:51<00:20, 99.89it/s]\u001b[A\n",
+            " 88%|████████▊ | 14809/16858 [02:52<00:20, 101.16it/s]\u001b[A\n",
+            " 88%|████████▊ | 14820/16858 [02:52<00:20, 99.21it/s] \u001b[A\n",
+            " 88%|████████▊ | 14830/16858 [02:52<00:21, 96.20it/s]\u001b[A\n",
+            " 88%|████████▊ | 14841/16858 [02:52<00:20, 98.69it/s]\u001b[A\n",
+            " 88%|████████▊ | 14851/16858 [02:52<00:20, 98.78it/s]\u001b[A\n",
+            " 88%|████████▊ | 14861/16858 [02:52<00:20, 98.37it/s]\u001b[A\n",
+            " 88%|████████▊ | 14872/16858 [02:52<00:20, 98.80it/s]\u001b[A\n",
+            " 88%|████████▊ | 14882/16858 [02:52<00:20, 98.37it/s]\u001b[A\n",
+            " 88%|████████▊ | 14893/16858 [02:52<00:19, 100.19it/s]\u001b[A\n",
+            " 88%|████████▊ | 14904/16858 [02:53<00:19, 98.85it/s] \u001b[A\n",
+            " 88%|████████▊ | 14915/16858 [02:53<00:19, 99.70it/s]\u001b[A\n",
+            " 89%|████████▊ | 14926/16858 [02:53<00:19, 100.99it/s]\u001b[A\n",
+            " 89%|████████▊ | 14937/16858 [02:53<00:19, 98.32it/s] \u001b[A\n",
+            " 89%|████████▊ | 14948/16858 [02:53<00:19, 100.09it/s]\u001b[A\n",
+            " 89%|████████▊ | 14959/16858 [02:53<00:18, 100.24it/s]\u001b[A\n",
+            " 89%|████████▉ | 14970/16858 [02:53<00:18, 100.26it/s]\u001b[A\n",
+            " 89%|████████▉ | 14981/16858 [02:53<00:18, 100.70it/s]\u001b[A\n",
+            " 89%|████████▉ | 14992/16858 [02:53<00:18, 101.85it/s]\u001b[A\n",
+            " 89%|████████▉ | 15003/16858 [02:54<00:18, 102.00it/s]\u001b[A\n",
+            " 89%|████████▉ | 15014/16858 [02:54<00:17, 102.54it/s]\u001b[A\n",
+            " 89%|████████▉ | 15025/16858 [02:54<00:18, 101.64it/s]\u001b[A\n",
+            " 89%|████████▉ | 15036/16858 [02:54<00:18, 97.13it/s] \u001b[A\n",
+            " 89%|████████▉ | 15047/16858 [02:54<00:18, 99.13it/s]\u001b[A\n",
+            " 89%|████████▉ | 15057/16858 [02:54<00:18, 98.18it/s]\u001b[A\n",
+            " 89%|████████▉ | 15068/16858 [02:54<00:17, 99.51it/s]\u001b[A\n",
+            " 89%|████████▉ | 15079/16858 [02:54<00:17, 99.67it/s]\u001b[A\n",
+            " 90%|████████▉ | 15089/16858 [02:54<00:17, 98.90it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15078 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 90%|████████▉ | 15100/16858 [02:55<00:17, 100.11it/s]\u001b[A\n",
+            " 90%|████████▉ | 15111/16858 [02:55<00:17, 99.48it/s] \u001b[A\n",
+            " 90%|████████▉ | 15122/16858 [02:55<00:17, 100.20it/s]\u001b[A\n",
+            " 90%|████████▉ | 15133/16858 [02:55<00:18, 93.28it/s] \u001b[A\n",
+            " 90%|████████▉ | 15143/16858 [02:55<00:18, 90.66it/s]\u001b[A\n",
+            " 90%|████████▉ | 15154/16858 [02:55<00:18, 94.23it/s]\u001b[A\n",
+            " 90%|████████▉ | 15164/16858 [02:55<00:17, 94.94it/s]\u001b[A\n",
+            " 90%|█████████ | 15174/16858 [02:55<00:17, 95.64it/s]\u001b[A\n",
+            " 90%|█████████ | 15185/16858 [02:55<00:17, 98.40it/s]\u001b[A\n",
+            " 90%|█████████ | 15195/16858 [02:56<00:16, 98.04it/s]\u001b[A\n",
+            " 90%|█████████ | 15206/16858 [02:56<00:16, 99.46it/s]\u001b[A\n",
+            " 90%|█████████ | 15216/16858 [02:56<00:16, 99.41it/s]\u001b[A\n",
+            " 90%|█████████ | 15227/16858 [02:56<00:16, 100.18it/s]\u001b[A\n",
+            " 90%|█████████ | 15238/16858 [02:56<00:16, 96.21it/s] \u001b[A\n",
+            " 90%|█████████ | 15248/16858 [02:56<00:16, 97.21it/s]\u001b[A\n",
+            " 91%|█████████ | 15259/16858 [02:56<00:16, 98.01it/s]\u001b[A\n",
+            " 91%|█████████ | 15270/16858 [02:56<00:15, 99.54it/s]\u001b[A\n",
+            " 91%|█████████ | 15281/16858 [02:56<00:15, 100.76it/s]\u001b[A\n",
+            " 91%|█████████ | 15292/16858 [02:57<00:15, 99.19it/s] \u001b[A\n",
+            " 91%|█████████ | 15303/16858 [02:57<00:15, 100.50it/s]\u001b[A\n",
+            " 91%|█████████ | 15314/16858 [02:57<00:15, 100.26it/s]\u001b[A\n",
+            " 91%|█████████ | 15325/16858 [02:57<00:15, 100.82it/s]\u001b[A\n",
+            " 91%|█████████ | 15336/16858 [02:57<00:15, 99.47it/s] \u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15324 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 91%|█████████ | 15346/16858 [02:57<00:15, 95.91it/s]\u001b[A\n",
+            " 91%|█████████ | 15356/16858 [02:57<00:15, 96.68it/s]\u001b[A\n",
+            " 91%|█████████ | 15367/16858 [02:57<00:15, 98.49it/s]\u001b[A\n",
+            " 91%|█████████ | 15377/16858 [02:57<00:15, 97.54it/s]\u001b[A\n",
+            " 91%|█████████▏| 15388/16858 [02:57<00:14, 98.98it/s]\u001b[A\n",
+            " 91%|█████████▏| 15399/16858 [02:58<00:14, 100.05it/s]\u001b[A\n",
+            " 91%|█████████▏| 15410/16858 [02:58<00:14, 101.41it/s]\u001b[A\n",
+            " 91%|█████████▏| 15421/16858 [02:58<00:14, 101.26it/s]\u001b[A\n",
+            " 92%|█████████▏| 15432/16858 [02:58<00:14, 100.82it/s]\u001b[A\n",
+            " 92%|█████████▏| 15443/16858 [02:58<00:14, 97.61it/s] \u001b[A\n",
+            " 92%|█████████▏| 15453/16858 [02:58<00:14, 94.86it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15434 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 92%|█████████▏| 15463/16858 [02:58<00:16, 83.16it/s]\u001b[A\n",
+            " 92%|█████████▏| 15472/16858 [02:58<00:18, 76.92it/s]\u001b[A\n",
+            " 92%|█████████▏| 15480/16858 [02:59<00:18, 76.43it/s]\u001b[A\n",
+            " 92%|█████████▏| 15488/16858 [02:59<00:18, 74.00it/s]\u001b[A\n",
+            " 92%|█████████▏| 15496/16858 [02:59<00:18, 73.33it/s]\u001b[A\n",
+            " 92%|█████████▏| 15504/16858 [02:59<00:18, 74.79it/s]\u001b[A\n",
+            " 92%|█████████▏| 15512/16858 [02:59<00:17, 75.66it/s]\u001b[A\n",
+            " 92%|█████████▏| 15520/16858 [02:59<00:18, 71.48it/s]\u001b[A"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15507 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n",
+            " 92%|█████████▏| 15528/16858 [02:59<00:18, 70.91it/s]\u001b[A\n",
+            " 92%|█████████▏| 15536/16858 [02:59<00:18, 73.21it/s]\u001b[A\n",
+            " 92%|█████████▏| 15544/16858 [02:59<00:17, 74.99it/s]\u001b[A\n",
+            " 92%|█████████▏| 15552/16858 [03:00<00:18, 71.85it/s]\u001b[A\n",
+            " 92%|█████████▏| 15560/16858 [03:00<00:18, 70.84it/s]\u001b[A\n",
+            " 92%|█████████▏| 15568/16858 [03:00<00:17, 73.12it/s]\u001b[A\n",
+            " 92%|█████████▏| 15577/16858 [03:00<00:16, 76.28it/s]\u001b[A\n",
+            " 92%|█████████▏| 15586/16858 [03:00<00:16, 78.01it/s]\u001b[A\n",
+            " 93%|█████████▎| 15594/16858 [03:00<00:16, 78.21it/s]\u001b[A\n",
+            " 93%|█████████▎| 15602/16858 [03:00<00:18, 67.98it/s]\u001b[A\n",
+            " 93%|█████████▎| 15610/16858 [03:00<00:18, 67.13it/s]\u001b[A\n",
+            " 93%|█████████▎| 15617/16858 [03:00<00:19, 65.09it/s]\u001b[A\n",
+            " 93%|█████████▎| 15624/16858 [03:01<00:19, 63.05it/s]\u001b[A\n",
+            " 93%|█████████▎| 15631/16858 [03:01<00:19, 62.43it/s]\u001b[A\n",
+            " 93%|█████████▎| 15638/16858 [03:01<00:20, 59.79it/s]\u001b[A\n",
+            " 93%|█████████▎| 15646/16858 [03:01<00:18, 64.16it/s]\u001b[A\n",
+            " 93%|█████████▎| 15656/16858 [03:01<00:16, 72.61it/s]\u001b[A\n",
+            " 93%|█████████▎| 15666/16858 [03:01<00:15, 78.21it/s]\u001b[A\n",
+            " 93%|█████████▎| 15675/16858 [03:01<00:14, 79.44it/s]\u001b[A\n",
+            " 93%|█████████▎| 15685/16858 [03:01<00:14, 83.41it/s]\u001b[A\n",
+            " 93%|█████████▎| 15695/16858 [03:01<00:13, 87.82it/s]\u001b[A\n",
+            " 93%|█████████▎| 15706/16858 [03:02<00:12, 91.83it/s]\u001b[A\n",
+            " 93%|█████████▎| 15716/16858 [03:02<00:12, 93.44it/s]\u001b[A\n",
+            " 93%|█████████▎| 15726/16858 [03:02<00:11, 95.08it/s]\u001b[A\n",
+            " 93%|█████████▎| 15737/16858 [03:02<00:11, 98.06it/s]\u001b[A\n",
+            " 93%|█████████▎| 15748/16858 [03:02<00:11, 100.21it/s]\u001b[A\n",
+            " 93%|█████████▎| 15759/16858 [03:02<00:11, 98.10it/s] \u001b[A\n",
+            " 94%|█████████▎| 15769/16858 [03:02<00:11, 97.43it/s]\u001b[A\n",
+            " 94%|█████████▎| 15779/16858 [03:02<00:11, 92.90it/s]\u001b[A\n",
+            " 94%|█████████▎| 15789/16858 [03:02<00:11, 94.08it/s]\u001b[A\n",
+            " 94%|█████████▎| 15799/16858 [03:03<00:11, 95.33it/s]\u001b[A\n",
+            " 94%|█████████▍| 15810/16858 [03:03<00:10, 97.88it/s]\u001b[A\n",
+            " 94%|█████████▍| 15820/16858 [03:03<00:10, 97.73it/s]\u001b[A\n",
+            " 94%|█████████▍| 15830/16858 [03:03<00:10, 98.04it/s]\u001b[A\n",
+            " 94%|█████████▍| 15840/16858 [03:03<00:10, 98.52it/s]\u001b[A\n",
+            " 94%|█████████▍| 15850/16858 [03:03<00:10, 98.39it/s]\u001b[A\n",
+            " 94%|█████████▍| 15861/16858 [03:03<00:09, 99.92it/s]\u001b[A\n",
+            " 94%|█████████▍| 15871/16858 [03:03<00:09, 99.47it/s]\u001b[A\n",
+            " 94%|█████████▍| 15881/16858 [03:03<00:10, 94.13it/s]\u001b[A\n",
+            " 94%|█████████▍| 15891/16858 [03:03<00:10, 95.50it/s]\u001b[A\n",
+            " 94%|█████████▍| 15902/16858 [03:04<00:09, 98.75it/s]\u001b[A\n",
+            " 94%|█████████▍| 15912/16858 [03:04<00:09, 99.11it/s]\u001b[A\n",
+            " 94%|█████████▍| 15923/16858 [03:04<00:09, 99.49it/s]\u001b[A\n",
+            " 95%|█████████▍| 15933/16858 [03:04<00:09, 99.35it/s]\u001b[A\n",
+            " 95%|█████████▍| 15943/16858 [03:04<00:09, 98.88it/s]\u001b[A\n",
+            " 95%|█████████▍| 15953/16858 [03:04<00:09, 98.93it/s]\u001b[A\n",
+            " 95%|█████████▍| 15964/16858 [03:04<00:08, 100.69it/s]\u001b[A\n",
+            " 95%|█████████▍| 15975/16858 [03:04<00:08, 98.98it/s] \u001b[A\n",
+            " 95%|█████████▍| 15985/16858 [03:04<00:08, 97.65it/s]\u001b[A\n",
+            " 95%|█████████▍| 15996/16858 [03:05<00:08, 99.24it/s]\u001b[A\n",
+            " 95%|█████████▍| 16007/16858 [03:05<00:08, 100.27it/s]\u001b[A\n",
+            " 95%|█████████▌| 16018/16858 [03:05<00:08, 99.46it/s] \u001b[A\n",
+            " 95%|█████████▌| 16029/16858 [03:05<00:08, 100.05it/s]\u001b[A\n",
+            " 95%|█████████▌| 16040/16858 [03:05<00:08, 100.06it/s]\u001b[A\n",
+            " 95%|█████████▌| 16051/16858 [03:05<00:08, 100.68it/s]\u001b[A\n",
+            " 95%|█████████▌| 16062/16858 [03:05<00:07, 101.75it/s]\u001b[A\n",
+            " 95%|█████████▌| 16073/16858 [03:05<00:07, 101.06it/s]\u001b[A\n",
+            " 95%|█████████▌| 16084/16858 [03:05<00:07, 97.80it/s] \u001b[A\n",
+            " 95%|█████████▌| 16094/16858 [03:06<00:07, 97.89it/s]\u001b[A\n",
+            " 96%|█████████▌| 16105/16858 [03:06<00:07, 99.10it/s]\u001b[A\n",
+            " 96%|█████████▌| 16115/16858 [03:06<00:07, 98.92it/s]\u001b[A\n",
+            " 96%|█████████▌| 16125/16858 [03:06<00:07, 98.39it/s]\u001b[A\n",
+            " 96%|█████████▌| 16135/16858 [03:06<00:07, 98.45it/s]\u001b[A\n",
+            " 96%|█████████▌| 16145/16858 [03:06<00:07, 98.81it/s]\u001b[A\n",
+            " 96%|█████████▌| 16155/16858 [03:06<00:07, 99.05it/s]\u001b[A\n",
+            " 96%|█████████▌| 16165/16858 [03:06<00:07, 98.91it/s]\u001b[A\n",
+            " 96%|█████████▌| 16175/16858 [03:06<00:06, 98.59it/s]\u001b[A\n",
+            " 96%|█████████▌| 16185/16858 [03:06<00:07, 93.23it/s]\u001b[A\n",
+            " 96%|█████████▌| 16195/16858 [03:07<00:06, 94.94it/s]\u001b[A\n",
+            " 96%|█████████▌| 16206/16858 [03:07<00:06, 97.20it/s]\u001b[A\n",
+            " 96%|█████████▌| 16217/16858 [03:07<00:06, 98.71it/s]\u001b[A\n",
+            " 96%|█████████▋| 16228/16858 [03:07<00:06, 99.37it/s]\u001b[A\n",
+            " 96%|█████████▋| 16239/16858 [03:07<00:06, 101.42it/s]\u001b[A\n",
+            " 96%|█████████▋| 16250/16858 [03:07<00:05, 102.21it/s]\u001b[A\n",
+            " 96%|█████████▋| 16261/16858 [03:07<00:05, 103.41it/s]\u001b[A\n",
+            " 97%|█████████▋| 16272/16858 [03:07<00:05, 102.94it/s]\u001b[A\n",
+            " 97%|█████████▋| 16283/16858 [03:07<00:05, 102.25it/s]\u001b[A\n",
+            " 97%|█████████▋| 16294/16858 [03:08<00:05, 99.14it/s] \u001b[A\n",
+            " 97%|█████████▋| 16305/16858 [03:08<00:05, 101.25it/s]\u001b[A\n",
+            " 97%|█████████▋| 16316/16858 [03:08<00:05, 101.84it/s]\u001b[A\n",
+            " 97%|█████████▋| 16327/16858 [03:08<00:05, 101.89it/s]\u001b[A\n",
+            " 97%|█████████▋| 16338/16858 [03:08<00:05, 102.79it/s]\u001b[A\n",
+            " 97%|█████████▋| 16349/16858 [03:08<00:04, 103.60it/s]\u001b[A\n",
+            " 97%|█████████▋| 16360/16858 [03:08<00:04, 101.82it/s]\u001b[A\n",
+            " 97%|█████████▋| 16371/16858 [03:08<00:04, 100.83it/s]\u001b[A\n",
+            " 97%|█████████▋| 16382/16858 [03:08<00:04, 100.88it/s]\u001b[A\n",
+            " 97%|█████████▋| 16393/16858 [03:09<00:04, 97.39it/s] \u001b[A\n",
+            " 97%|█████████▋| 16403/16858 [03:09<00:04, 97.44it/s]\u001b[A\n",
+            " 97%|█████████▋| 16413/16858 [03:09<00:04, 97.15it/s]\u001b[A\n",
+            " 97%|█████████▋| 16423/16858 [03:09<00:04, 97.45it/s]\u001b[A\n",
+            " 97%|█████████▋| 16433/16858 [03:09<00:04, 97.25it/s]\u001b[A\n",
+            " 98%|█████████▊| 16443/16858 [03:09<00:04, 97.75it/s]\u001b[A\n",
+            " 98%|█████████▊| 16454/16858 [03:09<00:04, 98.77it/s]\u001b[A\n",
+            " 98%|█████████▊| 16465/16858 [03:09<00:03, 100.69it/s]\u001b[A\n",
+            " 98%|█████████▊| 16476/16858 [03:09<00:03, 99.89it/s] \u001b[A\n",
+            " 98%|█████████▊| 16486/16858 [03:09<00:03, 99.41it/s]\u001b[A\n",
+            " 98%|█████████▊| 16496/16858 [03:10<00:03, 95.42it/s]\u001b[A\n",
+            " 98%|█████████▊| 16506/16858 [03:10<00:03, 96.36it/s]\u001b[A\n",
+            " 98%|█████████▊| 16516/16858 [03:10<00:03, 96.60it/s]\u001b[A\n",
+            " 98%|█████████▊| 16527/16858 [03:10<00:03, 97.78it/s]\u001b[A\n",
+            " 98%|█████████▊| 16538/16858 [03:10<00:03, 98.82it/s]\u001b[A\n",
+            " 98%|█████████▊| 16548/16858 [03:10<00:03, 97.50it/s]\u001b[A\n",
+            " 98%|█████████▊| 16559/16858 [03:10<00:03, 99.35it/s]\u001b[A\n",
+            " 98%|█████████▊| 16569/16858 [03:10<00:02, 98.94it/s]\u001b[A\n",
+            " 98%|█████████▊| 16579/16858 [03:10<00:02, 95.53it/s]\u001b[A\n",
+            " 98%|█████████▊| 16589/16858 [03:11<00:02, 95.30it/s]\u001b[A\n",
+            " 98%|█████████▊| 16599/16858 [03:11<00:02, 94.89it/s]\u001b[A\n",
+            " 99%|█████████▊| 16609/16858 [03:11<00:02, 95.60it/s]\u001b[A\n",
+            " 99%|█████████▊| 16620/16858 [03:11<00:02, 97.55it/s]\u001b[A\n",
+            " 99%|█████████▊| 16630/16858 [03:11<00:02, 94.22it/s]\u001b[A\n",
+            " 99%|█████████▊| 16640/16858 [03:11<00:02, 83.20it/s]\u001b[A\n",
+            " 99%|█████████▉| 16649/16858 [03:11<00:02, 79.75it/s]\u001b[A\n",
+            " 99%|█████████▉| 16658/16858 [03:11<00:02, 79.18it/s]\u001b[A\n",
+            " 99%|█████████▉| 16667/16858 [03:11<00:02, 78.42it/s]\u001b[A\n",
+            " 99%|█████████▉| 16675/16858 [03:12<00:02, 78.05it/s]\u001b[A\n",
+            " 99%|█████████▉| 16683/16858 [03:12<00:02, 76.55it/s]\u001b[A\n",
+            " 99%|█████████▉| 16691/16858 [03:12<00:02, 76.18it/s]\u001b[A\n",
+            " 99%|█████████▉| 16699/16858 [03:12<00:02, 74.84it/s]\u001b[A\n",
+            " 99%|█████████▉| 16707/16858 [03:12<00:01, 76.14it/s]\u001b[A\n",
+            " 99%|█████████▉| 16715/16858 [03:12<00:01, 76.87it/s]\u001b[A\n",
+            " 99%|█████████▉| 16723/16858 [03:12<00:01, 77.37it/s]\u001b[A\n",
+            " 99%|█████████▉| 16731/16858 [03:12<00:01, 77.66it/s]\u001b[A\n",
+            " 99%|█████████▉| 16739/16858 [03:12<00:01, 76.43it/s]\u001b[A\n",
+            " 99%|█████████▉| 16747/16858 [03:13<00:01, 76.89it/s]\u001b[A\n",
+            " 99%|█████████▉| 16756/16858 [03:13<00:01, 78.43it/s]\u001b[A\n",
+            " 99%|█████████▉| 16764/16858 [03:13<00:01, 72.93it/s]\u001b[A\n",
+            " 99%|█████████▉| 16773/16858 [03:13<00:01, 75.64it/s]\u001b[A\n",
+            "100%|█████████▉| 16781/16858 [03:13<00:01, 70.33it/s]\u001b[A\n",
+            "100%|█████████▉| 16789/16858 [03:13<00:01, 68.22it/s]\u001b[A\n",
+            "100%|█████████▉| 16796/16858 [03:13<00:00, 68.36it/s]\u001b[A\n",
+            "100%|█████████▉| 16803/16858 [03:13<00:00, 65.60it/s]\u001b[A\n",
+            "100%|█████████▉| 16810/16858 [03:13<00:00, 65.14it/s]\u001b[A\n",
+            "100%|█████████▉| 16817/16858 [03:14<00:00, 63.34it/s]\u001b[A\n",
+            "100%|█████████▉| 16824/16858 [03:14<00:00, 64.26it/s]\u001b[A\n",
+            "100%|█████████▉| 16832/16858 [03:14<00:00, 68.41it/s]\u001b[A\n",
+            "100%|█████████▉| 16842/16858 [03:14<00:00, 76.61it/s]\u001b[A\n",
+            "100%|██████████| 16858/16858 [03:14<00:00, 86.65it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 16853 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Train CRF with PyTorch"
+      ],
+      "metadata": {
+        "id": "DiCxlUcHQ9NJ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "\n",
+        "class NERDataset(Dataset):\n",
+        "    def __init__(self, embeddings, labels):\n",
+        "        self.embeddings = embeddings\n",
+        "        self.labels = labels\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.embeddings)\n",
+        "\n",
+        "    def __getitem__(self, idx):\n",
+        "        return self.embeddings[idx], self.labels[idx]\n",
+        "\n",
+        "def collate_fn(batch):\n",
+        "    embeddings, labels = zip(*batch)\n",
+        "    lengths = [len(x) for x in embeddings]\n",
+        "\n",
+        "    # Padding\n",
+        "    max_len = max(lengths)\n",
+        "    padded_embs = torch.stack([\n",
+        "        torch.cat([e, torch.zeros(max_len - len(e), e.size(1))]) for e in embeddings\n",
+        "    ])\n",
+        "    padded_labels = torch.stack([\n",
+        "        torch.cat([l, torch.full((max_len - len(l),), -1)]) for l in labels\n",
+        "    ])\n",
+        "    return padded_embs, padded_labels, lengths\n"
+      ],
+      "metadata": {
+        "id": "yr-VQlkdQ_lI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torchcrf import CRF\n",
+        "import torch.nn as nn\n",
+        "\n",
+        "class CRFTagger(nn.Module):\n",
+        "    def __init__(self, input_dim, num_tags):\n",
+        "        super().__init__()\n",
+        "        self.hidden2tag = nn.Linear(input_dim, num_tags)\n",
+        "        self.crf = CRF(num_tags, batch_first=True)\n",
+        "\n",
+        "    def forward(self, x, labels, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        loss = -self.crf(emissions, labels, mask=mask, reduction='mean')\n",
+        "        return loss\n",
+        "\n",
+        "    def decode(self, x, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        return self.crf.decode(emissions, mask)\n"
+      ],
+      "metadata": {
+        "id": "sceTmVnXRDlo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.metrics import precision_recall_fscore_support\n",
+        "\n",
+        "def evaluate_crf(model, dataloader):\n",
+        "    model.eval()\n",
+        "    all_preds = []\n",
+        "    all_true = []\n",
+        "\n",
+        "    with torch.no_grad():\n",
+        "        for x, y, lengths in dataloader:\n",
+        "            mask = (y != -1)\n",
+        "            preds = model.decode(x, mask)\n",
+        "\n",
+        "            # Flatten kết quả\n",
+        "            for pred_seq, true_seq, m in zip(preds, y, mask):\n",
+        "                true_seq = true_seq[m].tolist()\n",
+        "                all_preds.extend(pred_seq)\n",
+        "                all_true.extend(true_seq)\n",
+        "\n",
+        "    precision, recall, f1, _ = precision_recall_fscore_support(\n",
+        "        all_true, all_preds, average='macro', zero_division=0\n",
+        "    )\n",
+        "    print(f\"Precision: {precision:.4f} - Recall: {recall:.4f} - F1: {f1:.4f}\")\n"
+      ],
+      "metadata": {
+        "id": "l0ZSIjWWRbxR"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch.nn.utils.rnn import pad_sequence\n",
+        "\n",
+        "# Số nhãn\n",
+        "num_tags = max(label.max().item() for label in all_labels) + 1\n",
+        "\n",
+        "# Tạo DataLoader\n",
+        "dataset = NERDataset(all_embeddings, all_labels)\n",
+        "loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)\n",
+        "\n",
+        "# Model\n",
+        "model = CRFTagger(input_dim=768, num_tags=num_tags)\n",
+        "optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n",
+        "\n",
+        "# Training loop\n",
+        "for epoch in range(10):\n",
+        "    model.train()\n",
+        "    total_loss = 0\n",
+        "    for x, y, lengths in loader:\n",
+        "        mask = (y != -1)\n",
+        "        loss = model(x, y, mask)\n",
+        "        total_loss += loss.item()\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "    print(f\"Epoch {epoch+1} - Loss: {total_loss:.4f}\")\n",
+        "    evaluate_crf(model, loader)\n"
+      ],
+      "metadata": {
+        "id": "52HaYPuPRII3",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8f5463c3-0ab1-44d6-c4ef-d4ef3154373e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1 - Loss: 2298.3390\n",
+            "Precision: 0.8966 - Recall: 0.8211 - F1: 0.8544\n",
+            "Epoch 2 - Loss: 683.3343\n",
+            "Precision: 0.9166 - Recall: 0.8875 - F1: 0.9015\n",
+            "Epoch 3 - Loss: 499.8040\n",
+            "Precision: 0.9242 - Recall: 0.9110 - F1: 0.9172\n",
+            "Epoch 4 - Loss: 410.5207\n",
+            "Precision: 0.9279 - Recall: 0.9250 - F1: 0.9263\n",
+            "Epoch 5 - Loss: 353.9285\n",
+            "Precision: 0.9338 - Recall: 0.9359 - F1: 0.9348\n",
+            "Epoch 6 - Loss: 313.9812\n",
+            "Precision: 0.9372 - Recall: 0.9414 - F1: 0.9392\n",
+            "Epoch 7 - Loss: 285.1696\n",
+            "Precision: 0.9384 - Recall: 0.9420 - F1: 0.9396\n",
+            "Epoch 8 - Loss: 266.1924\n",
+            "Precision: 0.9556 - Recall: 0.9402 - F1: 0.9473\n",
+            "Epoch 9 - Loss: 247.7737\n",
+            "Precision: 0.9590 - Recall: 0.9373 - F1: 0.9475\n",
+            "Epoch 10 - Loss: 233.8348\n",
+            "Precision: 0.9591 - Recall: 0.9446 - F1: 0.9517\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "from torchcrf import CRF\n",
+        "from torch.nn.utils.rnn import pad_sequence\n",
+        "from sklearn.metrics import precision_recall_fscore_support, classification_report, accuracy_score\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from tqdm import tqdm\n",
+        "import wandb\n",
+        "import os\n",
+        "\n",
+        "# Initialize Weights & Biases\n",
+        "wandb.init(\n",
+        "    project=\"NER\",\n",
+        "    name=\"CRF_VLSP2016\",\n",
+        "    config={\n",
+        "        \"epochs\": 20,\n",
+        "        \"batch_size\": 16,\n",
+        "        \"learning_rate\": 1e-3,\n",
+        "        \"input_dim\": 768,\n",
+        "        \"test_size\": 0.2\n",
+        "    }\n",
+        ")\n",
+        "config = wandb.config\n",
+        "\n",
+        "# Prepare train/test split\n",
+        "train_embs, test_embs, train_labels, test_labels = train_test_split(\n",
+        "    all_embeddings, all_labels,\n",
+        "    test_size=config.test_size,\n",
+        "    random_state=42\n",
+        ")\n",
+        "\n",
+        "class NERDataset(Dataset):\n",
+        "    def __init__(self, embeddings, labels):\n",
+        "        self.embeddings = embeddings\n",
+        "        self.labels = labels\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.embeddings)\n",
+        "\n",
+        "    def __getitem__(self, idx):\n",
+        "        return self.embeddings[idx], self.labels[idx]\n",
+        "\n",
+        "\n",
+        "def collate_fn(batch):\n",
+        "    embeddings, labels = zip(*batch)\n",
+        "    lengths = [e.size(0) for e in embeddings]\n",
+        "    max_len = max(lengths)\n",
+        "\n",
+        "    padded_embs = torch.stack([\n",
+        "        torch.cat([e, torch.zeros(max_len - e.size(0), e.size(1))]) for e in embeddings\n",
+        "    ])\n",
+        "    padded_labels = torch.stack([\n",
+        "        torch.cat([l, torch.full((max_len - l.size(0),), -1, dtype=torch.long)]) for l in labels\n",
+        "    ])\n",
+        "    return padded_embs, padded_labels, lengths\n",
+        "\n",
+        "# Create DataLoaders\n",
+        "train_dataset = NERDataset(train_embs, train_labels)\n",
+        "test_dataset = NERDataset(test_embs, test_labels)\n",
+        "train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn)\n",
+        "test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)\n",
+        "\n",
+        "# Model setup\n",
+        "num_tags = max(label.max().item() for label in all_labels) + 1\n",
+        "class CRFTagger(nn.Module):\n",
+        "    def __init__(self, input_dim, num_tags):\n",
+        "        super().__init__()\n",
+        "        self.hidden2tag = nn.Linear(input_dim, num_tags)\n",
+        "        self.crf = CRF(num_tags, batch_first=True)\n",
+        "\n",
+        "    def forward(self, x, labels, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        return -self.crf(emissions, labels, mask=mask, reduction='mean')\n",
+        "\n",
+        "    def decode(self, x, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        return self.crf.decode(emissions, mask)\n",
+        "\n",
+        "model = CRFTagger(input_dim=config.input_dim, num_tags=num_tags)\n",
+        "optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)\n",
+        "\n",
+        "# Watch model parameters and gradients\n",
+        "wandb.watch(model, log=\"all\")\n",
+        "\n",
+        "# Create checkpoint directory\n",
+        "os.makedirs(\"checkpoints\", exist_ok=True)\n",
+        "best_f1, best_acc = 0.0, 0.0\n",
+        "\n",
+        "# Evaluation function with progress bar\n",
+        "def evaluate(model, loader, split_name=\"Eval\"):\n",
+        "    model.eval()\n",
+        "    all_preds, all_true = [], []\n",
+        "    pbar = tqdm(loader, desc=split_name)\n",
+        "    with torch.no_grad():\n",
+        "        for x, y, lengths in pbar:\n",
+        "            mask = (y != -1)\n",
+        "            preds = model.decode(x, mask)\n",
+        "            for pred_seq, true_seq, m in zip(preds, y, mask):\n",
+        "                ts = true_seq[m].tolist()\n",
+        "                all_preds.extend(pred_seq)\n",
+        "                all_true.extend(ts)\n",
+        "    precision, recall, f1, _ = precision_recall_fscore_support(all_true, all_preds, average='macro', zero_division=0)\n",
+        "    acc = accuracy_score(all_true, all_preds)\n",
+        "    return precision, recall, f1, acc\n",
+        "\n",
+        "# Training loop\n",
+        "for epoch in range(1, config.epochs + 1):\n",
+        "    model.train()\n",
+        "    total_loss = 0.0\n",
+        "    train_bar = tqdm(train_loader, desc=f\"Train Epoch {epoch}/{config.epochs}\")\n",
+        "    for batch_idx, (x, y, lengths) in enumerate(train_bar, start=1):\n",
+        "        mask = (y != -1)\n",
+        "        loss = model(x, y, mask)\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "\n",
+        "        total_loss += loss.item()\n",
+        "        train_bar.set_postfix(batch_loss=loss.item(), avg_loss=total_loss / batch_idx)\n",
+        "\n",
+        "    avg_train_loss = total_loss / len(train_loader)\n",
+        "    train_precision, train_recall, train_f1, train_acc = evaluate(model, train_loader, split_name=\"Train Eval\")\n",
+        "    test_precision, test_recall, test_f1, test_acc = evaluate(model, test_loader, split_name=\"Test Eval\")\n",
+        "\n",
+        "    # Print & log metrics\n",
+        "    print(f\"Epoch {epoch}: loss={avg_train_loss:.4f}, train_f1={train_f1:.4f}, train_acc={train_acc:.4f}, test_f1={test_f1:.4f}, test_acc={test_acc:.4f}\")\n",
+        "    wandb.log({\n",
+        "        \"epoch\": epoch,\n",
+        "        \"train_loss\": avg_train_loss,\n",
+        "        \"train_precision\": train_precision,\n",
+        "        \"train_recall\": train_recall,\n",
+        "        \"train_f1\": train_f1,\n",
+        "        \"train_acc\": train_acc,\n",
+        "        \"test_precision\": test_precision,\n",
+        "        \"test_recall\": test_recall,\n",
+        "        \"test_f1\": test_f1,\n",
+        "        \"test_acc\": test_acc\n",
+        "    })\n",
+        "\n",
+        "    # Save best model\n",
+        "    if test_f1 > best_f1 or test_acc > best_acc:\n",
+        "        best_f1 = max(test_f1, best_f1)\n",
+        "        best_acc = max(test_acc, best_acc)\n",
+        "        ckpt_path = f\"checkpoints/best_epoch_{epoch}.pt\"\n",
+        "        torch.save(model.state_dict(), ckpt_path)\n",
+        "        wandb.save(ckpt_path)\n",
+        "        print(f\"Saved improved model to {ckpt_path}\")\n",
+        "\n",
+        "# Finish W&B run\n",
+        "wandb.finish()\n",
+        "\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "RU_M57LePTb0",
+        "outputId": "149d92fe-7a3f-47e7-c463-178d80588eb0"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Tracking run with wandb version 0.19.11"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Run data is saved locally in <code>/content/wandb/run-20250605_133906-tjmjkx7n</code>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Syncing run <strong><a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/tjmjkx7n' target=\"_blank\">CRF_VLSP2016</a></strong> to <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View project at <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run at <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/tjmjkx7n' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/tjmjkx7n</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 1/20: 100%|██████████| 841/841 [00:25<00:00, 32.42it/s, avg_loss=2.55, batch_loss=0.525]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:06<00:00, 137.51it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 160.88it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1: loss=2.5528, train_f1=0.8316, train_acc=0.9869, test_f1=0.8319, test_acc=0.9869\n",
+            "Saved improved model to checkpoints/best_epoch_1.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 2/20: 100%|██████████| 841/841 [00:25<00:00, 32.82it/s, avg_loss=0.758, batch_loss=0.0907]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 161.70it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 125.73it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 2: loss=0.7581, train_f1=0.8833, train_acc=0.9907, test_f1=0.8744, test_acc=0.9903\n",
+            "Saved improved model to checkpoints/best_epoch_2.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 3/20: 100%|██████████| 841/841 [00:36<00:00, 23.06it/s, avg_loss=0.549, batch_loss=0.127]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 160.90it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 115.40it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 3: loss=0.5486, train_f1=0.9070, train_acc=0.9922, test_f1=0.8914, test_acc=0.9913\n",
+            "Saved improved model to checkpoints/best_epoch_3.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 4/20: 100%|██████████| 841/841 [00:27<00:00, 31.09it/s, avg_loss=0.448, batch_loss=0.71]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 153.29it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 161.35it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 4: loss=0.4482, train_f1=0.9209, train_acc=0.9933, test_f1=0.8992, test_acc=0.9919\n",
+            "Saved improved model to checkpoints/best_epoch_4.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 5/20: 100%|██████████| 841/841 [00:25<00:00, 32.91it/s, avg_loss=0.384, batch_loss=0.176]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 154.70it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 113.95it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 5: loss=0.3838, train_f1=0.9206, train_acc=0.9937, test_f1=0.8946, test_acc=0.9921\n",
+            "Saved improved model to checkpoints/best_epoch_5.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 6/20: 100%|██████████| 841/841 [00:25<00:00, 33.20it/s, avg_loss=0.338, batch_loss=0.529]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 150.44it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 166.03it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 6: loss=0.3382, train_f1=0.9342, train_acc=0.9944, test_f1=0.9047, test_acc=0.9925\n",
+            "Saved improved model to checkpoints/best_epoch_6.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 7/20: 100%|██████████| 841/841 [00:25<00:00, 32.74it/s, avg_loss=0.303, batch_loss=0.344]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 158.03it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 112.37it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 7: loss=0.3029, train_f1=0.9399, train_acc=0.9949, test_f1=0.9110, test_acc=0.9929\n",
+            "Saved improved model to checkpoints/best_epoch_7.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 8/20: 100%|██████████| 841/841 [00:25<00:00, 33.26it/s, avg_loss=0.28, batch_loss=0.0176]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 148.56it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 161.91it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 8: loss=0.2798, train_f1=0.9449, train_acc=0.9953, test_f1=0.9110, test_acc=0.9930\n",
+            "Saved improved model to checkpoints/best_epoch_8.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 9/20: 100%|██████████| 841/841 [00:26<00:00, 31.90it/s, avg_loss=0.257, batch_loss=0.113]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 149.53it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 118.68it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 9: loss=0.2575, train_f1=0.9497, train_acc=0.9957, test_f1=0.9092, test_acc=0.9930\n",
+            "Saved improved model to checkpoints/best_epoch_9.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 10/20: 100%|██████████| 841/841 [00:26<00:00, 31.27it/s, avg_loss=0.242, batch_loss=0.335]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 154.94it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 159.02it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 10: loss=0.2419, train_f1=0.9499, train_acc=0.9958, test_f1=0.9010, test_acc=0.9926\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 11/20: 100%|██████████| 841/841 [00:26<00:00, 31.36it/s, avg_loss=0.228, batch_loss=0.639]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:06<00:00, 131.67it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 158.27it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 11: loss=0.2276, train_f1=0.9527, train_acc=0.9960, test_f1=0.9130, test_acc=0.9931\n",
+            "Saved improved model to checkpoints/best_epoch_11.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 12/20: 100%|██████████| 841/841 [00:28<00:00, 29.31it/s, avg_loss=0.216, batch_loss=0.529]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 156.81it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 147.29it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 12: loss=0.2157, train_f1=0.9546, train_acc=0.9960, test_f1=0.9110, test_acc=0.9932\n",
+            "Saved improved model to checkpoints/best_epoch_12.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 13/20: 100%|██████████| 841/841 [00:27<00:00, 30.55it/s, avg_loss=0.206, batch_loss=0.502]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:06<00:00, 138.67it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 163.15it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 13: loss=0.2059, train_f1=0.9593, train_acc=0.9965, test_f1=0.9129, test_acc=0.9933\n",
+            "Saved improved model to checkpoints/best_epoch_13.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 14/20: 100%|██████████| 841/841 [00:26<00:00, 32.00it/s, avg_loss=0.198, batch_loss=0.413]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 154.97it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 110.08it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 14: loss=0.1975, train_f1=0.9612, train_acc=0.9966, test_f1=0.9102, test_acc=0.9930\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 15/20: 100%|██████████| 841/841 [00:27<00:00, 30.12it/s, avg_loss=0.191, batch_loss=0.0384]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 151.24it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 151.00it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 15: loss=0.1905, train_f1=0.9603, train_acc=0.9966, test_f1=0.9030, test_acc=0.9927\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 16/20: 100%|██████████| 841/841 [00:27<00:00, 30.24it/s, avg_loss=0.184, batch_loss=0.219]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:06<00:00, 132.65it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 159.54it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 16: loss=0.1836, train_f1=0.9649, train_acc=0.9969, test_f1=0.9028, test_acc=0.9926\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 17/20: 100%|██████████| 841/841 [00:27<00:00, 30.78it/s, avg_loss=0.178, batch_loss=0.0707]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 158.34it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 113.24it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 17: loss=0.1777, train_f1=0.9607, train_acc=0.9967, test_f1=0.9092, test_acc=0.9931\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 18/20: 100%|██████████| 841/841 [00:27<00:00, 30.48it/s, avg_loss=0.173, batch_loss=0.557]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 151.59it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 162.60it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 18: loss=0.1728, train_f1=0.9607, train_acc=0.9968, test_f1=0.9039, test_acc=0.9928\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 19/20: 100%|██████████| 841/841 [00:27<00:00, 30.22it/s, avg_loss=0.168, batch_loss=0.0108]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:06<00:00, 136.29it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 161.68it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 19: loss=0.1682, train_f1=0.9664, train_acc=0.9969, test_f1=0.9116, test_acc=0.9929\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 20/20: 100%|██████████| 841/841 [00:26<00:00, 31.60it/s, avg_loss=0.163, batch_loss=0.181]\n",
+            "Train Eval: 100%|██████████| 841/841 [00:05<00:00, 160.70it/s]\n",
+            "Test Eval: 100%|██████████| 211/211 [00:01<00:00, 164.59it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 20: loss=0.1626, train_f1=0.9647, train_acc=0.9969, test_f1=0.9044, test_acc=0.9928\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": []
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<br>    <style><br>        .wandb-row {<br>            display: flex;<br>            flex-direction: row;<br>            flex-wrap: wrap;<br>            justify-content: flex-start;<br>            width: 100%;<br>        }<br>        .wandb-col {<br>            display: flex;<br>            flex-direction: column;<br>            flex-basis: 100%;<br>            flex: 1;<br>            padding: 10px;<br>        }<br>    </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██</td></tr><tr><td>test_acc</td><td>▁▅▆▆▇▇███▇████▇▇█▇█▇</td></tr><tr><td>test_f1</td><td>▁▅▆▇▆▇███▇████▇▇█▇█▇</td></tr><tr><td>test_precision</td><td>▁▄▅▄▆▇▆▆▇▆▆█▇▆▆▅██▇█</td></tr><tr><td>test_recall</td><td>▁▄▆▇▆▇██▇▇█▇▇█▇▇▇▇▇▇</td></tr><tr><td>train_acc</td><td>▁▄▅▅▆▆▇▇▇▇▇▇████████</td></tr><tr><td>train_f1</td><td>▁▄▅▆▆▆▇▇▇▇▇▇████████</td></tr><tr><td>train_loss</td><td>█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train_precision</td><td>▁▃▄▃▅▅▅▅▆▆▅▇▇▇▇▇▇█▇█</td></tr><tr><td>train_recall</td><td>▁▄▅▆▆▆▇▇▇▇█▇█████▇██</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>20</td></tr><tr><td>test_acc</td><td>0.99285</td></tr><tr><td>test_f1</td><td>0.90442</td></tr><tr><td>test_precision</td><td>0.9205</td></tr><tr><td>test_recall</td><td>0.88994</td></tr><tr><td>train_acc</td><td>0.99693</td></tr><tr><td>train_f1</td><td>0.96475</td></tr><tr><td>train_loss</td><td>0.16259</td></tr><tr><td>train_precision</td><td>0.97877</td></tr><tr><td>train_recall</td><td>0.95181</td></tr></table><br/></div></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run <strong style=\"color:#cdcd00\">CRF_VLSP2016</strong> at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/tjmjkx7n' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/tjmjkx7n</a><br> View project at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a><br>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 12 other file(s)"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Find logs at: <code>./wandb/run-20250605_133906-tjmjkx7n/logs</code>"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Generate final classification report\n",
+        "model.eval()\n",
+        "all_preds, all_true = [], []\n",
+        "\n",
+        "with torch.no_grad():\n",
+        "    for x, y, lengths in tqdm(test_loader, desc=\"Generating classification report\"):\n",
+        "        mask = (y != -1)\n",
+        "        preds = model.decode(x, mask)\n",
+        "        for pred_seq, true_seq, m in zip(preds, y, mask):\n",
+        "            ts = true_seq[m].tolist()\n",
+        "            all_preds.extend(pred_seq)\n",
+        "            all_true.extend(ts)\n",
+        "\n",
+        "# Generate and print classification report\n",
+        "report = classification_report(all_true, all_preds, digits=4)\n",
+        "print(\"Classification Report:\\n\", report)\n"
+      ],
+      "metadata": {
+        "id": "CBwl-uTjaA1y",
+        "outputId": "7597a9ab-bd18-4530-e6d6-e335a974f01a",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Generating classification report: 100%|██████████| 211/211 [00:02<00:00, 101.37it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Classification Report:\n",
+            "               precision    recall  f1-score   support\n",
+            "\n",
+            "           0     0.9968    0.9983    0.9976     68476\n",
+            "           1     0.9903    0.9754    0.9828      1464\n",
+            "           2     0.9941    0.9781    0.9860       686\n",
+            "           3     0.8384    0.7471    0.7901       257\n",
+            "           4     0.8560    0.7605    0.8054       430\n",
+            "           5     0.9066    0.9073    0.9070      1241\n",
+            "           6     0.8613    0.8628    0.8620       554\n",
+            "\n",
+            "    accuracy                         0.9928     73108\n",
+            "   macro avg     0.9205    0.8899    0.9044     73108\n",
+            "weighted avg     0.9927    0.9928    0.9927     73108\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "L1bDKxlyZRAy",
+        "outputId": "cf258765-6629-4d34-bf0c-431ba6575950"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import shutil\n",
+        "shutil.copy('/content/checkpoints/best_epoch_13.pt', '/content/drive/My Drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 35
+        },
+        "id": "q4qCaBbrZcTZ",
+        "outputId": "57eff61e-f5ca-4597-e499-ea8b71d603a9"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'/content/drive/My Drive/best_epoch_13.pt'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Train/Valid/Test"
+      ],
+      "metadata": {
+        "id": "T0LAYLnU8ONv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "from torchcrf import CRF\n",
+        "from torch.nn.utils.rnn import pad_sequence\n",
+        "from sklearn.metrics import precision_recall_fscore_support, classification_report, accuracy_score\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from tqdm import tqdm\n",
+        "import wandb\n",
+        "import os\n",
+        "\n",
+        "# Initialize Weights & Biases\n",
+        "wandb.init(\n",
+        "    project=\"NER\",\n",
+        "    name=\"CRF_VLSP2016\",\n",
+        "    config={\n",
+        "        \"epochs\": 20,\n",
+        "        \"batch_size\": 16,\n",
+        "        \"learning_rate\": 1e-3,\n",
+        "        # train/val/test ratios\n",
+        "        \"train_ratio\": 0.70,\n",
+        "        \"val_ratio\": 0.15,\n",
+        "        \"test_ratio\": 0.15\n",
+        "    }\n",
+        ")\n",
+        "config = wandb.config\n",
+        "\n",
+        "# Create splits: first separate out test, then train/val\n",
+        "emb_train_val, emb_test, lbl_train_val, lbl_test = train_test_split(\n",
+        "    all_embeddings, all_labels,\n",
+        "    test_size=config.test_ratio,\n",
+        "    random_state=42\n",
+        ")\n",
+        "# Compute validation size relative to remaining (val_ratio / (train_ratio + val_ratio))\n",
+        "val_relative = config.val_ratio / (config.train_ratio + config.val_ratio)\n",
+        "emb_train, emb_val, lbl_train, lbl_val = train_test_split(\n",
+        "    emb_train_val, lbl_train_val,\n",
+        "    test_size=val_relative,\n",
+        "    random_state=42\n",
+        ")\n",
+        "\n",
+        "class NERDataset(Dataset):\n",
+        "    def __init__(self, embeddings, labels):\n",
+        "        self.embeddings = embeddings\n",
+        "        self.labels = labels\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.embeddings)\n",
+        "\n",
+        "    def __getitem__(self, idx):\n",
+        "        return self.embeddings[idx], self.labels[idx]\n",
+        "\n",
+        "\n",
+        "def collate_fn(batch):\n",
+        "    embeddings, labels = zip(*batch)\n",
+        "    lengths = [e.size(0) for e in embeddings]\n",
+        "    max_len = max(lengths)\n",
+        "\n",
+        "    padded_embs = torch.stack([\n",
+        "        torch.cat([e, torch.zeros(max_len - e.size(0), e.size(1))]) for e in embeddings\n",
+        "    ])\n",
+        "    padded_labels = torch.stack([\n",
+        "        torch.cat([l, torch.full((max_len - l.size(0),), -1, dtype=torch.long)]) for l in labels\n",
+        "    ])\n",
+        "    return padded_embs, padded_labels, lengths\n",
+        "\n",
+        "# Create DataLoaders\n",
+        "datasets = {\n",
+        "    'train': NERDataset(emb_train, lbl_train),\n",
+        "    'val': NERDataset(emb_val, lbl_val),\n",
+        "    'test': NERDataset(emb_test, lbl_test)\n",
+        "}\n",
+        "loaders = {\n",
+        "    split: DataLoader(ds, batch_size=config.batch_size,\n",
+        "                       shuffle=(split=='train'), collate_fn=collate_fn)\n",
+        "    for split, ds in datasets.items()\n",
+        "}\n",
+        "\n",
+        "# Model setup\n",
+        "num_tags = max(label.max().item() for label in all_labels) + 1\n",
+        "class CRFTagger(nn.Module):\n",
+        "    def __init__(self, input_dim, num_tags):\n",
+        "        super().__init__()\n",
+        "        self.hidden2tag = nn.Linear(input_dim, num_tags)\n",
+        "        self.crf = CRF(num_tags, batch_first=True)\n",
+        "\n",
+        "    def forward(self, x, labels, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        return -self.crf(emissions, labels, mask=mask, reduction='mean')\n",
+        "\n",
+        "    def decode(self, x, mask):\n",
+        "        emissions = self.hidden2tag(x)\n",
+        "        return self.crf.decode(emissions, mask)\n",
+        "\n",
+        "model = CRFTagger(input_dim=emb_train[0].size(1), num_tags=num_tags)\n",
+        "optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)\n",
+        "\n",
+        "# Watch model parameters and gradients\n",
+        "wandb.watch(model, log=\"all\")\n",
+        "\n",
+        "# Create checkpoint directory\n",
+        "os.makedirs(\"checkpoints\", exist_ok=True)\n",
+        "best_val_f1 = 0.0\n",
+        "\n",
+        "# Evaluation helper\n",
+        "def evaluate(model, loader):\n",
+        "    model.eval()\n",
+        "    all_preds, all_true = [], []\n",
+        "    with torch.no_grad():\n",
+        "        for x, y, _ in loader:\n",
+        "            mask = (y != -1)\n",
+        "            preds = model.decode(x, mask)\n",
+        "            for pred_seq, true_seq, m in zip(preds, y, mask):\n",
+        "                true_labels = true_seq[m].tolist()\n",
+        "                all_preds.extend(pred_seq)\n",
+        "                all_true.extend(true_labels)\n",
+        "    precision, recall, f1, _ = precision_recall_fscore_support(all_true, all_preds, average='macro', zero_division=0)\n",
+        "    acc = accuracy_score(all_true, all_preds)\n",
+        "    return precision, recall, f1, acc\n",
+        "\n",
+        "# Training loop\n",
+        "for epoch in range(1, config.epochs + 1):\n",
+        "    model.train()\n",
+        "    total_loss = 0.0\n",
+        "    train_bar = tqdm(loaders['train'], desc=f\"Train Epoch {epoch}/{config.epochs}\")\n",
+        "    for batch_idx, (x, y, _) in enumerate(train_bar, start=1):\n",
+        "        mask = (y != -1)\n",
+        "        loss = model(x, y, mask)\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "\n",
+        "        total_loss += loss.item()\n",
+        "        train_bar.set_postfix(batch_loss=loss.item(), avg_loss=total_loss / batch_idx)\n",
+        "\n",
+        "    avg_train_loss = total_loss / len(loaders['train'])\n",
+        "    train_precision, train_recall, train_f1, train_acc = evaluate(model, loaders['train'])\n",
+        "    val_precision, val_recall, val_f1, val_acc = evaluate(model, loaders['val'])\n",
+        "\n",
+        "    # Print & log metrics for train and val\n",
+        "    print(f\"Epoch {epoch}: train_loss={avg_train_loss:.4f}, train_f1={train_f1:.4f}, val_f1={val_f1:.4f}\")\n",
+        "    wandb.log({\n",
+        "        \"epoch\": epoch,\n",
+        "        \"train_loss\": avg_train_loss,\n",
+        "        \"train_precision\": train_precision,\n",
+        "        \"train_recall\": train_recall,\n",
+        "        \"train_f1\": train_f1,\n",
+        "        \"train_acc\": train_acc,\n",
+        "        \"val_precision\": val_precision,\n",
+        "        \"val_recall\": val_recall,\n",
+        "        \"val_f1\": val_f1,\n",
+        "        \"val_acc\": val_acc\n",
+        "    })\n",
+        "\n",
+        "    # Save best model based on val_f1\n",
+        "    if val_f1 > best_val_f1:\n",
+        "        best_val_f1 = val_f1\n",
+        "        ckpt_path = f\"checkpoints/best_epoch_{epoch}.pt\"\n",
+        "        torch.save(model.state_dict(), ckpt_path)\n",
+        "        wandb.save(ckpt_path)\n",
+        "        print(f\"Saved improved model to {ckpt_path}\")\n",
+        "\n",
+        "# Final evaluation on test set\n",
+        "print(\"Evaluating on test set...\")\n",
+        "test_preds, test_true = [], []\n",
+        "model.eval()\n",
+        "with torch.no_grad():\n",
+        "    for x, y, _ in loaders['test']:\n",
+        "        mask = (y != -1)\n",
+        "        preds = model.decode(x, mask)\n",
+        "        for pred_seq, true_seq, m in zip(preds, y, mask):\n",
+        "            test_true.extend(true_seq[m].tolist())\n",
+        "            test_preds.extend(pred_seq)\n",
+        "\n",
+        "# Classification report\n",
+        "report_dict = classification_report(test_true, test_preds, output_dict=True)\n",
+        "print(classification_report(test_true, test_preds))\n",
+        "\n",
+        "# Log classification report table to wandb\n",
+        "columns = [\"label\", \"precision\", \"recall\", \"f1-score\", \"support\"]\n",
+        "rows = []\n",
+        "for label, metrics in report_dict.items():\n",
+        "    if label not in [\"accuracy\", \"macro avg\", \"weighted avg\"]:\n",
+        "        rows.append([label, metrics['precision'], metrics['recall'], metrics['f1-score'], metrics['support']])\n",
+        "# Add overall averages\n",
+        "rows.append([\"macro avg\", report_dict['macro avg']['precision'], report_dict['macro avg']['recall'], report_dict['macro avg']['f1-score'], report_dict['macro avg']['support']])\n",
+        "rows.append([\"weighted avg\", report_dict['weighted avg']['precision'], report_dict['weighted avg']['recall'], report_dict['weighted avg']['f1-score'], report_dict['weighted avg']['support']])\n",
+        "\n",
+        "table = wandb.Table(columns=columns, data=rows)\n",
+        "wandb.log({\"test_classification\": table})\n",
+        "\n",
+        "# Finish W&B run\n",
+        "wandb.finish()\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "28KlV4cy8SAL",
+        "outputId": "d4700801-e21c-4559-ff6a-50ebd3643cc4"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Tracking run with wandb version 0.19.11"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Run data is saved locally in <code>/content/wandb/run-20250606_015838-r3oj54fe</code>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Syncing run <strong><a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/r3oj54fe' target=\"_blank\">CRF_VLSP2016</a></strong> to <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              " View project at <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              " View run at <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/r3oj54fe' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/r3oj54fe</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 1/20: 100%|██████████| 736/736 [00:18<00:00, 39.34it/s, avg_loss=2.91, batch_loss=1.26]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1: train_loss=2.9090, train_f1=0.8125, val_f1=0.8168\n",
+            "Saved improved model to checkpoints/best_epoch_1.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 2/20: 100%|██████████| 736/736 [00:20<00:00, 35.77it/s, avg_loss=0.835, batch_loss=0.186]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 2: train_loss=0.8350, train_f1=0.8793, val_f1=0.8784\n",
+            "Saved improved model to checkpoints/best_epoch_2.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 3/20: 100%|██████████| 736/736 [00:19<00:00, 37.89it/s, avg_loss=0.6, batch_loss=0.803]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 3: train_loss=0.6004, train_f1=0.8985, val_f1=0.8891\n",
+            "Saved improved model to checkpoints/best_epoch_3.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 4/20: 100%|██████████| 736/736 [00:19<00:00, 37.87it/s, avg_loss=0.485, batch_loss=0.377]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 4: train_loss=0.4847, train_f1=0.9165, val_f1=0.9112\n",
+            "Saved improved model to checkpoints/best_epoch_4.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 5/20: 100%|██████████| 736/736 [00:19<00:00, 38.52it/s, avg_loss=0.413, batch_loss=0.0734]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 5: train_loss=0.4129, train_f1=0.9088, val_f1=0.8904\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 6/20: 100%|██████████| 736/736 [00:19<00:00, 37.70it/s, avg_loss=0.365, batch_loss=0.779]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 6: train_loss=0.3645, train_f1=0.9327, val_f1=0.9151\n",
+            "Saved improved model to checkpoints/best_epoch_6.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 7/20: 100%|██████████| 736/736 [00:19<00:00, 38.16it/s, avg_loss=0.33, batch_loss=1.44]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 7: train_loss=0.3297, train_f1=0.9382, val_f1=0.9241\n",
+            "Saved improved model to checkpoints/best_epoch_7.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 8/20: 100%|██████████| 736/736 [00:19<00:00, 37.06it/s, avg_loss=0.295, batch_loss=0.156]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 8: train_loss=0.2948, train_f1=0.9432, val_f1=0.9167\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 9/20: 100%|██████████| 736/736 [00:18<00:00, 38.98it/s, avg_loss=0.276, batch_loss=0.119]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 9: train_loss=0.2757, train_f1=0.9477, val_f1=0.9247\n",
+            "Saved improved model to checkpoints/best_epoch_9.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 10/20: 100%|██████████| 736/736 [00:18<00:00, 39.42it/s, avg_loss=0.254, batch_loss=0.141]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 10: train_loss=0.2535, train_f1=0.9496, val_f1=0.9263\n",
+            "Saved improved model to checkpoints/best_epoch_10.pt\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 11/20: 100%|██████████| 736/736 [00:19<00:00, 38.60it/s, avg_loss=0.238, batch_loss=0.104]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 11: train_loss=0.2382, train_f1=0.9517, val_f1=0.9217\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 12/20: 100%|██████████| 736/736 [00:19<00:00, 38.10it/s, avg_loss=0.226, batch_loss=0.39]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 12: train_loss=0.2255, train_f1=0.9579, val_f1=0.9239\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 13/20: 100%|██████████| 736/736 [00:19<00:00, 37.54it/s, avg_loss=0.214, batch_loss=0.0747]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 13: train_loss=0.2142, train_f1=0.9555, val_f1=0.9213\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 14/20: 100%|██████████| 736/736 [00:19<00:00, 37.30it/s, avg_loss=0.204, batch_loss=0.062]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 14: train_loss=0.2040, train_f1=0.9606, val_f1=0.9255\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 15/20: 100%|██████████| 736/736 [00:19<00:00, 37.20it/s, avg_loss=0.195, batch_loss=0.0167]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 15: train_loss=0.1949, train_f1=0.9634, val_f1=0.9196\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 16/20: 100%|██████████| 736/736 [00:19<00:00, 37.11it/s, avg_loss=0.187, batch_loss=0.333]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 16: train_loss=0.1870, train_f1=0.9638, val_f1=0.9215\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 17/20: 100%|██████████| 736/736 [00:19<00:00, 37.21it/s, avg_loss=0.181, batch_loss=0.0567]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 17: train_loss=0.1811, train_f1=0.9580, val_f1=0.9179\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 18/20: 100%|██████████| 736/736 [00:19<00:00, 36.90it/s, avg_loss=0.175, batch_loss=0.554]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 18: train_loss=0.1747, train_f1=0.9669, val_f1=0.9237\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 19/20: 100%|██████████| 736/736 [00:19<00:00, 37.49it/s, avg_loss=0.169, batch_loss=0.0126]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 19: train_loss=0.1689, train_f1=0.9685, val_f1=0.9231\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Train Epoch 20/20: 100%|██████████| 736/736 [00:20<00:00, 36.24it/s, avg_loss=0.164, batch_loss=0.252]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 20: train_loss=0.1635, train_f1=0.9719, val_f1=0.9237\n",
+            "Evaluating on test set...\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "           0       1.00      1.00      1.00     51036\n",
+            "           1       0.99      0.98      0.99      1112\n",
+            "           2       0.98      0.99      0.99       506\n",
+            "           3       0.83      0.77      0.80       180\n",
+            "           4       0.83      0.73      0.78       291\n",
+            "           5       0.89      0.91      0.90       939\n",
+            "           6       0.86      0.85      0.85       428\n",
+            "\n",
+            "    accuracy                           0.99     54492\n",
+            "   macro avg       0.91      0.89      0.90     54492\n",
+            "weighted avg       0.99      0.99      0.99     54492\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": []
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<br>    <style><br>        .wandb-row {<br>            display: flex;<br>            flex-direction: row;<br>            flex-wrap: wrap;<br>            justify-content: flex-start;<br>            width: 100%;<br>        }<br>        .wandb-col {<br>            display: flex;<br>            flex-direction: column;<br>            flex-basis: 100%;<br>            flex: 1;<br>            padding: 10px;<br>        }<br>    </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██</td></tr><tr><td>train_acc</td><td>▁▄▅▅▅▆▆▇▇▇▇▇▇███▇███</td></tr><tr><td>train_f1</td><td>▁▄▅▆▅▆▇▇▇▇▇▇▇███▇███</td></tr><tr><td>train_loss</td><td>█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train_precision</td><td>▁▃▄▃▅▆▅▆▆▇▆▇▇▇███▇▇█</td></tr><tr><td>train_recall</td><td>▁▄▅▆▅▆▇▇▇▇▇▇▇██▇▇███</td></tr><tr><td>val_acc</td><td>▁▅▆▇▆▇█▇████████████</td></tr><tr><td>val_f1</td><td>▁▅▆▇▆▇█▇████████▇███</td></tr><tr><td>val_precision</td><td>▁▅▄▃▅▇▆▇▇▇▅▆▆▇▇▇█▆▆▇</td></tr><tr><td>val_recall</td><td>▁▅▆█▆▇█▇██████▇▇▇███</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>20</td></tr><tr><td>train_acc</td><td>0.99748</td></tr><tr><td>train_f1</td><td>0.97193</td></tr><tr><td>train_loss</td><td>0.16354</td></tr><tr><td>train_precision</td><td>0.97333</td></tr><tr><td>train_recall</td><td>0.9706</td></tr><tr><td>val_acc</td><td>0.99327</td></tr><tr><td>val_f1</td><td>0.92372</td></tr><tr><td>val_precision</td><td>0.93356</td></tr><tr><td>val_recall</td><td>0.91553</td></tr></table><br/></div></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run <strong style=\"color:#cdcd00\">CRF_VLSP2016</strong> at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/r3oj54fe' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/r3oj54fe</a><br> View project at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a><br>Synced 5 W&B file(s), 1 media file(s), 2 artifact file(s) and 8 other file(s)"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Find logs at: <code>./wandb/run-20250606_015838-r3oj54fe/logs</code>"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Train CRF With Kerras"
+      ],
+      "metadata": {
+        "id": "LV5FdgTTXFv3"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+        "\n",
+        "X = [emb.numpy() for emb in all_embeddings]\n",
+        "y = [label.numpy() for label in all_labels]\n",
+        "\n",
+        "max_len = max(len(seq) for seq in X)\n",
+        "num_tags = max(label.max().item() for label in all_labels) + 1\n",
+        "\n",
+        "X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', padding='post')\n",
+        "y_padded = pad_sequences(y, maxlen=max_len, value=-1)\n"
+      ],
+      "metadata": {
+        "id": "l_m8_-UgHlxo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_addons as tfa\n",
+        "from tensorflow.keras import layers, Model, Input\n",
+        "\n",
+        "input_dim = X_padded.shape[2]\n",
+        "\n",
+        "inputs = Input(shape=(max_len, input_dim), name=\"input_embedding\")\n",
+        "masking = layers.Masking(mask_value=0.0)(inputs)\n",
+        "dense = layers.Dense(num_tags)(masking)\n",
+        "\n",
+        "# CRF Layer\n",
+        "crf = tfa.layers.CRF(num_tags)\n",
+        "outputs = crf(dense)\n",
+        "\n",
+        "model = Model(inputs=inputs, outputs=outputs)\n",
+        "model.compile(optimizer='adam', loss=crf.loss, metrics=[crf.accuracy])\n",
+        "model.summary()\n"
+      ],
+      "metadata": {
+        "id": "kYrGkzFPXMBH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.fit(\n",
+        "    X_padded, y_padded,\n",
+        "    batch_size=32,\n",
+        "    epochs=5,\n",
+        "    validation_split=0.1,\n",
+        "    verbose=1\n",
+        ")\n"
+      ],
+      "metadata": {
+        "id": "pyxVhvn3XQ5q"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "pred = model.predict(X_padded)\n",
+        "pred_labels = np.argmax(pred, axis=-1)\n",
+        "\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "y_true_flat = []\n",
+        "y_pred_flat = []\n",
+        "\n",
+        "for i in range(len(y_padded)):\n",
+        "    for j in range(max_len):\n",
+        "        if y_padded[i][j] != -1:\n",
+        "            y_true_flat.append(y_padded[i][j])\n",
+        "            y_pred_flat.append(pred_labels[i][j])\n",
+        "\n",
+        "print(classification_report(y_true_flat, y_pred_flat, digits=4))\n"
+      ],
+      "metadata": {
+        "id": "zT7BtMiVXSMc"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Train Random Forest"
+      ],
+      "metadata": {
+        "id": "1VrZlknUb6cn"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "import numpy as np\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "X_flat = []\n",
+        "y_flat = []\n",
+        "\n",
+        "for emb_seq, label_seq in zip(all_embeddings, all_labels):\n",
+        "    for emb, label in zip(emb_seq, label_seq):\n",
+        "        X_flat.append(emb.numpy())   # emb: [768]\n",
+        "        y_flat.append(label.item())  # label: int\n",
+        "\n",
+        "X_flat = np.array(X_flat)  # [N, 768]\n",
+        "y_flat = np.array(y_flat)  # [N]\n"
+      ],
+      "metadata": {
+        "id": "VK2nmLo0b8d3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(X_flat.shape)\n",
+        "print(y_flat.shape)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GeqgiB4CtzA1",
+        "outputId": "452979ff-25be-49a9-c809-4acffd3b3c54"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(368172, 768)\n",
+            "(368172,)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Kiểm tra độ lệch data\n",
+        "unique_values, counts = np.unique(y_flat, return_counts=True)\n",
+        "\n",
+        "# In ra từng giá trị và số lần xuất hiện\n",
+        "for val, count in zip(unique_values, counts):\n",
+        "    print(f\"Label {val}: {count} times\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VeSfRzgOm6w-",
+        "outputId": "163a877f-9860-4b3a-e850-f6d8df9c6cfe"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Label 0: 344986 times\n",
+            "Label 1: 7450 times\n",
+            "Label 2: 3504 times\n",
+            "Label 3: 1204 times\n",
+            "Label 4: 2050 times\n",
+            "Label 5: 6211 times\n",
+            "Label 6: 2767 times\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_train, X_test, y_train, y_test = train_test_split(\n",
+        "    X_flat, y_flat, test_size=0.2, random_state=42, stratify=y_flat)\n"
+      ],
+      "metadata": {
+        "id": "AOOUix-NcERf"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import lightgbm as lgb\n",
+        "from sklearn.metrics import accuracy_score, f1_score, classification_report\n",
+        "\n",
+        "# Khởi tạo wandb project\n",
+        "wandb.init(project=\"NER\", name=\"RandomForest_100Trees_VLSP2016\")\n",
+        "\n",
+        "# Tạo Dataset cho LightGBM\n",
+        "train_data = lgb.Dataset(X_train, label=y_train)\n",
+        "test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)\n",
+        "\n",
+        "# Cấu hình tham số LightGBM (Random Forest mode)\n",
+        "params = {\n",
+        "    \"objective\": \"multiclass\",          # nếu multiclass classification\n",
+        "    \"num_class\": len(np.unique(y_train)),\n",
+        "    \"metric\": \"multi_logloss\",\n",
+        "    \"boosting_type\": \"rf\",              # random forest mode trong LightGBM\n",
+        "    \"num_leaves\": 31,\n",
+        "    \"bagging_freq\": 1,\n",
+        "    \"bagging_fraction\": 0.8,\n",
+        "    \"feature_fraction\": 0.8,\n",
+        "    \"bagging_seed\": 42,\n",
+        "    \"verbose\": -1,\n",
+        "    \"seed\": 42,\n",
+        "    \"is_unbalance\": True\n",
+        "}\n",
+        "\n",
+        "\n",
+        "\n",
+        "# Train model, tích hợp wandb callback để log metrics\n",
+        "model = lgb.train(\n",
+        "    params,\n",
+        "    train_data,\n",
+        "    num_boost_round=100,\n",
+        "    valid_sets=[train_data, test_data],\n",
+        "    valid_names=[\"train\", \"test\"],\n",
+        "    callbacks=[wandb.lightgbm.wandb_callback()]\n",
+        ")\n",
+        "\n",
+        "# Dự đoán trên test set\n",
+        "y_pred_prob = model.predict(X_test)\n",
+        "y_pred = np.argmax(y_pred_prob, axis=1)\n",
+        "\n",
+        "# Ánh xạ số về nhãn tên entity\n",
+        "label_map = {\n",
+        "    0: 'O',\n",
+        "    1: 'B-PER',\n",
+        "    2: 'I-PER',\n",
+        "    3: 'B-ORG',\n",
+        "    4: 'I-ORG',\n",
+        "    5: 'B-LOC',\n",
+        "    6: 'I-LOC'\n",
+        "}\n",
+        "\n",
+        "# Chuyển y_test và y_pred sang nhãn gốc\n",
+        "y_test_labels = [label_map[i] for i in y_test]\n",
+        "y_pred_labels = [label_map[i] for i in y_pred]\n",
+        "\n",
+        "# In classification report với nhãn thật\n",
+        "print(\"\\nClassification Report (theo label gốc):\")\n",
+        "print(classification_report(y_test_labels, y_pred_labels, digits=4))\n",
+        "\n",
+        "# Tạo bảng để log classification report\n",
+        "report_dict = classification_report(y_test_labels, y_pred_labels, output_dict=True)\n",
+        "table = wandb.Table(columns=[\"Label\", \"Precision\", \"Recall\", \"F1-Score\", \"Support\"])\n",
+        "\n",
+        "for label, scores in report_dict.items():\n",
+        "    if isinstance(scores, dict):  # Bỏ các dòng như 'accuracy'\n",
+        "        table.add_data(\n",
+        "            label,\n",
+        "            scores[\"precision\"],\n",
+        "            scores[\"recall\"],\n",
+        "            scores[\"f1-score\"],\n",
+        "            scores[\"support\"]\n",
+        "        )\n",
+        "\n",
+        "wandb.log({\"Classification Report\": table})\n",
+        "\n",
+        "\n",
+        "# Kết thúc wandb run\n",
+        "wandb.finish()\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 721
+        },
+        "id": "G6PUbpyPgF84",
+        "outputId": "6efc696f-1b6f-4cea-da68-c25e22bed461"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Tracking run with wandb version 0.19.11"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Run data is saved locally in <code>/content/wandb/run-20250605_114334-x4x6fpo4</code>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Syncing run <strong><a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/x4x6fpo4' target=\"_blank\">RandomForest_100Trees_VLSP2016</a></strong> to <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View project at <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run at <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/x4x6fpo4' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/x4x6fpo4</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "Classification Report (theo label gốc):\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "       B-LOC     0.4461    0.6167    0.5177      1242\n",
+            "       B-ORG     0.2841    0.6224    0.3901       241\n",
+            "       B-PER     0.5859    0.8423    0.6911      1490\n",
+            "       I-LOC     0.2812    0.6401    0.3907       553\n",
+            "       I-ORG     0.2350    0.4122    0.2994       410\n",
+            "       I-PER     0.6530    0.7489    0.6977       701\n",
+            "           O     0.9914    0.9550    0.9728     68998\n",
+            "\n",
+            "    accuracy                         0.9386     73635\n",
+            "   macro avg     0.4967    0.6911    0.5657     73635\n",
+            "weighted avg     0.9589    0.9386    0.9468     73635\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": []
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<br>    <style><br>        .wandb-row {<br>            display: flex;<br>            flex-direction: row;<br>            flex-wrap: wrap;<br>            justify-content: flex-start;<br>            width: 100%;<br>        }<br>        .wandb-col {<br>            display: flex;<br>            flex-direction: column;<br>            flex-basis: 100%;<br>            flex: 1;<br>            padding: 10px;<br>        }<br>    </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>iteration</td><td>▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██</td></tr><tr><td>test_multi_logloss</td><td>█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train_multi_logloss</td><td>█▇▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>iteration</td><td>99</td></tr></table><br/></div></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run <strong style=\"color:#cdcd00\">RandomForest_100Trees_VLSP2016</strong> at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER/runs/x4x6fpo4' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER/runs/x4x6fpo4</a><br> View project at: <a href='https://wandb.ai/laiducaivn-fpt-university/NER' target=\"_blank\">https://wandb.ai/laiducaivn-fpt-university/NER</a><br>Synced 5 W&B file(s), 1 media file(s), 2 artifact file(s) and 0 other file(s)"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Find logs at: <code>./wandb/run-20250605_114334-x4x6fpo4/logs</code>"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Lưu data"
+      ],
+      "metadata": {
+        "id": "4Ppa-bdT8r2v"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def save_tensors(all_embeddings, all_labels, embed_path='embeddings.pt', label_path='labels.pt'):\n",
+        "    torch.save(all_embeddings, embed_path)\n",
+        "    torch.save(all_labels, label_path)\n",
+        "    print(f\"Saved embeddings to {embed_path} and labels to {label_path}\")"
+      ],
+      "metadata": {
+        "id": "s9GulKoGqx6d"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "import shutil\n",
+        "\n",
+        "# Gọi hàm đã viết\n",
+        "save_tensors(all_embeddings, all_labels)\n",
+        "\n",
+        "# Mount và tải lên Drive\n",
+        "drive.mount('/content/drive')\n",
+        "shutil.copy('embeddings.pt', '/content/drive/My Drive')\n",
+        "shutil.copy('labels.pt', '/content/drive/My Drive')\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 70
+        },
+        "id": "AGAJZH_h8ve6",
+        "outputId": "13849039-adb8-40e8-ed20-544f65d018f8"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved embeddings to embeddings.pt and labels to labels.pt\n",
+            "Mounted at /content/drive\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'/content/drive/My Drive/labels.pt'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 14
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.save_model('lightgbm_rf_model.txt')\n",
+        "shutil.copy('lightgbm_rf_model.txt', '/content/drive/My Drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 35
+        },
+        "id": "ESWu8QI59dwl",
+        "outputId": "7eba9b3d-4c54-48ca-99eb-76771c01140e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'/content/drive/My Drive/lightgbm_rf_model.txt'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 16
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "BKx8yPUE-UHS"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file