diff --git "a/novel-translation/08r2_eval-lf-py3.11.ipynb" "b/novel-translation/08r2_eval-lf-py3.11.ipynb"
new file mode 100644--- /dev/null
+++ "b/novel-translation/08r2_eval-lf-py3.11.ipynb"
@@ -0,0 +1,5525 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "workding_dir = str(Path.cwd().parent)\n",
+    "os.chdir(workding_dir)\n",
+    "sys.path.append(workding_dir)\n",
+    "print(\"workding dir:\", workding_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "\n",
+    "found_dotenv = find_dotenv(\".env\")\n",
+    "\n",
+    "if len(found_dotenv) == 0:\n",
+    "    found_dotenv = find_dotenv(\".env.example\")\n",
+    "print(f\"loading env vars from: {found_dotenv}\")\n",
+    "load_dotenv(found_dotenv, override=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('unsloth/Qwen2-0.5B-Instruct',\n",
+       " True,\n",
+       " None,\n",
+       " None,\n",
+       " 2048,\n",
+       " 6,\n",
+       " None,\n",
+       " 'datasets/mac/mac.tsv',\n",
+       " 'results/mac-results_lf-r2.csv')"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "model_name = os.getenv(\"MODEL_NAME\")\n",
+    "token = os.getenv(\"HF_TOKEN\") or None\n",
+    "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+    "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+    "hub_model = os.getenv(\"HUB_MODEL\")\n",
+    "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+    "data_path = os.getenv(\"DATA_PATH\")\n",
+    "results_path = os.getenv(\"RESULTS_PATH\")\n",
+    "\n",
+    "max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = (\n",
+    "    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    ")\n",
+    "\n",
+    "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fri Jul  5 20:43:31 2024       \n",
+      "+---------------------------------------------------------------------------------------+\n",
+      "| NVIDIA-SMI 545.23.07              Driver Version: 546.12       CUDA Version: 12.3     |\n",
+      "|-----------------------------------------+----------------------+----------------------+\n",
+      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
+      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
+      "|                                         |                      |               MIG M. |\n",
+      "|=========================================+======================+======================|\n",
+      "|   0  NVIDIA GeForce RTX 4080 ...    On  | 00000000:01:00.0 Off |                  N/A |\n",
+      "| N/A   53C    P8               3W / 150W |      0MiB / 12282MiB |      0%      Default |\n",
+      "|                                         |                      |                  N/A |\n",
+      "+-----------------------------------------+----------------------+----------------------+\n",
+      "                                                                                         \n",
+      "+---------------------------------------------------------------------------------------+\n",
+      "| Processes:                                                                            |\n",
+      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
+      "|        ID   ID                                                             Usage      |\n",
+      "|=======================================================================================|\n",
+      "|  No running processes found                                                           |\n",
+      "+---------------------------------------------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Python 3.11.9\n",
+      "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+      "\u001b[0mCPU times: user 9.63 ms, sys: 584 μs, total: 10.2 ms\n",
+      "Wall time: 561 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "!python --version\n",
+    "!pip show flash-attn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_model_all_epochs(model_name, num_train_epochs, adapter_path_base, load_in_4bit=True):\n",
+    "    os.environ[\"MODEL_NAME\"] = model_name \n",
+    "    os.environ[\"LOAD_IN_4BIT\"] = \"true\" if load_in_4bit else \"false\"\n",
+    "    for i in range(num_train_epochs + 1):\n",
+    "        print(f\"Epoch {i}\")\n",
+    "        if i == 0:\n",
+    "            os.unsetenv(\"ADAPTER_NAME_OR_PATH\")\n",
+    "        else:\n",
+    "            adapter_path = f\"{adapter_path_base}/checkpoint-{560 * i}\"\n",
+    "            os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n",
+    "            \n",
+    "        !python llm_toolkit/eval_lf.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct None False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.648 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Ge lifted his gun and lowered his eyes as he fired a round at the muzzle of his revolver. A clatter echoed as the bullet landed on the ground, a thunderclap sound as the metal bullets flew through the air like tiny raindrops. The clangs were so loud that it was almost deafening.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Ge lifted his gun and lowered his eyes as he fired a round at the muzzle of his revolver. A clatter echoed as the bullet landed on the ground, a thunderclap sound as the metal bullets flew through the air like tiny raindrops. The clangs were so loud that it was almost deafening.\n",
+      "--------\n",
+      "step 3: Old Ge lifted his gun and lowered his eyes as he fired a round at the muzzle of his revolver. A clatter echoed as the bullet landed on the ground, a thunderclap sound as the metal bullets flew through the air like tiny raindrops. The clangs were so loud that it was almost deafening.\n",
+      "100%|█████████████████████████████████████| 1133/1133 [1:12:38<00:00,  3.85s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "42.221 GB of memory reserved.\n",
+      "                                             chinese  ...                           Qwen/Qwen2-0.5B-Instruct\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Ge lifted his gun and lowered his eyes as ...\n",
+      "\n",
+      "[1 rows x 3 columns]\n",
+      "{'accuracy': 0.00088261253309797, 'correct_ids': [659], 'meteor': 0.26081317536040693, 'bleu_scores': {'bleu': 0.04524849442288909, 'precisions': [0.29694860978538656, 0.07063552833078102, 0.022536982493298476, 0.008867807795421737], 'brevity_penalty': 1.0, 'length_ratio': 1.0757535607817157, 'translation_length': 32477, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.30875100352153667, 'rouge2': 0.0888448776296162, 'rougeL': 0.25554192339577875, 'rougeLsum': 0.25541634440613653}}\n",
+      "Epoch 1\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-560 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-05 21:57:32,624 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-05 21:57:32,754 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/05/2024 21:57:32 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/05/2024 21:57:32 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-05 21:57:33,154 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-05 21:57:33,155 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/05/2024 21:57:33 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/05/2024 21:57:33 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-05 21:57:33,197 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-05 21:57:34,819 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-05 21:57:34,821 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-05 21:58:53,028 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-05 21:58:53,029 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-05 21:58:53,351 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-05 21:58:53,352 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/05/2024 21:58:53 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/05/2024 21:58:54 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-560\n",
+      "07/05/2024 21:58:54 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 13341.90 examples/s]\n",
+      "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 4246.31 examples/s]\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Goong cocked his gun and lowered his eyes as he pulled the trigger. The hammer clinked, and a hail of metal balls fell like ice from the sky. The sound of a crackling pine was heard above him.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Goong cocked his gun and lowered his eyes as he pulled the trigger. The hammer clinked, and a hail of metal balls fell like ice from the sky. The sound of a crackling pine was heard above him.\n",
+      "--------\n",
+      "step 3: Old Goong cocked his gun and lowered his eyes as he pulled the trigger. The hammer clinked, and a hail of metal balls fell like ice from the sky. The sound of a crackling pine was heard above him.\n",
+      "100%|█████████████████████████████████████| 1133/1133 [2:01:28<00:00,  6.43s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "42.279 GB of memory reserved.\n",
+      "                                             chinese  ...            Qwen/Qwen2-0.5B-Instruct_checkpoint-560\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Goong cocked his gun and lowered his eyes ...\n",
+      "\n",
+      "[1 rows x 4 columns]\n",
+      "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.29049497377443884, 'bleu_scores': {'bleu': 0.04774897884656905, 'precisions': [0.3009629887457942, 0.07359865638964701, 0.024153239576542175, 0.009716234476545911], 'brevity_penalty': 1.0, 'length_ratio': 1.1419675389201722, 'translation_length': 34476, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3246452473704004, 'rouge2': 0.09593992376129423, 'rougeL': 0.26493434905174373, 'rougeLsum': 0.26501903150454154}}\n",
+      "Epoch 2\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-1120 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,837 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,837 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,837 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,837 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,837 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:00:55,838 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 00:00:55,965 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 00:00:55 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 00:00:55 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 00:00:56,623 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 00:00:56,623 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 00:00:56 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 00:00:56 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 00:00:56,669 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 00:00:57,733 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 00:00:57,736 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 00:01:23,016 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 00:01:23,016 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 00:01:23,305 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 00:01:23,305 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 00:01:23 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 00:01:23 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-1120\n",
+      "07/06/2024 00:01:23 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng held his gun up, his eyes narrowed. The trigger crackled and a shower of gold feathers fell from his fingers as he fired his pistol. Crisp thundering sounds rang out, like a clatter of metal against stone.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng held his gun up, his eyes narrowed. The trigger crackled and a shower of gold feathers fell from his fingers as he fired his pistol. Crisp thundering sounds rang out, like a clatter of metal against stone.\n",
+      "--------\n",
+      "step 3: Old Geng held his gun up, his eyes narrowed. The trigger crackled and a shower of gold feathers fell from his fingers as he fired his pistol. Crisp thundering sounds rang out, like a clatter of metal against stone.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [29:02<00:00,  1.54s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "6.992 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-0.5B-Instruct_checkpoint-1120\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng held his gun up, his eyes narrowed. T...\n",
+      "\n",
+      "[1 rows x 5 columns]\n",
+      "{'accuracy': 0.00088261253309797, 'correct_ids': [659], 'meteor': 0.31049607994218004, 'bleu_scores': {'bleu': 0.06668675115525585, 'precisions': [0.3681131155533886, 0.1027518944200718, 0.037731568998109644, 0.017020772450833267], 'brevity_penalty': 0.9498953137120124, 'length_ratio': 0.9511096389532958, 'translation_length': 28714, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3479658441183239, 'rouge2': 0.11472239367740286, 'rougeL': 0.2868370512147882, 'rougeLsum': 0.2867332633148184}}\n",
+      "Epoch 3\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-1680 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 00:30:45,885 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 00:30:46,030 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 00:30:46 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 00:30:46 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 00:30:46,401 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 00:30:46,402 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 00:30:46 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 00:30:46 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 00:30:46,448 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 00:30:47,453 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 00:30:47,456 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 00:31:12,438 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 00:31:12,439 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 00:31:12,728 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 00:31:12,729 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 00:31:12 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 00:31:13 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-1680\n",
+      "07/06/2024 00:31:13 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took his pistol from his holster and raised a pair of triangular eyes. The barrel started firing, and the hammer clicked. A shower of gold sparrows fell down like hailstones on the ground, cracking and clattering as though they were being thrown by wind.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took his pistol from his holster and raised a pair of triangular eyes. The barrel started firing, and the hammer clicked. A shower of gold sparrows fell down like hailstones on the ground, cracking and clattering as though they were being thrown by wind.\n",
+      "--------\n",
+      "step 3: Old Geng took his pistol from his holster and raised a pair of triangular eyes. The barrel started firing, and the hammer clicked. A shower of gold sparrows fell down like hailstones on the ground, cracking and clattering as though they were being thrown by wind.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [31:24<00:00,  1.66s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "5.885 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-0.5B-Instruct_checkpoint-1680\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took his pistol from his holster and ...\n",
+      "\n",
+      "[1 rows x 6 columns]\n",
+      "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.3229970135854747, 'bleu_scores': {'bleu': 0.06974645118974397, 'precisions': [0.3523769402767426, 0.10115723608961852, 0.038224802406099184, 0.017367558711086837], 'brevity_penalty': 1.0, 'length_ratio': 1.0221596555150712, 'translation_length': 30859, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.35019397498390753, 'rouge2': 0.1213649003504492, 'rougeL': 0.2888414547712551, 'rougeLsum': 0.28908710512654723}}\n",
+      "Epoch 4\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-2240 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:02:58,401 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 01:02:58,564 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 01:02:58 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 01:02:58 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 01:02:58,870 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 01:02:58,871 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 01:02:58 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 01:02:58 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 01:02:58,913 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 01:02:59,768 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 01:02:59,771 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 01:03:25,366 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 01:03:25,366 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 01:03:25,684 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 01:03:25,684 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 01:03:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 01:03:26 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-2240\n",
+      "07/06/2024 01:03:26 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng raised his rifle, narrowed his eyes, and fired. A shot rang out as the bullet flew down from a distant windowpane, hammering the golden sparrows of ice and snow, cracking the leaves on the ground like a hailstorm.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng raised his rifle, narrowed his eyes, and fired. A shot rang out as the bullet flew down from a distant windowpane, hammering the golden sparrows of ice and snow, cracking the leaves on the ground like a hailstorm.\n",
+      "--------\n",
+      "step 3: Old Geng raised his rifle, narrowed his eyes, and fired. A shot rang out as the bullet flew down from a distant windowpane, hammering the golden sparrows of ice and snow, cracking the leaves on the ground like a hailstorm.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [30:51<00:00,  1.63s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "7.842 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-0.5B-Instruct_checkpoint-2240\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng raised his rifle, narrowed his eyes, ...\n",
+      "\n",
+      "[1 rows x 7 columns]\n",
+      "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 555], 'meteor': 0.3162958975982396, 'bleu_scores': {'bleu': 0.07169822392654703, 'precisions': [0.3574799511567275, 0.10412095447065277, 0.03987872302479044, 0.017803382270953353], 'brevity_penalty': 1.0, 'length_ratio': 1.0036767141437561, 'translation_length': 30301, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.34455286773137617, 'rouge2': 0.12246702123907623, 'rougeL': 0.28572160041791167, 'rougeLsum': 0.28540154083752656}}\n",
+      "Epoch 5\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-2800 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 01:34:37,013 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 01:34:37,140 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 01:34:37 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 01:34:37 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 01:34:37,533 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 01:34:37,534 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 01:34:37 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 01:34:37 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 01:34:37,582 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 01:34:38,570 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 01:34:38,572 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 01:35:04,788 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 01:35:04,788 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 01:35:05,254 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 01:35:05,254 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 01:35:05 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 01:35:05 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-2800\n",
+      "07/06/2024 01:35:05 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng raised his rifle,眯着眼睛，the trigger clicked, a hail of bullets flying down from his fingertips like ice martins screeching with delight. Then came the sound of sand flying, crunching as it flew through the air.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng raised his rifle,眯着眼睛，the trigger clicked, a hail of bullets flying down from his fingertips like ice martins screeching with delight. Then came the sound of sand flying, crunching as it flew through the air.\n",
+      "--------\n",
+      "step 3: Old Geng raised his rifle,眯着眼睛，the trigger clicked, a hail of bullets flying down from his fingertips like ice martins screeching with delight. Then came the sound of sand flying, crunching as it flew through the air.\n",
+      "100%|█████████████████████████████████████| 1133/1133 [1:14:52<00:00,  3.97s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "42.277 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-0.5B-Instruct_checkpoint-2800\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng raised his rifle,眯着眼睛，the trigger cli...\n",
+      "\n",
+      "[1 rows x 8 columns]\n",
+      "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 364], 'meteor': 0.31283536857877553, 'bleu_scores': {'bleu': 0.060313705990244354, 'precisions': [0.3079815657779411, 0.08660474354480664, 0.03277549540840986, 0.01513729905548258], 'brevity_penalty': 1.0, 'length_ratio': 1.1715468698244451, 'translation_length': 35369, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.34381816601376697, 'rouge2': 0.11947696823377901, 'rougeL': 0.28276793110866827, 'rougeLsum': 0.2825445910163455}}\n",
+      "Epoch 6\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-3360 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 02:50:21,290 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 02:50:21,413 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 02:50:21 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 02:50:21 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 02:50:21,722 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 02:50:21,723 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 896,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 4864,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 24,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 14,\n",
+      "  \"num_hidden_layers\": 24,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 02:50:21 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 02:50:21 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 02:50:21,766 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 02:50:22,690 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 02:50:22,693 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 02:50:46,315 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 02:50:46,315 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 02:50:46,596 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 02:50:46,597 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 02:50:46 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 02:50:47 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b-bf16/lora/sft/checkpoint-3360\n",
+      "07/06/2024 02:50:47 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.658 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng held his gun at someone's head, his eyes a bit crooked as he cocked it. The shotgun fired, a deafening bang! followed by ricochet after ricochet of gold-winged sparrows raining down ice pellets on leaves, cracking, cracking. The sandalstick of bullets snapping through the air like a clattering metal drum.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng held his gun at someone's head, his eyes a bit crooked as he cocked it. The shotgun fired, a deafening bang! followed by ricochet after ricochet of gold-winged sparrows raining down ice pellets on leaves, cracking, cracking. The sandalstick of bullets snapping through the air like a clattering metal drum.\n",
+      "--------\n",
+      "step 3: Old Geng held his gun at someone's head, his eyes a bit crooked as he cocked it. The shotgun fired, a deafening bang! followed by ricochet after ricochet of gold-winged sparrows raining down ice pellets on leaves, cracking, cracking. The sandalstick of bullets snapping through the air like a clattering metal drum.\n",
+      "100%|█████████████████████████████████████| 1133/1133 [1:09:23<00:00,  3.67s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "42.277 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-0.5B-Instruct_checkpoint-3360\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng held his gun at someone's head, his e...\n",
+      "\n",
+      "[1 rows x 9 columns]\n",
+      "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 364], 'meteor': 0.31152790941615477, 'bleu_scores': {'bleu': 0.061326135354300634, 'precisions': [0.31536558706782486, 0.08687895816608687, 0.03332205162918064, 0.015492508766337265], 'brevity_penalty': 1.0, 'length_ratio': 1.1515733686651208, 'translation_length': 34766, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.34370850889188637, 'rouge2': 0.11732037461970812, 'rougeL': 0.28244417413163836, 'rougeLsum': 0.2820705045841321}}\n",
+      "CPU times: user 7min 17s, sys: 2min 39s, total: 9min 57s\n",
+      "Wall time: 7h 16min 45s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "\n",
+    "evaluate_model_all_epochs(\"Qwen/Qwen2-0.5B-Instruct\", num_train_epochs, \"llama-factory/saves/qwen2-0.5b-bf16/lora/sft\", load_in_4bit=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "evaluate_model_all_epochs(\"unsloth/qwen2-0.5b-instruct-bnb-4bit\" , num_train_epochs, \"llama-factory/saves/qwen2-0.5b/lora/sft\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct None False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.697 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 12597.96 examples/s]\n",
+      "Map: 100%|████████████████████████| 1133/1133 [00:00<00:00, 13985.25 examples/s]\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞迸着，嚓嚓有声。<|im_end|>\n",
+      "--------\n",
+      "step 2: Old耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞迸着，嚓嚓有声。\n",
+      "--------\n",
+      "step 3: Old耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞迸着，嚓嚓有声。\n",
+      "100%|█���█████████████████████████████████████| 1133/1133 [44:18<00:00,  2.35s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "8.055 GB of memory reserved.\n",
+      "                                             chinese  ...                           Qwen/Qwen2-1.5B-Instruct\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝...\n",
+      "\n",
+      "[1 rows x 10 columns]\n",
+      "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.31463018790549185, 'bleu_scores': {'bleu': 0.07617500476658332, 'precisions': [0.4025838638817529, 0.12186115214180207, 0.04992072848489776, 0.022578171594104114], 'brevity_penalty': 0.8833639224944743, 'length_ratio': 0.8896654521364691, 'translation_length': 26859, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.37119834139701413, 'rouge2': 0.13281280032592238, 'rougeL': 0.3150444223537181, 'rougeLsum': 0.31529082897505967}}\n",
+      "Epoch 1\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-560 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 04:46:07,209 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 04:46:07,379 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 04:46:07 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 04:46:07 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 04:46:07,705 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 04:46:07,706 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 04:46:07 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 04:46:07 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 04:46:07,824 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 04:46:10,135 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 04:46:10,139 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 04:47:37,722 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 04:47:37,722 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 04:47:37,994 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 04:47:37,994 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 04:47:38 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 04:47:38 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-560\n",
+      "07/06/2024 04:47:38 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 28781.97 examples/s]\n",
+      "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 3422.61 examples/s]\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took up his rifle and squinted through the raindrops with one eye. He squeezed the trigger and a hail of gold sparrows came down from the sky, striking the birches. The sand flew through the air, making a sound like popping popcorn.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took up his rifle and squinted through the raindrops with one eye. He squeezed the trigger and a hail of gold sparrows came down from the sky, striking the birches. The sand flew through the air, making a sound like popping popcorn.\n",
+      "--------\n",
+      "step 3: Old Geng took up his rifle and squinted through the raindrops with one eye. He squeezed the trigger and a hail of gold sparrows came down from the sky, striking the birches. The sand flew through the air, making a sound like popping popcorn.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [41:14<00:00,  2.18s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "8.17 GB of memory reserved.\n",
+      "                                             chinese  ...            Qwen/Qwen2-1.5B-Instruct_checkpoint-560\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一���扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took up his rifle and squinted throug...\n",
+      "\n",
+      "[1 rows x 11 columns]\n",
+      "{'accuracy': 0.00176522506619594, 'correct_ids': [77, 419], 'meteor': 0.3601984358327376, 'bleu_scores': {'bleu': 0.09136211453920208, 'precisions': [0.4177844546982867, 0.13815985684461676, 0.058223709815521135, 0.02853537409427664], 'brevity_penalty': 0.9232339020413112, 'length_ratio': 0.9260351109638953, 'translation_length': 27957, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39835530538706004, 'rouge2': 0.153852117821579, 'rougeL': 0.3364598612963231, 'rougeLsum': 0.3364542798589155}}\n",
+      "Epoch 2\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-1120 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 05:29:13,243 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 05:29:13,391 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 05:29:13 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 05:29:13 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 05:29:13,932 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 05:29:13,932 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 05:29:13 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 05:29:13 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 05:29:13,983 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 05:29:15,487 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 05:29:15,490 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 05:30:36,733 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 05:30:36,733 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 05:30:37,009 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 05:30:37,009 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 05:30:37 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 05:30:37 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-1120\n",
+      "07/06/2024 05:30:37 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng raised his gun, squinted at it through the rain, cocked it, fired, and a hail of golden sparrows fell down on the ground, iron sand whizzing between the branches of the willow trees, making a sound like broken glass.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng raised his gun, squinted at it through the rain, cocked it, fired, and a hail of golden sparrows fell down on the ground, iron sand whizzing between the branches of the willow trees, making a sound like broken glass.\n",
+      "--------\n",
+      "step 3: Old Geng raised his gun, squinted at it through the rain, cocked it, fired, and a hail of golden sparrows fell down on the ground, iron sand whizzing between the branches of the willow trees, making a sound like broken glass.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [42:38<00:00,  2.26s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "6.896 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-1.5B-Instruct_checkpoint-1120\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng raised his gun, squinted at it throug...\n",
+      "\n",
+      "[1 rows x 12 columns]\n",
+      "{'accuracy': 0.00353045013239188, 'correct_ids': [77, 170, 240, 738], 'meteor': 0.3715145486123948, 'bleu_scores': {'bleu': 0.09990739895468834, 'precisions': [0.41975395678381333, 0.14803935967102363, 0.06454702164336334, 0.0321108263933376], 'brevity_penalty': 0.9378269974960846, 'length_ratio': 0.9396820139118913, 'translation_length': 28369, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.407656988217189, 'rouge2': 0.16985073716420096, 'rougeL': 0.34850770823763794, 'rougeLsum': 0.34839716484833966}}\n",
+      "Epoch 3\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-1680 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 06:13:37,150 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 06:13:37,283 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 06:13:37 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 06:13:37 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 06:13:37,654 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 06:13:37,655 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 06:13:37 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 06:13:37 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 06:13:37,796 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 06:13:39,400 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 06:13:39,403 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 06:14:58,257 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 06:14:58,257 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 06:14:58,530 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 06:14:58,530 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 06:14:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 06:14:59 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-1680\n",
+      "07/06/2024 06:14:59 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took a step forward with his gun raised and fired; a hail of golden sparrows rained down on the ground, while iron pellets flew through the treetops, crackling as they went.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took a step forward with his gun raised and fired; a hail of golden sparrows rained down on the ground, while iron pellets flew through the treetops, crackling as they went.\n",
+      "--------\n",
+      "step 3: Old Geng took a step forward with his gun raised and fired; a hail of golden sparrows rained down on the ground, while iron pellets flew through the treetops, crackling as they went.\n",
+      "100%|█████████████████████████████████████| 1133/1133 [4:04:10<00:00, 12.93s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "42.271 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-1.5B-Instruct_checkpoint-1680\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took a step forward with his gun rais...\n",
+      "\n",
+      "[1 rows x 13 columns]\n",
+      "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 381], 'meteor': 0.3720444789500285, 'bleu_scores': {'bleu': 0.09123009255208808, 'precisions': [0.3641282748922381, 0.1247070745455697, 0.05514681731590357, 0.027662187052322804], 'brevity_penalty': 1.0, 'length_ratio': 1.083504471679364, 'translation_length': 32711, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40494632360158267, 'rouge2': 0.16629962834305784, 'rougeL': 0.3426174581521877, 'rougeLsum': 0.34263145168084297}}\n",
+      "Epoch 4\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-2240 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 10:19:42,798 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 10:19:42,959 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 10:19:42 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 10:19:42 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 10:19:43,246 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 10:19:43,246 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 10:19:43 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 10:19:43 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 10:19:43,306 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 10:19:44,980 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 10:19:44,983 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 10:21:08,496 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 10:21:08,496 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 10:21:08,782 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 10:21:08,782 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 10:21:09 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 10:21:09 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-2240\n",
+      "07/06/2024 10:21:09 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took up his weapon with a squinted look, raised the butt of the rifle to his lips, squeezed the trigger, and it went off like a bolt from the blue sky: hailstones of lead rain down in all directions, striking branches and scattering sparks.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took up his weapon with a squinted look, raised the butt of the rifle to his lips, squeezed the trigger, and it went off like a bolt from the blue sky: hailstones of lead rain down in all directions, striking branches and scattering sparks.\n",
+      "--------\n",
+      "step 3: Old Geng took up his weapon with a squinted look, raised the butt of the rifle to his lips, squeezed the trigger, and it went off like a bolt from the blue sky: hailstones of lead rain down in all directions, striking branches and scattering sparks.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [45:00<00:00,  2.38s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "7.518 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-1.5B-Instruct_checkpoint-2240\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took up his weapon with a squinted lo...\n",
+      "\n",
+      "[1 rows x 14 columns]\n",
+      "{'accuracy': 0.00088261253309797, 'correct_ids': [147], 'meteor': 0.36344098311977724, 'bleu_scores': {'bleu': 0.09957396873764404, 'precisions': [0.4061541642259054, 0.13941480206540446, 0.06297652862909255, 0.03238771608069614], 'brevity_penalty': 0.9605193264852147, 'length_ratio': 0.9612785690626036, 'translation_length': 29021, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3947630372166354, 'rouge2': 0.1607957226567645, 'rougeL': 0.33285208707455316, 'rougeLsum': 0.33265174295694383}}\n",
+      "Epoch 5\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-2800 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:06:31,391 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 11:06:31,614 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 11:06:31 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 11:06:31 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 11:06:31,925 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 11:06:31,925 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 11:06:31 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 11:06:31 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 11:06:31,974 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 11:06:33,333 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 11:06:33,337 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 11:07:51,767 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 11:07:51,767 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 11:07:52,244 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 11:07:52,244 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 11:07:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 11:07:53 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-2800\n",
+      "07/06/2024 11:07:53 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The bullets flew like hailstones; tin pellets whistled through the air as they exploded among the willow twigs.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The bullets flew like hailstones; tin pellets whistled through the air as they exploded among the willow twigs.\n",
+      "--------\n",
+      "step 3: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The bullets flew like hailstones; tin pellets whistled through the air as they exploded among the willow twigs.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [44:57<00:00,  2.38s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "8.508 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-1.5B-Instruct_checkpoint-2800\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took a step forward, raised his pisto...\n",
+      "\n",
+      "[1 rows x 15 columns]\n",
+      "{'accuracy': 0.00088261253309797, 'correct_ids': [147], 'meteor': 0.3590942297170212, 'bleu_scores': {'bleu': 0.09721160968740532, 'precisions': [0.3943767957312902, 0.13539479770842971, 0.06072964555835682, 0.031379377055523315], 'brevity_penalty': 0.9678956444152034, 'length_ratio': 0.9684001324942034, 'translation_length': 29236, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3854727479634057, 'rouge2': 0.1555832972124876, 'rougeL': 0.3279226642764544, 'rougeLsum': 0.32816076982382436}}\n",
+      "Epoch 6\n",
+      "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+      "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+      "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_utils.py\n",
+      "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n",
+      "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+      "[nltk_data]   Package omw-1.4 is already up-to-date!\n",
+      "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-3360 False datasets/mac/mac.tsv results/mac-results_lf-r2.csv\n",
+      "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "0.0 GB of memory reserved.\n",
+      "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file added_tokens.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file special_tokens_map.json from cache at None\n",
+      "[INFO|tokenization_utils_base.py:2161] 2024-07-06 11:53:12,406 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+      "[WARNING|logging.py:313] 2024-07-06 11:53:12,531 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "07/06/2024 11:53:12 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+      "07/06/2024 11:53:12 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+      "[INFO|configuration_utils.py:733] 2024-07-06 11:53:12,846 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+      "[INFO|configuration_utils.py:800] 2024-07-06 11:53:12,847 >> Model config Qwen2Config {\n",
+      "  \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+      "  \"architectures\": [\n",
+      "    \"Qwen2ForCausalLM\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645,\n",
+      "  \"hidden_act\": \"silu\",\n",
+      "  \"hidden_size\": 1536,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 8960,\n",
+      "  \"max_position_embeddings\": 32768,\n",
+      "  \"max_window_layers\": 28,\n",
+      "  \"model_type\": \"qwen2\",\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 28,\n",
+      "  \"num_key_value_heads\": 2,\n",
+      "  \"rms_norm_eps\": 1e-06,\n",
+      "  \"rope_theta\": 1000000.0,\n",
+      "  \"sliding_window\": 32768,\n",
+      "  \"tie_word_embeddings\": true,\n",
+      "  \"torch_dtype\": \"bfloat16\",\n",
+      "  \"transformers_version\": \"4.42.3\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"use_sliding_window\": false,\n",
+      "  \"vocab_size\": 151936\n",
+      "}\n",
+      "\n",
+      "07/06/2024 11:53:12 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+      "07/06/2024 11:53:12 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+      "[INFO|modeling_utils.py:3556] 2024-07-06 11:53:12,906 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+      "[INFO|modeling_utils.py:1531] 2024-07-06 11:53:13,748 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 11:53:13,752 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"eos_token_id\": 151645\n",
+      "}\n",
+      "\n",
+      "[INFO|modeling_utils.py:4364] 2024-07-06 11:54:39,244 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+      "\n",
+      "[INFO|modeling_utils.py:4372] 2024-07-06 11:54:39,244 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+      "[INFO|configuration_utils.py:955] 2024-07-06 11:54:39,612 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+      "[INFO|configuration_utils.py:1000] 2024-07-06 11:54:39,612 >> Generate config GenerationConfig {\n",
+      "  \"bos_token_id\": 151643,\n",
+      "  \"do_sample\": true,\n",
+      "  \"eos_token_id\": [\n",
+      "    151645,\n",
+      "    151643\n",
+      "  ],\n",
+      "  \"pad_token_id\": 151643,\n",
+      "  \"repetition_penalty\": 1.1,\n",
+      "  \"temperature\": 0.7,\n",
+      "  \"top_k\": 20,\n",
+      "  \"top_p\": 0.8\n",
+      "}\n",
+      "\n",
+      "07/06/2024 11:54:40 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+      "07/06/2024 11:54:40 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b-bf16/lora/sft/checkpoint-3360\n",
+      "07/06/2024 11:54:40 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+      "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "1.689 GB of memory reserved.\n",
+      "loading train/test data files\n",
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 4528\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['chinese', 'english', 'text', 'prompt'],\n",
+      "        num_rows: 1133\n",
+      "    })\n",
+      "})\n",
+      "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+      "  0%|                                                  | 0/1133 [00:00<?, ?it/s]--------\n",
+      "step 1: Old Geng took a step forward, lifted his pistol, and squeezed the trigger. A shower of gold-bird pellets flew from the barrel, along with a dozen or so pieces of metal-shrapnel that exploded among the willows.<|im_end|>\n",
+      "--------\n",
+      "step 2: Old Geng took a step forward, lifted his pistol, and squeezed the trigger. A shower of gold-bird pellets flew from the barrel, along with a dozen or so pieces of metal-shrapnel that exploded among the willows.\n",
+      "--------\n",
+      "step 3: Old Geng took a step forward, lifted his pistol, and squeezed the trigger. A shower of gold-bird pellets flew from the barrel, along with a dozen or so pieces of metal-shrapnel that exploded among the willows.\n",
+      "100%|███████████████████████████████████████| 1133/1133 [46:06<00:00,  2.44s/it]\n",
+      "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+      "7.359 GB of memory reserved.\n",
+      "                                             chinese  ...           Qwen/Qwen2-1.5B-Instruct_checkpoint-3360\n",
+      "0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Geng took a step forward, lifted his pisto...\n",
+      "\n",
+      "[1 rows x 16 columns]\n",
+      "{'accuracy': 0.00088261253309797, 'correct_ids': [199], 'meteor': 0.3486547719340687, 'bleu_scores': {'bleu': 0.0936397309436105, 'precisions': [0.3901145886779545, 0.12895879296847199, 0.05791835069895065, 0.03006965944272446], 'brevity_penalty': 0.9678614572628684, 'length_ratio': 0.9683670089433587, 'translation_length': 29235, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3811393635601209, 'rouge2': 0.1492043203557439, 'rougeL': 0.3217330341685408, 'rougeLsum': 0.32214344841265247}}\n",
+      "CPU times: user 8min 37s, sys: 2min 58s, total: 11min 35s\n",
+      "Wall time: 8h 40min 35s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "\n",
+    "evaluate_model_all_epochs(\"Qwen/Qwen2-1.5B-Instruct\", num_train_epochs, \"llama-factory/saves/qwen2-1.5b-bf16/lora/sft\", load_in_4bit=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "evaluate_model_all_epochs(\"unsloth/qwen2-1.5b-instruct-bnb-4bit\" , num_train_epochs, \"llama-factory/saves/qwen2-1.5b/lora/sft\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "environmentMetadata": null,
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 4
+   },
+   "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+   "widgets": {}
+  },
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "036fc5746f43416db18c19ad8fd36677": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "06e806c82c7b4cbea31c5358dd9c3434": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "087b76a8b7514269b1f0ab29b062e444": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+      "placeholder": "​",
+      "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+      "value": "Map (num_proc=2): 100%"
+     }
+    },
+    "09b76013aa9e45efb6deb23a7a0d0925": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+      "value": "config.json: 100%"
+     }
+    },
+    "0a92c56bfa134ef583220d7ef0b13e17": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0c34be936c8145d3ab41282f30a70713": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "0f8b6bfe16894500838793f2491d403f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "177c78fce95d4b4ab33057c5a048d693": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "1f44c9ce1adf470cbb19784493ed209f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+      "value": "model.safetensors: 100%"
+     }
+    },
+    "201b59ccd9f845e197029b57e424aefc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "2157f01726d748f8a9ae4a00664430da": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "21db8a77b00d4a4e82fdfa608657531f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "26e4202cca81496a90d15a0dd4ca9cf1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+       "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+       "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+      ],
+      "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+     }
+    },
+    "27155728b6b84cb199c91c940095d0a8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+       "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+       "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+      ],
+      "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+     }
+    },
+    "271ddaa553a042d09b6db7b450643d8f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "2a58d04b428c46f4b3dbadd3bc6cd529": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2d18ddf6482c4d97829ac0e5a7b9868f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+       "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+       "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+      ],
+      "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+     }
+    },
+    "2e5087c76f98437cb5dc729230358cba": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2e63a29e2f7247bba5beede9a568c99f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+      "placeholder": "​",
+      "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+      "value": " 464/464 [00:00&lt;00:00, 27.1kB/s]"
+     }
+    },
+    "2f6c70dd266c4816bfad3fd3d192929a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "30307300bc4e4baf96560e30969a82b6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+      "value": "generation_config.json: 100%"
+     }
+    },
+    "3056b148aa9f4e6e8aa3b61d26886255": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "30cdc32298134cb0be4d41615b9e5774": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3572201bd4d74a58b7a665f9bdfdcdba": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "35b0e8c26d6640e9bd0ed7b242a423d8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+      "max": 51760,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+      "value": 51760
+     }
+    },
+    "36166c7bcb854b34aca1f41a5d6ea50b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "370692d819df41828b48c4ad446f977b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "39b29a75374b45c0a22506010be2b84e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+      "max": 1179,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+      "value": 1179
+     }
+    },
+    "3cf2dd993b5e4d3daecf61e4bab5a404": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+       "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+       "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+      ],
+      "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+     }
+    },
+    "43dec2ede91341f5af60eb522e18e984": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4463edd481c1467f914c7dcd6c6e6ffc": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "47928317548c454bba6358ab132e8dee": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "49277aeeac16434a865a4d12308b1abc": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4ae7e449e4ea4c729b5f34607c18ebae": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4b2061b8a73c43ffb0c2f83daf0d0183": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4c4c88d4c701450692fa0f6b0c5764b0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4c666f4ace3943f8b80ecd20e7503236": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "4ccedf0d93094e63b57a0f8a434fba06": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+      "max": 44307561,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+      "value": 44307561
+     }
+    },
+    "4dcf6ff672d24983a1877a8431709aa9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+      "placeholder": "​",
+      "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+      "value": "Generating train split: 100%"
+     }
+    },
+    "4ea63adfce694725bdba878aef709dd3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5234566b1bfc4655b8d582ea5b46ed9f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+      "value": " 51760/51760 [01:02&lt;00:00, 1131.51 examples/s]"
+     }
+    },
+    "56aee4853b7740e6a977254f5d1fa66d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "57182a263d324a3dbf1471c74290a0d5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5807d5fb827d490fb3bc698f801ffff5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5c9d781c28944f3eb86e2a6d44efdf18": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "61560ff6a36b44f4a9dfdae5c52791d4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+      "max": 11610,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+      "value": 11610
+     }
+    },
+    "6578fd7acdb54c4c93528ea431fd0144": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+      "value": " 50.6k/50.6k [00:00&lt;00:00, 2.71MB/s]"
+     }
+    },
+    "668d5377ca56426a99753867e6e24862": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "697f027529b54ee9956bae78a11e0611": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "69ac12aec0714318bf2c83d4f4e745f5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "6b2012c3f88547af8884a9ea90e3164b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+      "placeholder": "​",
+      "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+      "value": " 44.3M/44.3M [00:01&lt;00:00, 31.0MB/s]"
+     }
+    },
+    "6b91feeed5464877991ac2c207aebe7c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+      "placeholder": "​",
+      "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+      "value": "special_tokens_map.json: 100%"
+     }
+    },
+    "6d3b9a05db0b4dadb638c686faa0c40a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6dbbedeca9314e66ae50e44ffa31a414": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6e34619b45934040b6092e6fb01ea7fe": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "71ce208e20d6483abb9ed923510c86d7": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+      "value": " 51760/51760 [00:01&lt;00:00, 45512.81 examples/s]"
+     }
+    },
+    "7358cdad832342c983e31efb8754ab78": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "73e352a3404f4c7dad0737f57d29e92f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+       "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+       "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+      ],
+      "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+     }
+    },
+    "74501720ac7e4dbb911a4a99b3633bc6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "78e5400bff924a92a4cc61c4ff18b182": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+      "placeholder": "​",
+      "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+      "value": " 1.18k/1.18k [00:00&lt;00:00, 31.3kB/s]"
+     }
+    },
+    "7975adbc2ec5489ea7fa0167e620d85c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+      "max": 51760,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+      "value": 51760
+     }
+    },
+    "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "810ff6c0e17d4fa09a30fef27eacff90": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "89965917796a4f81b899fdc7685f33df": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+      "value": " 9.09M/9.09M [00:00&lt;00:00, 32.6MB/s]"
+     }
+    },
+    "8b3505352a5a42bf910428c40ce40465": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+      "placeholder": "​",
+      "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+      "value": " 5.70G/5.70G [01:02&lt;00:00, 30.1MB/s]"
+     }
+    },
+    "8fc142b628fb40568730234de1cafde2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+      "max": 172,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+      "value": 172
+     }
+    },
+    "9367047a800747f79c6b225d92397846": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "938f45f1b3e24118b815d96ae34ba86a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "95fbe66647904c06a20f640630d6dc0e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+      "placeholder": "​",
+      "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+      "value": " 11.6k/11.6k [00:00&lt;00:00, 716kB/s]"
+     }
+    },
+    "988a0e8c1f89446086858da0a891a79c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+      "value": "Downloading data: 100%"
+     }
+    },
+    "98c58f23f4d549518832cb2d18f796e8": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+       "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+       "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+      ],
+      "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+     }
+    },
+    "99fdbb0300c14c139d1937c646f0cfe7": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+      "value": " 51760/51760 [00:01&lt;00:00, 38665.95 examples/s]"
+     }
+    },
+    "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+      "value": "tokenizer.json: 100%"
+     }
+    },
+    "a0037bdccf254159becde630bee3d1db": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a069d2ab23824f29aa320ac256e2cfe9": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a0bf9160eb2647409b3200270914b90f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a41dc44766444a998bec2d777f249d23": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a8464a4c711e4e00aafdfc919b60d07e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+      "value": " 172/172 [00:00&lt;00:00, 12.0kB/s]"
+     }
+    },
+    "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ad2be500fc164c0f86f33e914ef8e6a0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b0240cd9a4554b29ae11f8051984a1c6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+      "placeholder": "​",
+      "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+      "value": "Map: 100%"
+     }
+    },
+    "b0a370dc20654b279b9680692e34418e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b518dcee69074b87be73957cd810e7ed": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+      "placeholder": "​",
+      "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+      "value": "tokenizer_config.json: 100%"
+     }
+    },
+    "b8908fa0df3743ecb9d12983a739104f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b993eaec6b224440bf80c0958c6fb536": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b9b313fd861948f5aba25b24b1518d30": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ba90fdb8822d47dab7ba203bee297f37": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+      "value": "Downloading readme: 100%"
+     }
+    },
+    "bb19f6c747754682a514373a3a0535ba": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+      "max": 51760,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+      "value": 51760
+     }
+    },
+    "c161d94df0f04feba9542237e0856c22": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c22f71b1f85843209d7e5321506b9cb9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+       "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+       "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+      ],
+      "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+     }
+    },
+    "c4f2b06a82fd4987b8b659524a7b503b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "cca8113c54c0495daedce1327bf9c68b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+      "max": 464,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+      "value": 464
+     }
+    },
+    "cced8fd7e998472794f3f3e3018956a5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "cf245afeb1c04f29a24d291608c3d157": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+       "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+       "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+      ],
+      "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+     }
+    },
+    "cfe8cae0e22b495bafa221a63d13b283": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "cfeb365ddf7548d58b2557f22737fcf5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d1b47d39450d4019ae85c9b2f943eeaf": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+       "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+       "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+      ],
+      "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+     }
+    },
+    "d35db8148a354c56aaac56dbae22536f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d69dc491b3ab44d7852b21873ed7bb7f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d891f8d0b1fc462f8008d02bb2a15692": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d8e5318cead340c4adbeaccc05d39225": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "daf4cd890b35422683d22fd30bc71e83": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+       "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+       "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+      ],
+      "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+     }
+    },
+    "db19fc8d37db4e45a5790a876836d8c4": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "de868e26e7154f62aa86223a539ad421": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "dea41c5260884aa6879b5e1d1697b14f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e02f9b7849c64531835eb77b860d1c93": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e29104486d594b2992d7285e0ef77371": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+      "max": 50641,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+      "value": 50641
+     }
+    },
+    "e36a3f9eff0e4cf68834d66b0213ae96": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e9159e03e61f4f56978ece9c3bca49b2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e9adf418296e436fb48bb9f78885598b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "edaf890370314a218f138015faa0b05d": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f1addc4479d849879e743cf9089e6540": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+      "max": 5702746405,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+      "value": 5702746405
+     }
+    },
+    "f2df530d22c74977b249dd9fb5f4829b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+      "max": 9085698,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+      "value": 9085698
+     }
+    },
+    "f401d53bf28e44eb906bce6c05412662": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "fb995c740590427b882572c81d4e848c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "fce7a61c25ec4390af43d92b7c473a45": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+       "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+       "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+      ],
+      "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+     }
+    },
+    "fdb1941405ed4e4aa06019933892deb3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}