diff --git "a/finnish_training_script.ipynb" "b/finnish_training_script.ipynb"
new file mode 100644--- /dev/null
+++ "b/finnish_training_script.ipynb"
@@ -0,0 +1,2727 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# HuggingFace challenge - Debugger notebook\n",
+ "Run this notebook to verify your libraries versions, check GPU config and run a quick training"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "T2utsYSKszvv"
+ },
+ "outputs": [],
+ "source": [
+ "import platform\n",
+ "import multiprocessing\n",
+ "\n",
+ "import torch\n",
+ "import transformers\n",
+ "import datasets\n",
+ "\n",
+ "import soundfile"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Print main infos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5P6I-W9ts-kR",
+ "outputId": "939bd550-1486-46a6-8371-e82ada0f448c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Platform: Linux-5.11.0-37-generic-x86_64-with-glibc2.10\n",
+ "CPU cores: 60\n",
+ "Python version: 3.8.8\n",
+ "PyTorch version: 1.10.1+cu102\n",
+ "GPU is visible: True\n",
+ "Transformers version: 4.16.0.dev0\n",
+ "Datasets version: 1.17.1.dev0\n",
+ "soundfile version: 0.10.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"Platform: {platform.platform()}\")\n",
+ "print(f\"CPU cores: {multiprocessing.cpu_count()}\")\n",
+ "\n",
+ "print(f\"Python version: {platform.python_version()}\")\n",
+ "\n",
+ "print(f\"PyTorch version: {torch.__version__}\")\n",
+ "print(f\"GPU is visible: {torch.cuda.is_available()}\")\n",
+ "\n",
+ "print(f\"Transformers version: {transformers.__version__}\")\n",
+ "print(f\"Datasets version: {datasets.__version__}\")\n",
+ "\n",
+ "print(f\"soundfile version: {soundfile.__version__}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Check your GPU informations (if any)\n",
+ "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n",
+ "Driver and CUDA version "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YT7fRnKctggU",
+ "outputId": "f355a3e0-20da-489f-bd1f-5e508e792a68"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Thu Jan 27 02:55:35 2022 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 Tesla V100S-PCI... Off | 00000000:00:06.0 Off | 0 |\n",
+ "| N/A 34C P0 25W / 250W | 4MiB / 32510MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=============================================================================|\n",
+ "| No running processes found |\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2fa897b4afc049229144599af9e3f807",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HTML(value='
\\n] 29.64K --.-KB/s in 0.001s \n",
+ "\n",
+ "2022-01-22 15:01:09 (20.1 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30348/30348]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# \t--learning_rate=\"7.5e-5\" \\\n",
+ "# 84.5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Mz4bubhxxsad",
+ "outputId": "23398525-cc19-43c2-9fec-497e06214f29"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "01/27/2022 03:05:04 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True\n",
+ "01/27/2022 03:05:04 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n",
+ "_n_gpu=1,\n",
+ "adafactor=False,\n",
+ "adam_beta1=0.9,\n",
+ "adam_beta2=0.999,\n",
+ "adam_epsilon=1e-08,\n",
+ "bf16=False,\n",
+ "bf16_full_eval=False,\n",
+ "dataloader_drop_last=False,\n",
+ "dataloader_num_workers=0,\n",
+ "dataloader_pin_memory=True,\n",
+ "ddp_bucket_cap_mb=None,\n",
+ "ddp_find_unused_parameters=None,\n",
+ "debug=[],\n",
+ "deepspeed=None,\n",
+ "disable_tqdm=False,\n",
+ "do_eval=True,\n",
+ "do_predict=False,\n",
+ "do_train=True,\n",
+ "eval_accumulation_steps=None,\n",
+ "eval_steps=500,\n",
+ "evaluation_strategy=IntervalStrategy.STEPS,\n",
+ "fp16=True,\n",
+ "fp16_backend=auto,\n",
+ "fp16_full_eval=False,\n",
+ "fp16_opt_level=O1,\n",
+ "gradient_accumulation_steps=1,\n",
+ "gradient_checkpointing=True,\n",
+ "greater_is_better=None,\n",
+ "group_by_length=True,\n",
+ "half_precision_backend=auto,\n",
+ "hub_model_id=None,\n",
+ "hub_strategy=HubStrategy.EVERY_SAVE,\n",
+ "hub_token=,\n",
+ "ignore_data_skip=False,\n",
+ "label_names=None,\n",
+ "label_smoothing_factor=0.0,\n",
+ "learning_rate=7e-05,\n",
+ "length_column_name=input_length,\n",
+ "load_best_model_at_end=False,\n",
+ "local_rank=-1,\n",
+ "log_level=-1,\n",
+ "log_level_replica=-1,\n",
+ "log_on_each_node=True,\n",
+ "logging_dir=./wav2vec2-large-xls-r-300m-finnish/runs/Jan27_03-05-04_job-8be8b741-e32e-4579-bbec-1e00d9824b4f,\n",
+ "logging_first_step=False,\n",
+ "logging_nan_inf_filter=True,\n",
+ "logging_steps=100,\n",
+ "logging_strategy=IntervalStrategy.STEPS,\n",
+ "lr_scheduler_type=SchedulerType.LINEAR,\n",
+ "max_grad_norm=1.0,\n",
+ "max_steps=-1,\n",
+ "metric_for_best_model=None,\n",
+ "mp_parameters=,\n",
+ "no_cuda=False,\n",
+ "num_train_epochs=70.0,\n",
+ "optim=OptimizerNames.ADAMW_HF,\n",
+ "output_dir=./wav2vec2-large-xls-r-300m-finnish,\n",
+ "overwrite_output_dir=True,\n",
+ "past_index=-1,\n",
+ "per_device_eval_batch_size=32,\n",
+ "per_device_train_batch_size=32,\n",
+ "prediction_loss_only=False,\n",
+ "push_to_hub=True,\n",
+ "push_to_hub_model_id=None,\n",
+ "push_to_hub_organization=None,\n",
+ "push_to_hub_token=,\n",
+ "remove_unused_columns=True,\n",
+ "report_to=[],\n",
+ "resume_from_checkpoint=None,\n",
+ "run_name=./wav2vec2-large-xls-r-300m-finnish,\n",
+ "save_on_each_node=False,\n",
+ "save_steps=500,\n",
+ "save_strategy=IntervalStrategy.STEPS,\n",
+ "save_total_limit=2,\n",
+ "seed=42,\n",
+ "sharded_ddp=[],\n",
+ "skip_memory_metrics=True,\n",
+ "tf32=None,\n",
+ "tpu_metrics_debug=False,\n",
+ "tpu_num_cores=None,\n",
+ "use_legacy_prediction_loop=False,\n",
+ "warmup_ratio=0.0,\n",
+ "warmup_steps=500,\n",
+ "weight_decay=0.0,\n",
+ "xpu_backend=None,\n",
+ ")\n",
+ "01/27/2022 03:05:06 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n",
+ "01/27/2022 03:05:09 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n",
+ "01/27/2022 03:05:09 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-02a235731bb40486.arrow\n",
+ "01/27/2022 03:05:09 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-4a027b259934c0ca.arrow\n",
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.1,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"sum\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.1,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.1,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.1,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.075,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 32,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 4.60ba/s]\n",
+ "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 22.77ba/s]\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-finnish/tokenizer_config.json. We won't load it.\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-finnish/added_tokens.json. We won't load it.\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-finnish/special_tokens_map.json. We won't load it.\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-finnish/tokenizer.json. We won't load it.\n",
+ "loading file ./wav2vec2-large-xls-r-300m-finnish/vocab.json\n",
+ "loading file None\n",
+ "loading file None\n",
+ "loading file None\n",
+ "loading file None\n",
+ "file ./wav2vec2-large-xls-r-300m-finnish/config.json not found\n",
+ "Adding to the vocabulary\n",
+ "Adding to the vocabulary\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.1,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"sum\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.1,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.1,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.1,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.075,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 32,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n",
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
+ " \"do_normalize\": true,\n",
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+ " \"feature_size\": 1,\n",
+ " \"padding_side\": \"right\",\n",
+ " \"padding_value\": 0,\n",
+ " \"return_attention_mask\": true,\n",
+ " \"sampling_rate\": 16000\n",
+ "}\n",
+ "\n",
+ "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n",
+ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_hid.weight']\n",
+ "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "preprocess datasets: 100%|█████████████████| 3627/3627 [00:26<00:00, 138.44ex/s]\n",
+ "preprocess datasets: 100%|█████████████████| 1599/1599 [00:11<00:00, 133.52ex/s]\n",
+ "100%|████████████████████████████████████████████| 4/4 [00:00<00:00, 343.45ba/s]\n",
+ "100%|████████████████████████████████████████████| 2/2 [00:00<00:00, 491.54ba/s]\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "tokenizer config file saved in ./wav2vec2-large-xls-r-300m-finnish/tokenizer_config.json\n",
+ "Special tokens file saved in ./wav2vec2-large-xls-r-300m-finnish/special_tokens_map.json\n",
+ "added tokens file saved in ./wav2vec2-large-xls-r-300m-finnish/added_tokens.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/config.json\n",
+ "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "loading configuration file ./wav2vec2-large-xls-r-300m-finnish/config.json\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"./wav2vec2-large-xls-r-300m-finnish\",\n",
+ " \"activation_dropout\": 0.1,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"mean\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.0,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.0,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.0,\n",
+ " \"mask_feature_length\": 64,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.25,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.75,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 32,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 35,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
+ " \"do_normalize\": true,\n",
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+ " \"feature_size\": 1,\n",
+ " \"padding_side\": \"right\",\n",
+ " \"padding_value\": 0,\n",
+ " \"return_attention_mask\": true,\n",
+ " \"sampling_rate\": 16000\n",
+ "}\n",
+ "\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-finnish/tokenizer.json. We won't load it.\n",
+ "loading file ./wav2vec2-large-xls-r-300m-finnish/vocab.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-finnish/tokenizer_config.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-finnish/added_tokens.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-finnish/special_tokens_map.json\n",
+ "loading file None\n",
+ "Adding to the vocabulary\n",
+ "Adding to the vocabulary\n",
+ "Cloning https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish into local empty directory.\n",
+ "01/27/2022 03:06:06 - WARNING - huggingface_hub.repository - Cloning https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish into local empty directory.\n",
+ "Using amp half precision backend\n",
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n",
+ "***** Running training *****\n",
+ " Num examples = 3627\n",
+ " Num Epochs = 70\n",
+ " Instantaneous batch size per device = 32\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 32\n",
+ " Gradient Accumulation steps = 1\n",
+ " Total optimization steps = 7980\n",
+ "{'loss': 8.2074, 'learning_rate': 1.3719999999999999e-05, 'epoch': 0.88} \n",
+ "{'loss': 3.7205, 'learning_rate': 2.772e-05, 'epoch': 1.75} \n",
+ "{'loss': 3.1583, 'learning_rate': 4.1719999999999994e-05, 'epoch': 2.63} \n",
+ "{'loss': 2.9766, 'learning_rate': 5.5719999999999995e-05, 'epoch': 3.51} \n",
+ "{'loss': 2.9032, 'learning_rate': 6.971999999999999e-05, 'epoch': 4.39} \n",
+ " 6%|██▍ | 500/7980 [10:07<2:27:07, 1.18s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:34, 1.40it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:45, 1.04it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:04<00:52, 1.15s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:56, 1.26s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:07<00:59, 1.35s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:08<00:52, 1.22s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:09<00:52, 1.25s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:10<00:54, 1.32s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:12<00:56, 1.42s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:14<01:01, 1.57s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:16<01:01, 1.61s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:17<00:55, 1.50s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:18<00:48, 1.35s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:19<00:43, 1.24s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:20<00:39, 1.16s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:21<00:35, 1.09s/it]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:22<00:33, 1.06s/it]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:23<00:31, 1.02s/it]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:24<00:30, 1.02s/it]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:25<00:28, 1.01it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:26<00:27, 1.00it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:27<00:25, 1.06it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:28<00:25, 1.04it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:29<00:25, 1.02s/it]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:30<00:24, 1.02s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:31<00:23, 1.03s/it]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:32<00:22, 1.00s/it]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:33<00:20, 1.05it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:33<00:18, 1.05it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:34<00:18, 1.04it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:36<00:17, 1.01it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:36<00:15, 1.08it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:37<00:13, 1.22it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:38<00:13, 1.15it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:39<00:12, 1.15it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:40<00:11, 1.09it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:41<00:11, 1.09it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:42<00:10, 1.09it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:43<00:09, 1.10it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:43<00:08, 1.12it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:44<00:07, 1.10it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:45<00:06, 1.06it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:46<00:05, 1.02it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:47<00:04, 1.02it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:48<00:03, 1.01it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:49<00:02, 1.01it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:50<00:01, 1.04it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:51<00:00, 1.08it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.8768463134765625, 'eval_wer': 1.0, 'eval_runtime': 54.8752, 'eval_samples_per_second': 29.139, 'eval_steps_per_second': 0.911, 'epoch': 4.39}\n",
+ " 6%|██▍ | 500/7980 [11:02<2:27:07, 1.18s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:53<00:00, 1.05it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "{'loss': 2.7766, 'learning_rate': 6.81470588235294e-05, 'epoch': 6.14} \n",
+ "{'loss': 2.3776, 'learning_rate': 6.721122994652407e-05, 'epoch': 7.02} \n",
+ "{'loss': 1.8024, 'learning_rate': 6.627540106951871e-05, 'epoch': 7.89} \n",
+ "{'loss': 1.5724, 'learning_rate': 6.533957219251336e-05, 'epoch': 8.77} \n",
+ " 13%|████▋ | 1000/7980 [22:16<3:29:05, 1.80s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:33, 1.43it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:44, 1.05it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:04<00:50, 1.10s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:54, 1.22s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:57, 1.30s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:50, 1.17s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:09<00:50, 1.21s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:10<00:53, 1.30s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:12<00:55, 1.39s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:14<01:00, 1.56s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:15<01:00, 1.59s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:17<00:55, 1.49s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:18<00:48, 1.35s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:19<00:43, 1.24s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:20<00:39, 1.15s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:20<00:35, 1.08s/it]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:22<00:34, 1.07s/it]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:22<00:31, 1.03s/it]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:23<00:30, 1.03s/it]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:24<00:28, 1.01it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:25<00:28, 1.02s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:26<00:26, 1.02it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:27<00:26, 1.02s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:29<00:26, 1.05s/it]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:30<00:25, 1.06s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:31<00:24, 1.05s/it]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:32<00:22, 1.00s/it]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:32<00:19, 1.06it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:33<00:18, 1.07it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:34<00:18, 1.05it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:35<00:17, 1.01it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:36<00:15, 1.09it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:37<00:12, 1.25it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:38<00:12, 1.17it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:39<00:11, 1.17it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:39<00:11, 1.13it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:40<00:10, 1.13it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:41<00:09, 1.13it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:42<00:08, 1.14it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:43<00:07, 1.15it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:44<00:07, 1.12it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:45<00:06, 1.08it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:46<00:05, 1.03it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:47<00:04, 1.03it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████�� | 46/50 [00:48<00:03, 1.04it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:49<00:02, 1.03it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:50<00:01, 1.05it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:51<00:00, 1.08it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.5638473033905029, 'eval_wer': 0.6437733562266438, 'eval_runtime': 54.3288, 'eval_samples_per_second': 29.432, 'eval_steps_per_second': 0.92, 'epoch': 8.77}\n",
+ " 13%|████▋ | 1000/7980 [23:10<3:29:05, 1.80s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:53<00:00, 1.08it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "{'loss': 1.4498, 'learning_rate': 6.440374331550802e-05, 'epoch': 9.65} \n",
+ "{'loss': 1.2806, 'learning_rate': 6.253208556149732e-05, 'epoch': 11.4} \n",
+ "{'loss': 1.2595, 'learning_rate': 6.159625668449198e-05, 'epoch': 12.28} \n",
+ "{'loss': 1.1818, 'learning_rate': 6.0660427807486626e-05, 'epoch': 13.16} \n",
+ " 19%|██████▉ | 1500/7980 [35:29<1:56:44, 1.08s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:33, 1.44it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:44, 1.05it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:04<00:50, 1.10s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:55, 1.22s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:57, 1.31s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:50, 1.18s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:09<00:51, 1.22s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:10<00:53, 1.30s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:12<00:56, 1.42s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:14<01:01, 1.58s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:15<01:01, 1.61s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:17<00:55, 1.50s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:18<00:48, 1.35s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:19<00:43, 1.23s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:20<00:38, 1.14s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:21<00:35, 1.06s/it]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:22<00:33, 1.05s/it]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:22<00:31, 1.02s/it]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:23<00:30, 1.02s/it]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:24<00:28, 1.02it/s]\u001b[A\n",
+ " 44%|████████████████��█▉ | 22/50 [00:25<00:27, 1.00it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:26<00:25, 1.06it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:27<00:25, 1.03it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:28<00:25, 1.03s/it]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:30<00:25, 1.07s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:31<00:24, 1.06s/it]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:32<00:22, 1.01s/it]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:32<00:20, 1.04it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:33<00:19, 1.05it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:34<00:18, 1.03it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:35<00:18, 1.02s/it]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:36<00:16, 1.04it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:37<00:13, 1.18it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:38<00:13, 1.10it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:39<00:12, 1.10it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:40<00:12, 1.06it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:41<00:11, 1.06it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:42<00:10, 1.05it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:43<00:09, 1.07it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:44<00:08, 1.10it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:44<00:07, 1.08it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:45<00:06, 1.05it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:47<00:05, 1.02it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:48<00:04, 1.02it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:48<00:03, 1.03it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:49<00:02, 1.03it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:50<00:01, 1.06it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:51<00:00, 1.08it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.3338354229927063, 'eval_wer': 0.4759115240884759, 'eval_runtime': 54.9878, 'eval_samples_per_second': 29.079, 'eval_steps_per_second': 0.909, 'epoch': 13.16}\n",
+ " 19%|██████▉ | 1500/7980 [36:24<1:56:44, 1.08s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:53<00:00, 1.07it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-1500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-500] due to args.save_total_limit\n",
+ "{'loss': 1.1818, 'learning_rate': 5.972459893048127e-05, 'epoch': 14.04} \n",
+ "{'loss': 1.133, 'learning_rate': 5.8788770053475934e-05, 'epoch': 14.91} \n",
+ "{'loss': 1.1239, 'learning_rate': 5.785294117647058e-05, 'epoch': 15.79} \n",
+ "{'loss': 1.0991, 'learning_rate': 5.6917112299465236e-05, 'epoch': 16.67} \n",
+ "{'loss': 1.0798, 'learning_rate': 5.5981283422459884e-05, 'epoch': 17.54} \n",
+ " 25%|█████████▎ | 2000/7980 [47:57<2:35:35, 1.56s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:31, 1.55it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.13it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.02s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:51, 1.14s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.22s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:47, 1.11s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:48, 1.15s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:49, 1.22s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:51, 1.29s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.43s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.46s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.37s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.23s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.13s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.05s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.02it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:30, 1.03it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:21<00:28, 1.07it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:22<00:28, 1.07it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:26, 1.11it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.10it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.16it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.13it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.06it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.06it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.05it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:20, 1.10it/s]\u001b[A\n",
+ " 58%|██████████████���█████████▉ | 29/50 [00:30<00:18, 1.15it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.16it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.14it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.10it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.18it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:34<00:11, 1.35it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.26it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:11, 1.26it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.22it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.22it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:38<00:09, 1.21it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:39<00:08, 1.24it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.26it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.22it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.17it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.13it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:43<00:04, 1.12it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:44<00:03, 1.13it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:45<00:02, 1.13it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:46<00:01, 1.15it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.2876473665237427, 'eval_wer': 0.40864559135440864, 'eval_runtime': 49.892, 'eval_samples_per_second': 32.049, 'eval_steps_per_second': 1.002, 'epoch': 17.54}\n",
+ " 25%|█████████▎ | 2000/7980 [48:47<2:35:35, 1.56s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-1000] due to args.save_total_limit\n",
+ "{'loss': 1.0756, 'learning_rate': 5.5045454545454545e-05, 'epoch': 18.42} \n",
+ "{'loss': 1.0653, 'learning_rate': 5.410962566844919e-05, 'epoch': 19.3} \n",
+ "{'loss': 1.0472, 'learning_rate': 5.317379679144385e-05, 'epoch': 20.18} \n",
+ "{'loss': 1.03, 'learning_rate': 5.2237967914438494e-05, 'epoch': 21.05} \n",
+ "{'loss': 1.0296, 'learning_rate': 5.1302139037433155e-05, 'epoch': 21.93} \n",
+ " 31%|██████████▉ | 2500/7980 [1:00:08<1:19:38, 1.15it/s]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.56it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.14it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.02s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.21s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:47, 1.09s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.13s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:49, 1.20s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:51, 1.29s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.42s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.46s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.37s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.24s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.14s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.05s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.01it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:31, 1.03it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:21<00:29, 1.06it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:22<00:28, 1.06it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:26, 1.10it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.08it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.15it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:23, 1.12it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.07it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.06it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.06it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:19, 1.10it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:30<00:18, 1.15it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.16it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.15it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.10it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.18it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:34<00:11, 1.36it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.26it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:11, 1.26it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.21it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.22it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:38<00:09, 1.22it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:39<00:07, 1.25it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.27it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.23it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.18it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.14it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:43<00:04, 1.13it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:44<00:03, 1.14it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:45<00:02, 1.13it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:46<00:01, 1.15it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.2693549394607544, 'eval_wer': 0.4248055751944248, 'eval_runtime': 49.7815, 'eval_samples_per_second': 32.12, 'eval_steps_per_second': 1.004, 'epoch': 21.93}\n",
+ " 31%|██████████▉ | 2500/7980 [1:00:58<1:19:38, 1.15it/s]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.17it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-2500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-1500] due to args.save_total_limit\n",
+ "{'loss': 1.0139, 'learning_rate': 4.943048128342246e-05, 'epoch': 23.68} \n",
+ "{'loss': 1.0081, 'learning_rate': 4.8494652406417105e-05, 'epoch': 24.56} \n",
+ "{'loss': 0.994, 'learning_rate': 4.755882352941176e-05, 'epoch': 25.44} \n",
+ "{'loss': 1.0014, 'learning_rate': 4.6622994652406414e-05, 'epoch': 26.32} \n",
+ " 38%|█████████████▏ | 3000/7980 [1:12:13<2:03:03, 1.48s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.55it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.13it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.02s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:51, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.21s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.09s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.41s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.46s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.36s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.23s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.13s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.04s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.02it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:30, 1.04it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.08it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.07it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.12it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.09it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.16it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:23, 1.13it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.07it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.06it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.05it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:19, 1.10it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:18, 1.16it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.16it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.15it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.11it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.19it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.36it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.27it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:11, 1.27it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.22it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.23it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:38<00:09, 1.22it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:08, 1.25it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.26it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.23it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.18it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.14it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:43<00:04, 1.13it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:44<00:03, 1.14it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:45<00:02, 1.11it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.14it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.17it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.26258525252342224, 'eval_wer': 0.3732956267043733, 'eval_runtime': 49.6202, 'eval_samples_per_second': 32.225, 'eval_steps_per_second': 1.008, 'epoch': 26.32}\n",
+ " 38%|█████████████▏ | 3000/7980 [1:13:03<2:03:03, 1.48s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-2000] due to args.save_total_limit\n",
+ "{'loss': 0.991, 'learning_rate': 4.569652406417112e-05, 'epoch': 27.19} \n",
+ "{'loss': 0.9786, 'learning_rate': 4.4760695187165766e-05, 'epoch': 28.07} \n",
+ "{'loss': 0.9689, 'learning_rate': 4.382486631016043e-05, 'epoch': 28.95} \n",
+ "{'loss': 0.9631, 'learning_rate': 4.2889037433155075e-05, 'epoch': 29.82} \n",
+ "{'loss': 0.9616, 'learning_rate': 4.195320855614973e-05, 'epoch': 30.7} \n",
+ " 44%|████████████████▏ | 3500/7980 [1:24:16<59:17, 1.26it/s]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.58it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.14it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.21s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.09s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.13s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:49, 1.20s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:51, 1.28s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.41s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.46s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.37s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.23s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.13s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.04s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.02it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:30, 1.04it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:21<00:28, 1.07it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:28, 1.07it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:26, 1.11it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.09it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.16it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.13it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.08it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.07it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.06it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:19, 1.11it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:18, 1.17it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.17it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.16it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.10it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.17it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.34it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.26it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:11, 1.26it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.22it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.22it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:38<00:09, 1.21it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:08, 1.24it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.26it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.23it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.17it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.14it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:43<00:04, 1.14it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:44<00:03, 1.14it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████���███▍ | 47/50 [00:45<00:02, 1.13it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.15it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.23906062543392181, 'eval_wer': 0.32936067063932933, 'eval_runtime': 49.6357, 'eval_samples_per_second': 32.215, 'eval_steps_per_second': 1.007, 'epoch': 30.7}\n",
+ " 44%|████████████████▏ | 3500/7980 [1:25:06<59:17, 1.26it/s]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-3500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-2500] due to args.save_total_limit\n",
+ "{'loss': 0.9375, 'learning_rate': 4.101737967914438e-05, 'epoch': 31.58} \n",
+ "{'loss': 0.9629, 'learning_rate': 4.008155080213904e-05, 'epoch': 32.46} \n",
+ "{'loss': 0.946, 'learning_rate': 3.9145721925133686e-05, 'epoch': 33.33} \n",
+ "{'loss': 0.9511, 'learning_rate': 3.820989304812834e-05, 'epoch': 34.21} \n",
+ "{'loss': 0.9303, 'learning_rate': 3.727406417112299e-05, 'epoch': 35.09} \n",
+ " 50%|█████████████████▌ | 4000/7980 [1:36:24<1:34:07, 1.42s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:31, 1.54it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.12it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:48, 1.05s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:52, 1.17s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:54, 1.24s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:47, 1.11s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:48, 1.15s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:10<00:49, 1.21s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:51, 1.30s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.43s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.47s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:51, 1.38s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.24s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:40, 1.14s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.06s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.01it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:31, 1.03it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:21<00:29, 1.06it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:22<00:28, 1.06it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:23<00:26, 1.10it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:24<00:25, 1.09it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.15it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:23, 1.13it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.07it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.06it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.05it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:20, 1.10it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:30<00:18, 1.15it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:31<00:17, 1.16it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:32<00:16, 1.15it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:33<00:16, 1.10it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.19it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:34<00:11, 1.36it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:35<00:11, 1.26it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:11, 1.26it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.21it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.21it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:38<00:09, 1.21it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:39<00:08, 1.23it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:40<00:07, 1.26it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.22it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:06, 1.16it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.12it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:43<00:04, 1.12it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:44<00:03, 1.13it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:45<00:02, 1.12it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:46<00:01, 1.14it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:47<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.23517830669879913, 'eval_wer': 0.3217856782143218, 'eval_runtime': 50.0462, 'eval_samples_per_second': 31.95, 'eval_steps_per_second': 0.999, 'epoch': 35.09}\n",
+ " 50%|█████████████████▌ | 4000/7980 [1:37:14<1:34:07, 1.42s/it]\n",
+ "100%|████████████████████████████████��██████████| 50/50 [00:49<00:00, 1.17it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-3000] due to args.save_total_limit\n",
+ "{'loss': 0.9236, 'learning_rate': 3.633823529411765e-05, 'epoch': 35.96} \n",
+ "{'loss': 0.9278, 'learning_rate': 3.5402406417112296e-05, 'epoch': 36.84} \n",
+ "{'loss': 0.9218, 'learning_rate': 3.446657754010695e-05, 'epoch': 37.72} \n",
+ "{'loss': 0.922, 'learning_rate': 3.35307486631016e-05, 'epoch': 38.6} \n",
+ "{'loss': 0.9248, 'learning_rate': 3.259491978609625e-05, 'epoch': 39.47} \n",
+ " 56%|████████████████████▊ | 4500/7980 [1:48:24<42:07, 1.38it/s]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.58it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:40, 1.15it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:52, 1.20s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.08s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.40s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.44s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:49, 1.35s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:43, 1.21s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.11s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:34, 1.03s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:31, 1.03it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.05it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.08it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.08it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.12it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.10it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.16it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:23, 1.13it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.07it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.06it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.06it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.11it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:18, 1.16it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.17it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.15it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.11it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.20it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.38it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.29it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:10, 1.28it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.24it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.24it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.24it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:07, 1.27it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.28it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.25it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.20it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.16it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.15it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.15it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.15it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.17it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.20it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.23506982624530792, 'eval_wer': 0.3206746793253207, 'eval_runtime': 49.0987, 'eval_samples_per_second': 32.567, 'eval_steps_per_second': 1.018, 'epoch': 39.47}\n",
+ " 56%|████████████████████▊ | 4500/7980 [1:49:13<42:07, 1.38it/s]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.20it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-4500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-3500] due to args.save_total_limit\n",
+ "{'loss': 0.8909, 'learning_rate': 3.165909090909091e-05, 'epoch': 40.35} \n",
+ "{'loss': 0.912, 'learning_rate': 3.072326203208556e-05, 'epoch': 41.23} \n",
+ "{'loss': 0.8901, 'learning_rate': 2.9787433155080212e-05, 'epoch': 42.11} \n",
+ "{'loss': 0.9007, 'learning_rate': 2.8851604278074863e-05, 'epoch': 42.98} \n",
+ "{'loss': 0.8837, 'learning_rate': 2.7915775401069517e-05, 'epoch': 43.86} \n",
+ " 63%|███████████████████████▏ | 5000/7980 [2:00:31<57:57, 1.17s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.56it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.14it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.21s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.08s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.40s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.44s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:49, 1.35s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:43, 1.22s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.12s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.03s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:31, 1.03it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.05it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.09it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.08it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.12it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.10it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:22, 1.18it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.15it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:22, 1.09it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.07it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.07it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.12it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.17it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:17, 1.17it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.16it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.12it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.20it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.37it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.28it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:10, 1.28it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.23it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.24it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.23it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:07, 1.25it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.26it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.23it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.18it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.14it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.13it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.14it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.13it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.16it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.19it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.23407745361328125, 'eval_wer': 0.31027168972831026, 'eval_runtime': 49.1749, 'eval_samples_per_second': 32.517, 'eval_steps_per_second': 1.017, 'epoch': 43.86}\n",
+ " 63%|███████████████████████▏ | 5000/7980 [2:01:20<57:57, 1.17s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.19it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-4000] due to args.save_total_limit\n",
+ "{'loss': 0.8891, 'learning_rate': 2.6979946524064168e-05, 'epoch': 44.74} \n",
+ "{'loss': 0.882, 'learning_rate': 2.6044117647058823e-05, 'epoch': 45.61} \n",
+ "{'loss': 0.8921, 'learning_rate': 2.5108288770053474e-05, 'epoch': 46.49} \n",
+ "{'loss': 0.8794, 'learning_rate': 2.4172459893048128e-05, 'epoch': 47.37} \n",
+ "{'loss': 0.8887, 'learning_rate': 2.323663101604278e-05, 'epoch': 48.25} \n",
+ " 69%|█████████████████████████▌ | 5500/7980 [2:12:30<27:31, 1.50it/s]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.55it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.15it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:52, 1.20s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.08s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.40s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.45s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.35s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.22s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.12s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.03s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:31, 1.03it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.06it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.09it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.09it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.13it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.11it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:22, 1.19it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.15it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:22, 1.09it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.08it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.07it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.12it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.18it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:16, 1.19it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.18it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:15, 1.13it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:32<00:14, 1.21it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.38it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.29it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:10, 1.29it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:35<00:10, 1.25it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.25it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.24it/s]\u001b[A\n",
+ " 80%|██████████��███████████████████████▍ | 40/50 [00:38<00:07, 1.27it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.28it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:39<00:06, 1.25it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:40<00:05, 1.19it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.15it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.15it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.16it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.15it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.18it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:45<00:00, 1.21it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.23113244771957397, 'eval_wer': 0.3114836885163115, 'eval_runtime': 48.9674, 'eval_samples_per_second': 32.654, 'eval_steps_per_second': 1.021, 'epoch': 48.25}\n",
+ " 69%|█████████████████████████▌ | 5500/7980 [2:13:19<27:31, 1.50it/s]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:47<00:00, 1.20it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-5500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-4500] due to args.save_total_limit\n",
+ "{'loss': 0.8553, 'learning_rate': 2.2300802139037433e-05, 'epoch': 49.12} \n",
+ "{'loss': 0.8751, 'learning_rate': 2.1364973262032084e-05, 'epoch': 50.0} \n",
+ "{'loss': 0.8557, 'learning_rate': 2.042914438502674e-05, 'epoch': 50.88} \n",
+ "{'loss': 0.8626, 'learning_rate': 1.949331550802139e-05, 'epoch': 51.75} \n",
+ "{'loss': 0.8529, 'learning_rate': 1.8557486631016044e-05, 'epoch': 52.63} \n",
+ " 75%|███████████████████████████▊ | 6000/7980 [2:24:37<36:03, 1.09s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.55it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:41, 1.14it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.13s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:53, 1.21s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.08s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:47, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:13<00:55, 1.42s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:55, 1.46s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:50, 1.37s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.23s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.13s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.04s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:32, 1.02it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:20<00:30, 1.04it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.08it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:28, 1.06it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:26, 1.11it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.10it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:23, 1.17it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.14it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:23, 1.08it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.07it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.07it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:29<00:19, 1.12it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.17it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:16, 1.18it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.17it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:16, 1.12it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:33<00:14, 1.20it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.37it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.28it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:10, 1.28it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:36<00:10, 1.24it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:37<00:09, 1.24it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.23it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:07, 1.26it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.27it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:40<00:06, 1.24it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:41<00:05, 1.19it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:42<00:05, 1.15it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.14it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.15it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.14it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.16it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:46<00:00, 1.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.22297005355358124, 'eval_wer': 0.3000706999293001, 'eval_runtime': 49.2944, 'eval_samples_per_second': 32.438, 'eval_steps_per_second': 1.014, 'epoch': 52.63}\n",
+ " 75%|███████████████████████████▊ | 6000/7980 [2:25:27<36:03, 1.09s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:48<00:00, 1.19it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-5000] due to args.save_total_limit\n",
+ "{'loss': 0.8487, 'learning_rate': 1.762165775401069e-05, 'epoch': 53.51} \n",
+ "{'loss': 0.8585, 'learning_rate': 1.6685828877005346e-05, 'epoch': 54.39} \n",
+ "{'loss': 0.8412, 'learning_rate': 1.575e-05, 'epoch': 55.26} \n",
+ "{'loss': 0.8457, 'learning_rate': 1.4814171122994651e-05, 'epoch': 56.14} \n",
+ "{'loss': 0.8404, 'learning_rate': 1.3878342245989304e-05, 'epoch': 57.02} \n",
+ " 81%|██████████████████████████████▏ | 6500/7980 [2:36:34<39:13, 1.59s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.60it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:40, 1.16it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:45, 1.01it/s]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.12s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:52, 1.19s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:45, 1.07s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:46, 1.11s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.17s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.25s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.39s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.43s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:49, 1.34s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:43, 1.20s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:38, 1.11s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:34, 1.02s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:18<00:31, 1.05it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.06it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.10it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.10it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.14it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:24, 1.12it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:23<00:22, 1.19it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:24<00:22, 1.16it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:25<00:22, 1.10it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:26<00:22, 1.08it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:27<00:21, 1.08it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.13it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.18it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:16, 1.19it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:31<00:16, 1.18it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:31<00:15, 1.13it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:32<00:13, 1.22it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.37it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.27it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:34<00:10, 1.28it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:35<00:10, 1.24it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.25it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.24it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:07, 1.27it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:38<00:06, 1.29it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:39<00:06, 1.26it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:40<00:05, 1.20it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.17it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.16it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.16it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.13it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:44<00:01, 1.17it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:45<00:00, 1.20it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.22788779437541962, 'eval_wer': 0.3054236945763054, 'eval_runtime': 48.6265, 'eval_samples_per_second': 32.883, 'eval_steps_per_second': 1.028, 'epoch': 57.02}\n",
+ " 81%|██████████████████████████████▏ | 6500/7980 [2:37:22<39:13, 1.59s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:47<00:00, 1.20it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-6500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-5500] due to args.save_total_limit\n",
+ "{'loss': 0.8342, 'learning_rate': 1.2942513368983956e-05, 'epoch': 57.89} \n",
+ "{'loss': 0.844, 'learning_rate': 1.2006684491978609e-05, 'epoch': 58.77} \n",
+ "{'loss': 0.8276, 'learning_rate': 1.107085561497326e-05, 'epoch': 59.65} \n",
+ "{'loss': 0.8264, 'learning_rate': 1.0144385026737967e-05, 'epoch': 60.53} \n",
+ "{'loss': 0.8242, 'learning_rate': 9.20855614973262e-06, 'epoch': 61.4} \n",
+ " 88%|████████████████████████████████▍ | 7000/7980 [2:48:32<16:12, 1.01it/s]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.58it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:40, 1.15it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.12s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:52, 1.20s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.07s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:46, 1.11s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.18s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.25s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.38s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.42s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:49, 1.33s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:43, 1.20s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:38, 1.10s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:34, 1.02s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:18<00:31, 1.05it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.07it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.10it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.09it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.14it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:24, 1.12it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:23<00:22, 1.19it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:24<00:22, 1.16it/s]\u001b[A\n",
+ " 50%|███████��█████████████▌ | 25/50 [00:25<00:22, 1.10it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:26<00:22, 1.08it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:27<00:21, 1.08it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.13it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.19it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:16, 1.20it/s]\u001b[A\n",
+ " 62%|██████████████████████████▋ | 31/50 [00:30<00:16, 1.19it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:31<00:15, 1.14it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:32<00:13, 1.22it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.40it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:33<00:11, 1.30it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:34<00:10, 1.31it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:35<00:10, 1.26it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.26it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.26it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:37<00:07, 1.29it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:38<00:06, 1.30it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:39<00:06, 1.27it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:40<00:05, 1.22it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.17it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.16it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.18it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:43<00:02, 1.16it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:44<00:01, 1.19it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:45<00:00, 1.22it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.2298082411289215, 'eval_wer': 0.30057569942430057, 'eval_runtime': 48.4266, 'eval_samples_per_second': 33.019, 'eval_steps_per_second': 1.032, 'epoch': 61.4}\n",
+ " 88%|████████████████████████████████▍ | 7000/7980 [2:49:21<16:12, 1.01it/s]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:47<00:00, 1.21it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-6000] due to args.save_total_limit\n",
+ "{'loss': 0.8291, 'learning_rate': 8.272727272727272e-06, 'epoch': 62.28} \n",
+ "{'loss': 0.8213, 'learning_rate': 7.336898395721925e-06, 'epoch': 63.16} \n",
+ "{'loss': 0.8139, 'learning_rate': 6.401069518716577e-06, 'epoch': 64.04} \n",
+ "{'loss': 0.8177, 'learning_rate': 5.46524064171123e-06, 'epoch': 64.91} \n",
+ "{'loss': 0.8288, 'learning_rate': 4.529411764705883e-06, 'epoch': 65.79} \n",
+ " 94%|██████████████████████████████████▊ | 7500/7980 [3:00:30<12:55, 1.62s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▊ | 2/50 [00:01<00:30, 1.57it/s]\u001b[A\n",
+ " 6%|██▋ | 3/50 [00:02<00:40, 1.15it/s]\u001b[A\n",
+ " 8%|███▌ | 4/50 [00:03<00:46, 1.01s/it]\u001b[A\n",
+ " 10%|████▍ | 5/50 [00:05<00:50, 1.12s/it]\u001b[A\n",
+ " 12%|█████▎ | 6/50 [00:06<00:52, 1.20s/it]\u001b[A\n",
+ " 14%|██████▏ | 7/50 [00:07<00:46, 1.08s/it]\u001b[A\n",
+ " 16%|███████ | 8/50 [00:08<00:46, 1.12s/it]\u001b[A\n",
+ " 18%|███████▉ | 9/50 [00:09<00:48, 1.19s/it]\u001b[A\n",
+ " 20%|████████▌ | 10/50 [00:11<00:50, 1.27s/it]\u001b[A\n",
+ " 22%|█████████▍ | 11/50 [00:12<00:54, 1.40s/it]\u001b[A\n",
+ " 24%|██████████▎ | 12/50 [00:14<00:54, 1.44s/it]\u001b[A\n",
+ " 26%|███████████▏ | 13/50 [00:15<00:49, 1.35s/it]\u001b[A\n",
+ " 28%|████████████ | 14/50 [00:16<00:44, 1.22s/it]\u001b[A\n",
+ " 30%|████████████▉ | 15/50 [00:17<00:39, 1.12s/it]\u001b[A\n",
+ " 32%|█████████████▊ | 16/50 [00:18<00:35, 1.03s/it]\u001b[A\n",
+ " 34%|██████████████▌ | 17/50 [00:19<00:31, 1.03it/s]\u001b[A\n",
+ " 36%|███████████████▍ | 18/50 [00:19<00:30, 1.06it/s]\u001b[A\n",
+ " 38%|████████████████▎ | 19/50 [00:20<00:28, 1.09it/s]\u001b[A\n",
+ " 40%|█████████████████▏ | 20/50 [00:21<00:27, 1.08it/s]\u001b[A\n",
+ " 42%|██████████████████ | 21/50 [00:22<00:25, 1.13it/s]\u001b[A\n",
+ " 44%|██████████████████▉ | 22/50 [00:23<00:25, 1.10it/s]\u001b[A\n",
+ " 46%|███████████████████▊ | 23/50 [00:24<00:22, 1.18it/s]\u001b[A\n",
+ " 48%|████████████████████▋ | 24/50 [00:25<00:22, 1.15it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 25/50 [00:26<00:22, 1.09it/s]\u001b[A\n",
+ " 52%|██████████████████████▎ | 26/50 [00:27<00:22, 1.08it/s]\u001b[A\n",
+ " 54%|███████████████████████▏ | 27/50 [00:28<00:21, 1.07it/s]\u001b[A\n",
+ " 56%|████████████████████████ | 28/50 [00:28<00:19, 1.12it/s]\u001b[A\n",
+ " 58%|████████████████████████▉ | 29/50 [00:29<00:17, 1.18it/s]\u001b[A\n",
+ " 60%|█████████████████████████▊ | 30/50 [00:30<00:16, 1.19it/s]\u001b[A\n",
+ " 62%|██████████��███████████████▋ | 31/50 [00:31<00:16, 1.17it/s]\u001b[A\n",
+ " 64%|███████████████████████████▌ | 32/50 [00:32<00:15, 1.13it/s]\u001b[A\n",
+ " 66%|████████████████████████████▍ | 33/50 [00:32<00:14, 1.21it/s]\u001b[A\n",
+ " 68%|█████████████████████████████▏ | 34/50 [00:33<00:11, 1.39it/s]\u001b[A\n",
+ " 70%|██████████████████████████████ | 35/50 [00:34<00:11, 1.29it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▉ | 36/50 [00:35<00:10, 1.29it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 37/50 [00:35<00:10, 1.25it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 38/50 [00:36<00:09, 1.25it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▌ | 39/50 [00:37<00:08, 1.24it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▍ | 40/50 [00:38<00:07, 1.26it/s]\u001b[A\n",
+ " 82%|███████████████████████████████████▎ | 41/50 [00:39<00:07, 1.28it/s]\u001b[A\n",
+ " 84%|████████████████████████████████████ | 42/50 [00:39<00:06, 1.25it/s]\u001b[A\n",
+ " 86%|████████████████████████████████████▉ | 43/50 [00:40<00:05, 1.19it/s]\u001b[A\n",
+ " 88%|█████████████████████████████████████▊ | 44/50 [00:41<00:05, 1.15it/s]\u001b[A\n",
+ " 90%|██████████████████████████████████████▋ | 45/50 [00:42<00:04, 1.15it/s]\u001b[A\n",
+ " 92%|███████████████████████████████████████▌ | 46/50 [00:43<00:03, 1.16it/s]\u001b[A\n",
+ " 94%|████████████████████████████████████████▍ | 47/50 [00:44<00:02, 1.15it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 48/50 [00:45<00:01, 1.18it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████▏| 49/50 [00:45<00:00, 1.21it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 0.2333229035139084, 'eval_wer': 0.2996667003332997, 'eval_runtime': 48.8974, 'eval_samples_per_second': 32.701, 'eval_steps_per_second': 1.023, 'epoch': 65.79}\n",
+ " 94%|██████████████████████████████████▊ | 7500/7980 [3:01:19<12:55, 1.62s/it]\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:47<00:00, 1.21it/s]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/checkpoint-7500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-finnish/checkpoint-6500] due to args.save_total_limit\n",
+ "{'loss': 0.8184, 'learning_rate': 3.5935828877005344e-06, 'epoch': 66.67} \n",
+ "{'loss': 0.8178, 'learning_rate': 2.657754010695187e-06, 'epoch': 67.54} \n",
+ "{'loss': 0.8214, 'learning_rate': 1.7219251336898393e-06, 'epoch': 68.42} \n",
+ "{'loss': 0.8133, 'learning_rate': 7.860962566844919e-07, 'epoch': 69.3} \n",
+ "100%|█████████████████████████████████████| 7980/7980 [3:12:04<00:00, 1.28it/s]\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 11524.5807, 'train_samples_per_second': 22.03, 'train_steps_per_second': 0.692, 'train_loss': 1.2354308326740313, 'epoch': 70.0}\n",
+ "100%|█████████████████████████████████████| 7980/7980 [3:12:04<00:00, 1.44s/it]\n",
+ "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "***** train metrics *****\n",
+ " epoch = 70.0\n",
+ " train_loss = 1.2354\n",
+ " train_runtime = 3:12:04.58\n",
+ " train_samples = 3627\n",
+ " train_samples_per_second = 22.03\n",
+ " train_steps_per_second = 0.692\n",
+ "01/27/2022 06:18:18 - INFO - __main__ - *** Evaluate ***\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1599\n",
+ " Batch size = 32\n",
+ "100%|███████████████████████████████████████████| 50/50 [00:47<00:00, 1.04it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 70.0\n",
+ " eval_loss = 0.2307\n",
+ " eval_runtime = 0:00:48.91\n",
+ " eval_samples = 1599\n",
+ " eval_samples_per_second = 32.69\n",
+ " eval_steps_per_second = 1.022\n",
+ " eval_wer = 0.2984\n",
+ "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-finnish\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-finnish/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-finnish/preprocessor_config.json\n",
+ "Upload file pytorch_model.bin: 100%|██████▉| 1.17G/1.18G [01:23<00:00, 4.10MB/s]To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish\n",
+ " 423d9bc..236f82b main -> main\n",
+ "\n",
+ "01/27/2022 06:21:58 - WARNING - huggingface_hub.repository - To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish\n",
+ " 423d9bc..236f82b main -> main\n",
+ "\n",
+ "Upload file pytorch_model.bin: 100%|███████| 1.18G/1.18G [01:25<00:00, 14.7MB/s]\n",
+ "Dropping the following result as it does not have all the necessary fields:\n",
+ "{'dataset': {'name': 'MOZILLA-FOUNDATION/COMMON_VOICE_7_0 - FI', 'type': 'common_voice', 'args': 'Config: fi, Training split: train+validation, Eval split: test'}}\n",
+ "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish\n",
+ " 236f82b..310dc81 main -> main\n",
+ "\n",
+ "01/27/2022 06:22:07 - WARNING - huggingface_hub.repository - To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish\n",
+ " 236f82b..310dc81 main -> main\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!python run_speech_recognition_ctc.py \\\n",
+ "\t--dataset_name=\"mozilla-foundation/common_voice_7_0\" \\\n",
+ "\t--model_name_or_path=\"facebook/wav2vec2-xls-r-300m\" \\\n",
+ "\t--dataset_config_name=\"fi\" \\\n",
+ "\t--output_dir=\"./wav2vec2-large-xls-r-300m-finnish\" \\\n",
+ "\t--overwrite_output_dir \\\n",
+ "\t--num_train_epochs=\"70\" \\\n",
+ "\t--per_device_train_batch_size=\"32\" \\\n",
+ "\t--per_device_eval_batch_size=\"32\" \\\n",
+ "\t--gradient_accumulation_steps=\"1\" \\\n",
+ "\t--learning_rate=\"7e-5\" \\\n",
+ "\t--warmup_steps=\"500\" \\\n",
+ "\t--length_column_name=\"input_length\" \\\n",
+ "\t--evaluation_strategy=\"steps\" \\\n",
+ "\t--text_column_name=\"sentence\" \\\n",
+ "\t--chars_to_ignore , ? . ! \\- \\; \\: \\\" “ % ‘ ” � — ’ … – \\\n",
+ "\t--save_steps=\"500\" \\\n",
+ "\t--eval_steps=\"500\" \\\n",
+ "\t--logging_steps=\"100\" \\\n",
+ "\t--layerdrop=\"0.0\" \\\n",
+ "\t--activation_dropout=\"0.1\" \\\n",
+ "\t--save_total_limit=\"2\" \\\n",
+ "\t--freeze_feature_encoder \\\n",
+ "\t--feat_proj_dropout=\"0.0\" \\\n",
+ "\t--mask_time_prob=\"0.75\" \\\n",
+ "\t--mask_time_length=\"10\" \\\n",
+ "\t--mask_feature_prob=\"0.25\" \\\n",
+ "\t--mask_feature_length=\"64\" \\\n",
+ "\t--gradient_checkpointing \\\n",
+ "\t--use_auth_token \\\n",
+ "\t--fp16 \\\n",
+ "\t--group_by_length \\\n",
+ "\t--do_train --do_eval \\\n",
+ " --push_to_hub"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !rm -rf wav2vec2-large-xls-r-300m-bashkir"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls -ltr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Filesystem Size Used Avail Use% Mounted on\n",
+ "overlay 3.5T 1.2T 2.2T 34% /\n",
+ "tmpfs 64M 0 64M 0% /dev\n",
+ "tmpfs 87G 0 87G 0% /sys/fs/cgroup\n",
+ "tmpfs 87G 0 87G 0% /dev/shm\n",
+ "/dev/md0 3.5T 1.2T 2.2T 34% /etc/group\n",
+ "tmpfs 87G 12K 87G 1% /proc/driver/nvidia\n",
+ "/dev/vda1 49G 6.5G 42G 14% /usr/bin/nvidia-smi\n",
+ "udev 87G 0 87G 0% /dev/nvidia0\n",
+ "tmpfs 87G 0 87G 0% /proc/acpi\n",
+ "tmpfs 87G 0 87G 0% /proc/scsi\n",
+ "tmpfs 87G 0 87G 0% /sys/firmware\n"
+ ]
+ }
+ ],
+ "source": [
+ "!df -h"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n",
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3627\n"
+ ]
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset, load_metric, Audio\n",
+ "\n",
+ "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"fi\", use_auth_token=True, split=\"train+validation\")\n",
+ "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"fi\", use_auth_token=True, split=\"test\")\n",
+ "\n",
+ "print(len(common_voice_train))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7934.0625"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(common_voice_train) * 70 / 32"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n",
+ "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datasets import ClassLabel\n",
+ "import random\n",
+ "import pandas as pd\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "def show_random_elements(dataset, num_examples=10):\n",
+ " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n",
+ " picks = []\n",
+ " for _ in range(num_examples):\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " while pick in picks:\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " picks.append(pick)\n",
+ " \n",
+ " df = pd.DataFrame(dataset[picks])\n",
+ " display(HTML(df.to_html()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Äänestimme mietintöä vastaan edellä esitettyjen voimakkaiden mielipiteidemme vuoksi. | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " \"Aikaa lähtövalmisteluihi on tunti\"\".\" | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ja sen jälkeen lähtee jono rannalle. | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Huokaisin helpotuksesta. | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Lämpö, joka jatkui ja jatkui. | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Hän varmasti tiesi, mitä olin aikeissa tehdä. | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Ei näy, mistä siihen pääsisi ylös. | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Äänestän sen vuoksi tämän tärkeän mietinnön puolesta. | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Porsaasta johtuivat ajatukset Kaisan taloon. | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Aivan oikein. | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "chars_to_remove_regex = '[\\,\\?\\.\\!\\-\\;\\:\\\"\\“\\%\\‘\\”\\�\\—\\’\\…\\–]'\n",
+ "\n",
+ "def remove_special_characters(batch):\n",
+ " batch[\"sentence\"] = re.sub(chars_to_remove_regex, '', batch[\"sentence\"]).lower()\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-079dfa6e4746ae78.arrow\n",
+ "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-474e82849d2d9c95.arrow\n"
+ ]
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(remove_special_characters)\n",
+ "common_voice_test = common_voice_test.map(remove_special_characters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_with_ar = common_voice_train.filter(lambda example: \"'\" in example['sentence'])\n",
+ "# start_with_ar[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_with_ar[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def replace_hatted_characters(batch):\n",
+ "# batch[\"sentence\"] = re.sub('[â]', 'a', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[î]', 'i', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[ô]', 'o', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[û]', 'u', batch[\"sentence\"])\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-7b7878ba6acb4302.arrow\n",
+ "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-93e7f03d125d9e56.arrow\n"
+ ]
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(replace_hatted_characters)\n",
+ "common_voice_test = common_voice_test.map(replace_hatted_characters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def extract_all_chars(batch):\n",
+ " all_text = \" \".join(batch[\"sentence\"])\n",
+ " vocab = list(set(all_text))\n",
+ " return {\"vocab\": [vocab], \"all_text\": [all_text]}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "705e09df2c5644d9b4ddab3d367b44d6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b26ee857a54b4899bfb02a18895bdfb4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n",
+ "vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vocab_list = list(set(vocab_train[\"vocab\"][0]) | set(vocab_test[\"vocab\"][0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{' ': 0,\n",
+ " \"'\": 1,\n",
+ " 'a': 2,\n",
+ " 'b': 3,\n",
+ " 'c': 4,\n",
+ " 'd': 5,\n",
+ " 'e': 6,\n",
+ " 'f': 7,\n",
+ " 'g': 8,\n",
+ " 'h': 9,\n",
+ " 'i': 10,\n",
+ " 'j': 11,\n",
+ " 'k': 12,\n",
+ " 'l': 13,\n",
+ " 'm': 14,\n",
+ " 'n': 15,\n",
+ " 'o': 16,\n",
+ " 'p': 17,\n",
+ " 'q': 18,\n",
+ " 'r': 19,\n",
+ " 's': 20,\n",
+ " 't': 21,\n",
+ " 'u': 22,\n",
+ " 'v': 23,\n",
+ " 'w': 24,\n",
+ " 'x': 25,\n",
+ " 'y': 26,\n",
+ " 'z': 27,\n",
+ " 'ä': 28,\n",
+ " 'å': 29,\n",
+ " 'é': 30,\n",
+ " 'ö': 31}"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict = {v: k for k, v in enumerate(sorted(vocab_list))}\n",
+ "vocab_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "file ./config.json not found\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "34\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.8/site-packages/huggingface_hub/hf_api.py:1001: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n",
+ " warnings.warn(\n",
+ "Cloning https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish into local empty directory.\n",
+ "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish\n",
+ " 9ce4a79..212cf89 main -> main\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-finnish/commit/212cf89ca0491548a79a7ba213ca8a9e91b5303e'"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict[\"|\"] = vocab_dict[\" \"]\n",
+ "del vocab_dict[\" \"]\n",
+ "\n",
+ "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n",
+ "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n",
+ "print(len(vocab_dict))\n",
+ "\n",
+ "import json\n",
+ "with open('./vocab.json', 'w') as vocab_file:\n",
+ " json.dump(vocab_dict, vocab_file)\n",
+ " \n",
+ "from transformers import Wav2Vec2CTCTokenizer\n",
+ "\n",
+ "tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(\"./\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")\n",
+ "\n",
+ "repo_name = \"wav2vec2-large-xls-r-300m-finnish\"\n",
+ "\n",
+ "tokenizer.push_to_hub(repo_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2022-01-27 02:33:09-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 4421 (4.3K) [text/plain]\n",
+ "Saving to: ‘eval.py’\n",
+ "\n",
+ "eval.py 100%[===================>] 4.32K --.-KB/s in 0s \n",
+ "\n",
+ "2022-01-27 02:33:09 (13.9 MB/s) - ‘eval.py’ saved [4421/4421]\n",
+ "\n",
+ "total 1232608\n",
+ "-rw-r--r-- 1 ovh ovh 435 Jan 26 15:14 vocab.json\n",
+ "-rw-r--r-- 1 ovh ovh 293 Jan 26 15:14 tokenizer_config.json\n",
+ "-rw-r--r-- 1 ovh ovh 502 Jan 26 15:14 special_tokens_map.json\n",
+ "-rw-r--r-- 1 ovh ovh 23 Jan 26 15:14 added_tokens.json\n",
+ "drwxr-xr-x 2 ovh ovh 4096 Jan 26 17:34 checkpoint-5000\n",
+ "drwxr-xr-x 2 ovh ovh 4096 Jan 26 17:49 checkpoint-5500\n",
+ "-rw-r--r-- 1 ovh ovh 195 Jan 26 17:55 train_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 10133 Jan 26 17:55 trainer_state.json\n",
+ "-rw-r--r-- 1 ovh ovh 222 Jan 26 17:55 eval_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 395 Jan 26 17:55 all_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 2033 Jan 26 17:55 config.json\n",
+ "-rw-r--r-- 1 ovh ovh 1262112241 Jan 26 17:55 pytorch_model.bin\n",
+ "-rw-r--r-- 1 ovh ovh 3055 Jan 26 17:55 training_args.bin\n",
+ "-rw-r--r-- 1 ovh ovh 212 Jan 26 17:55 preprocessor_config.json\n",
+ "-rw-r--r-- 1 ovh ovh 2182 Jan 26 17:57 README.md\n",
+ "-rw-r--r-- 1 ovh ovh 4421 Jan 27 02:33 eval.py\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget -O eval.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n",
+ "!cp eval.py wav2vec2-large-xls-r-300m-chuvash\n",
+ "!ls -ltr wav2vec2-large-xls-r-300m-chuvash"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n",
+ " 74%|██████████████████████████████▍ | 601/810 [05:33<01:52, 1.85ex/s]"
+ ]
+ }
+ ],
+ "source": [
+ "!cd wav2vec2-large-xls-r-300m-chuvash; python eval.py \\\n",
+ " --model_id ./ --dataset mozilla-foundation/common_voice_7_0 --config cv --split test --log_outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "24592b0be30e4eafb1949cf09d1c4fb4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/260 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f9bf2ab0d2fa4d3f9235cc6d1ab772f1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/574 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b0791474a34043da8057e06741472ade",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/23.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1ccbd582d616458b87c76ac8dc5b6b36",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/309 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# from transformers import AutoModelForCTC, Wav2Vec2Processor\n",
+ "\n",
+ "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "# processor = Wav2Vec2Processor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "013fabff2ea243a0a728a79b8f54ae09",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.99k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a8d9ca6d024f46f58301bfbcc475e41a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.18G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b336e2647c05466d87a11dfa326e30d6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/212 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8e6962320ad944439261482617be4869",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/260 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "99de2ef750aa49fd986965d66853a5ea",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/520 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "765670f93e5f4c2e849c98d53e616f38",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/23.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "812abafc8f6b49e3a498718d034a379b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/309 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "ename": "AssertionError",
+ "evalue": "55",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m: 55"
+ ]
+ }
+ ],
+ "source": [
+ "# from transformers import AutoModelForCTC, AutoProcessor\n",
+ "# from datasets import load_dataset\n",
+ "\n",
+ "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "# processor = AutoProcessor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "\n",
+ "# input_values = processor(common_voice_test[0][\"audio\"][\"array\"], return_tensors=\"pt\", sampling_rate=16_000).input_values\n",
+ "# # input_values = input_values.to(\"cuda\")\n",
+ "\n",
+ "# logits = model(input_values).logits\n",
+ "\n",
+ "# assert logits.shape[-1] == 32, logits.shape[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/fi/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "76613aaa9bd3471f9cdc2a3771250713",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/10 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['lhsz mxs sdjhrhlld', \"zzmdrszlld szlzm utnjrh snhrhm jthm l''s'kntcdm u'khnjtms' nm 'rh'm nr'ks' drhsszmxs\", \"qtod'sjn qdllhhm u'h ds\", \"zzmdrshm mzhm nkkdm lhdshmmém otnkdrs'\", \"jhhsnr dssz stkhssd i' nodshssd ldhkkd uhhr'tsdmmd\", \"j'sr'gchm xkér\", 'szlz nm gxuhm szqjdz sdgszuz', \"nr'jdxgshém nr'jj''s q'gnhss'u's k'hsnjrdm q'jdms'lhrdm i' xkkzohcnm\", \"zzmdrszm ozzsérk'trdkl'm otnkdrs' u'hjj' rhhmz dh ldmmzjzzm qhhsszuzm ohsjzkkd\", \"chdfn j''sth l''g'm ltssdh gzm u'hjtss'mts jtnkkddks'\"]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'Mitä nyt tekisimme?'"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datasets import Audio, Dataset, load_dataset, load_metric\n",
+ "from transformers import AutoFeatureExtractor, pipeline\n",
+ "\n",
+ "dataset = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"fi\", use_auth_token=True, split=\"train+validation\")\n",
+ "\n",
+ "# for testing: only process the first two examples as a test\n",
+ "dataset = dataset.select(range(10))\n",
+ "\n",
+ "repo_name = 'infinitejoy/wav2vec2-large-xls-r-300m-finnish'\n",
+ "\n",
+ "# load processor\n",
+ "feature_extractor = AutoFeatureExtractor.from_pretrained(repo_name)\n",
+ "# feature_extractor = processor_with_lm.feature_extractor\n",
+ "sampling_rate = feature_extractor.sampling_rate\n",
+ "\n",
+ "# resample audio\n",
+ "dataset = dataset.cast_column(\"audio\", Audio(sampling_rate=sampling_rate))\n",
+ "\n",
+ "# load eval pipeline\n",
+ "asr = pipeline(\"automatic-speech-recognition\", model=repo_name, feature_extractor=feature_extractor)\n",
+ "\n",
+ "# map function to decode audio\n",
+ "def map_to_pred(batch):\n",
+ " prediction = asr(\n",
+ " batch[\"audio\"][\"array\"])\n",
+ "\n",
+ " batch[\"prediction\"] = prediction[\"text\"].replace('[UNK]', '')\n",
+ " batch[\"target\"] = batch[\"sentence\"]\n",
+ " return batch\n",
+ "\n",
+ "# run inference on all examples\n",
+ "result = dataset.map(map_to_pred, remove_columns=dataset.column_names)\n",
+ "print(result[\"prediction\"])\n",
+ "\n",
+ "result[0]['target']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\"qtod'sjn qdllhhm u'h ds\""
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result[2]['prediction'].replace('[UNK]', '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Rupeatko remmiin, vai et?'"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result[2]['target'].replace('[UNK]', '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "authorship_tag": "ABX9TyM3OaMlm9YQtKpl28c8gBBd",
+ "include_colab_link": true,
+ "name": "DebugOVHTransformers.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}