initial commit

Files changed (7) hide show

.gitignore +1 -0
Generate.ipynb +562 -0
sharpcoder/wav2vec2_bjorn/config.json +88 -0
sharpcoder/wav2vec2_bjorn/preprocessor_config.json +9 -0
sharpcoder/wav2vec2_bjorn/pytorch_model.bin +3 -0
sharpcoder/wav2vec2_bjorn/training_args.bin +3 -0
vocab.json +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .ipynb_checkpoints/

Generate.ipynb ADDED Viewed

	@@ -0,0 +1,562 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5205c0d3-2272-4a43-9345-9553af479fe6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "50bf0f78f5f044dd8be6b181b2cb0949",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import notebook_login\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "38bdf299-f60d-43ea-9230-df1be861e406",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using custom data configuration sharpcoder--bjorn_training-8c32a3534606a113\n",
+      "Reusing dataset parquet (/home/sharpcoder/.cache/huggingface/datasets/sharpcoder___parquet/sharpcoder--bjorn_training-8c32a3534606a113/0.0.0/7328ef7ee03eaf3f86ae40594d46a1cec86161704e02dd19f232d81eee72ade8)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c495fe2f4a44499fb32751d60ac1488e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset, load_metric\n",
+    "ds = load_dataset(\"sharpcoder/bjorn_training\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "75b32151-eb53-4476-8c1f-7e6da72e173e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f019d1f864b4b56af5c828588fd89bf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?ba/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def extract_all_chars(batch):\n",
+    "  all_text = \" \".join(batch[\"text\"])\n",
+    "  vocab = list(set(all_text))\n",
+    "  return {\"vocab\": [vocab], \"all_text\": [all_text]}\n",
+    "\n",
+    "vocabs = ds.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=ds.column_names[\"train\"])\n",
+    "vocab_list = list(set(vocabs[\"train\"][\"vocab\"][0]) | set(vocabs[\"train\"][\"vocab\"][0]))\n",
+    "vocab_dict = {v: k for k, v in enumerate(vocab_list)}\n",
+    "vocab_dict[\"|\"] = vocab_dict[\" \"]\n",
+    "del vocab_dict[\" \"]\n",
+    "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n",
+    "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n",
+    "len(vocab_dict)\n",
+    "import json\n",
+    "with open('vocab.json', 'w') as vocab_file:\n",
+    "    json.dump(vocab_dict, vocab_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "d214872e-d4b1-4aa7-be07-8a1591961968",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Wav2Vec2CTCTokenizer\n",
+    "from transformers import Wav2Vec2FeatureExtractor\n",
+    "from transformers import Wav2Vec2Processor\n",
+    "\n",
+    "tokenizer = Wav2Vec2CTCTokenizer(\"./vocab.json\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")\n",
+    "feature_extractor = Wav2Vec2FeatureExtractor(feature_size=1, sampling_rate=16000, padding_value=0.0, do_normalize=True, return_attention_mask=False)\n",
+    "processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "e906c45f-6971-43c3-ad0a-b13363100bdf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_dataset(batch):\n",
+    "    audio = batch[\"audio\"]\n",
+    "\n",
+    "    # batched output is \"un-batched\" to ensure mapping is correct\n",
+    "    batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sample_rate\"]).input_values[0]\n",
+    "    batch[\"input_length\"] = len(batch[\"input_values\"])\n",
+    "    \n",
+    "    with processor.as_target_processor():\n",
+    "        batch[\"labels\"] = processor(batch[\"text\"]).input_ids\n",
+    "    return batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "8c083db6-eab5-4f25-9a08-eab50d2d30ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b36aee8ffc44253a8381da4d0f4c362",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?ex/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ds_prepared = ds.map(prepare_dataset, remove_columns=ds.column_names[\"train\"], num_proc=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "50c9a6ad-9e79-4a1c-a5ce-6e1f73a96e4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "from dataclasses import dataclass, field\n",
+    "from typing import Any, Dict, List, Optional, Union\n",
+    "\n",
+    "@dataclass\n",
+    "class DataCollatorCTCWithPadding:\n",
+    "    \"\"\"\n",
+    "    Data collator that will dynamically pad the inputs received.\n",
+    "    Args:\n",
+    "        processor (:class:`~transformers.Wav2Vec2Processor`)\n",
+    "            The processor used for proccessing the data.\n",
+    "        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n",
+    "            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n",
+    "            among:\n",
+    "            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n",
+    "              sequence if provided).\n",
+    "            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n",
+    "              maximum acceptable input length for the model if that argument is not provided.\n",
+    "            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n",
+    "              different lengths).\n",
+    "    \"\"\"\n",
+    "\n",
+    "    processor: Wav2Vec2Processor\n",
+    "    padding: Union[bool, str] = True\n",
+    "\n",
+    "    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
+    "        # split inputs and labels since they have to be of different lenghts and need\n",
+    "        # different padding methods\n",
+    "        input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n",
+    "        label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
+    "\n",
+    "        batch = self.processor.pad(\n",
+    "            input_features,\n",
+    "            padding=self.padding,\n",
+    "            return_tensors=\"pt\",\n",
+    "        )\n",
+    "        with self.processor.as_target_processor():\n",
+    "            labels_batch = self.processor.pad(\n",
+    "                label_features,\n",
+    "                padding=self.padding,\n",
+    "                return_tensors=\"pt\",\n",
+    "            )\n",
+    "\n",
+    "        # replace padding with -100 to ignore loss correctly\n",
+    "        labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
+    "\n",
+    "        batch[\"labels\"] = labels\n",
+    "\n",
+    "        return batch\n",
+    "    \n",
+    "def compute_metrics(pred):\n",
+    "    pred_logits = pred.predictions\n",
+    "    pred_ids = np.argmax(pred_logits, axis=-1)\n",
+    "\n",
+    "    pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n",
+    "\n",
+    "    pred_str = processor.batch_decode(pred_ids)\n",
+    "    # we do not want to group tokens when computing the metrics\n",
+    "    label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n",
+    "\n",
+    "    wer = wer_metric.compute(predictions=pred_str, references=label_str)\n",
+    "\n",
+    "    return {\"wer\": wer}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)\n",
+    "wer_metric = load_metric(\"wer\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "71351cf4-6d00-40ae-89cc-cedb87073625",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "loading configuration file https://huggingface.co/facebook/wav2vec2-base/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/c7746642f045322fd01afa31271dd490e677ea11999e68660a92619ec7c892b4.ce1f96bfaf3d7475cb8187b9668c7f19437ade45fb9ceb78d2b06a2cec198015\n",
+      "/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/configuration_utils.py:336: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
+      "  warnings.warn(\n",
+      "Model config Wav2Vec2Config {\n",
+      "  \"activation_dropout\": 0.0,\n",
+      "  \"apply_spec_augment\": true,\n",
+      "  \"architectures\": [\n",
+      "    \"Wav2Vec2ForPreTraining\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.1,\n",
+      "  \"bos_token_id\": 1,\n",
+      "  \"classifier_proj_size\": 256,\n",
+      "  \"codevector_dim\": 256,\n",
+      "  \"contrastive_logits_temperature\": 0.1,\n",
+      "  \"conv_bias\": false,\n",
+      "  \"conv_dim\": [\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    512\n",
+      "  ],\n",
+      "  \"conv_kernel\": [\n",
+      "    10,\n",
+      "    3,\n",
+      "    3,\n",
+      "    3,\n",
+      "    3,\n",
+      "    2,\n",
+      "    2\n",
+      "  ],\n",
+      "  \"conv_stride\": [\n",
+      "    5,\n",
+      "    2,\n",
+      "    2,\n",
+      "    2,\n",
+      "    2,\n",
+      "    2,\n",
+      "    2\n",
+      "  ],\n",
+      "  \"ctc_loss_reduction\": \"mean\",\n",
+      "  \"ctc_zero_infinity\": false,\n",
+      "  \"diversity_loss_weight\": 0.1,\n",
+      "  \"do_stable_layer_norm\": false,\n",
+      "  \"eos_token_id\": 2,\n",
+      "  \"feat_extract_activation\": \"gelu\",\n",
+      "  \"feat_extract_norm\": \"group\",\n",
+      "  \"feat_proj_dropout\": 0.1,\n",
+      "  \"feat_quantizer_dropout\": 0.0,\n",
+      "  \"final_dropout\": 0.0,\n",
+      "  \"freeze_feat_extract_train\": true,\n",
+      "  \"gradient_checkpointing\": true,\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"layer_norm_eps\": 1e-05,\n",
+      "  \"layerdrop\": 0.0,\n",
+      "  \"mask_channel_length\": 10,\n",
+      "  \"mask_channel_min_space\": 1,\n",
+      "  \"mask_channel_other\": 0.0,\n",
+      "  \"mask_channel_prob\": 0.0,\n",
+      "  \"mask_channel_selection\": \"static\",\n",
+      "  \"mask_feature_length\": 10,\n",
+      "  \"mask_feature_prob\": 0.0,\n",
+      "  \"mask_time_length\": 10,\n",
+      "  \"mask_time_min_space\": 1,\n",
+      "  \"mask_time_other\": 0.0,\n",
+      "  \"mask_time_prob\": 0.05,\n",
+      "  \"mask_time_selection\": \"static\",\n",
+      "  \"model_type\": \"wav2vec2\",\n",
+      "  \"no_mask_channel_overlap\": false,\n",
+      "  \"no_mask_time_overlap\": false,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_codevector_groups\": 2,\n",
+      "  \"num_codevectors_per_group\": 320,\n",
+      "  \"num_conv_pos_embedding_groups\": 16,\n",
+      "  \"num_conv_pos_embeddings\": 128,\n",
+      "  \"num_feat_extract_layers\": 7,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"num_negatives\": 100,\n",
+      "  \"pad_token_id\": 19,\n",
+      "  \"proj_codevector_dim\": 256,\n",
+      "  \"transformers_version\": \"4.11.3\",\n",
+      "  \"use_weighted_layer_sum\": false,\n",
+      "  \"vocab_size\": 32\n",
+      "}\n",
+      "\n",
+      "loading weights file https://huggingface.co/facebook/wav2vec2-base/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/ef45231897ce572a660ebc5a63d3702f1a6041c4c5fb78cbec330708531939b3.fcae05302a685f7904c551c8ea571e8bc2a2c4a1777ea81ad66e47f7883a650a\n",
+      "Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForCTC: ['project_q.bias', 'project_hid.bias', 'quantizer.codevectors', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_hid.weight']\n",
+      "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import Wav2Vec2ForCTC\n",
+    "\n",
+    "model = Wav2Vec2ForCTC.from_pretrained(\n",
+    "    \"facebook/wav2vec2-base\",\n",
+    "    ctc_loss_reduction=\"mean\", \n",
+    "    pad_token_id=processor.tokenizer.pad_token_id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "PyTorch: setting up devices\n",
+      "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import TrainingArguments\n",
+    "from transformers import Trainer\n",
+    "\n",
+    "training_args = TrainingArguments(\n",
+    "  output_dir=\"sharpcoder/wav2vec2_bjorn\",\n",
+    "  group_by_length=True,\n",
+    "  per_device_train_batch_size=8,\n",
+    "  evaluation_strategy=\"steps\",\n",
+    "  num_train_epochs=30,\n",
+    "  fp16=False,\n",
+    "  gradient_checkpointing=True,\n",
+    "  save_steps=500,\n",
+    "  eval_steps=500,\n",
+    "  logging_steps=500,\n",
+    "  learning_rate=1e-4,\n",
+    "  weight_decay=0.005,\n",
+    "  warmup_steps=1000,\n",
+    "  save_total_limit=2,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    data_collator=data_collator,\n",
+    "    args=training_args,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    train_dataset=ds_prepared[\"train\"],\n",
+    "    eval_dataset=ds_prepared[\"train\"],\n",
+    "    tokenizer=processor.feature_extractor,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The following columns in the training set  don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+      "***** Running training *****\n",
+      "  Num examples = 1\n",
+      "  Num Epochs = 30\n",
+      "  Instantaneous batch size per device = 8\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 30\n",
+      "/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/feature_extraction_utils.py:158: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ../torch/csrc/utils/tensor_new.cpp:210.)\n",
+      "  tensor = as_tensor(value)\n",
+      "/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:882: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
+      "  return (input_length - kernel_size) // stride + 1\n",
+      "/home/sharpcoder/.local/lib/python3.10/site-packages/torch/autocast_mode.py:162: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
+      "  warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='30' max='30' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [30/30 00:29, Epoch 30/30]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=30, training_loss=7.301008097330729, metrics={'train_runtime': 30.6312, 'train_samples_per_second': 0.979, 'train_steps_per_second': 0.979, 'total_flos': 943749864316800.0, 'train_loss': 7.301008097330729, 'epoch': 30.0})"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "70866f1f-3745-4e68-acd5-f50b6eff348b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to sharpcoder/wav2vec2_bjorn\n",
+      "Configuration saved in sharpcoder/wav2vec2_bjorn/config.json\n",
+      "Model weights saved in sharpcoder/wav2vec2_bjorn/pytorch_model.bin\n",
+      "Configuration saved in sharpcoder/wav2vec2_bjorn/preprocessor_config.json\n"
+     ]
+    },
+    {
+     "ename": "AttributeError",
+     "evalue": "'Trainer' object has no attribute 'repo'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [47]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:2677\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m   2674\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_world_process_zero():\n\u001b[1;32m   2675\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m-> 2677\u001b[0m git_head_commit_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241m.\u001b[39mpush_to_hub(commit_message\u001b[38;5;241m=\u001b[39mcommit_message, blocking\u001b[38;5;241m=\u001b[39mblocking)\n\u001b[1;32m   2678\u001b[0m \u001b[38;5;66;03m# push separately the model card to be independant from the rest of the model\u001b[39;00m\n\u001b[1;32m   2679\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'Trainer' object has no attribute 'repo'"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "333d43cf-add3-4d78-bbca-b44c638519fe",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sharpcoder/wav2vec2_bjorn/config.json ADDED Viewed

	@@ -0,0 +1,88 @@

+{
+  "_name_or_path": "facebook/wav2vec2-base",
+  "activation_dropout": 0.0,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "freeze_feat_extract_train": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.05,
+  "mask_time_selection": "static",
+  "model_type": "wav2vec2",
+  "no_mask_channel_overlap": false,
+  "no_mask_time_overlap": false,
+  "num_attention_heads": 12,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "num_negatives": 100,
+  "pad_token_id": 19,
+  "proj_codevector_dim": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.11.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

sharpcoder/wav2vec2_bjorn/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

sharpcoder/wav2vec2_bjorn/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:873bf552da3de5ce2fc1efbe234017f06cf7b9b70812d408585136c69486cb81
+size 377667031

sharpcoder/wav2vec2_bjorn/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b61aecf82c993254e7b0fbeb1c240469688a2bf27cd91d288ef05824cd7c911
+size 2799

vocab.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"w": 0, "y": 1, "m": 2, "i": 3, "e": 4, "s": 5, "r": 6, "p": 7, "n": 8, "a": 9, "h": 10, ".": 11, "j": 12, "b": 13, "d": 14, "l": 15, "o": 16, "\|": 17, "[UNK]": 18, "[PAD]": 19}