diff --git "a/hf_speech_recognition.ipynb" "b/hf_speech_recognition.ipynb" new file mode 100644--- /dev/null +++ "b/hf_speech_recognition.ipynb" @@ -0,0 +1,8788 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "6d694232151e4a89af146f862ebf3d34": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [], + "layout": "IPY_MODEL_cd3507cca4c446e2b524ead6e3e45a1c" + } + }, + "ac848d0c414a4200ac2ceac8514cc1eb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_adc44359086c47d0934f0bffe860e56d", + "placeholder": "", + "style": "IPY_MODEL_1cce610ea8cd49ff87f62fbc7c434e7d", + "value": "
Step | \n", + "Training Loss | \n", + "Validation Loss | \n", + "Wer Ortho | \n", + "Wer | \n", + "
---|---|---|---|---|
500 | \n", + "0.000600 | \n", + "0.651501 | \n", + "0.332690 | \n", + "0.328413 | \n", + "
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50359]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.\n",
+ "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:2817: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.\n",
+ " warnings.warn(\n",
+ "There were missing keys in the checkpoint model loaded: ['proj_out.weight'].\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=500, training_loss=0.2877341524623334, metrics={'train_runtime': 1153.318, 'train_samples_per_second': 6.937, 'train_steps_per_second': 0.434, 'total_flos': 1.9109178630144e+17, 'train_loss': 0.2877341524623334, 'epoch': 17.24137931034483})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "kwargs = {\n",
+ " \"dataset_tags\": \"PolyAI/minds14\",\n",
+ " \"finetuned_from\": \"openai/whisper-tiny\",\n",
+ " \"tasks\": \"automatic-speech-recognition\",\n",
+ "}\n",
+ "\n",
+ "trainer.push_to_hub(**kwargs)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 71
+ },
+ "id": "5gnigMKhGtvc",
+ "outputId": "bc8eb34a-4b3d-4bdc-b88c-6f306ac5b191"
+ },
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "CommitInfo(commit_url='https://huggingface.co/ahk-d/whisper-tiny/commit/3a0a084cbb4d87e79434a2fe353e0e405e0dffed', commit_message='End of training', commit_description='', oid='3a0a084cbb4d87e79434a2fe353e0e405e0dffed', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ahk-d/whisper-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='ahk-d/whisper-tiny'), pr_revision=None, pr_num=None)"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 28
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pipe = pipeline(\"automatic-speech-recognition\", model=\"ahk-d/whisper-tiny\")\n",
+ "\n",
+ "from google.colab import files\n",
+ "uploaded = files.upload()\n",
+ "\n",
+ "audio_file = list(uploaded.keys())[0]\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 92
+ },
+ "id": "fKaGHSqjI3Eu",
+ "outputId": "42d47476-a1b7-4da7-ff64-af606f5f70c1"
+ },
+ "execution_count": 35,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "