Josh Cole commited on
Commit
b493b18
·
1 Parent(s): fea66a8
Files changed (4) hide show
  1. Generate.ipynb +22 -32
  2. config.json +7 -15
  3. pytorch_model.bin +1 -1
  4. training_args.bin +1 -1
Generate.ipynb CHANGED
@@ -242,7 +242,7 @@
242
  },
243
  {
244
  "cell_type": "code",
245
- "execution_count": 27,
246
  "id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
247
  "metadata": {},
248
  "outputs": [],
@@ -253,7 +253,7 @@
253
  },
254
  {
255
  "cell_type": "code",
256
- "execution_count": 35,
257
  "id": "71351cf4-6d00-40ae-89cc-cedb87073625",
258
  "metadata": {},
259
  "outputs": [
@@ -261,14 +261,13 @@
261
  "name": "stderr",
262
  "output_type": "stream",
263
  "text": [
264
- "loading configuration file https://huggingface.co/facebook/wav2vec2-base/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/c7746642f045322fd01afa31271dd490e677ea11999e68660a92619ec7c892b4.ce1f96bfaf3d7475cb8187b9668c7f19437ade45fb9ceb78d2b06a2cec198015\n",
265
- "/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/configuration_utils.py:336: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
266
- " warnings.warn(\n",
267
  "Model config Wav2Vec2Config {\n",
268
- " \"activation_dropout\": 0.0,\n",
 
269
  " \"apply_spec_augment\": true,\n",
270
  " \"architectures\": [\n",
271
- " \"Wav2Vec2ForPreTraining\"\n",
272
  " ],\n",
273
  " \"attention_dropout\": 0.1,\n",
274
  " \"bos_token_id\": 1,\n",
@@ -309,34 +308,25 @@
309
  " \"do_stable_layer_norm\": false,\n",
310
  " \"eos_token_id\": 2,\n",
311
  " \"feat_extract_activation\": \"gelu\",\n",
 
312
  " \"feat_extract_norm\": \"group\",\n",
313
  " \"feat_proj_dropout\": 0.1,\n",
314
  " \"feat_quantizer_dropout\": 0.0,\n",
315
- " \"final_dropout\": 0.0,\n",
316
- " \"freeze_feat_extract_train\": true,\n",
317
- " \"gradient_checkpointing\": true,\n",
318
  " \"hidden_act\": \"gelu\",\n",
319
  " \"hidden_dropout\": 0.1,\n",
 
320
  " \"hidden_size\": 768,\n",
321
  " \"initializer_range\": 0.02,\n",
322
  " \"intermediate_size\": 3072,\n",
323
  " \"layer_norm_eps\": 1e-05,\n",
324
- " \"layerdrop\": 0.0,\n",
325
- " \"mask_channel_length\": 10,\n",
326
- " \"mask_channel_min_space\": 1,\n",
327
- " \"mask_channel_other\": 0.0,\n",
328
- " \"mask_channel_prob\": 0.0,\n",
329
- " \"mask_channel_selection\": \"static\",\n",
330
  " \"mask_feature_length\": 10,\n",
331
  " \"mask_feature_prob\": 0.0,\n",
332
  " \"mask_time_length\": 10,\n",
333
- " \"mask_time_min_space\": 1,\n",
334
- " \"mask_time_other\": 0.0,\n",
335
  " \"mask_time_prob\": 0.05,\n",
336
- " \"mask_time_selection\": \"static\",\n",
337
  " \"model_type\": \"wav2vec2\",\n",
338
- " \"no_mask_channel_overlap\": false,\n",
339
- " \"no_mask_time_overlap\": false,\n",
340
  " \"num_attention_heads\": 12,\n",
341
  " \"num_codevector_groups\": 2,\n",
342
  " \"num_codevectors_per_group\": 320,\n",
@@ -352,11 +342,10 @@
352
  " \"vocab_size\": 32\n",
353
  "}\n",
354
  "\n",
355
- "loading weights file https://huggingface.co/facebook/wav2vec2-base/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/ef45231897ce572a660ebc5a63d3702f1a6041c4c5fb78cbec330708531939b3.fcae05302a685f7904c551c8ea571e8bc2a2c4a1777ea81ad66e47f7883a650a\n",
356
- "Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForCTC: ['project_hid.bias', 'quantizer.weight_proj.bias', 'project_q.weight', 'project_hid.weight', 'quantizer.weight_proj.weight', 'quantizer.codevectors', 'project_q.bias']\n",
357
- "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
358
- "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
359
- "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n",
360
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
361
  ]
362
  }
@@ -365,7 +354,8 @@
365
  "from transformers import Wav2Vec2ForCTC\n",
366
  "\n",
367
  "model = Wav2Vec2ForCTC.from_pretrained(\n",
368
- " \"facebook/wav2vec2-base\",\n",
 
369
  " ctc_loss_reduction=\"mean\", \n",
370
  " pad_token_id=processor.tokenizer.pad_token_id,\n",
371
  ")"
@@ -373,7 +363,7 @@
373
  },
374
  {
375
  "cell_type": "code",
376
- "execution_count": 45,
377
  "id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
378
  "metadata": {},
379
  "outputs": [
@@ -395,7 +385,7 @@
395
  " group_by_length=True,\n",
396
  " per_device_train_batch_size=8,\n",
397
  " evaluation_strategy=\"steps\",\n",
398
- " num_train_epochs=3,\n",
399
  " fp16=False,\n",
400
  " gradient_checkpointing=True,\n",
401
  " save_steps=500,\n",
@@ -420,7 +410,7 @@
420
  },
421
  {
422
  "cell_type": "code",
423
- "execution_count": 46,
424
  "id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
425
  "metadata": {},
426
  "outputs": [
@@ -480,10 +470,10 @@
480
  {
481
  "data": {
482
  "text/plain": [
483
- "TrainOutput(global_step=3, training_loss=10.471563975016275, metrics={'train_runtime': 3.8966, 'train_samples_per_second': 0.77, 'train_steps_per_second': 0.77, 'total_flos': 94374986431680.0, 'train_loss': 10.471563975016275, 'epoch': 3.0})"
484
  ]
485
  },
486
- "execution_count": 46,
487
  "metadata": {},
488
  "output_type": "execute_result"
489
  }
 
242
  },
243
  {
244
  "cell_type": "code",
245
+ "execution_count": 49,
246
  "id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
247
  "metadata": {},
248
  "outputs": [],
 
253
  },
254
  {
255
  "cell_type": "code",
256
+ "execution_count": 50,
257
  "id": "71351cf4-6d00-40ae-89cc-cedb87073625",
258
  "metadata": {},
259
  "outputs": [
 
261
  "name": "stderr",
262
  "output_type": "stream",
263
  "text": [
264
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/cbb3014bb9f03ead9b94f4a791ff8e777465307670e85079d35e28cbc5d88727.0e2d739358c9b58747bd19db5f9f4320dacabbeb1e6282f5cc1069c5c55a82d2\n",
 
 
265
  "Model config Wav2Vec2Config {\n",
266
+ " \"_name_or_path\": \"facebook/wav2vec2-base-960h\",\n",
267
+ " \"activation_dropout\": 0.1,\n",
268
  " \"apply_spec_augment\": true,\n",
269
  " \"architectures\": [\n",
270
+ " \"Wav2Vec2ForCTC\"\n",
271
  " ],\n",
272
  " \"attention_dropout\": 0.1,\n",
273
  " \"bos_token_id\": 1,\n",
 
308
  " \"do_stable_layer_norm\": false,\n",
309
  " \"eos_token_id\": 2,\n",
310
  " \"feat_extract_activation\": \"gelu\",\n",
311
+ " \"feat_extract_dropout\": 0.0,\n",
312
  " \"feat_extract_norm\": \"group\",\n",
313
  " \"feat_proj_dropout\": 0.1,\n",
314
  " \"feat_quantizer_dropout\": 0.0,\n",
315
+ " \"final_dropout\": 0.1,\n",
316
+ " \"gradient_checkpointing\": false,\n",
 
317
  " \"hidden_act\": \"gelu\",\n",
318
  " \"hidden_dropout\": 0.1,\n",
319
+ " \"hidden_dropout_prob\": 0.1,\n",
320
  " \"hidden_size\": 768,\n",
321
  " \"initializer_range\": 0.02,\n",
322
  " \"intermediate_size\": 3072,\n",
323
  " \"layer_norm_eps\": 1e-05,\n",
324
+ " \"layerdrop\": 0.1,\n",
 
 
 
 
 
325
  " \"mask_feature_length\": 10,\n",
326
  " \"mask_feature_prob\": 0.0,\n",
327
  " \"mask_time_length\": 10,\n",
 
 
328
  " \"mask_time_prob\": 0.05,\n",
 
329
  " \"model_type\": \"wav2vec2\",\n",
 
 
330
  " \"num_attention_heads\": 12,\n",
331
  " \"num_codevector_groups\": 2,\n",
332
  " \"num_codevectors_per_group\": 320,\n",
 
342
  " \"vocab_size\": 32\n",
343
  "}\n",
344
  "\n",
345
+ "loading weights file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/4cb133d3cf3e58e8a4e088b1fc826611a3bcf3d98b20a0bb49ce8cd5362411b7.beeaccfa4baf44ba6123c23938d8a17f48344361a5e7041782e537dfd78a2037\n",
346
+ "All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
347
+ "\n",
348
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']\n",
 
349
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
350
  ]
351
  }
 
354
  "from transformers import Wav2Vec2ForCTC\n",
355
  "\n",
356
  "model = Wav2Vec2ForCTC.from_pretrained(\n",
357
+ " #\"facebook/wav2vec2-base\",\n",
358
+ " \"facebook/wav2vec2-base-960h\",\n",
359
  " ctc_loss_reduction=\"mean\", \n",
360
  " pad_token_id=processor.tokenizer.pad_token_id,\n",
361
  ")"
 
363
  },
364
  {
365
  "cell_type": "code",
366
+ "execution_count": 51,
367
  "id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
368
  "metadata": {},
369
  "outputs": [
 
385
  " group_by_length=True,\n",
386
  " per_device_train_batch_size=8,\n",
387
  " evaluation_strategy=\"steps\",\n",
388
+ " num_train_epochs=30,\n",
389
  " fp16=False,\n",
390
  " gradient_checkpointing=True,\n",
391
  " save_steps=500,\n",
 
410
  },
411
  {
412
  "cell_type": "code",
413
+ "execution_count": 52,
414
  "id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
415
  "metadata": {},
416
  "outputs": [
 
470
  {
471
  "data": {
472
  "text/plain": [
473
+ "TrainOutput(global_step=3, training_loss=15.702210744222006, metrics={'train_runtime': 3.157, 'train_samples_per_second': 0.95, 'train_steps_per_second': 0.95, 'total_flos': 94374986431680.0, 'train_loss': 15.702210744222006, 'epoch': 3.0})"
474
  ]
475
  },
476
+ "execution_count": 52,
477
  "metadata": {},
478
  "output_type": "execute_result"
479
  }
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-base",
3
- "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "Wav2Vec2ForCTC"
@@ -44,33 +44,25 @@
44
  "do_stable_layer_norm": false,
45
  "eos_token_id": 2,
46
  "feat_extract_activation": "gelu",
 
47
  "feat_extract_norm": "group",
48
  "feat_proj_dropout": 0.1,
49
  "feat_quantizer_dropout": 0.0,
50
- "final_dropout": 0.0,
51
- "freeze_feat_extract_train": true,
52
  "hidden_act": "gelu",
53
  "hidden_dropout": 0.1,
 
54
  "hidden_size": 768,
55
  "initializer_range": 0.02,
56
  "intermediate_size": 3072,
57
  "layer_norm_eps": 1e-05,
58
- "layerdrop": 0.0,
59
- "mask_channel_length": 10,
60
- "mask_channel_min_space": 1,
61
- "mask_channel_other": 0.0,
62
- "mask_channel_prob": 0.0,
63
- "mask_channel_selection": "static",
64
  "mask_feature_length": 10,
65
  "mask_feature_prob": 0.0,
66
  "mask_time_length": 10,
67
- "mask_time_min_space": 1,
68
- "mask_time_other": 0.0,
69
  "mask_time_prob": 0.05,
70
- "mask_time_selection": "static",
71
  "model_type": "wav2vec2",
72
- "no_mask_channel_overlap": false,
73
- "no_mask_time_overlap": false,
74
  "num_attention_heads": 12,
75
  "num_codevector_groups": 2,
76
  "num_codevectors_per_group": 320,
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-base-960h",
3
+ "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "Wav2Vec2ForCTC"
 
44
  "do_stable_layer_norm": false,
45
  "eos_token_id": 2,
46
  "feat_extract_activation": "gelu",
47
+ "feat_extract_dropout": 0.0,
48
  "feat_extract_norm": "group",
49
  "feat_proj_dropout": 0.1,
50
  "feat_quantizer_dropout": 0.0,
51
+ "final_dropout": 0.1,
52
+ "gradient_checkpointing": false,
53
  "hidden_act": "gelu",
54
  "hidden_dropout": 0.1,
55
+ "hidden_dropout_prob": 0.1,
56
  "hidden_size": 768,
57
  "initializer_range": 0.02,
58
  "intermediate_size": 3072,
59
  "layer_norm_eps": 1e-05,
60
+ "layerdrop": 0.1,
 
 
 
 
 
61
  "mask_feature_length": 10,
62
  "mask_feature_prob": 0.0,
63
  "mask_time_length": 10,
 
 
64
  "mask_time_prob": 0.05,
 
65
  "model_type": "wav2vec2",
 
 
66
  "num_attention_heads": 12,
67
  "num_codevector_groups": 2,
68
  "num_codevectors_per_group": 320,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea220cc133930f98791c7b7a1d76d68b159241b625a40a783d4e05d2c93c11d7
3
  size 377667031
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3f3abcf77f71881019078ae17cf773e46b424e4176401072a817530aabafac
3
  size 377667031
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19c7738f5655571cd7c062b8a732e09ad439c7c98c6a054da91449f8906026bf
3
  size 2735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed19b832c7db582771504df1e4a7dc89ac95ce233c3914ed7c2c37ff4ea55f88
3
  size 2735