TheRamsay commited on
Commit
67d74eb
·
verified ·
1 Parent(s): ac7a8b7

Training in progress, step 2000

Browse files
Files changed (3) hide show
  1. config.json +16 -18
  2. model.safetensors +2 -2
  3. training_args.bin +2 -2
config.json CHANGED
@@ -2,7 +2,7 @@
2
  "architectures": [
3
  "SpeechEncoderDecoderModel"
4
  ],
5
- "bos_token_id": 0,
6
  "decoder": {
7
  "_attn_implementation_autoset": false,
8
  "_name_or_path": "fav-kky/gpt2-small-cs",
@@ -14,11 +14,11 @@
14
  "attn_pdrop": 0.1,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
- "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
19
  "cross_attention_hidden_size": null,
20
- "decoder_start_token_id": 0,
21
- "decoder_vocab_size": 50000,
22
  "diversity_penalty": 0.0,
23
  "do_sample": false,
24
  "early_stopping": false,
@@ -26,8 +26,8 @@
26
  "encoder_ctc_loss_reduction": "mean",
27
  "encoder_layerdrop": 0.0,
28
  "encoder_no_repeat_ngram_size": 0,
29
- "encoder_pad_token_id": 0,
30
- "encoder_vocab_size": 50000,
31
  "eos_token_id": 0,
32
  "exponential_decay_length_penalty": null,
33
  "finetuning_task": null,
@@ -47,7 +47,6 @@
47
  },
48
  "layer_norm_epsilon": 1e-05,
49
  "length_penalty": 1.0,
50
- "mask_token_id": null,
51
  "max_length": 20,
52
  "min_length": 0,
53
  "model_type": "gpt2",
@@ -64,7 +63,7 @@
64
  "output_attentions": false,
65
  "output_hidden_states": false,
66
  "output_scores": false,
67
- "pad_token_id": 0,
68
  "prefix": null,
69
  "problem_type": null,
70
  "pruned_heads": {},
@@ -101,9 +100,9 @@
101
  "typical_p": 1.0,
102
  "use_bfloat16": false,
103
  "use_cache": true,
104
- "vocab_size": 50000
105
  },
106
- "decoder_start_token_id": 0,
107
  "encoder": {
108
  "_attn_implementation_autoset": false,
109
  "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
@@ -120,7 +119,7 @@
120
  "attention_dropout": 0.1,
121
  "bad_words_ids": null,
122
  "begin_suppress_tokens": null,
123
- "bos_token_id": 0,
124
  "chunk_size_feed_forward": 0,
125
  "classifier_proj_size": 256,
126
  "codevector_dim": 256,
@@ -156,8 +155,8 @@
156
  "cross_attention_hidden_size": null,
157
  "ctc_loss_reduction": "sum",
158
  "ctc_zero_infinity": false,
159
- "decoder_start_token_id": null,
160
- "decoder_vocab_size": 50000,
161
  "diversity_loss_weight": 0.1,
162
  "diversity_penalty": 0.0,
163
  "do_sample": false,
@@ -166,8 +165,8 @@
166
  "encoder_ctc_loss_reduction": "mean",
167
  "encoder_layerdrop": 0.0,
168
  "encoder_no_repeat_ngram_size": 0,
169
- "encoder_pad_token_id": 0,
170
- "encoder_vocab_size": 50000,
171
  "eos_token_id": 0,
172
  "exponential_decay_length_penalty": null,
173
  "feat_extract_activation": "gelu",
@@ -202,7 +201,6 @@
202
  "mask_time_length": 10,
203
  "mask_time_min_masks": 2,
204
  "mask_time_prob": 0.05,
205
- "mask_token_id": null,
206
  "max_length": 20,
207
  "min_length": 0,
208
  "model_type": "wav2vec2",
@@ -223,7 +221,7 @@
223
  "output_hidden_size": 768,
224
  "output_hidden_states": false,
225
  "output_scores": false,
226
- "pad_token_id": 0,
227
  "prefix": null,
228
  "problem_type": null,
229
  "proj_codevector_dim": 256,
@@ -274,7 +272,7 @@
274
  "eos_token_id": 0,
275
  "is_encoder_decoder": true,
276
  "model_type": "speech-encoder-decoder",
277
- "pad_token_id": 0,
278
  "tie_word_embeddings": false,
279
  "torch_dtype": "float32",
280
  "transformers_version": "4.49.0"
 
2
  "architectures": [
3
  "SpeechEncoderDecoderModel"
4
  ],
5
+ "bos_token_id": 50000,
6
  "decoder": {
7
  "_attn_implementation_autoset": false,
8
  "_name_or_path": "fav-kky/gpt2-small-cs",
 
14
  "attn_pdrop": 0.1,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
+ "bos_token_id": 50000,
18
  "chunk_size_feed_forward": 0,
19
  "cross_attention_hidden_size": null,
20
+ "decoder_start_token_id": 50000,
21
+ "decoder_vocab_size": 50002,
22
  "diversity_penalty": 0.0,
23
  "do_sample": false,
24
  "early_stopping": false,
 
26
  "encoder_ctc_loss_reduction": "mean",
27
  "encoder_layerdrop": 0.0,
28
  "encoder_no_repeat_ngram_size": 0,
29
+ "encoder_pad_token_id": 50001,
30
+ "encoder_vocab_size": 50002,
31
  "eos_token_id": 0,
32
  "exponential_decay_length_penalty": null,
33
  "finetuning_task": null,
 
47
  },
48
  "layer_norm_epsilon": 1e-05,
49
  "length_penalty": 1.0,
 
50
  "max_length": 20,
51
  "min_length": 0,
52
  "model_type": "gpt2",
 
63
  "output_attentions": false,
64
  "output_hidden_states": false,
65
  "output_scores": false,
66
+ "pad_token_id": 50001,
67
  "prefix": null,
68
  "problem_type": null,
69
  "pruned_heads": {},
 
100
  "typical_p": 1.0,
101
  "use_bfloat16": false,
102
  "use_cache": true,
103
+ "vocab_size": 50002
104
  },
105
+ "decoder_start_token_id": 50000,
106
  "encoder": {
107
  "_attn_implementation_autoset": false,
108
  "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
 
119
  "attention_dropout": 0.1,
120
  "bad_words_ids": null,
121
  "begin_suppress_tokens": null,
122
+ "bos_token_id": 50000,
123
  "chunk_size_feed_forward": 0,
124
  "classifier_proj_size": 256,
125
  "codevector_dim": 256,
 
155
  "cross_attention_hidden_size": null,
156
  "ctc_loss_reduction": "sum",
157
  "ctc_zero_infinity": false,
158
+ "decoder_start_token_id": 50000,
159
+ "decoder_vocab_size": 50002,
160
  "diversity_loss_weight": 0.1,
161
  "diversity_penalty": 0.0,
162
  "do_sample": false,
 
165
  "encoder_ctc_loss_reduction": "mean",
166
  "encoder_layerdrop": 0.0,
167
  "encoder_no_repeat_ngram_size": 0,
168
+ "encoder_pad_token_id": 50001,
169
+ "encoder_vocab_size": 50002,
170
  "eos_token_id": 0,
171
  "exponential_decay_length_penalty": null,
172
  "feat_extract_activation": "gelu",
 
201
  "mask_time_length": 10,
202
  "mask_time_min_masks": 2,
203
  "mask_time_prob": 0.05,
 
204
  "max_length": 20,
205
  "min_length": 0,
206
  "model_type": "wav2vec2",
 
221
  "output_hidden_size": 768,
222
  "output_hidden_states": false,
223
  "output_scores": false,
224
+ "pad_token_id": 50001,
225
  "prefix": null,
226
  "problem_type": null,
227
  "proj_codevector_dim": 256,
 
272
  "eos_token_id": 0,
273
  "is_encoder_decoder": true,
274
  "model_type": "speech-encoder-decoder",
275
+ "pad_token_id": 50001,
276
  "tie_word_embeddings": false,
277
  "torch_dtype": "float32",
278
  "transformers_version": "4.49.0"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bbfd1fb052347e73354c43b34b1bd6d3169a552e75a529e636b5c4449cba80b
3
- size 550862672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c11deb910a326616126aebcc1e8e2afa1dbb2f0eb892c3da257f2ae759d7cb
3
+ size 550866768
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2424ec6d2f7b258ccbd71da6253ef1133bb223b4d24ea4e546834750eebba3d
3
- size 7800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11fa44b3b8da62cb0d948e4fe6159afce332e4a75a1f94e121b8483ac5e2e1fd
3
+ size 5624