nouamanetazi HF staff commited on
Commit
2ee5f57
1 Parent(s): 63c4466

Model save

Browse files
all_results.json CHANGED
@@ -7,9 +7,9 @@
7
  "eval_steps_per_second": 0.334,
8
  "eval_wer": 1.0,
9
  "total_flos": 1.3476444758728704e+17,
10
- "train_loss": 19.624227905273436,
11
- "train_runtime": 37.1321,
12
  "train_samples": 128,
13
- "train_samples_per_second": 17.236,
14
- "train_steps_per_second": 0.269
15
  }
 
7
  "eval_steps_per_second": 0.334,
8
  "eval_wer": 1.0,
9
  "total_flos": 1.3476444758728704e+17,
10
+ "train_loss": 19.56191415786743,
11
+ "train_runtime": 92.9692,
12
  "train_samples": 128,
13
+ "train_samples_per_second": 6.884,
14
+ "train_steps_per_second": 0.108
15
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03187d4fdffd6bfd6668c712b8b34435c8b8fd1d873425a1d3179c6faaa1434c
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea93def4f37206b423024351165f6f76fb31abde5bd5c8aa0c1691f50e4ca3b
3
  size 1262067185
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.3476444758728704e+17,
4
- "train_loss": 19.624227905273436,
5
- "train_runtime": 37.1321,
6
  "train_samples": 128,
7
- "train_samples_per_second": 17.236,
8
- "train_steps_per_second": 0.269
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.3476444758728704e+17,
4
+ "train_loss": 19.56191415786743,
5
+ "train_runtime": 92.9692,
6
  "train_samples": 128,
7
+ "train_samples_per_second": 6.884,
8
+ "train_steps_per_second": 0.108
9
  }
trainer_state.json CHANGED
@@ -7,14 +7,164 @@
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 5.0,
12
  "step": 10,
13
  "total_flos": 1.3476444758728704e+17,
14
- "train_loss": 19.624227905273436,
15
- "train_runtime": 37.1321,
16
- "train_samples_per_second": 17.236,
17
- "train_steps_per_second": 0.269
18
  }
19
  ],
20
  "max_steps": 10,
 
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.5,
12
+ "learning_rate": 7.5e-05,
13
+ "loss": 22.283,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.5,
18
+ "eval_loss": 38.53455352783203,
19
+ "eval_runtime": 5.6686,
20
+ "eval_samples_per_second": 22.581,
21
+ "eval_steps_per_second": 0.353,
22
+ "eval_wer": 1.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "learning_rate": 7.5e-05,
28
+ "loss": 22.622,
29
+ "step": 2
30
+ },
31
+ {
32
+ "epoch": 1.0,
33
+ "eval_loss": 38.53455352783203,
34
+ "eval_runtime": 5.4852,
35
+ "eval_samples_per_second": 23.336,
36
+ "eval_steps_per_second": 0.365,
37
+ "eval_wer": 1.0,
38
+ "step": 2
39
+ },
40
+ {
41
+ "epoch": 1.5,
42
+ "learning_rate": 6.75e-05,
43
+ "loss": 23.0656,
44
+ "step": 3
45
+ },
46
+ {
47
+ "epoch": 1.5,
48
+ "eval_loss": 37.73825454711914,
49
+ "eval_runtime": 5.4328,
50
+ "eval_samples_per_second": 23.561,
51
+ "eval_steps_per_second": 0.368,
52
+ "eval_wer": 1.0,
53
+ "step": 3
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "learning_rate": 6.75e-05,
58
+ "loss": 20.8353,
59
+ "step": 4
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "eval_loss": 37.73825454711914,
64
+ "eval_runtime": 5.7103,
65
+ "eval_samples_per_second": 22.416,
66
+ "eval_steps_per_second": 0.35,
67
+ "eval_wer": 1.0,
68
+ "step": 4
69
+ },
70
+ {
71
+ "epoch": 2.5,
72
+ "learning_rate": 5.9999999999999995e-05,
73
+ "loss": 21.4977,
74
+ "step": 5
75
+ },
76
+ {
77
+ "epoch": 2.5,
78
+ "eval_loss": 36.794700622558594,
79
+ "eval_runtime": 5.4096,
80
+ "eval_samples_per_second": 23.661,
81
+ "eval_steps_per_second": 0.37,
82
+ "eval_wer": 1.0,
83
+ "step": 5
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "learning_rate": 5.2499999999999995e-05,
88
+ "loss": 20.1685,
89
+ "step": 6
90
+ },
91
+ {
92
+ "epoch": 3.0,
93
+ "eval_loss": 35.34020233154297,
94
+ "eval_runtime": 5.6574,
95
+ "eval_samples_per_second": 22.625,
96
+ "eval_steps_per_second": 0.354,
97
+ "eval_wer": 1.0,
98
+ "step": 6
99
+ },
100
+ {
101
+ "epoch": 3.5,
102
+ "learning_rate": 4.4999999999999996e-05,
103
+ "loss": 19.3566,
104
+ "step": 7
105
+ },
106
+ {
107
+ "epoch": 3.5,
108
+ "eval_loss": 33.572959899902344,
109
+ "eval_runtime": 5.6556,
110
+ "eval_samples_per_second": 22.632,
111
+ "eval_steps_per_second": 0.354,
112
+ "eval_wer": 1.0,
113
+ "step": 7
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "learning_rate": 3.75e-05,
118
+ "loss": 15.8992,
119
+ "step": 8
120
+ },
121
+ {
122
+ "epoch": 4.0,
123
+ "eval_loss": 30.588499069213867,
124
+ "eval_runtime": 5.3488,
125
+ "eval_samples_per_second": 23.93,
126
+ "eval_steps_per_second": 0.374,
127
+ "eval_wer": 1.0,
128
+ "step": 8
129
+ },
130
+ {
131
+ "epoch": 4.5,
132
+ "learning_rate": 2.9999999999999997e-05,
133
+ "loss": 15.0607,
134
+ "step": 9
135
+ },
136
+ {
137
+ "epoch": 4.5,
138
+ "eval_loss": 27.82217788696289,
139
+ "eval_runtime": 5.6203,
140
+ "eval_samples_per_second": 22.774,
141
+ "eval_steps_per_second": 0.356,
142
+ "eval_wer": 1.0,
143
+ "step": 9
144
+ },
145
+ {
146
+ "epoch": 5.0,
147
+ "learning_rate": 2.2499999999999998e-05,
148
+ "loss": 14.8305,
149
+ "step": 10
150
+ },
151
+ {
152
+ "epoch": 5.0,
153
+ "eval_loss": 25.9698429107666,
154
+ "eval_runtime": 5.6638,
155
+ "eval_samples_per_second": 22.599,
156
+ "eval_steps_per_second": 0.353,
157
+ "eval_wer": 1.0,
158
+ "step": 10
159
+ },
160
  {
161
  "epoch": 5.0,
162
  "step": 10,
163
  "total_flos": 1.3476444758728704e+17,
164
+ "train_loss": 19.56191415786743,
165
+ "train_runtime": 92.9692,
166
+ "train_samples_per_second": 6.884,
167
+ "train_steps_per_second": 0.108
168
  }
169
  ],
170
  "max_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f86ca385bb82aeecbcfec2b42d4f24ce3dfaaa81f2eaa0da96c75d5dc881f6
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:205b839c6738065c780db23902fe5de7ab8a5a6745c332ab60e9b1d434f7d4e6
3
  size 3055