cloudwalkerw commited on
Commit
6e27689
·
1 Parent(s): d01e104

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +12 -0
  3. eval_results.json +8 -0
  4. train_results.json +7 -0
  5. trainer_state.json +613 -0
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
 
4
  - generated_from_trainer
5
  metrics:
6
  - f1
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.3326
20
  - F1: 0.9459
21
 
22
  ## Model description
 
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
4
+ - audio-classification
5
  - generated_from_trainer
6
  metrics:
7
  - f1
 
17
 
18
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3325
21
  - F1: 0.9459
22
 
23
  ## Model description
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_f1": 0.9459482392872296,
4
+ "eval_loss": 0.3324863016605377,
5
+ "eval_runtime": 496.607,
6
+ "eval_samples_per_second": 50.027,
7
+ "eval_steps_per_second": 25.014,
8
+ "train_loss": 0.32019265878080116,
9
+ "train_runtime": 22716.823,
10
+ "train_samples_per_second": 11.172,
11
+ "train_steps_per_second": 0.174
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_f1": 0.9459482392872296,
4
+ "eval_loss": 0.3324863016605377,
5
+ "eval_runtime": 496.607,
6
+ "eval_samples_per_second": 50.027,
7
+ "eval_steps_per_second": 25.014
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "train_loss": 0.32019265878080116,
4
+ "train_runtime": 22716.823,
5
+ "train_samples_per_second": 11.172,
6
+ "train_steps_per_second": 0.174
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.06379657983779907,
3
+ "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_4/checkpoint-200",
4
+ "epoch": 9.98109640831758,
5
+ "eval_steps": 100,
6
+ "global_step": 3960,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.25,
13
+ "learning_rate": 7.575757575757576e-05,
14
+ "loss": 0.3784,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "eval_f1": 0.9905683736634776,
20
+ "eval_loss": 0.07838701456785202,
21
+ "eval_runtime": 494.4493,
22
+ "eval_samples_per_second": 50.246,
23
+ "eval_steps_per_second": 25.123,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.5,
28
+ "learning_rate": 0.00015151515151515152,
29
+ "loss": 0.1125,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 0.5,
34
+ "eval_f1": 0.9925361354770335,
35
+ "eval_loss": 0.06379657983779907,
36
+ "eval_runtime": 495.4697,
37
+ "eval_samples_per_second": 50.142,
38
+ "eval_steps_per_second": 25.071,
39
+ "step": 200
40
+ },
41
+ {
42
+ "epoch": 0.76,
43
+ "learning_rate": 0.00022727272727272725,
44
+ "loss": 0.1158,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.76,
49
+ "eval_f1": 0.9773465624725612,
50
+ "eval_loss": 0.1715879738330841,
51
+ "eval_runtime": 495.6685,
52
+ "eval_samples_per_second": 50.122,
53
+ "eval_steps_per_second": 25.061,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 1.01,
58
+ "learning_rate": 0.00029966329966329963,
59
+ "loss": 0.327,
60
+ "step": 400
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "eval_f1": 0.9459482392872296,
65
+ "eval_loss": 0.3308480978012085,
66
+ "eval_runtime": 495.7404,
67
+ "eval_samples_per_second": 50.115,
68
+ "eval_steps_per_second": 25.057,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 1.26,
73
+ "learning_rate": 0.00029124579124579125,
74
+ "loss": 0.3346,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 1.26,
79
+ "eval_f1": 0.9459482392872296,
80
+ "eval_loss": 0.34493646025657654,
81
+ "eval_runtime": 495.7377,
82
+ "eval_samples_per_second": 50.115,
83
+ "eval_steps_per_second": 25.058,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 1.51,
88
+ "learning_rate": 0.0002828282828282828,
89
+ "loss": 0.3345,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.51,
94
+ "eval_f1": 0.9459482392872296,
95
+ "eval_loss": 0.3316415250301361,
96
+ "eval_runtime": 495.8008,
97
+ "eval_samples_per_second": 50.109,
98
+ "eval_steps_per_second": 25.054,
99
+ "step": 600
100
+ },
101
+ {
102
+ "epoch": 1.76,
103
+ "learning_rate": 0.0002744107744107744,
104
+ "loss": 0.3313,
105
+ "step": 700
106
+ },
107
+ {
108
+ "epoch": 1.76,
109
+ "eval_f1": 0.9459482392872296,
110
+ "eval_loss": 0.3320470452308655,
111
+ "eval_runtime": 495.8537,
112
+ "eval_samples_per_second": 50.103,
113
+ "eval_steps_per_second": 25.052,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 2.02,
118
+ "learning_rate": 0.00026599326599326595,
119
+ "loss": 0.3249,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 2.02,
124
+ "eval_f1": 0.9459482392872296,
125
+ "eval_loss": 0.33273717761039734,
126
+ "eval_runtime": 496.067,
127
+ "eval_samples_per_second": 50.082,
128
+ "eval_steps_per_second": 25.041,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 2.27,
133
+ "learning_rate": 0.00025757575757575756,
134
+ "loss": 0.3403,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 2.27,
139
+ "eval_f1": 0.9459482392872296,
140
+ "eval_loss": 0.3314824402332306,
141
+ "eval_runtime": 496.0647,
142
+ "eval_samples_per_second": 50.082,
143
+ "eval_steps_per_second": 25.041,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.52,
148
+ "learning_rate": 0.00024915824915824913,
149
+ "loss": 0.3345,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 2.52,
154
+ "eval_f1": 0.9459482392872296,
155
+ "eval_loss": 0.3381609320640564,
156
+ "eval_runtime": 496.3675,
157
+ "eval_samples_per_second": 50.052,
158
+ "eval_steps_per_second": 25.026,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 2.77,
163
+ "learning_rate": 0.00024074074074074072,
164
+ "loss": 0.3174,
165
+ "step": 1100
166
+ },
167
+ {
168
+ "epoch": 2.77,
169
+ "eval_f1": 0.9459482392872296,
170
+ "eval_loss": 0.33755871653556824,
171
+ "eval_runtime": 496.1396,
172
+ "eval_samples_per_second": 50.075,
173
+ "eval_steps_per_second": 25.037,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 3.02,
178
+ "learning_rate": 0.0002323232323232323,
179
+ "loss": 0.3274,
180
+ "step": 1200
181
+ },
182
+ {
183
+ "epoch": 3.02,
184
+ "eval_f1": 0.9459482392872296,
185
+ "eval_loss": 0.3353538513183594,
186
+ "eval_runtime": 496.3912,
187
+ "eval_samples_per_second": 50.049,
188
+ "eval_steps_per_second": 25.025,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 3.28,
193
+ "learning_rate": 0.0002239057239057239,
194
+ "loss": 0.3296,
195
+ "step": 1300
196
+ },
197
+ {
198
+ "epoch": 3.28,
199
+ "eval_f1": 0.9459482392872296,
200
+ "eval_loss": 0.33069172501564026,
201
+ "eval_runtime": 496.1466,
202
+ "eval_samples_per_second": 50.074,
203
+ "eval_steps_per_second": 25.037,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 3.53,
208
+ "learning_rate": 0.00021548821548821544,
209
+ "loss": 0.3175,
210
+ "step": 1400
211
+ },
212
+ {
213
+ "epoch": 3.53,
214
+ "eval_f1": 0.9459482392872296,
215
+ "eval_loss": 0.3341147303581238,
216
+ "eval_runtime": 496.5334,
217
+ "eval_samples_per_second": 50.035,
218
+ "eval_steps_per_second": 25.017,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 3.78,
223
+ "learning_rate": 0.00020707070707070703,
224
+ "loss": 0.3416,
225
+ "step": 1500
226
+ },
227
+ {
228
+ "epoch": 3.78,
229
+ "eval_f1": 0.9459482392872296,
230
+ "eval_loss": 0.3344402611255646,
231
+ "eval_runtime": 496.2158,
232
+ "eval_samples_per_second": 50.067,
233
+ "eval_steps_per_second": 25.033,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 4.03,
238
+ "learning_rate": 0.00019865319865319862,
239
+ "loss": 0.3412,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 4.03,
244
+ "eval_f1": 0.9459482392872296,
245
+ "eval_loss": 0.3307790160179138,
246
+ "eval_runtime": 496.5097,
247
+ "eval_samples_per_second": 50.037,
248
+ "eval_steps_per_second": 25.019,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 4.28,
253
+ "learning_rate": 0.00019023569023569022,
254
+ "loss": 0.3293,
255
+ "step": 1700
256
+ },
257
+ {
258
+ "epoch": 4.28,
259
+ "eval_f1": 0.9459482392872296,
260
+ "eval_loss": 0.3313847482204437,
261
+ "eval_runtime": 496.395,
262
+ "eval_samples_per_second": 50.049,
263
+ "eval_steps_per_second": 25.024,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 4.54,
268
+ "learning_rate": 0.0001818181818181818,
269
+ "loss": 0.3346,
270
+ "step": 1800
271
+ },
272
+ {
273
+ "epoch": 4.54,
274
+ "eval_f1": 0.9459482392872296,
275
+ "eval_loss": 0.33080053329467773,
276
+ "eval_runtime": 496.4252,
277
+ "eval_samples_per_second": 50.046,
278
+ "eval_steps_per_second": 25.023,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 4.79,
283
+ "learning_rate": 0.0001734006734006734,
284
+ "loss": 0.3279,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 4.79,
289
+ "eval_f1": 0.9459482392872296,
290
+ "eval_loss": 0.3317018449306488,
291
+ "eval_runtime": 496.5163,
292
+ "eval_samples_per_second": 50.037,
293
+ "eval_steps_per_second": 25.018,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 5.04,
298
+ "learning_rate": 0.000164983164983165,
299
+ "loss": 0.3246,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 5.04,
304
+ "eval_f1": 0.9459482392872296,
305
+ "eval_loss": 0.33184757828712463,
306
+ "eval_runtime": 496.5592,
307
+ "eval_samples_per_second": 50.032,
308
+ "eval_steps_per_second": 25.016,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 5.29,
313
+ "learning_rate": 0.00015656565656565653,
314
+ "loss": 0.3373,
315
+ "step": 2100
316
+ },
317
+ {
318
+ "epoch": 5.29,
319
+ "eval_f1": 0.9459482392872296,
320
+ "eval_loss": 0.33109256625175476,
321
+ "eval_runtime": 496.5149,
322
+ "eval_samples_per_second": 50.037,
323
+ "eval_steps_per_second": 25.018,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 5.55,
328
+ "learning_rate": 0.00014814814814814812,
329
+ "loss": 0.3262,
330
+ "step": 2200
331
+ },
332
+ {
333
+ "epoch": 5.55,
334
+ "eval_f1": 0.9459482392872296,
335
+ "eval_loss": 0.33351102471351624,
336
+ "eval_runtime": 496.6308,
337
+ "eval_samples_per_second": 50.025,
338
+ "eval_steps_per_second": 25.013,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 5.8,
343
+ "learning_rate": 0.0001397306397306397,
344
+ "loss": 0.3279,
345
+ "step": 2300
346
+ },
347
+ {
348
+ "epoch": 5.8,
349
+ "eval_f1": 0.9459482392872296,
350
+ "eval_loss": 0.3326103985309601,
351
+ "eval_runtime": 496.8717,
352
+ "eval_samples_per_second": 50.001,
353
+ "eval_steps_per_second": 25.0,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 6.05,
358
+ "learning_rate": 0.0001313131313131313,
359
+ "loss": 0.3298,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 6.05,
364
+ "eval_f1": 0.9459482392872296,
365
+ "eval_loss": 0.3322770297527313,
366
+ "eval_runtime": 496.7239,
367
+ "eval_samples_per_second": 50.016,
368
+ "eval_steps_per_second": 25.008,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 6.3,
373
+ "learning_rate": 0.0001228956228956229,
374
+ "loss": 0.3397,
375
+ "step": 2500
376
+ },
377
+ {
378
+ "epoch": 6.3,
379
+ "eval_f1": 0.9459482392872296,
380
+ "eval_loss": 0.3311246633529663,
381
+ "eval_runtime": 496.772,
382
+ "eval_samples_per_second": 50.011,
383
+ "eval_steps_per_second": 25.005,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 6.55,
388
+ "learning_rate": 0.00011447811447811446,
389
+ "loss": 0.3312,
390
+ "step": 2600
391
+ },
392
+ {
393
+ "epoch": 6.55,
394
+ "eval_f1": 0.9459482392872296,
395
+ "eval_loss": 0.33860132098197937,
396
+ "eval_runtime": 496.8085,
397
+ "eval_samples_per_second": 50.007,
398
+ "eval_steps_per_second": 25.004,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 6.81,
403
+ "learning_rate": 0.00010606060606060605,
404
+ "loss": 0.3291,
405
+ "step": 2700
406
+ },
407
+ {
408
+ "epoch": 6.81,
409
+ "eval_f1": 0.9459482392872296,
410
+ "eval_loss": 0.3316628634929657,
411
+ "eval_runtime": 496.6873,
412
+ "eval_samples_per_second": 50.019,
413
+ "eval_steps_per_second": 25.01,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 7.06,
418
+ "learning_rate": 9.764309764309764e-05,
419
+ "loss": 0.3146,
420
+ "step": 2800
421
+ },
422
+ {
423
+ "epoch": 7.06,
424
+ "eval_f1": 0.9459482392872296,
425
+ "eval_loss": 0.33230647444725037,
426
+ "eval_runtime": 496.6634,
427
+ "eval_samples_per_second": 50.022,
428
+ "eval_steps_per_second": 25.011,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 7.31,
433
+ "learning_rate": 8.92255892255892e-05,
434
+ "loss": 0.3296,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 7.31,
439
+ "eval_f1": 0.9459482392872296,
440
+ "eval_loss": 0.3313485085964203,
441
+ "eval_runtime": 496.6247,
442
+ "eval_samples_per_second": 50.026,
443
+ "eval_steps_per_second": 25.013,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 7.56,
448
+ "learning_rate": 8.08080808080808e-05,
449
+ "loss": 0.3367,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 7.56,
454
+ "eval_f1": 0.9459482392872296,
455
+ "eval_loss": 0.33174964785575867,
456
+ "eval_runtime": 496.735,
457
+ "eval_samples_per_second": 50.015,
458
+ "eval_steps_per_second": 25.007,
459
+ "step": 3000
460
+ },
461
+ {
462
+ "epoch": 7.81,
463
+ "learning_rate": 7.239057239057239e-05,
464
+ "loss": 0.3232,
465
+ "step": 3100
466
+ },
467
+ {
468
+ "epoch": 7.81,
469
+ "eval_f1": 0.9459482392872296,
470
+ "eval_loss": 0.3318324387073517,
471
+ "eval_runtime": 496.5843,
472
+ "eval_samples_per_second": 50.03,
473
+ "eval_steps_per_second": 25.015,
474
+ "step": 3100
475
+ },
476
+ {
477
+ "epoch": 8.07,
478
+ "learning_rate": 6.397306397306397e-05,
479
+ "loss": 0.3314,
480
+ "step": 3200
481
+ },
482
+ {
483
+ "epoch": 8.07,
484
+ "eval_f1": 0.9459482392872296,
485
+ "eval_loss": 0.3325050175189972,
486
+ "eval_runtime": 496.7125,
487
+ "eval_samples_per_second": 50.017,
488
+ "eval_steps_per_second": 25.008,
489
+ "step": 3200
490
+ },
491
+ {
492
+ "epoch": 8.32,
493
+ "learning_rate": 5.5555555555555545e-05,
494
+ "loss": 0.3201,
495
+ "step": 3300
496
+ },
497
+ {
498
+ "epoch": 8.32,
499
+ "eval_f1": 0.9459482392872296,
500
+ "eval_loss": 0.33225715160369873,
501
+ "eval_runtime": 496.6425,
502
+ "eval_samples_per_second": 50.024,
503
+ "eval_steps_per_second": 25.012,
504
+ "step": 3300
505
+ },
506
+ {
507
+ "epoch": 8.57,
508
+ "learning_rate": 4.7138047138047136e-05,
509
+ "loss": 0.3301,
510
+ "step": 3400
511
+ },
512
+ {
513
+ "epoch": 8.57,
514
+ "eval_f1": 0.9459482392872296,
515
+ "eval_loss": 0.3346656262874603,
516
+ "eval_runtime": 496.595,
517
+ "eval_samples_per_second": 50.029,
518
+ "eval_steps_per_second": 25.014,
519
+ "step": 3400
520
+ },
521
+ {
522
+ "epoch": 8.82,
523
+ "learning_rate": 3.8720538720538714e-05,
524
+ "loss": 0.3268,
525
+ "step": 3500
526
+ },
527
+ {
528
+ "epoch": 8.82,
529
+ "eval_f1": 0.9459482392872296,
530
+ "eval_loss": 0.33254188299179077,
531
+ "eval_runtime": 496.5109,
532
+ "eval_samples_per_second": 50.037,
533
+ "eval_steps_per_second": 25.019,
534
+ "step": 3500
535
+ },
536
+ {
537
+ "epoch": 9.07,
538
+ "learning_rate": 3.03030303030303e-05,
539
+ "loss": 0.3361,
540
+ "step": 3600
541
+ },
542
+ {
543
+ "epoch": 9.07,
544
+ "eval_f1": 0.9459482392872296,
545
+ "eval_loss": 0.33214762806892395,
546
+ "eval_runtime": 496.8406,
547
+ "eval_samples_per_second": 50.004,
548
+ "eval_steps_per_second": 25.002,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 9.33,
553
+ "learning_rate": 2.1885521885521884e-05,
554
+ "loss": 0.3395,
555
+ "step": 3700
556
+ },
557
+ {
558
+ "epoch": 9.33,
559
+ "eval_f1": 0.9459482392872296,
560
+ "eval_loss": 0.3312663733959198,
561
+ "eval_runtime": 496.7467,
562
+ "eval_samples_per_second": 50.013,
563
+ "eval_steps_per_second": 25.007,
564
+ "step": 3700
565
+ },
566
+ {
567
+ "epoch": 9.58,
568
+ "learning_rate": 1.3468013468013465e-05,
569
+ "loss": 0.3231,
570
+ "step": 3800
571
+ },
572
+ {
573
+ "epoch": 9.58,
574
+ "eval_f1": 0.9459482392872296,
575
+ "eval_loss": 0.3319169878959656,
576
+ "eval_runtime": 496.7702,
577
+ "eval_samples_per_second": 50.011,
578
+ "eval_steps_per_second": 25.006,
579
+ "step": 3800
580
+ },
581
+ {
582
+ "epoch": 9.83,
583
+ "learning_rate": 5.05050505050505e-06,
584
+ "loss": 0.3197,
585
+ "step": 3900
586
+ },
587
+ {
588
+ "epoch": 9.83,
589
+ "eval_f1": 0.9459482392872296,
590
+ "eval_loss": 0.3325766921043396,
591
+ "eval_runtime": 496.6934,
592
+ "eval_samples_per_second": 50.019,
593
+ "eval_steps_per_second": 25.009,
594
+ "step": 3900
595
+ },
596
+ {
597
+ "epoch": 9.98,
598
+ "step": 3960,
599
+ "total_flos": 1.506420070651024e+19,
600
+ "train_loss": 0.32019265878080116,
601
+ "train_runtime": 22716.823,
602
+ "train_samples_per_second": 11.172,
603
+ "train_steps_per_second": 0.174
604
+ }
605
+ ],
606
+ "logging_steps": 100,
607
+ "max_steps": 3960,
608
+ "num_train_epochs": 10,
609
+ "save_steps": 100,
610
+ "total_flos": 1.506420070651024e+19,
611
+ "trial_name": null,
612
+ "trial_params": null
613
+ }