File size: 12,418 Bytes
3a8809c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
{
  "best_metric": 0.8169879527109274,
  "best_model_checkpoint": "/home/bram/shares/predict/trained/dutch/hebban-reviews/xlm-roberta-base/checkpoint-11000",
  "epoch": 3.9447731755424065,
  "global_step": 12000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.16,
      "learning_rate": 4.794166666666667e-05,
      "loss": 0.913,
      "step": 500
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.7665803747534516,
      "eval_f1": 0.7649545454029377,
      "eval_loss": 0.7253644466400146,
      "eval_precision": 0.7641565688780922,
      "eval_recall": 0.7665803747534516,
      "eval_runtime": 24.3431,
      "eval_samples_per_second": 666.472,
      "eval_steps_per_second": 27.77,
      "step": 500
    },
    {
      "epoch": 0.33,
      "learning_rate": 4.5858333333333334e-05,
      "loss": 0.7582,
      "step": 1000
    },
    {
      "epoch": 0.33,
      "eval_accuracy": 0.6934787968441815,
      "eval_f1": 0.7165538695299023,
      "eval_loss": 0.7026467323303223,
      "eval_precision": 0.7706797791371907,
      "eval_recall": 0.6934787968441815,
      "eval_runtime": 24.1169,
      "eval_samples_per_second": 672.723,
      "eval_steps_per_second": 28.03,
      "step": 1000
    },
    {
      "epoch": 0.49,
      "learning_rate": 4.3775e-05,
      "loss": 0.6847,
      "step": 1500
    },
    {
      "epoch": 0.49,
      "eval_accuracy": 0.7233727810650887,
      "eval_f1": 0.7442026185547411,
      "eval_loss": 0.6611877083778381,
      "eval_precision": 0.8038626962859567,
      "eval_recall": 0.7233727810650887,
      "eval_runtime": 24.1771,
      "eval_samples_per_second": 671.047,
      "eval_steps_per_second": 27.96,
      "step": 1500
    },
    {
      "epoch": 0.66,
      "learning_rate": 4.1691666666666666e-05,
      "loss": 0.6532,
      "step": 2000
    },
    {
      "epoch": 0.66,
      "eval_accuracy": 0.8156434911242604,
      "eval_f1": 0.8093687626348003,
      "eval_loss": 0.6557860374450684,
      "eval_precision": 0.8059505966835585,
      "eval_recall": 0.8156434911242604,
      "eval_runtime": 24.3135,
      "eval_samples_per_second": 667.283,
      "eval_steps_per_second": 27.803,
      "step": 2000
    },
    {
      "epoch": 0.82,
      "learning_rate": 3.960833333333334e-05,
      "loss": 0.6281,
      "step": 2500
    },
    {
      "epoch": 0.82,
      "eval_accuracy": 0.7695389546351085,
      "eval_f1": 0.7795959943899675,
      "eval_loss": 0.616236686706543,
      "eval_precision": 0.7973850176423627,
      "eval_recall": 0.7695389546351085,
      "eval_runtime": 24.3023,
      "eval_samples_per_second": 667.59,
      "eval_steps_per_second": 27.816,
      "step": 2500
    },
    {
      "epoch": 0.99,
      "learning_rate": 3.7525e-05,
      "loss": 0.6281,
      "step": 3000
    },
    {
      "epoch": 0.99,
      "eval_accuracy": 0.8015902366863905,
      "eval_f1": 0.803170220169187,
      "eval_loss": 0.5991469621658325,
      "eval_precision": 0.806066258666526,
      "eval_recall": 0.8015902366863905,
      "eval_runtime": 24.3429,
      "eval_samples_per_second": 666.477,
      "eval_steps_per_second": 27.77,
      "step": 3000
    },
    {
      "epoch": 1.15,
      "learning_rate": 3.544583333333333e-05,
      "loss": 0.5668,
      "step": 3500
    },
    {
      "epoch": 1.15,
      "eval_accuracy": 0.7874753451676528,
      "eval_f1": 0.7971578305557919,
      "eval_loss": 0.5982191562652588,
      "eval_precision": 0.8156521173286116,
      "eval_recall": 0.7874753451676528,
      "eval_runtime": 24.3367,
      "eval_samples_per_second": 666.646,
      "eval_steps_per_second": 27.777,
      "step": 3500
    },
    {
      "epoch": 1.31,
      "learning_rate": 3.3362500000000005e-05,
      "loss": 0.567,
      "step": 4000
    },
    {
      "epoch": 1.31,
      "eval_accuracy": 0.8007889546351085,
      "eval_f1": 0.8041408078539543,
      "eval_loss": 0.6023094654083252,
      "eval_precision": 0.8088979038333125,
      "eval_recall": 0.8007889546351085,
      "eval_runtime": 24.2641,
      "eval_samples_per_second": 668.643,
      "eval_steps_per_second": 27.86,
      "step": 4000
    },
    {
      "epoch": 1.48,
      "learning_rate": 3.127916666666667e-05,
      "loss": 0.5704,
      "step": 4500
    },
    {
      "epoch": 1.48,
      "eval_accuracy": 0.7429117357001972,
      "eval_f1": 0.7619810076683907,
      "eval_loss": 0.6065093278884888,
      "eval_precision": 0.8107750610152921,
      "eval_recall": 0.7429117357001972,
      "eval_runtime": 24.3353,
      "eval_samples_per_second": 666.686,
      "eval_steps_per_second": 27.779,
      "step": 4500
    },
    {
      "epoch": 1.64,
      "learning_rate": 2.9195833333333333e-05,
      "loss": 0.5596,
      "step": 5000
    },
    {
      "epoch": 1.64,
      "eval_accuracy": 0.8072608481262328,
      "eval_f1": 0.8104579115041036,
      "eval_loss": 0.5900022983551025,
      "eval_precision": 0.8144869336926288,
      "eval_recall": 0.8072608481262328,
      "eval_runtime": 24.3429,
      "eval_samples_per_second": 666.477,
      "eval_steps_per_second": 27.77,
      "step": 5000
    },
    {
      "epoch": 1.81,
      "learning_rate": 2.7116666666666667e-05,
      "loss": 0.5495,
      "step": 5500
    },
    {
      "epoch": 1.81,
      "eval_accuracy": 0.810034516765286,
      "eval_f1": 0.8141305380075001,
      "eval_loss": 0.613106906414032,
      "eval_precision": 0.8219044900382881,
      "eval_recall": 0.810034516765286,
      "eval_runtime": 24.3418,
      "eval_samples_per_second": 666.509,
      "eval_steps_per_second": 27.771,
      "step": 5500
    },
    {
      "epoch": 1.97,
      "learning_rate": 2.5033333333333336e-05,
      "loss": 0.5449,
      "step": 6000
    },
    {
      "epoch": 1.97,
      "eval_accuracy": 0.8124383629191322,
      "eval_f1": 0.8140798132169556,
      "eval_loss": 0.6060279011726379,
      "eval_precision": 0.816286158402022,
      "eval_recall": 0.8124383629191322,
      "eval_runtime": 24.332,
      "eval_samples_per_second": 666.777,
      "eval_steps_per_second": 27.782,
      "step": 6000
    },
    {
      "epoch": 2.14,
      "learning_rate": 2.2950000000000002e-05,
      "loss": 0.4898,
      "step": 6500
    },
    {
      "epoch": 2.14,
      "eval_accuracy": 0.7848865877712031,
      "eval_f1": 0.7964804879952159,
      "eval_loss": 0.6215521693229675,
      "eval_precision": 0.820125366434727,
      "eval_recall": 0.7848865877712031,
      "eval_runtime": 24.3196,
      "eval_samples_per_second": 667.117,
      "eval_steps_per_second": 27.797,
      "step": 6500
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.0866666666666668e-05,
      "loss": 0.4837,
      "step": 7000
    },
    {
      "epoch": 2.3,
      "eval_accuracy": 0.7318786982248521,
      "eval_f1": 0.7528823670992008,
      "eval_loss": 0.6411539912223816,
      "eval_precision": 0.8100101192694165,
      "eval_recall": 0.7318786982248521,
      "eval_runtime": 24.3327,
      "eval_samples_per_second": 666.758,
      "eval_steps_per_second": 27.782,
      "step": 7000
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.87875e-05,
      "loss": 0.4671,
      "step": 7500
    },
    {
      "epoch": 2.47,
      "eval_accuracy": 0.803870808678501,
      "eval_f1": 0.810953536758238,
      "eval_loss": 0.6316830515861511,
      "eval_precision": 0.8241951591967538,
      "eval_recall": 0.803870808678501,
      "eval_runtime": 24.3331,
      "eval_samples_per_second": 666.747,
      "eval_steps_per_second": 27.781,
      "step": 7500
    },
    {
      "epoch": 2.63,
      "learning_rate": 1.670416666666667e-05,
      "loss": 0.4791,
      "step": 8000
    },
    {
      "epoch": 2.63,
      "eval_accuracy": 0.8032544378698225,
      "eval_f1": 0.8091355876498971,
      "eval_loss": 0.5908682942390442,
      "eval_precision": 0.8179762946015251,
      "eval_recall": 0.8032544378698225,
      "eval_runtime": 24.3361,
      "eval_samples_per_second": 666.664,
      "eval_steps_per_second": 27.778,
      "step": 8000
    },
    {
      "epoch": 2.79,
      "learning_rate": 1.4620833333333334e-05,
      "loss": 0.4739,
      "step": 8500
    },
    {
      "epoch": 2.79,
      "eval_accuracy": 0.8067061143984221,
      "eval_f1": 0.8120892015198095,
      "eval_loss": 0.6165759563446045,
      "eval_precision": 0.8199753861011193,
      "eval_recall": 0.8067061143984221,
      "eval_runtime": 24.1747,
      "eval_samples_per_second": 671.116,
      "eval_steps_per_second": 27.963,
      "step": 8500
    },
    {
      "epoch": 2.96,
      "learning_rate": 1.25375e-05,
      "loss": 0.4587,
      "step": 9000
    },
    {
      "epoch": 2.96,
      "eval_accuracy": 0.8041173570019724,
      "eval_f1": 0.8104680056938384,
      "eval_loss": 0.5887444019317627,
      "eval_precision": 0.820271897699825,
      "eval_recall": 0.8041173570019724,
      "eval_runtime": 24.1945,
      "eval_samples_per_second": 670.564,
      "eval_steps_per_second": 27.94,
      "step": 9000
    },
    {
      "epoch": 3.12,
      "learning_rate": 1.0454166666666667e-05,
      "loss": 0.4147,
      "step": 9500
    },
    {
      "epoch": 3.12,
      "eval_accuracy": 0.780448717948718,
      "eval_f1": 0.7927207824065717,
      "eval_loss": 0.6190515160560608,
      "eval_precision": 0.8178157106781372,
      "eval_recall": 0.780448717948718,
      "eval_runtime": 24.1866,
      "eval_samples_per_second": 670.784,
      "eval_steps_per_second": 27.949,
      "step": 9500
    },
    {
      "epoch": 3.29,
      "learning_rate": 8.370833333333333e-06,
      "loss": 0.3861,
      "step": 10000
    },
    {
      "epoch": 3.29,
      "eval_accuracy": 0.7917899408284024,
      "eval_f1": 0.8013705698417323,
      "eval_loss": 0.6606641411781311,
      "eval_precision": 0.8190127006775076,
      "eval_recall": 0.7917899408284024,
      "eval_runtime": 24.3416,
      "eval_samples_per_second": 666.514,
      "eval_steps_per_second": 27.771,
      "step": 10000
    },
    {
      "epoch": 3.45,
      "learning_rate": 6.2875e-06,
      "loss": 0.3897,
      "step": 10500
    },
    {
      "epoch": 3.45,
      "eval_accuracy": 0.788646449704142,
      "eval_f1": 0.7987527519476123,
      "eval_loss": 0.6613931059837341,
      "eval_precision": 0.8177063830689373,
      "eval_recall": 0.788646449704142,
      "eval_runtime": 24.342,
      "eval_samples_per_second": 666.503,
      "eval_steps_per_second": 27.771,
      "step": 10500
    },
    {
      "epoch": 3.62,
      "learning_rate": 4.204166666666667e-06,
      "loss": 0.3877,
      "step": 11000
    },
    {
      "epoch": 3.62,
      "eval_accuracy": 0.8135478303747534,
      "eval_f1": 0.8169879527109274,
      "eval_loss": 0.6640126705169678,
      "eval_precision": 0.8215474627622835,
      "eval_recall": 0.8135478303747534,
      "eval_runtime": 24.3344,
      "eval_samples_per_second": 666.71,
      "eval_steps_per_second": 27.78,
      "step": 11000
    },
    {
      "epoch": 3.78,
      "learning_rate": 2.1208333333333335e-06,
      "loss": 0.3795,
      "step": 11500
    },
    {
      "epoch": 3.78,
      "eval_accuracy": 0.8053500986193294,
      "eval_f1": 0.8113411583628731,
      "eval_loss": 0.6599082350730896,
      "eval_precision": 0.8205901740056264,
      "eval_recall": 0.8053500986193294,
      "eval_runtime": 24.3271,
      "eval_samples_per_second": 666.91,
      "eval_steps_per_second": 27.788,
      "step": 11500
    },
    {
      "epoch": 3.94,
      "learning_rate": 3.7500000000000005e-08,
      "loss": 0.3863,
      "step": 12000
    },
    {
      "epoch": 3.94,
      "eval_accuracy": 0.8009122287968442,
      "eval_f1": 0.8081790831347971,
      "eval_loss": 0.6572125554084778,
      "eval_precision": 0.8200806840462704,
      "eval_recall": 0.8009122287968442,
      "eval_runtime": 24.3366,
      "eval_samples_per_second": 666.651,
      "eval_steps_per_second": 27.777,
      "step": 12000
    },
    {
      "epoch": 3.94,
      "step": 12000,
      "total_flos": 1.4962346001065574e+17,
      "train_loss": 0.5341533139546712,
      "train_runtime": 3936.6668,
      "train_samples_per_second": 146.317,
      "train_steps_per_second": 3.048
    }
  ],
  "max_steps": 12000,
  "num_train_epochs": 4,
  "total_flos": 1.4962346001065574e+17,
  "trial_name": null,
  "trial_params": null
}