File size: 12,473 Bytes
162e8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
{
  "best_metric": 0.2544600938967136,
  "best_model_checkpoint": "/content/our_data/checkpoint-10500",
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 12410,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4,
      "learning_rate": 1.91941982272361e-05,
      "loss": 2.0809,
      "step": 500
    },
    {
      "epoch": 0.4,
      "eval_accuracy": 0.5297831784608054,
      "eval_f1": 0.013054830287206266,
      "eval_loss": 2.0593647956848145,
      "eval_precision": 0.5,
      "eval_recall": 0.006613756613756613,
      "eval_runtime": 1.3043,
      "eval_samples_per_second": 233.075,
      "eval_steps_per_second": 116.537,
      "step": 500
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.83883964544722e-05,
      "loss": 1.8682,
      "step": 1000
    },
    {
      "epoch": 0.81,
      "eval_accuracy": 0.5528949249463903,
      "eval_f1": 0.09097688292319164,
      "eval_loss": 1.8005567789077759,
      "eval_precision": 0.10427350427350428,
      "eval_recall": 0.08068783068783068,
      "eval_runtime": 1.4327,
      "eval_samples_per_second": 212.189,
      "eval_steps_per_second": 106.095,
      "step": 1000
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.75825946817083e-05,
      "loss": 1.6332,
      "step": 1500
    },
    {
      "epoch": 1.21,
      "eval_accuracy": 0.574815344293543,
      "eval_f1": 0.14125,
      "eval_loss": 1.8355692625045776,
      "eval_precision": 0.1338862559241706,
      "eval_recall": 0.14947089947089948,
      "eval_runtime": 1.8642,
      "eval_samples_per_second": 163.074,
      "eval_steps_per_second": 81.537,
      "step": 1500
    },
    {
      "epoch": 1.61,
      "learning_rate": 1.67767929089444e-05,
      "loss": 1.468,
      "step": 2000
    },
    {
      "epoch": 1.61,
      "eval_accuracy": 0.5891112699547296,
      "eval_f1": 0.15114235500878734,
      "eval_loss": 1.6260936260223389,
      "eval_precision": 0.13564668769716087,
      "eval_recall": 0.17063492063492064,
      "eval_runtime": 1.9126,
      "eval_samples_per_second": 158.942,
      "eval_steps_per_second": 79.471,
      "step": 2000
    },
    {
      "epoch": 2.01,
      "learning_rate": 1.59709911361805e-05,
      "loss": 1.401,
      "step": 2500
    },
    {
      "epoch": 2.01,
      "eval_accuracy": 0.5986418870621872,
      "eval_f1": 0.16253968253968254,
      "eval_loss": 1.694327473640442,
      "eval_precision": 0.1562881562881563,
      "eval_recall": 0.1693121693121693,
      "eval_runtime": 1.3338,
      "eval_samples_per_second": 227.927,
      "eval_steps_per_second": 113.964,
      "step": 2500
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.5165189363416601e-05,
      "loss": 1.1878,
      "step": 3000
    },
    {
      "epoch": 2.42,
      "eval_accuracy": 0.5975696926375983,
      "eval_f1": 0.16076058772687984,
      "eval_loss": 1.6739833354949951,
      "eval_precision": 0.11938382541720154,
      "eval_recall": 0.24603174603174602,
      "eval_runtime": 1.305,
      "eval_samples_per_second": 232.956,
      "eval_steps_per_second": 116.478,
      "step": 3000
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.4359387590652701e-05,
      "loss": 1.1182,
      "step": 3500
    },
    {
      "epoch": 2.82,
      "eval_accuracy": 0.622706695258518,
      "eval_f1": 0.18434203220433093,
      "eval_loss": 1.6201189756393433,
      "eval_precision": 0.15885167464114833,
      "eval_recall": 0.21957671957671956,
      "eval_runtime": 1.2763,
      "eval_samples_per_second": 238.191,
      "eval_steps_per_second": 119.095,
      "step": 3500
    },
    {
      "epoch": 3.22,
      "learning_rate": 1.35535858178888e-05,
      "loss": 0.9677,
      "step": 4000
    },
    {
      "epoch": 3.22,
      "eval_accuracy": 0.6175839885632595,
      "eval_f1": 0.1704312114989733,
      "eval_loss": 1.6241066455841064,
      "eval_precision": 0.13926174496644295,
      "eval_recall": 0.21957671957671956,
      "eval_runtime": 1.3014,
      "eval_samples_per_second": 233.592,
      "eval_steps_per_second": 116.796,
      "step": 4000
    },
    {
      "epoch": 3.63,
      "learning_rate": 1.27477840451249e-05,
      "loss": 0.9055,
      "step": 4500
    },
    {
      "epoch": 3.63,
      "eval_accuracy": 0.6157969978556112,
      "eval_f1": 0.17582417582417587,
      "eval_loss": 1.5932097434997559,
      "eval_precision": 0.1316655694535879,
      "eval_recall": 0.26455026455026454,
      "eval_runtime": 1.5381,
      "eval_samples_per_second": 197.65,
      "eval_steps_per_second": 98.825,
      "step": 4500
    },
    {
      "epoch": 4.03,
      "learning_rate": 1.1941982272361e-05,
      "loss": 0.8772,
      "step": 5000
    },
    {
      "epoch": 4.03,
      "eval_accuracy": 0.6254467476769121,
      "eval_f1": 0.20804710500490678,
      "eval_loss": 1.5797325372695923,
      "eval_precision": 0.16536661466458658,
      "eval_recall": 0.2804232804232804,
      "eval_runtime": 1.8044,
      "eval_samples_per_second": 168.477,
      "eval_steps_per_second": 84.239,
      "step": 5000
    },
    {
      "epoch": 4.43,
      "learning_rate": 1.11361804995971e-05,
      "loss": 0.7224,
      "step": 5500
    },
    {
      "epoch": 4.43,
      "eval_accuracy": 0.6412913986180605,
      "eval_f1": 0.20699172033118673,
      "eval_loss": 1.5723158121109009,
      "eval_precision": 0.15867418899858957,
      "eval_recall": 0.2976190476190476,
      "eval_runtime": 1.8259,
      "eval_samples_per_second": 166.496,
      "eval_steps_per_second": 83.248,
      "step": 5500
    },
    {
      "epoch": 4.83,
      "learning_rate": 1.0330378726833199e-05,
      "loss": 0.7498,
      "step": 6000
    },
    {
      "epoch": 4.83,
      "eval_accuracy": 0.6496306885870861,
      "eval_f1": 0.22154779969650987,
      "eval_loss": 1.595717191696167,
      "eval_precision": 0.17936117936117937,
      "eval_recall": 0.2896825396825397,
      "eval_runtime": 1.7138,
      "eval_samples_per_second": 177.388,
      "eval_steps_per_second": 88.694,
      "step": 6000
    },
    {
      "epoch": 5.24,
      "learning_rate": 9.5245769540693e-06,
      "loss": 0.6632,
      "step": 6500
    },
    {
      "epoch": 5.24,
      "eval_accuracy": 0.6427209911841791,
      "eval_f1": 0.22222222222222224,
      "eval_loss": 1.6824833154678345,
      "eval_precision": 0.1863799283154122,
      "eval_recall": 0.2751322751322751,
      "eval_runtime": 1.3164,
      "eval_samples_per_second": 230.932,
      "eval_steps_per_second": 115.466,
      "step": 6500
    },
    {
      "epoch": 5.64,
      "learning_rate": 8.7187751813054e-06,
      "loss": 0.6139,
      "step": 7000
    },
    {
      "epoch": 5.64,
      "eval_accuracy": 0.6508220157255182,
      "eval_f1": 0.23450735621934907,
      "eval_loss": 1.5827279090881348,
      "eval_precision": 0.1768661735036987,
      "eval_recall": 0.3478835978835979,
      "eval_runtime": 1.3076,
      "eval_samples_per_second": 232.479,
      "eval_steps_per_second": 116.239,
      "step": 7000
    },
    {
      "epoch": 6.04,
      "learning_rate": 7.9129734085415e-06,
      "loss": 0.6212,
      "step": 7500
    },
    {
      "epoch": 6.04,
      "eval_accuracy": 0.6526090064331665,
      "eval_f1": 0.23380154055278662,
      "eval_loss": 1.5536507368087769,
      "eval_precision": 0.17780840799448655,
      "eval_recall": 0.3412698412698413,
      "eval_runtime": 1.9253,
      "eval_samples_per_second": 157.894,
      "eval_steps_per_second": 78.947,
      "step": 7500
    },
    {
      "epoch": 6.45,
      "learning_rate": 7.107171635777599e-06,
      "loss": 0.5379,
      "step": 8000
    },
    {
      "epoch": 6.45,
      "eval_accuracy": 0.6535620681439123,
      "eval_f1": 0.23245002324500233,
      "eval_loss": 1.5670047998428345,
      "eval_precision": 0.17921146953405018,
      "eval_recall": 0.3306878306878307,
      "eval_runtime": 1.8494,
      "eval_samples_per_second": 164.376,
      "eval_steps_per_second": 82.188,
      "step": 8000
    },
    {
      "epoch": 6.85,
      "learning_rate": 6.301369863013699e-06,
      "loss": 0.5376,
      "step": 8500
    },
    {
      "epoch": 6.85,
      "eval_accuracy": 0.6529664045746962,
      "eval_f1": 0.2388059701492537,
      "eval_loss": 1.6112617254257202,
      "eval_precision": 0.1844380403458213,
      "eval_recall": 0.3386243386243386,
      "eval_runtime": 1.9275,
      "eval_samples_per_second": 157.714,
      "eval_steps_per_second": 78.857,
      "step": 8500
    },
    {
      "epoch": 7.25,
      "learning_rate": 5.495568090249799e-06,
      "loss": 0.5,
      "step": 9000
    },
    {
      "epoch": 7.25,
      "eval_accuracy": 0.6599952346914463,
      "eval_f1": 0.22989593188268687,
      "eval_loss": 1.6431697607040405,
      "eval_precision": 0.17893961708394698,
      "eval_recall": 0.32142857142857145,
      "eval_runtime": 1.9076,
      "eval_samples_per_second": 159.365,
      "eval_steps_per_second": 79.682,
      "step": 9000
    },
    {
      "epoch": 7.66,
      "learning_rate": 4.689766317485899e-06,
      "loss": 0.4928,
      "step": 9500
    },
    {
      "epoch": 7.66,
      "eval_accuracy": 0.660948296402192,
      "eval_f1": 0.2414772727272727,
      "eval_loss": 1.6421875953674316,
      "eval_precision": 0.18805309734513273,
      "eval_recall": 0.3373015873015873,
      "eval_runtime": 1.2935,
      "eval_samples_per_second": 235.02,
      "eval_steps_per_second": 117.51,
      "step": 9500
    },
    {
      "epoch": 8.06,
      "learning_rate": 3.883964544721999e-06,
      "loss": 0.4877,
      "step": 10000
    },
    {
      "epoch": 8.06,
      "eval_accuracy": 0.6653562068143912,
      "eval_f1": 0.254,
      "eval_loss": 1.6850905418395996,
      "eval_precision": 0.20418006430868169,
      "eval_recall": 0.335978835978836,
      "eval_runtime": 1.4405,
      "eval_samples_per_second": 211.033,
      "eval_steps_per_second": 105.516,
      "step": 10000
    },
    {
      "epoch": 8.46,
      "learning_rate": 3.0781627719580986e-06,
      "loss": 0.4339,
      "step": 10500
    },
    {
      "epoch": 8.46,
      "eval_accuracy": 0.6636883488205861,
      "eval_f1": 0.2544600938967136,
      "eval_loss": 1.6376055479049683,
      "eval_precision": 0.19723435225618632,
      "eval_recall": 0.3584656084656085,
      "eval_runtime": 1.8779,
      "eval_samples_per_second": 161.879,
      "eval_steps_per_second": 80.94,
      "step": 10500
    },
    {
      "epoch": 8.86,
      "learning_rate": 2.2723609991941985e-06,
      "loss": 0.4303,
      "step": 11000
    },
    {
      "epoch": 8.86,
      "eval_accuracy": 0.660352632832976,
      "eval_f1": 0.2396694214876033,
      "eval_loss": 1.6363922357559204,
      "eval_precision": 0.18354430379746836,
      "eval_recall": 0.34523809523809523,
      "eval_runtime": 1.8659,
      "eval_samples_per_second": 162.922,
      "eval_steps_per_second": 81.461,
      "step": 11000
    },
    {
      "epoch": 9.27,
      "learning_rate": 1.4665592264302982e-06,
      "loss": 0.4509,
      "step": 11500
    },
    {
      "epoch": 9.27,
      "eval_accuracy": 0.6664284012389802,
      "eval_f1": 0.2508507535245503,
      "eval_loss": 1.644798994064331,
      "eval_precision": 0.19830899308224442,
      "eval_recall": 0.3412698412698413,
      "eval_runtime": 1.3158,
      "eval_samples_per_second": 231.043,
      "eval_steps_per_second": 115.521,
      "step": 11500
    },
    {
      "epoch": 9.67,
      "learning_rate": 6.607574536663981e-07,
      "loss": 0.4114,
      "step": 12000
    },
    {
      "epoch": 9.67,
      "eval_accuracy": 0.6658327376697641,
      "eval_f1": 0.2510658455708195,
      "eval_loss": 1.6494354009628296,
      "eval_precision": 0.19557195571955718,
      "eval_recall": 0.3505291005291005,
      "eval_runtime": 1.3234,
      "eval_samples_per_second": 229.715,
      "eval_steps_per_second": 114.857,
      "step": 12000
    },
    {
      "epoch": 10.0,
      "step": 12410,
      "total_flos": 256497375844554.0,
      "train_loss": 0.8660164156044615,
      "train_runtime": 798.2036,
      "train_samples_per_second": 31.082,
      "train_steps_per_second": 15.547
    }
  ],
  "logging_steps": 500,
  "max_steps": 12410,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "total_flos": 256497375844554.0,
  "trial_name": null,
  "trial_params": null
}