File size: 10,132 Bytes
aac3138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
{
  "best_metric": 0.18285594880580902,
  "best_model_checkpoint": "autotrain-6doma-5m8vf/checkpoint-1107",
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 1107,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.06775067750677506,
      "grad_norm": 16.566085815429688,
      "learning_rate": 9.90990990990991e-06,
      "loss": 1.5667,
      "step": 25
    },
    {
      "epoch": 0.13550135501355012,
      "grad_norm": 25.27834129333496,
      "learning_rate": 2.117117117117117e-05,
      "loss": 1.2958,
      "step": 50
    },
    {
      "epoch": 0.2032520325203252,
      "grad_norm": 14.882951736450195,
      "learning_rate": 3.2432432432432436e-05,
      "loss": 1.0021,
      "step": 75
    },
    {
      "epoch": 0.27100271002710025,
      "grad_norm": 12.05569839477539,
      "learning_rate": 4.369369369369369e-05,
      "loss": 0.7635,
      "step": 100
    },
    {
      "epoch": 0.33875338753387535,
      "grad_norm": 15.701233863830566,
      "learning_rate": 4.944779116465864e-05,
      "loss": 0.572,
      "step": 125
    },
    {
      "epoch": 0.4065040650406504,
      "grad_norm": 45.54197692871094,
      "learning_rate": 4.8242971887550205e-05,
      "loss": 0.4778,
      "step": 150
    },
    {
      "epoch": 0.4742547425474255,
      "grad_norm": 20.10284996032715,
      "learning_rate": 4.698795180722892e-05,
      "loss": 0.4213,
      "step": 175
    },
    {
      "epoch": 0.5420054200542005,
      "grad_norm": 52.795291900634766,
      "learning_rate": 4.573293172690764e-05,
      "loss": 0.4171,
      "step": 200
    },
    {
      "epoch": 0.6097560975609756,
      "grad_norm": 32.24135208129883,
      "learning_rate": 4.447791164658635e-05,
      "loss": 0.4031,
      "step": 225
    },
    {
      "epoch": 0.6775067750677507,
      "grad_norm": 48.17521286010742,
      "learning_rate": 4.3222891566265064e-05,
      "loss": 0.3206,
      "step": 250
    },
    {
      "epoch": 0.7452574525745257,
      "grad_norm": 27.5257511138916,
      "learning_rate": 4.196787148594378e-05,
      "loss": 0.3616,
      "step": 275
    },
    {
      "epoch": 0.8130081300813008,
      "grad_norm": 15.912370681762695,
      "learning_rate": 4.071285140562249e-05,
      "loss": 0.508,
      "step": 300
    },
    {
      "epoch": 0.8807588075880759,
      "grad_norm": 12.763589859008789,
      "learning_rate": 3.9508032128514064e-05,
      "loss": 0.3685,
      "step": 325
    },
    {
      "epoch": 0.948509485094851,
      "grad_norm": 20.36044692993164,
      "learning_rate": 3.8253012048192774e-05,
      "loss": 0.393,
      "step": 350
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.9189005768578216,
      "eval_f1_macro": 0.8913236764060113,
      "eval_f1_micro": 0.9189005768578216,
      "eval_f1_weighted": 0.9195807716070247,
      "eval_loss": 0.2322985827922821,
      "eval_precision_macro": 0.8992556342366311,
      "eval_precision_micro": 0.9189005768578216,
      "eval_precision_weighted": 0.9235420876186199,
      "eval_recall_macro": 0.8887207219589304,
      "eval_recall_micro": 0.9189005768578216,
      "eval_recall_weighted": 0.9189005768578216,
      "eval_runtime": 19.8492,
      "eval_samples_per_second": 148.469,
      "eval_steps_per_second": 9.32,
      "step": 369
    },
    {
      "epoch": 1.016260162601626,
      "grad_norm": 14.446638107299805,
      "learning_rate": 3.699799196787149e-05,
      "loss": 0.2934,
      "step": 375
    },
    {
      "epoch": 1.084010840108401,
      "grad_norm": 29.589651107788086,
      "learning_rate": 3.57429718875502e-05,
      "loss": 0.3801,
      "step": 400
    },
    {
      "epoch": 1.151761517615176,
      "grad_norm": 23.76276397705078,
      "learning_rate": 3.4487951807228916e-05,
      "loss": 0.2276,
      "step": 425
    },
    {
      "epoch": 1.2195121951219512,
      "grad_norm": 33.06072998046875,
      "learning_rate": 3.323293172690763e-05,
      "loss": 0.2623,
      "step": 450
    },
    {
      "epoch": 1.2872628726287263,
      "grad_norm": 31.562694549560547,
      "learning_rate": 3.197791164658634e-05,
      "loss": 0.3324,
      "step": 475
    },
    {
      "epoch": 1.3550135501355014,
      "grad_norm": 25.050046920776367,
      "learning_rate": 3.072289156626506e-05,
      "loss": 0.3613,
      "step": 500
    },
    {
      "epoch": 1.4227642276422765,
      "grad_norm": 5.65738582611084,
      "learning_rate": 2.9467871485943778e-05,
      "loss": 0.3689,
      "step": 525
    },
    {
      "epoch": 1.4905149051490514,
      "grad_norm": 30.50360870361328,
      "learning_rate": 2.821285140562249e-05,
      "loss": 0.2128,
      "step": 550
    },
    {
      "epoch": 1.5582655826558267,
      "grad_norm": 31.306838989257812,
      "learning_rate": 2.6957831325301207e-05,
      "loss": 0.3329,
      "step": 575
    },
    {
      "epoch": 1.6260162601626016,
      "grad_norm": 5.569540023803711,
      "learning_rate": 2.570281124497992e-05,
      "loss": 0.3934,
      "step": 600
    },
    {
      "epoch": 1.6937669376693767,
      "grad_norm": 79.83793640136719,
      "learning_rate": 2.4447791164658633e-05,
      "loss": 0.3329,
      "step": 625
    },
    {
      "epoch": 1.7615176151761518,
      "grad_norm": 11.711432456970215,
      "learning_rate": 2.319277108433735e-05,
      "loss": 0.3065,
      "step": 650
    },
    {
      "epoch": 1.8292682926829267,
      "grad_norm": 27.71021842956543,
      "learning_rate": 2.1937751004016066e-05,
      "loss": 0.3361,
      "step": 675
    },
    {
      "epoch": 1.897018970189702,
      "grad_norm": 24.346481323242188,
      "learning_rate": 2.068273092369478e-05,
      "loss": 0.3967,
      "step": 700
    },
    {
      "epoch": 1.9647696476964769,
      "grad_norm": 7.5306549072265625,
      "learning_rate": 1.9427710843373495e-05,
      "loss": 0.3304,
      "step": 725
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.9284017645062775,
      "eval_f1_macro": 0.903605865288441,
      "eval_f1_micro": 0.9284017645062775,
      "eval_f1_weighted": 0.9277100982185731,
      "eval_loss": 0.2046061009168625,
      "eval_precision_macro": 0.9095877174004062,
      "eval_precision_micro": 0.9284017645062775,
      "eval_precision_weighted": 0.9281331331487362,
      "eval_recall_macro": 0.8989112570392443,
      "eval_recall_micro": 0.9284017645062775,
      "eval_recall_weighted": 0.9284017645062775,
      "eval_runtime": 19.9432,
      "eval_samples_per_second": 147.769,
      "eval_steps_per_second": 9.276,
      "step": 738
    },
    {
      "epoch": 2.032520325203252,
      "grad_norm": 28.1395206451416,
      "learning_rate": 1.822289156626506e-05,
      "loss": 0.332,
      "step": 750
    },
    {
      "epoch": 2.100271002710027,
      "grad_norm": 7.682183265686035,
      "learning_rate": 1.6967871485943776e-05,
      "loss": 0.2995,
      "step": 775
    },
    {
      "epoch": 2.168021680216802,
      "grad_norm": 23.640390396118164,
      "learning_rate": 1.5712851405622492e-05,
      "loss": 0.3089,
      "step": 800
    },
    {
      "epoch": 2.2357723577235773,
      "grad_norm": 3.6244945526123047,
      "learning_rate": 1.4457831325301205e-05,
      "loss": 0.2557,
      "step": 825
    },
    {
      "epoch": 2.303523035230352,
      "grad_norm": 109.68293762207031,
      "learning_rate": 1.3202811244979921e-05,
      "loss": 0.3734,
      "step": 850
    },
    {
      "epoch": 2.3712737127371275,
      "grad_norm": 28.19609832763672,
      "learning_rate": 1.1947791164658636e-05,
      "loss": 0.3144,
      "step": 875
    },
    {
      "epoch": 2.4390243902439024,
      "grad_norm": 17.588850021362305,
      "learning_rate": 1.069277108433735e-05,
      "loss": 0.4061,
      "step": 900
    },
    {
      "epoch": 2.5067750677506773,
      "grad_norm": 21.384654998779297,
      "learning_rate": 9.437751004016063e-06,
      "loss": 0.2626,
      "step": 925
    },
    {
      "epoch": 2.5745257452574526,
      "grad_norm": 0.35269397497177124,
      "learning_rate": 8.18273092369478e-06,
      "loss": 0.2822,
      "step": 950
    },
    {
      "epoch": 2.642276422764228,
      "grad_norm": 21.37306785583496,
      "learning_rate": 6.927710843373494e-06,
      "loss": 0.4436,
      "step": 975
    },
    {
      "epoch": 2.710027100271003,
      "grad_norm": 17.97796630859375,
      "learning_rate": 5.672690763052209e-06,
      "loss": 0.2517,
      "step": 1000
    },
    {
      "epoch": 2.7777777777777777,
      "grad_norm": 9.19117259979248,
      "learning_rate": 4.417670682730924e-06,
      "loss": 0.2395,
      "step": 1025
    },
    {
      "epoch": 2.845528455284553,
      "grad_norm": 64.22978210449219,
      "learning_rate": 3.1626506024096387e-06,
      "loss": 0.3387,
      "step": 1050
    },
    {
      "epoch": 2.913279132791328,
      "grad_norm": 1.4875394105911255,
      "learning_rate": 1.9076305220883537e-06,
      "loss": 0.2559,
      "step": 1075
    },
    {
      "epoch": 2.9810298102981028,
      "grad_norm": 0.5281310677528381,
      "learning_rate": 6.526104417670682e-07,
      "loss": 0.3605,
      "step": 1100
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.9365456396335257,
      "eval_f1_macro": 0.9148614413559308,
      "eval_f1_micro": 0.9365456396335257,
      "eval_f1_weighted": 0.9364564915178187,
      "eval_loss": 0.18285594880580902,
      "eval_precision_macro": 0.9159613318061458,
      "eval_precision_micro": 0.9365456396335257,
      "eval_precision_weighted": 0.9365041505044936,
      "eval_recall_macro": 0.9139276800740521,
      "eval_recall_micro": 0.9365456396335257,
      "eval_recall_weighted": 0.9365456396335257,
      "eval_runtime": 19.7723,
      "eval_samples_per_second": 149.047,
      "eval_steps_per_second": 9.357,
      "step": 1107
    }
  ],
  "logging_steps": 25,
  "max_steps": 1107,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 2.225421168402862e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}