File size: 9,305 Bytes
a8f05d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
{
  "best_metric": 0.8807787895202637,
  "best_model_checkpoint": "org_org_a/org_aug_a/checkpoint-400",
  "epoch": 0.24615384615384617,
  "eval_steps": 25,
  "global_step": 400,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.015384615384615385,
      "grad_norm": 99.7617416381836,
      "learning_rate": 9.375e-05,
      "loss": 2.2195,
      "step": 25
    },
    {
      "epoch": 0.015384615384615385,
      "eval_f1_macro": 0.5521299126846324,
      "eval_f1_micro": 0.5691194856224325,
      "eval_f1_weighted": 0.569423288164649,
      "eval_loss": 1.520703673362732,
      "eval_runtime": 1286.7359,
      "eval_samples_per_second": 8.703,
      "eval_steps_per_second": 0.272,
      "step": 25
    },
    {
      "epoch": 0.03076923076923077,
      "grad_norm": 40.433555603027344,
      "learning_rate": 8.75e-05,
      "loss": 1.4371,
      "step": 50
    },
    {
      "epoch": 0.03076923076923077,
      "eval_f1_macro": 0.5857301862067549,
      "eval_f1_micro": 0.6089480264332917,
      "eval_f1_weighted": 0.6051732662908408,
      "eval_loss": 1.2746953964233398,
      "eval_runtime": 1302.368,
      "eval_samples_per_second": 8.598,
      "eval_steps_per_second": 0.269,
      "step": 50
    },
    {
      "epoch": 0.046153846153846156,
      "grad_norm": 50.680335998535156,
      "learning_rate": 8.125000000000001e-05,
      "loss": 1.2556,
      "step": 75
    },
    {
      "epoch": 0.046153846153846156,
      "eval_f1_macro": 0.6036315452406847,
      "eval_f1_micro": 0.6303804250759064,
      "eval_f1_weighted": 0.6240449220829647,
      "eval_loss": 1.1545159816741943,
      "eval_runtime": 1308.8563,
      "eval_samples_per_second": 8.556,
      "eval_steps_per_second": 0.267,
      "step": 75
    },
    {
      "epoch": 0.06153846153846154,
      "grad_norm": 24.990671157836914,
      "learning_rate": 7.500000000000001e-05,
      "loss": 1.2415,
      "step": 100
    },
    {
      "epoch": 0.06153846153846154,
      "eval_f1_macro": 0.6131831448419165,
      "eval_f1_micro": 0.6319878549741025,
      "eval_f1_weighted": 0.6300913896752308,
      "eval_loss": 1.0690622329711914,
      "eval_runtime": 1302.0106,
      "eval_samples_per_second": 8.601,
      "eval_steps_per_second": 0.269,
      "step": 100
    },
    {
      "epoch": 0.07692307692307693,
      "grad_norm": 20.941816329956055,
      "learning_rate": 6.875e-05,
      "loss": 0.9864,
      "step": 125
    },
    {
      "epoch": 0.07692307692307693,
      "eval_f1_macro": 0.627800207605976,
      "eval_f1_micro": 0.6399357028040722,
      "eval_f1_weighted": 0.6411240450638264,
      "eval_loss": 1.0263742208480835,
      "eval_runtime": 1310.3949,
      "eval_samples_per_second": 8.546,
      "eval_steps_per_second": 0.267,
      "step": 125
    },
    {
      "epoch": 0.09230769230769231,
      "grad_norm": 17.243314743041992,
      "learning_rate": 6.25e-05,
      "loss": 1.0647,
      "step": 150
    },
    {
      "epoch": 0.09230769230769231,
      "eval_f1_macro": 0.6265827051287185,
      "eval_f1_micro": 0.6510091087694231,
      "eval_f1_weighted": 0.6455165794591529,
      "eval_loss": 0.9917964339256287,
      "eval_runtime": 1292.6103,
      "eval_samples_per_second": 8.663,
      "eval_steps_per_second": 0.271,
      "step": 150
    },
    {
      "epoch": 0.1076923076923077,
      "grad_norm": 20.114173889160156,
      "learning_rate": 5.6250000000000005e-05,
      "loss": 0.9849,
      "step": 175
    },
    {
      "epoch": 0.1076923076923077,
      "eval_f1_macro": 0.6317476736951155,
      "eval_f1_micro": 0.6576174316842294,
      "eval_f1_weighted": 0.6510976948325254,
      "eval_loss": 0.9679338932037354,
      "eval_runtime": 1305.0812,
      "eval_samples_per_second": 8.58,
      "eval_steps_per_second": 0.268,
      "step": 175
    },
    {
      "epoch": 0.12307692307692308,
      "grad_norm": 39.2221565246582,
      "learning_rate": 5e-05,
      "loss": 1.0067,
      "step": 200
    },
    {
      "epoch": 0.12307692307692308,
      "eval_f1_macro": 0.6383959350585475,
      "eval_f1_micro": 0.6501160921593142,
      "eval_f1_weighted": 0.6513020604373679,
      "eval_loss": 0.9382981061935425,
      "eval_runtime": 1283.0843,
      "eval_samples_per_second": 8.727,
      "eval_steps_per_second": 0.273,
      "step": 200
    },
    {
      "epoch": 0.13846153846153847,
      "grad_norm": 26.992185592651367,
      "learning_rate": 4.375e-05,
      "loss": 0.8928,
      "step": 225
    },
    {
      "epoch": 0.13846153846153847,
      "eval_f1_macro": 0.640450740779414,
      "eval_f1_micro": 0.6619932130737631,
      "eval_f1_weighted": 0.6578984928748007,
      "eval_loss": 0.9242791533470154,
      "eval_runtime": 1276.3685,
      "eval_samples_per_second": 8.773,
      "eval_steps_per_second": 0.274,
      "step": 225
    },
    {
      "epoch": 0.15384615384615385,
      "grad_norm": 71.61570739746094,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 0.9858,
      "step": 250
    },
    {
      "epoch": 0.15384615384615385,
      "eval_f1_macro": 0.640469116958249,
      "eval_f1_micro": 0.6627076263618503,
      "eval_f1_weighted": 0.6581960436641718,
      "eval_loss": 0.9131789803504944,
      "eval_runtime": 1285.3671,
      "eval_samples_per_second": 8.712,
      "eval_steps_per_second": 0.272,
      "step": 250
    },
    {
      "epoch": 0.16923076923076924,
      "grad_norm": 51.381019592285156,
      "learning_rate": 3.125e-05,
      "loss": 0.9085,
      "step": 275
    },
    {
      "epoch": 0.16923076923076924,
      "eval_f1_macro": 0.6446344740224741,
      "eval_f1_micro": 0.6575281300232184,
      "eval_f1_weighted": 0.6580540316041209,
      "eval_loss": 0.9010853171348572,
      "eval_runtime": 1307.6049,
      "eval_samples_per_second": 8.564,
      "eval_steps_per_second": 0.268,
      "step": 275
    },
    {
      "epoch": 0.18461538461538463,
      "grad_norm": 25.47317886352539,
      "learning_rate": 2.5e-05,
      "loss": 1.0059,
      "step": 300
    },
    {
      "epoch": 0.18461538461538463,
      "eval_f1_macro": 0.6435798595814134,
      "eval_f1_micro": 0.6686015359885694,
      "eval_f1_weighted": 0.662301841928527,
      "eval_loss": 0.9018191695213318,
      "eval_runtime": 1330.8319,
      "eval_samples_per_second": 8.414,
      "eval_steps_per_second": 0.263,
      "step": 300
    },
    {
      "epoch": 0.2,
      "grad_norm": 16.64508628845215,
      "learning_rate": 1.8750000000000002e-05,
      "loss": 0.8939,
      "step": 325
    },
    {
      "epoch": 0.2,
      "eval_f1_macro": 0.6448244249080287,
      "eval_f1_micro": 0.6681550276835149,
      "eval_f1_weighted": 0.6628710932570111,
      "eval_loss": 0.892798125743866,
      "eval_runtime": 1305.0822,
      "eval_samples_per_second": 8.58,
      "eval_steps_per_second": 0.268,
      "step": 325
    },
    {
      "epoch": 0.2153846153846154,
      "grad_norm": 32.9316520690918,
      "learning_rate": 1.25e-05,
      "loss": 0.864,
      "step": 350
    },
    {
      "epoch": 0.2153846153846154,
      "eval_f1_macro": 0.6477608498148076,
      "eval_f1_micro": 0.6621718163957849,
      "eval_f1_weighted": 0.6618855058014609,
      "eval_loss": 0.8832775950431824,
      "eval_runtime": 1344.9811,
      "eval_samples_per_second": 8.326,
      "eval_steps_per_second": 0.26,
      "step": 350
    },
    {
      "epoch": 0.23076923076923078,
      "grad_norm": 63.84526824951172,
      "learning_rate": 6.25e-06,
      "loss": 0.9499,
      "step": 375
    },
    {
      "epoch": 0.23076923076923078,
      "eval_f1_macro": 0.646307301425064,
      "eval_f1_micro": 0.6585104482943382,
      "eval_f1_weighted": 0.659331813949001,
      "eval_loss": 0.8836826682090759,
      "eval_runtime": 1287.5574,
      "eval_samples_per_second": 8.697,
      "eval_steps_per_second": 0.272,
      "step": 375
    },
    {
      "epoch": 0.24615384615384617,
      "grad_norm": 21.614879608154297,
      "learning_rate": 0.0,
      "loss": 0.9721,
      "step": 400
    },
    {
      "epoch": 0.24615384615384617,
      "eval_f1_macro": 0.6475943444675745,
      "eval_f1_micro": 0.6614574031076978,
      "eval_f1_weighted": 0.6614511612983146,
      "eval_loss": 0.8807787895202637,
      "eval_runtime": 1313.0891,
      "eval_samples_per_second": 8.528,
      "eval_steps_per_second": 0.267,
      "step": 400
    },
    {
      "epoch": 0.24615384615384617,
      "step": 400,
      "total_flos": 1.690284412871639e+17,
      "train_loss": 1.104334650039673,
      "train_runtime": 22485.3021,
      "train_samples_per_second": 0.569,
      "train_steps_per_second": 0.018
    },
    {
      "epoch": 0.24615384615384617,
      "eval_f1_macro": 0.6475943444675745,
      "eval_f1_micro": 0.6614574031076978,
      "eval_f1_weighted": 0.6614511612983146,
      "eval_loss": 0.8807787895202637,
      "eval_runtime": 1299.8097,
      "eval_samples_per_second": 8.615,
      "eval_steps_per_second": 0.269,
      "step": 400
    }
  ],
  "logging_steps": 25,
  "max_steps": 400,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "total_flos": 1.690284412871639e+17,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}