lombardata commited on
Commit
c2dc5e3
1 Parent(s): 1fa23ae

🍻 cheers

Browse files
README.md CHANGED
@@ -1,7 +1,11 @@
1
  ---
 
 
2
  license: apache-2.0
3
  base_model: microsoft/resnet-50
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # resnet-50-linearhead-2024_03_12-with_data_aug_batch-size32_epochs93_freeze
17
 
18
- This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.1546
21
- - F1 Micro: 0.7452
22
- - F1 Macro: 0.6223
23
- - Roc Auc: 0.8208
24
- - Accuracy: 0.4050
25
  - Learning Rate: 1e-05
26
 
27
  ## Model description
 
1
  ---
2
+ language:
3
+ - eng
4
  license: apache-2.0
5
  base_model: microsoft/resnet-50
6
  tags:
7
+ - multilabel-image-classification
8
+ - multilabel
9
  - generated_from_trainer
10
  metrics:
11
  - accuracy
 
19
 
20
  # resnet-50-linearhead-2024_03_12-with_data_aug_batch-size32_epochs93_freeze
21
 
22
+ This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the multilabel_complete_dataset dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.1518
25
+ - F1 Micro: 0.7545
26
+ - F1 Macro: 0.6309
27
+ - Roc Auc: 0.8276
28
+ - Accuracy: 0.4069
29
  - Learning Rate: 1e-05
30
 
31
  ## Model description
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "eval_accuracy": 0.40687478077867417,
4
+ "eval_f1_macro": 0.6308665357852737,
5
+ "eval_f1_micro": 0.7544523735650016,
6
+ "eval_loss": 0.15177054703235626,
7
+ "eval_roc_auc": 0.8275818899442217,
8
+ "eval_runtime": 368.9147,
9
+ "eval_samples_per_second": 7.728,
10
+ "eval_steps_per_second": 0.244,
11
+ "learning_rate": 1e-05,
12
+ "train_loss": 0.1827572128274107,
13
+ "train_runtime": 70769.3212,
14
+ "train_samples_per_second": 11.517,
15
+ "train_steps_per_second": 0.36
16
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/resnet-50",
3
  "architectures": [
4
  "ResNetForImageClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "microsoft/resnet-502024_03_12",
3
  "architectures": [
4
  "ResNetForImageClassification"
5
  ],
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "eval_accuracy": 0.40687478077867417,
4
+ "eval_f1_macro": 0.6308665357852737,
5
+ "eval_f1_micro": 0.7544523735650016,
6
+ "eval_loss": 0.15177054703235626,
7
+ "eval_roc_auc": 0.8275818899442217,
8
+ "eval_runtime": 368.9147,
9
+ "eval_samples_per_second": 7.728,
10
+ "eval_steps_per_second": 0.244,
11
+ "learning_rate": 1e-05
12
+ }
runs/Mar12_11-48-01_datavisu4/events.out.tfevents.1710311640.datavisu4.62295.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a62462d2e0eea6e5dfabdb1d9f3e70881b3db0f6c29348f071465f035653cb
3
+ size 624
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 46.0,
3
+ "learning_rate": 1e-05,
4
+ "train_loss": 0.1827572128274107,
5
+ "train_runtime": 70769.3212,
6
+ "train_samples_per_second": 11.517,
7
+ "train_steps_per_second": 0.36
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,779 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.15303632616996765,
3
+ "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/resnet-50-linearhead-2024_03_12-with_data_aug_batch-size32_epochs93_freeze/checkpoint-9864",
4
+ "epoch": 46.0,
5
+ "eval_steps": 500,
6
+ "global_step": 12604,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.1951983298538622,
14
+ "eval_f1_macro": 0.2834407731254271,
15
+ "eval_f1_micro": 0.5838964773544213,
16
+ "eval_loss": 0.22367511689662933,
17
+ "eval_roc_auc": 0.71756579506086,
18
+ "eval_runtime": 377.3279,
19
+ "eval_samples_per_second": 7.617,
20
+ "eval_steps_per_second": 0.239,
21
+ "learning_rate": 0.001,
22
+ "step": 274
23
+ },
24
+ {
25
+ "epoch": 1.82,
26
+ "learning_rate": 0.001,
27
+ "loss": 0.2683,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.30549756437021575,
33
+ "eval_f1_macro": 0.4548607736351322,
34
+ "eval_f1_micro": 0.67727182342414,
35
+ "eval_loss": 0.18945501744747162,
36
+ "eval_roc_auc": 0.7743387387654568,
37
+ "eval_runtime": 387.1639,
38
+ "eval_samples_per_second": 7.423,
39
+ "eval_steps_per_second": 0.232,
40
+ "learning_rate": 0.001,
41
+ "step": 548
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "eval_accuracy": 0.35386221294363257,
46
+ "eval_f1_macro": 0.5202190109990653,
47
+ "eval_f1_micro": 0.7020794195763426,
48
+ "eval_loss": 0.17864234745502472,
49
+ "eval_roc_auc": 0.7911168944948984,
50
+ "eval_runtime": 391.2444,
51
+ "eval_samples_per_second": 7.346,
52
+ "eval_steps_per_second": 0.23,
53
+ "learning_rate": 0.001,
54
+ "step": 822
55
+ },
56
+ {
57
+ "epoch": 3.65,
58
+ "learning_rate": 0.001,
59
+ "loss": 0.2058,
60
+ "step": 1000
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_accuracy": 0.36673625608907445,
65
+ "eval_f1_macro": 0.5666424404021909,
66
+ "eval_f1_micro": 0.7198384654215043,
67
+ "eval_loss": 0.17152228951454163,
68
+ "eval_roc_auc": 0.8057974071888091,
69
+ "eval_runtime": 384.9412,
70
+ "eval_samples_per_second": 7.466,
71
+ "eval_steps_per_second": 0.234,
72
+ "learning_rate": 0.001,
73
+ "step": 1096
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "eval_accuracy": 0.3768267223382046,
78
+ "eval_f1_macro": 0.5718368144674506,
79
+ "eval_f1_micro": 0.7220207418718584,
80
+ "eval_loss": 0.16623608767986298,
81
+ "eval_roc_auc": 0.8049842310066343,
82
+ "eval_runtime": 384.6479,
83
+ "eval_samples_per_second": 7.472,
84
+ "eval_steps_per_second": 0.234,
85
+ "learning_rate": 0.001,
86
+ "step": 1370
87
+ },
88
+ {
89
+ "epoch": 5.47,
90
+ "learning_rate": 0.001,
91
+ "loss": 0.1916,
92
+ "step": 1500
93
+ },
94
+ {
95
+ "epoch": 6.0,
96
+ "eval_accuracy": 0.37961029923451634,
97
+ "eval_f1_macro": 0.5721081100676888,
98
+ "eval_f1_micro": 0.715496603041087,
99
+ "eval_loss": 0.16475693881511688,
100
+ "eval_roc_auc": 0.7979565909429278,
101
+ "eval_runtime": 379.9116,
102
+ "eval_samples_per_second": 7.565,
103
+ "eval_steps_per_second": 0.237,
104
+ "learning_rate": 0.001,
105
+ "step": 1644
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "eval_accuracy": 0.38100208768267224,
110
+ "eval_f1_macro": 0.5973146797781492,
111
+ "eval_f1_micro": 0.7280701754385965,
112
+ "eval_loss": 0.16181063652038574,
113
+ "eval_roc_auc": 0.808226921681461,
114
+ "eval_runtime": 378.5774,
115
+ "eval_samples_per_second": 7.592,
116
+ "eval_steps_per_second": 0.238,
117
+ "learning_rate": 0.001,
118
+ "step": 1918
119
+ },
120
+ {
121
+ "epoch": 7.3,
122
+ "learning_rate": 0.001,
123
+ "loss": 0.1858,
124
+ "step": 2000
125
+ },
126
+ {
127
+ "epoch": 8.0,
128
+ "eval_accuracy": 0.38552540013917885,
129
+ "eval_f1_macro": 0.6061015606132648,
130
+ "eval_f1_micro": 0.737494522005885,
131
+ "eval_loss": 0.15984570980072021,
132
+ "eval_roc_auc": 0.8165736033401827,
133
+ "eval_runtime": 373.9089,
134
+ "eval_samples_per_second": 7.686,
135
+ "eval_steps_per_second": 0.241,
136
+ "learning_rate": 0.001,
137
+ "step": 2192
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "eval_accuracy": 0.39109255393180237,
142
+ "eval_f1_macro": 0.6209440717973264,
143
+ "eval_f1_micro": 0.7440332279461905,
144
+ "eval_loss": 0.15988709032535553,
145
+ "eval_roc_auc": 0.8223085810562136,
146
+ "eval_runtime": 382.5296,
147
+ "eval_samples_per_second": 7.513,
148
+ "eval_steps_per_second": 0.235,
149
+ "learning_rate": 0.001,
150
+ "step": 2466
151
+ },
152
+ {
153
+ "epoch": 9.12,
154
+ "learning_rate": 0.001,
155
+ "loss": 0.1839,
156
+ "step": 2500
157
+ },
158
+ {
159
+ "epoch": 10.0,
160
+ "eval_accuracy": 0.394919972164231,
161
+ "eval_f1_macro": 0.6047260524666048,
162
+ "eval_f1_micro": 0.7382172771596449,
163
+ "eval_loss": 0.15841498970985413,
164
+ "eval_roc_auc": 0.8173181533125079,
165
+ "eval_runtime": 384.4811,
166
+ "eval_samples_per_second": 7.475,
167
+ "eval_steps_per_second": 0.234,
168
+ "learning_rate": 0.001,
169
+ "step": 2740
170
+ },
171
+ {
172
+ "epoch": 10.95,
173
+ "learning_rate": 0.001,
174
+ "loss": 0.1815,
175
+ "step": 3000
176
+ },
177
+ {
178
+ "epoch": 11.0,
179
+ "eval_accuracy": 0.3959638135003479,
180
+ "eval_f1_macro": 0.6067885073789792,
181
+ "eval_f1_micro": 0.7413944173238203,
182
+ "eval_loss": 0.15687702596187592,
183
+ "eval_roc_auc": 0.8186288501697149,
184
+ "eval_runtime": 378.7669,
185
+ "eval_samples_per_second": 7.588,
186
+ "eval_steps_per_second": 0.238,
187
+ "learning_rate": 0.001,
188
+ "step": 3014
189
+ },
190
+ {
191
+ "epoch": 12.0,
192
+ "eval_accuracy": 0.39631176061238693,
193
+ "eval_f1_macro": 0.5952620618269702,
194
+ "eval_f1_micro": 0.7257172262961533,
195
+ "eval_loss": 0.15848152339458466,
196
+ "eval_roc_auc": 0.8043142106532862,
197
+ "eval_runtime": 379.0677,
198
+ "eval_samples_per_second": 7.582,
199
+ "eval_steps_per_second": 0.237,
200
+ "learning_rate": 0.001,
201
+ "step": 3288
202
+ },
203
+ {
204
+ "epoch": 12.77,
205
+ "learning_rate": 0.001,
206
+ "loss": 0.1807,
207
+ "step": 3500
208
+ },
209
+ {
210
+ "epoch": 13.0,
211
+ "eval_accuracy": 0.3966597077244259,
212
+ "eval_f1_macro": 0.6286378372829055,
213
+ "eval_f1_micro": 0.7513671162960261,
214
+ "eval_loss": 0.1580551415681839,
215
+ "eval_roc_auc": 0.8311449619256275,
216
+ "eval_runtime": 381.6295,
217
+ "eval_samples_per_second": 7.531,
218
+ "eval_steps_per_second": 0.236,
219
+ "learning_rate": 0.001,
220
+ "step": 3562
221
+ },
222
+ {
223
+ "epoch": 14.0,
224
+ "eval_accuracy": 0.4022268615170494,
225
+ "eval_f1_macro": 0.6230252873319017,
226
+ "eval_f1_micro": 0.7452783300198808,
227
+ "eval_loss": 0.15653984248638153,
228
+ "eval_roc_auc": 0.8224100131959987,
229
+ "eval_runtime": 376.7587,
230
+ "eval_samples_per_second": 7.628,
231
+ "eval_steps_per_second": 0.239,
232
+ "learning_rate": 0.001,
233
+ "step": 3836
234
+ },
235
+ {
236
+ "epoch": 14.6,
237
+ "learning_rate": 0.001,
238
+ "loss": 0.1795,
239
+ "step": 4000
240
+ },
241
+ {
242
+ "epoch": 15.0,
243
+ "eval_accuracy": 0.3990953375086987,
244
+ "eval_f1_macro": 0.6252632316687944,
245
+ "eval_f1_micro": 0.750355574794385,
246
+ "eval_loss": 0.1549140363931656,
247
+ "eval_roc_auc": 0.8261687325810277,
248
+ "eval_runtime": 378.8075,
249
+ "eval_samples_per_second": 7.587,
250
+ "eval_steps_per_second": 0.238,
251
+ "learning_rate": 0.001,
252
+ "step": 4110
253
+ },
254
+ {
255
+ "epoch": 16.0,
256
+ "eval_accuracy": 0.3938761308281141,
257
+ "eval_f1_macro": 0.6290292313985378,
258
+ "eval_f1_micro": 0.7446265030216187,
259
+ "eval_loss": 0.15725594758987427,
260
+ "eval_roc_auc": 0.8213875460516582,
261
+ "eval_runtime": 387.9526,
262
+ "eval_samples_per_second": 7.408,
263
+ "eval_steps_per_second": 0.232,
264
+ "learning_rate": 0.001,
265
+ "step": 4384
266
+ },
267
+ {
268
+ "epoch": 16.42,
269
+ "learning_rate": 0.001,
270
+ "loss": 0.178,
271
+ "step": 4500
272
+ },
273
+ {
274
+ "epoch": 17.0,
275
+ "eval_accuracy": 0.4025748086290884,
276
+ "eval_f1_macro": 0.6286656277196884,
277
+ "eval_f1_micro": 0.7519389388157086,
278
+ "eval_loss": 0.15514959394931793,
279
+ "eval_roc_auc": 0.8281482042533274,
280
+ "eval_runtime": 380.4468,
281
+ "eval_samples_per_second": 7.554,
282
+ "eval_steps_per_second": 0.237,
283
+ "learning_rate": 0.001,
284
+ "step": 4658
285
+ },
286
+ {
287
+ "epoch": 18.0,
288
+ "eval_accuracy": 0.3914405010438413,
289
+ "eval_f1_macro": 0.6155101525089491,
290
+ "eval_f1_micro": 0.7430460271922166,
291
+ "eval_loss": 0.15699204802513123,
292
+ "eval_roc_auc": 0.8203220599321907,
293
+ "eval_runtime": 379.1064,
294
+ "eval_samples_per_second": 7.581,
295
+ "eval_steps_per_second": 0.237,
296
+ "learning_rate": 0.001,
297
+ "step": 4932
298
+ },
299
+ {
300
+ "epoch": 18.25,
301
+ "learning_rate": 0.001,
302
+ "loss": 0.1764,
303
+ "step": 5000
304
+ },
305
+ {
306
+ "epoch": 19.0,
307
+ "eval_accuracy": 0.3990953375086987,
308
+ "eval_f1_macro": 0.6286877926596727,
309
+ "eval_f1_micro": 0.7480261112838048,
310
+ "eval_loss": 0.15578077733516693,
311
+ "eval_roc_auc": 0.8236456074129308,
312
+ "eval_runtime": 380.5374,
313
+ "eval_samples_per_second": 7.552,
314
+ "eval_steps_per_second": 0.237,
315
+ "learning_rate": 0.001,
316
+ "step": 5206
317
+ },
318
+ {
319
+ "epoch": 20.0,
320
+ "eval_accuracy": 0.4001391788448156,
321
+ "eval_f1_macro": 0.608545078452072,
322
+ "eval_f1_micro": 0.7402785655763534,
323
+ "eval_loss": 0.15735512971878052,
324
+ "eval_roc_auc": 0.8163778880079953,
325
+ "eval_runtime": 371.4966,
326
+ "eval_samples_per_second": 7.736,
327
+ "eval_steps_per_second": 0.242,
328
+ "learning_rate": 0.001,
329
+ "step": 5480
330
+ },
331
+ {
332
+ "epoch": 20.07,
333
+ "learning_rate": 0.001,
334
+ "loss": 0.1775,
335
+ "step": 5500
336
+ },
337
+ {
338
+ "epoch": 21.0,
339
+ "eval_accuracy": 0.40292275574112735,
340
+ "eval_f1_macro": 0.6245521933830414,
341
+ "eval_f1_micro": 0.753183153770813,
342
+ "eval_loss": 0.15607939660549164,
343
+ "eval_roc_auc": 0.8301931385420087,
344
+ "eval_runtime": 382.1339,
345
+ "eval_samples_per_second": 7.521,
346
+ "eval_steps_per_second": 0.236,
347
+ "learning_rate": 0.001,
348
+ "step": 5754
349
+ },
350
+ {
351
+ "epoch": 21.9,
352
+ "learning_rate": 0.0001,
353
+ "loss": 0.177,
354
+ "step": 6000
355
+ },
356
+ {
357
+ "epoch": 22.0,
358
+ "eval_accuracy": 0.39735560194850383,
359
+ "eval_f1_macro": 0.6430549553924351,
360
+ "eval_f1_micro": 0.7596049620619053,
361
+ "eval_loss": 0.1545252650976181,
362
+ "eval_roc_auc": 0.8377894663149374,
363
+ "eval_runtime": 374.7337,
364
+ "eval_samples_per_second": 7.669,
365
+ "eval_steps_per_second": 0.24,
366
+ "learning_rate": 0.0001,
367
+ "step": 6028
368
+ },
369
+ {
370
+ "epoch": 23.0,
371
+ "eval_accuracy": 0.4025748086290884,
372
+ "eval_f1_macro": 0.6291738798521619,
373
+ "eval_f1_micro": 0.7472035794183445,
374
+ "eval_loss": 0.1555616706609726,
375
+ "eval_roc_auc": 0.8233330160035968,
376
+ "eval_runtime": 372.3825,
377
+ "eval_samples_per_second": 7.718,
378
+ "eval_steps_per_second": 0.242,
379
+ "learning_rate": 0.0001,
380
+ "step": 6302
381
+ },
382
+ {
383
+ "epoch": 23.72,
384
+ "learning_rate": 0.0001,
385
+ "loss": 0.1762,
386
+ "step": 6500
387
+ },
388
+ {
389
+ "epoch": 24.0,
390
+ "eval_accuracy": 0.39944328462073764,
391
+ "eval_f1_macro": 0.6343316351234961,
392
+ "eval_f1_micro": 0.752788906009245,
393
+ "eval_loss": 0.1547899842262268,
394
+ "eval_roc_auc": 0.8282598991861336,
395
+ "eval_runtime": 369.1278,
396
+ "eval_samples_per_second": 7.786,
397
+ "eval_steps_per_second": 0.244,
398
+ "learning_rate": 0.0001,
399
+ "step": 6576
400
+ },
401
+ {
402
+ "epoch": 25.0,
403
+ "eval_accuracy": 0.39944328462073764,
404
+ "eval_f1_macro": 0.6224788217146914,
405
+ "eval_f1_micro": 0.7467564870259481,
406
+ "eval_loss": 0.15538595616817474,
407
+ "eval_roc_auc": 0.8221815416817054,
408
+ "eval_runtime": 366.9271,
409
+ "eval_samples_per_second": 7.833,
410
+ "eval_steps_per_second": 0.245,
411
+ "learning_rate": 0.0001,
412
+ "step": 6850
413
+ },
414
+ {
415
+ "epoch": 25.55,
416
+ "learning_rate": 0.0001,
417
+ "loss": 0.1759,
418
+ "step": 7000
419
+ },
420
+ {
421
+ "epoch": 26.0,
422
+ "eval_accuracy": 0.3977035490605428,
423
+ "eval_f1_macro": 0.632595682527143,
424
+ "eval_f1_micro": 0.7529426189308485,
425
+ "eval_loss": 0.1547958254814148,
426
+ "eval_roc_auc": 0.8296944220765509,
427
+ "eval_runtime": 367.9424,
428
+ "eval_samples_per_second": 7.811,
429
+ "eval_steps_per_second": 0.245,
430
+ "learning_rate": 0.0001,
431
+ "step": 7124
432
+ },
433
+ {
434
+ "epoch": 27.0,
435
+ "eval_accuracy": 0.3970076548364649,
436
+ "eval_f1_macro": 0.6352159754424679,
437
+ "eval_f1_micro": 0.7516299667855824,
438
+ "eval_loss": 0.1552300751209259,
439
+ "eval_roc_auc": 0.8281751565219652,
440
+ "eval_runtime": 367.5636,
441
+ "eval_samples_per_second": 7.819,
442
+ "eval_steps_per_second": 0.245,
443
+ "learning_rate": 0.0001,
444
+ "step": 7398
445
+ },
446
+ {
447
+ "epoch": 27.37,
448
+ "learning_rate": 0.0001,
449
+ "loss": 0.1752,
450
+ "step": 7500
451
+ },
452
+ {
453
+ "epoch": 28.0,
454
+ "eval_accuracy": 0.4091858037578288,
455
+ "eval_f1_macro": 0.6327633387519097,
456
+ "eval_f1_micro": 0.7523139577934099,
457
+ "eval_loss": 0.1543245166540146,
458
+ "eval_roc_auc": 0.8277194590316342,
459
+ "eval_runtime": 363.6815,
460
+ "eval_samples_per_second": 7.903,
461
+ "eval_steps_per_second": 0.247,
462
+ "learning_rate": 0.0001,
463
+ "step": 7672
464
+ },
465
+ {
466
+ "epoch": 29.0,
467
+ "eval_accuracy": 0.40187891440501045,
468
+ "eval_f1_macro": 0.6311913970379929,
469
+ "eval_f1_micro": 0.7505715168365771,
470
+ "eval_loss": 0.15445660054683685,
471
+ "eval_roc_auc": 0.8264967181798933,
472
+ "eval_runtime": 370.9747,
473
+ "eval_samples_per_second": 7.747,
474
+ "eval_steps_per_second": 0.243,
475
+ "learning_rate": 0.0001,
476
+ "step": 7946
477
+ },
478
+ {
479
+ "epoch": 29.2,
480
+ "learning_rate": 0.0001,
481
+ "loss": 0.1757,
482
+ "step": 8000
483
+ },
484
+ {
485
+ "epoch": 30.0,
486
+ "eval_accuracy": 0.40396659707724425,
487
+ "eval_f1_macro": 0.6394187347167111,
488
+ "eval_f1_micro": 0.7554370872962985,
489
+ "eval_loss": 0.15497015416622162,
490
+ "eval_roc_auc": 0.834040386953802,
491
+ "eval_runtime": 370.7394,
492
+ "eval_samples_per_second": 7.752,
493
+ "eval_steps_per_second": 0.243,
494
+ "learning_rate": 0.0001,
495
+ "step": 8220
496
+ },
497
+ {
498
+ "epoch": 31.0,
499
+ "eval_accuracy": 0.4022268615170494,
500
+ "eval_f1_macro": 0.6345022832926616,
501
+ "eval_f1_micro": 0.7511534912334668,
502
+ "eval_loss": 0.15540161728858948,
503
+ "eval_roc_auc": 0.8278824679295457,
504
+ "eval_runtime": 373.8253,
505
+ "eval_samples_per_second": 7.688,
506
+ "eval_steps_per_second": 0.241,
507
+ "learning_rate": 0.0001,
508
+ "step": 8494
509
+ },
510
+ {
511
+ "epoch": 31.02,
512
+ "learning_rate": 0.0001,
513
+ "loss": 0.1758,
514
+ "step": 8500
515
+ },
516
+ {
517
+ "epoch": 32.0,
518
+ "eval_accuracy": 0.4032707028531663,
519
+ "eval_f1_macro": 0.6301888508291651,
520
+ "eval_f1_micro": 0.7513253606213783,
521
+ "eval_loss": 0.15450650453567505,
522
+ "eval_roc_auc": 0.8274588541043629,
523
+ "eval_runtime": 366.8538,
524
+ "eval_samples_per_second": 7.834,
525
+ "eval_steps_per_second": 0.245,
526
+ "learning_rate": 0.0001,
527
+ "step": 8768
528
+ },
529
+ {
530
+ "epoch": 32.85,
531
+ "learning_rate": 0.0001,
532
+ "loss": 0.1755,
533
+ "step": 9000
534
+ },
535
+ {
536
+ "epoch": 33.0,
537
+ "eval_accuracy": 0.3977035490605428,
538
+ "eval_f1_macro": 0.6261364069131695,
539
+ "eval_f1_micro": 0.7456298600311042,
540
+ "eval_loss": 0.15546494722366333,
541
+ "eval_roc_auc": 0.8222675298841294,
542
+ "eval_runtime": 372.6997,
543
+ "eval_samples_per_second": 7.711,
544
+ "eval_steps_per_second": 0.241,
545
+ "learning_rate": 0.0001,
546
+ "step": 9042
547
+ },
548
+ {
549
+ "epoch": 34.0,
550
+ "eval_accuracy": 0.41092553931802367,
551
+ "eval_f1_macro": 0.6307477868895636,
552
+ "eval_f1_micro": 0.7514572739675058,
553
+ "eval_loss": 0.15329033136367798,
554
+ "eval_roc_auc": 0.8260358892930517,
555
+ "eval_runtime": 370.3661,
556
+ "eval_samples_per_second": 7.76,
557
+ "eval_steps_per_second": 0.243,
558
+ "learning_rate": 0.0001,
559
+ "step": 9316
560
+ },
561
+ {
562
+ "epoch": 34.67,
563
+ "learning_rate": 0.0001,
564
+ "loss": 0.1752,
565
+ "step": 9500
566
+ },
567
+ {
568
+ "epoch": 35.0,
569
+ "eval_accuracy": 0.40535838552540016,
570
+ "eval_f1_macro": 0.6325032556817062,
571
+ "eval_f1_micro": 0.7506499938095826,
572
+ "eval_loss": 0.15512260794639587,
573
+ "eval_roc_auc": 0.8260564148790938,
574
+ "eval_runtime": 369.1819,
575
+ "eval_samples_per_second": 7.785,
576
+ "eval_steps_per_second": 0.244,
577
+ "learning_rate": 0.0001,
578
+ "step": 9590
579
+ },
580
+ {
581
+ "epoch": 36.0,
582
+ "eval_accuracy": 0.4025748086290884,
583
+ "eval_f1_macro": 0.6299014754104307,
584
+ "eval_f1_micro": 0.7539002281556392,
585
+ "eval_loss": 0.15303632616996765,
586
+ "eval_roc_auc": 0.8287079244556137,
587
+ "eval_runtime": 369.3754,
588
+ "eval_samples_per_second": 7.781,
589
+ "eval_steps_per_second": 0.244,
590
+ "learning_rate": 0.0001,
591
+ "step": 9864
592
+ },
593
+ {
594
+ "epoch": 36.5,
595
+ "learning_rate": 0.0001,
596
+ "loss": 0.1752,
597
+ "step": 10000
598
+ },
599
+ {
600
+ "epoch": 37.0,
601
+ "eval_accuracy": 0.4036186499652053,
602
+ "eval_f1_macro": 0.6270201439545436,
603
+ "eval_f1_micro": 0.7464025415810128,
604
+ "eval_loss": 0.1545841097831726,
605
+ "eval_roc_auc": 0.8223240249935749,
606
+ "eval_runtime": 381.9233,
607
+ "eval_samples_per_second": 7.525,
608
+ "eval_steps_per_second": 0.236,
609
+ "learning_rate": 0.0001,
610
+ "step": 10138
611
+ },
612
+ {
613
+ "epoch": 38.0,
614
+ "eval_accuracy": 0.3987473903966597,
615
+ "eval_f1_macro": 0.6363851637994423,
616
+ "eval_f1_micro": 0.7538583541755627,
617
+ "eval_loss": 0.1548989862203598,
618
+ "eval_roc_auc": 0.8314331916384938,
619
+ "eval_runtime": 372.9722,
620
+ "eval_samples_per_second": 7.706,
621
+ "eval_steps_per_second": 0.241,
622
+ "learning_rate": 0.0001,
623
+ "step": 10412
624
+ },
625
+ {
626
+ "epoch": 38.32,
627
+ "learning_rate": 0.0001,
628
+ "loss": 0.1763,
629
+ "step": 10500
630
+ },
631
+ {
632
+ "epoch": 39.0,
633
+ "eval_accuracy": 0.3977035490605428,
634
+ "eval_f1_macro": 0.6420887435658987,
635
+ "eval_f1_micro": 0.7579290763003684,
636
+ "eval_loss": 0.15474249422550201,
637
+ "eval_roc_auc": 0.8360712223390366,
638
+ "eval_runtime": 367.6796,
639
+ "eval_samples_per_second": 7.817,
640
+ "eval_steps_per_second": 0.245,
641
+ "learning_rate": 0.0001,
642
+ "step": 10686
643
+ },
644
+ {
645
+ "epoch": 40.0,
646
+ "eval_accuracy": 0.40048712595685454,
647
+ "eval_f1_macro": 0.6344591092641424,
648
+ "eval_f1_micro": 0.7539234919077978,
649
+ "eval_loss": 0.15435411036014557,
650
+ "eval_roc_auc": 0.8302027785659021,
651
+ "eval_runtime": 366.3255,
652
+ "eval_samples_per_second": 7.845,
653
+ "eval_steps_per_second": 0.246,
654
+ "learning_rate": 0.0001,
655
+ "step": 10960
656
+ },
657
+ {
658
+ "epoch": 40.15,
659
+ "learning_rate": 0.0001,
660
+ "loss": 0.176,
661
+ "step": 11000
662
+ },
663
+ {
664
+ "epoch": 41.0,
665
+ "eval_accuracy": 0.40153096729297144,
666
+ "eval_f1_macro": 0.6347263815695223,
667
+ "eval_f1_micro": 0.7535591556210112,
668
+ "eval_loss": 0.15571445226669312,
669
+ "eval_roc_auc": 0.8297643933203154,
670
+ "eval_runtime": 368.1721,
671
+ "eval_samples_per_second": 7.806,
672
+ "eval_steps_per_second": 0.244,
673
+ "learning_rate": 0.0001,
674
+ "step": 11234
675
+ },
676
+ {
677
+ "epoch": 41.97,
678
+ "learning_rate": 0.0001,
679
+ "loss": 0.1758,
680
+ "step": 11500
681
+ },
682
+ {
683
+ "epoch": 42.0,
684
+ "eval_accuracy": 0.3959638135003479,
685
+ "eval_f1_macro": 0.627716108835686,
686
+ "eval_f1_micro": 0.7474281438992456,
687
+ "eval_loss": 0.15399669110774994,
688
+ "eval_roc_auc": 0.8226314850063985,
689
+ "eval_runtime": 371.1239,
690
+ "eval_samples_per_second": 7.744,
691
+ "eval_steps_per_second": 0.243,
692
+ "learning_rate": 0.0001,
693
+ "step": 11508
694
+ },
695
+ {
696
+ "epoch": 43.0,
697
+ "eval_accuracy": 0.3970076548364649,
698
+ "eval_f1_macro": 0.6383655319176039,
699
+ "eval_f1_micro": 0.7577960704199963,
700
+ "eval_loss": 0.1547509878873825,
701
+ "eval_roc_auc": 0.83741716645486,
702
+ "eval_runtime": 365.888,
703
+ "eval_samples_per_second": 7.855,
704
+ "eval_steps_per_second": 0.246,
705
+ "learning_rate": 1e-05,
706
+ "step": 11782
707
+ },
708
+ {
709
+ "epoch": 43.8,
710
+ "learning_rate": 1e-05,
711
+ "loss": 0.1764,
712
+ "step": 12000
713
+ },
714
+ {
715
+ "epoch": 44.0,
716
+ "eval_accuracy": 0.4011830201809325,
717
+ "eval_f1_macro": 0.6398260147869291,
718
+ "eval_f1_micro": 0.758185787048266,
719
+ "eval_loss": 0.15433941781520844,
720
+ "eval_roc_auc": 0.8352458155607984,
721
+ "eval_runtime": 377.6162,
722
+ "eval_samples_per_second": 7.611,
723
+ "eval_steps_per_second": 0.238,
724
+ "learning_rate": 1e-05,
725
+ "step": 12056
726
+ },
727
+ {
728
+ "epoch": 45.0,
729
+ "eval_accuracy": 0.3990953375086987,
730
+ "eval_f1_macro": 0.6206082021561579,
731
+ "eval_f1_micro": 0.7447583176396736,
732
+ "eval_loss": 0.1543821096420288,
733
+ "eval_roc_auc": 0.8196012986350353,
734
+ "eval_runtime": 369.6762,
735
+ "eval_samples_per_second": 7.774,
736
+ "eval_steps_per_second": 0.243,
737
+ "learning_rate": 1e-05,
738
+ "step": 12330
739
+ },
740
+ {
741
+ "epoch": 45.62,
742
+ "learning_rate": 1e-05,
743
+ "loss": 0.1746,
744
+ "step": 12500
745
+ },
746
+ {
747
+ "epoch": 46.0,
748
+ "eval_accuracy": 0.40501043841336115,
749
+ "eval_f1_macro": 0.6223405225541471,
750
+ "eval_f1_micro": 0.7452157598499062,
751
+ "eval_loss": 0.15461167693138123,
752
+ "eval_roc_auc": 0.8207527729569265,
753
+ "eval_runtime": 369.5577,
754
+ "eval_samples_per_second": 7.777,
755
+ "eval_steps_per_second": 0.244,
756
+ "learning_rate": 1e-05,
757
+ "step": 12604
758
+ },
759
+ {
760
+ "epoch": 46.0,
761
+ "learning_rate": 1e-05,
762
+ "step": 12604,
763
+ "total_flos": 4.586865882212175e+19,
764
+ "train_loss": 0.1827572128274107,
765
+ "train_runtime": 70769.3212,
766
+ "train_samples_per_second": 11.517,
767
+ "train_steps_per_second": 0.36
768
+ }
769
+ ],
770
+ "logging_steps": 500,
771
+ "max_steps": 25482,
772
+ "num_input_tokens_seen": 0,
773
+ "num_train_epochs": 93,
774
+ "save_steps": 500,
775
+ "total_flos": 4.586865882212175e+19,
776
+ "train_batch_size": 32,
777
+ "trial_name": null,
778
+ "trial_params": null
779
+ }