DiTo97 commited on
Commit
a4ce5cd
1 Parent(s): de4eb28

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-base-patch4-window8-256
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - generator
@@ -15,11 +17,11 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # swinv2-base-panorama-IQA
17
 
18
- This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on the generator dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0262
21
- - Srocc: 0.1319
22
- - Lcc: 0.2258
23
 
24
  ## Model description
25
 
 
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-base-patch4-window8-256
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - generator
 
17
 
18
  # swinv2-base-panorama-IQA
19
 
20
+ This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on the isiqa-2019-hf dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.0246
23
+ - Srocc: 0.0896
24
+ - Lcc: 0.1773
25
 
26
  ## Model description
27
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 32.857142857142854,
3
- "eval_LCC": 0.23080971037643985,
4
- "eval_SROCC": 0.15515006002400963,
5
- "eval_loss": 0.025447649881243706,
6
- "eval_runtime": 36.1026,
7
- "eval_samples_per_second": 1.385,
8
- "eval_steps_per_second": 0.055,
9
- "total_flos": 2.895570431785304e+18,
10
- "train_loss": 0.030544885701459388,
11
- "train_runtime": 6073.1055,
12
- "train_samples_per_second": 1.762,
13
- "train_steps_per_second": 0.025
14
  }
 
1
  {
2
+ "epoch": 26.0,
3
+ "eval_LCC": 0.17734737426317984,
4
+ "eval_SROCC": 0.08955582232893158,
5
+ "eval_loss": 0.02460244856774807,
6
+ "eval_runtime": 37.0604,
7
+ "eval_samples_per_second": 1.349,
8
+ "eval_steps_per_second": 0.054,
9
+ "total_flos": 2.2813585220126638e+18,
10
+ "train_loss": 0.041763259517540646,
11
+ "train_runtime": 5114.8643,
12
+ "train_samples_per_second": 2.092,
13
+ "train_steps_per_second": 0.029
14
  }
runs/Aug03_06-48-53_c63f2be4bc15/events.out.tfevents.1722672904.c63f2be4bc15.53.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4e32340872f601f5fdd65c3c7719c29ddb9e89a6c2b185ad60f84756caa62d
3
+ size 448
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 32.857142857142854,
3
- "eval_LCC": 0.23080971037643985,
4
- "eval_SROCC": 0.15515006002400963,
5
- "eval_loss": 0.025447649881243706,
6
- "eval_runtime": 36.1026,
7
- "eval_samples_per_second": 1.385,
8
- "eval_steps_per_second": 0.055
9
  }
 
1
  {
2
+ "epoch": 26.0,
3
+ "eval_LCC": 0.17734737426317984,
4
+ "eval_SROCC": 0.08955582232893158,
5
+ "eval_loss": 0.02460244856774807,
6
+ "eval_runtime": 37.0604,
7
+ "eval_samples_per_second": 1.349,
8
+ "eval_steps_per_second": 0.054
9
  }
trainer_state.json CHANGED
@@ -1,428 +1,344 @@
1
  {
2
- "best_metric": 0.025447649881243706,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-98",
4
- "epoch": 32.857142857142854,
5
  "eval_steps": 500,
6
- "global_step": 115,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8571428571428571,
13
- "eval_LCC": -0.13879293215538938,
14
- "eval_SROCC": -0.15851140456182472,
15
- "eval_loss": 0.26562368869781494,
16
- "eval_runtime": 38.0061,
17
- "eval_samples_per_second": 1.316,
18
- "eval_steps_per_second": 0.053,
19
  "step": 3
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_LCC": -0.12769050797605289,
24
- "eval_SROCC": -0.18098439375750297,
25
- "eval_loss": 0.06580950319766998,
26
- "eval_runtime": 35.9745,
27
- "eval_samples_per_second": 1.39,
28
- "eval_steps_per_second": 0.056,
29
  "step": 7
30
  },
31
  {
32
  "epoch": 2.857142857142857,
33
- "grad_norm": 6.6160054206848145,
34
  "learning_rate": 1.3333333333333333e-05,
35
- "loss": 0.2144,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 2.857142857142857,
40
- "eval_LCC": -0.11849463789687457,
41
- "eval_SROCC": -0.16840336134453782,
42
- "eval_loss": 0.1341191679239273,
43
- "eval_runtime": 35.7755,
44
- "eval_samples_per_second": 1.398,
45
- "eval_steps_per_second": 0.056,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 4.0,
50
- "eval_LCC": -0.13792803325263353,
51
- "eval_SROCC": -0.23822328931572626,
52
- "eval_loss": 0.05623332038521767,
53
- "eval_runtime": 36.0015,
54
- "eval_samples_per_second": 1.389,
55
- "eval_steps_per_second": 0.056,
56
  "step": 14
57
  },
58
  {
59
  "epoch": 4.857142857142857,
60
- "eval_LCC": -0.14276549217386758,
61
- "eval_SROCC": -0.1657142857142857,
62
- "eval_loss": 0.061644963920116425,
63
- "eval_runtime": 35.7987,
64
- "eval_samples_per_second": 1.397,
65
- "eval_steps_per_second": 0.056,
66
  "step": 17
67
  },
68
  {
69
  "epoch": 5.714285714285714,
70
- "grad_norm": 1.571254014968872,
71
  "learning_rate": 1.925925925925926e-05,
72
- "loss": 0.0575,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 6.0,
77
- "eval_LCC": -0.1185043581646345,
78
- "eval_SROCC": -0.15006002400960383,
79
- "eval_loss": 0.05936155468225479,
80
- "eval_runtime": 36.1077,
81
- "eval_samples_per_second": 1.385,
82
- "eval_steps_per_second": 0.055,
83
  "step": 21
84
  },
85
  {
86
  "epoch": 6.857142857142857,
87
- "eval_LCC": -0.11221146661280773,
88
- "eval_SROCC": -0.12038415366146459,
89
- "eval_loss": 0.05129233002662659,
90
- "eval_runtime": 35.8427,
91
- "eval_samples_per_second": 1.395,
92
- "eval_steps_per_second": 0.056,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_LCC": -0.09867320013209922,
98
- "eval_SROCC": -0.06929171668667466,
99
- "eval_loss": 0.041676923632621765,
100
- "eval_runtime": 36.0348,
101
- "eval_samples_per_second": 1.388,
102
- "eval_steps_per_second": 0.056,
103
  "step": 28
104
  },
105
  {
106
  "epoch": 8.571428571428571,
107
- "grad_norm": 0.800278902053833,
108
  "learning_rate": 1.7777777777777777e-05,
109
- "loss": 0.0201,
110
  "step": 30
111
  },
112
  {
113
  "epoch": 8.857142857142858,
114
- "eval_LCC": -0.08130033550198683,
115
- "eval_SROCC": -0.07140456182472989,
116
- "eval_loss": 0.04058969393372536,
117
- "eval_runtime": 36.0799,
118
- "eval_samples_per_second": 1.386,
119
- "eval_steps_per_second": 0.055,
120
  "step": 31
121
  },
122
  {
123
  "epoch": 10.0,
124
- "eval_LCC": -0.0384540491268753,
125
- "eval_SROCC": -0.02357743097238895,
126
- "eval_loss": 0.034295279532670975,
127
- "eval_runtime": 36.1741,
128
- "eval_samples_per_second": 1.382,
129
- "eval_steps_per_second": 0.055,
130
  "step": 35
131
  },
132
  {
133
  "epoch": 10.857142857142858,
134
- "eval_LCC": 0.0010666390633979736,
135
- "eval_SROCC": 0.00744297719087635,
136
- "eval_loss": 0.031487837433815,
137
- "eval_runtime": 36.0877,
138
- "eval_samples_per_second": 1.386,
139
- "eval_steps_per_second": 0.055,
140
  "step": 38
141
  },
142
  {
143
  "epoch": 11.428571428571429,
144
- "grad_norm": 0.3433558940887451,
145
  "learning_rate": 1.6296296296296297e-05,
146
- "loss": 0.0142,
147
  "step": 40
148
  },
149
  {
150
  "epoch": 12.0,
151
- "eval_LCC": 0.03256796518969046,
152
- "eval_SROCC": 0.02175270108043217,
153
- "eval_loss": 0.03087497688829899,
154
- "eval_runtime": 35.9382,
155
- "eval_samples_per_second": 1.391,
156
- "eval_steps_per_second": 0.056,
157
  "step": 42
158
  },
159
  {
160
  "epoch": 12.857142857142858,
161
- "eval_LCC": 0.04658237209190174,
162
- "eval_SROCC": 0.03807923169267707,
163
- "eval_loss": 0.030963044613599777,
164
- "eval_runtime": 36.1523,
165
- "eval_samples_per_second": 1.383,
166
- "eval_steps_per_second": 0.055,
167
  "step": 45
168
  },
169
  {
170
  "epoch": 14.0,
171
- "eval_LCC": 0.06813425647249395,
172
- "eval_SROCC": 0.05354141656662665,
173
- "eval_loss": 0.029918596148490906,
174
- "eval_runtime": 35.716,
175
- "eval_samples_per_second": 1.4,
176
- "eval_steps_per_second": 0.056,
177
  "step": 49
178
  },
179
  {
180
  "epoch": 14.285714285714286,
181
- "grad_norm": 0.8214556574821472,
182
  "learning_rate": 1.4814814814814815e-05,
183
- "loss": 0.0097,
184
  "step": 50
185
  },
186
  {
187
  "epoch": 14.857142857142858,
188
- "eval_LCC": 0.07492776273224191,
189
- "eval_SROCC": 0.06036014405762304,
190
- "eval_loss": 0.03141804039478302,
191
- "eval_runtime": 36.0969,
192
- "eval_samples_per_second": 1.385,
193
- "eval_steps_per_second": 0.055,
194
  "step": 52
195
  },
196
  {
197
  "epoch": 16.0,
198
- "eval_LCC": 0.10494559746287975,
199
- "eval_SROCC": 0.07851140456182472,
200
- "eval_loss": 0.02881774678826332,
201
- "eval_runtime": 35.9818,
202
- "eval_samples_per_second": 1.39,
203
- "eval_steps_per_second": 0.056,
204
  "step": 56
205
  },
206
  {
207
  "epoch": 16.857142857142858,
208
- "eval_LCC": 0.12693485096467225,
209
- "eval_SROCC": 0.09435774309723889,
210
- "eval_loss": 0.0282765943557024,
211
- "eval_runtime": 36.0917,
212
- "eval_samples_per_second": 1.385,
213
- "eval_steps_per_second": 0.055,
214
  "step": 59
215
  },
216
  {
217
  "epoch": 17.142857142857142,
218
- "grad_norm": 0.5231106877326965,
219
  "learning_rate": 1.3333333333333333e-05,
220
- "loss": 0.0083,
221
  "step": 60
222
  },
223
  {
224
  "epoch": 18.0,
225
- "eval_LCC": 0.14480085260022996,
226
- "eval_SROCC": 0.1022328931572629,
227
- "eval_loss": 0.0297652930021286,
228
- "eval_runtime": 35.9054,
229
- "eval_samples_per_second": 1.393,
230
- "eval_steps_per_second": 0.056,
231
  "step": 63
232
  },
233
  {
234
  "epoch": 18.857142857142858,
235
- "eval_LCC": 0.16514532692258366,
236
- "eval_SROCC": 0.11183673469387755,
237
- "eval_loss": 0.02742738462984562,
238
- "eval_runtime": 35.7423,
239
- "eval_samples_per_second": 1.399,
240
- "eval_steps_per_second": 0.056,
241
  "step": 66
242
  },
243
  {
244
  "epoch": 20.0,
245
- "grad_norm": 0.43896809220314026,
246
  "learning_rate": 1.1851851851851852e-05,
247
- "loss": 0.0063,
248
  "step": 70
249
  },
250
  {
251
  "epoch": 20.0,
252
- "eval_LCC": 0.17032522192761249,
253
- "eval_SROCC": 0.12240096038415366,
254
- "eval_loss": 0.028603849932551384,
255
- "eval_runtime": 36.0386,
256
- "eval_samples_per_second": 1.387,
257
- "eval_steps_per_second": 0.055,
258
  "step": 70
259
  },
260
  {
261
  "epoch": 20.857142857142858,
262
- "eval_LCC": 0.1833462608756279,
263
- "eval_SROCC": 0.13709483793517407,
264
- "eval_loss": 0.028336353600025177,
265
- "eval_runtime": 35.9704,
266
- "eval_samples_per_second": 1.39,
267
- "eval_steps_per_second": 0.056,
268
  "step": 73
269
  },
270
  {
271
  "epoch": 22.0,
272
- "eval_LCC": 0.19435351727111327,
273
- "eval_SROCC": 0.13171668667466988,
274
- "eval_loss": 0.028169861063361168,
275
- "eval_runtime": 36.261,
276
- "eval_samples_per_second": 1.379,
277
- "eval_steps_per_second": 0.055,
278
  "step": 77
279
  },
280
  {
281
  "epoch": 22.857142857142858,
282
- "grad_norm": 0.3503696024417877,
283
  "learning_rate": 1.037037037037037e-05,
284
- "loss": 0.0059,
285
  "step": 80
286
  },
287
  {
288
  "epoch": 22.857142857142858,
289
- "eval_LCC": 0.20354883288709705,
290
- "eval_SROCC": 0.13815126050420168,
291
- "eval_loss": 0.027723778039216995,
292
- "eval_runtime": 36.0717,
293
- "eval_samples_per_second": 1.386,
294
- "eval_steps_per_second": 0.055,
295
  "step": 80
296
  },
297
  {
298
  "epoch": 24.0,
299
- "eval_LCC": 0.21464098120344227,
300
- "eval_SROCC": 0.1479471788715486,
301
- "eval_loss": 0.027026496827602386,
302
- "eval_runtime": 36.0759,
303
- "eval_samples_per_second": 1.386,
304
- "eval_steps_per_second": 0.055,
305
  "step": 84
306
  },
307
  {
308
  "epoch": 24.857142857142858,
309
- "eval_LCC": 0.21974470798595805,
310
- "eval_SROCC": 0.1499639855942377,
311
- "eval_loss": 0.026259804144501686,
312
- "eval_runtime": 35.9282,
313
- "eval_samples_per_second": 1.392,
314
- "eval_steps_per_second": 0.056,
315
  "step": 87
316
  },
317
  {
318
  "epoch": 25.714285714285715,
319
- "grad_norm": 0.24314194917678833,
320
  "learning_rate": 8.888888888888888e-06,
321
- "loss": 0.0046,
322
  "step": 90
323
  },
324
  {
325
  "epoch": 26.0,
326
- "eval_LCC": 0.21990605203497599,
327
- "eval_SROCC": 0.13642256902761105,
328
- "eval_loss": 0.026854444295167923,
329
- "eval_runtime": 36.0574,
330
- "eval_samples_per_second": 1.387,
331
- "eval_steps_per_second": 0.055,
332
  "step": 91
333
  },
334
  {
335
- "epoch": 26.857142857142858,
336
- "eval_LCC": 0.22513542090356625,
337
- "eval_SROCC": 0.1406482593037215,
338
- "eval_loss": 0.025891508907079697,
339
- "eval_runtime": 36.0239,
340
- "eval_samples_per_second": 1.388,
341
- "eval_steps_per_second": 0.056,
342
- "step": 94
343
- },
344
- {
345
- "epoch": 28.0,
346
- "eval_LCC": 0.23080971037643985,
347
- "eval_SROCC": 0.15515006002400963,
348
- "eval_loss": 0.025447649881243706,
349
- "eval_runtime": 36.1194,
350
- "eval_samples_per_second": 1.384,
351
- "eval_steps_per_second": 0.055,
352
- "step": 98
353
- },
354
- {
355
- "epoch": 28.571428571428573,
356
- "grad_norm": 0.2996827960014343,
357
- "learning_rate": 7.4074074074074075e-06,
358
- "loss": 0.0039,
359
- "step": 100
360
- },
361
- {
362
- "epoch": 28.857142857142858,
363
- "eval_LCC": 0.22614882286734456,
364
- "eval_SROCC": 0.14804321728691477,
365
- "eval_loss": 0.026662170886993408,
366
- "eval_runtime": 35.7837,
367
- "eval_samples_per_second": 1.397,
368
- "eval_steps_per_second": 0.056,
369
- "step": 101
370
- },
371
- {
372
- "epoch": 30.0,
373
- "eval_LCC": 0.22468420274739098,
374
- "eval_SROCC": 0.14890756302521005,
375
- "eval_loss": 0.02701684460043907,
376
- "eval_runtime": 35.9924,
377
- "eval_samples_per_second": 1.389,
378
- "eval_steps_per_second": 0.056,
379
- "step": 105
380
- },
381
- {
382
- "epoch": 30.857142857142858,
383
- "eval_LCC": 0.2319344817291397,
384
- "eval_SROCC": 0.15764705882352942,
385
- "eval_loss": 0.026096588000655174,
386
- "eval_runtime": 35.7006,
387
- "eval_samples_per_second": 1.401,
388
- "eval_steps_per_second": 0.056,
389
- "step": 108
390
- },
391
- {
392
- "epoch": 31.428571428571427,
393
- "grad_norm": 0.31328070163726807,
394
- "learning_rate": 5.925925925925926e-06,
395
- "loss": 0.0041,
396
- "step": 110
397
- },
398
- {
399
- "epoch": 32.0,
400
- "eval_LCC": 0.23109019043125076,
401
- "eval_SROCC": 0.1630252100840336,
402
- "eval_loss": 0.026817049831151962,
403
- "eval_runtime": 36.031,
404
- "eval_samples_per_second": 1.388,
405
- "eval_steps_per_second": 0.056,
406
- "step": 112
407
- },
408
- {
409
- "epoch": 32.857142857142854,
410
- "eval_LCC": 0.22573394181226267,
411
- "eval_SROCC": 0.1569747899159664,
412
- "eval_loss": 0.028198465704917908,
413
- "eval_runtime": 35.9924,
414
- "eval_samples_per_second": 1.389,
415
- "eval_steps_per_second": 0.056,
416
- "step": 115
417
- },
418
- {
419
- "epoch": 32.857142857142854,
420
- "step": 115,
421
- "total_flos": 2.895570431785304e+18,
422
- "train_loss": 0.030544885701459388,
423
- "train_runtime": 6073.1055,
424
- "train_samples_per_second": 1.762,
425
- "train_steps_per_second": 0.025
426
  }
427
  ],
428
  "logging_steps": 10,
@@ -451,7 +367,7 @@
451
  "attributes": {}
452
  }
453
  },
454
- "total_flos": 2.895570431785304e+18,
455
  "train_batch_size": 16,
456
  "trial_name": null,
457
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.02460244856774807,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-73",
4
+ "epoch": 26.0,
5
  "eval_steps": 500,
6
+ "global_step": 91,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8571428571428571,
13
+ "eval_LCC": -0.13996786173466005,
14
+ "eval_SROCC": -0.1660984393757503,
15
+ "eval_loss": 0.2684723138809204,
16
+ "eval_runtime": 39.3373,
17
+ "eval_samples_per_second": 1.271,
18
+ "eval_steps_per_second": 0.051,
19
  "step": 3
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_LCC": -0.13191836249511346,
24
+ "eval_SROCC": -0.20710684273709484,
25
+ "eval_loss": 0.06745556741952896,
26
+ "eval_runtime": 37.0134,
27
+ "eval_samples_per_second": 1.351,
28
+ "eval_steps_per_second": 0.054,
29
  "step": 7
30
  },
31
  {
32
  "epoch": 2.857142857142857,
33
+ "grad_norm": 5.974637508392334,
34
  "learning_rate": 1.3333333333333333e-05,
35
+ "loss": 0.223,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 2.857142857142857,
40
+ "eval_LCC": -0.11444761651756143,
41
+ "eval_SROCC": -0.19721488595438177,
42
+ "eval_loss": 0.13801459968090057,
43
+ "eval_runtime": 37.041,
44
+ "eval_samples_per_second": 1.35,
45
+ "eval_steps_per_second": 0.054,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 4.0,
50
+ "eval_LCC": -0.11619739343449043,
51
+ "eval_SROCC": -0.23620648259303723,
52
+ "eval_loss": 0.0638759583234787,
53
+ "eval_runtime": 37.3454,
54
+ "eval_samples_per_second": 1.339,
55
+ "eval_steps_per_second": 0.054,
56
  "step": 14
57
  },
58
  {
59
  "epoch": 4.857142857142857,
60
+ "eval_LCC": -0.1097281268596262,
61
+ "eval_SROCC": -0.17599039615846337,
62
+ "eval_loss": 0.06009223312139511,
63
+ "eval_runtime": 39.2582,
64
+ "eval_samples_per_second": 1.274,
65
+ "eval_steps_per_second": 0.051,
66
  "step": 17
67
  },
68
  {
69
  "epoch": 5.714285714285714,
70
+ "grad_norm": 1.5656846761703491,
71
  "learning_rate": 1.925925925925926e-05,
72
+ "loss": 0.0607,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 6.0,
77
+ "eval_LCC": -0.08523254844178266,
78
+ "eval_SROCC": -0.12902761104441776,
79
+ "eval_loss": 0.06266126781702042,
80
+ "eval_runtime": 37.4159,
81
+ "eval_samples_per_second": 1.336,
82
+ "eval_steps_per_second": 0.053,
83
  "step": 21
84
  },
85
  {
86
  "epoch": 6.857142857142857,
87
+ "eval_LCC": -0.07908973191513438,
88
+ "eval_SROCC": -0.10501800720288115,
89
+ "eval_loss": 0.054282378405332565,
90
+ "eval_runtime": 39.9947,
91
+ "eval_samples_per_second": 1.25,
92
+ "eval_steps_per_second": 0.05,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_LCC": -0.07022943984845728,
98
+ "eval_SROCC": -0.0683313325330132,
99
+ "eval_loss": 0.04083505645394325,
100
+ "eval_runtime": 39.4734,
101
+ "eval_samples_per_second": 1.267,
102
+ "eval_steps_per_second": 0.051,
103
  "step": 28
104
  },
105
  {
106
  "epoch": 8.571428571428571,
107
+ "grad_norm": 0.6326273679733276,
108
  "learning_rate": 1.7777777777777777e-05,
109
+ "loss": 0.0212,
110
  "step": 30
111
  },
112
  {
113
  "epoch": 8.857142857142858,
114
+ "eval_LCC": -0.05666279490414187,
115
+ "eval_SROCC": -0.06919567827130851,
116
+ "eval_loss": 0.04194454103708267,
117
+ "eval_runtime": 37.518,
118
+ "eval_samples_per_second": 1.333,
119
+ "eval_steps_per_second": 0.053,
120
  "step": 31
121
  },
122
  {
123
  "epoch": 10.0,
124
+ "eval_LCC": -0.02743218726796948,
125
+ "eval_SROCC": -0.037022809123649456,
126
+ "eval_loss": 0.03434378281235695,
127
+ "eval_runtime": 37.3074,
128
+ "eval_samples_per_second": 1.34,
129
+ "eval_steps_per_second": 0.054,
130
  "step": 35
131
  },
132
  {
133
  "epoch": 10.857142857142858,
134
+ "eval_LCC": -0.0012650189550020947,
135
+ "eval_SROCC": -0.033949579831932766,
136
+ "eval_loss": 0.03074028715491295,
137
+ "eval_runtime": 39.2094,
138
+ "eval_samples_per_second": 1.275,
139
+ "eval_steps_per_second": 0.051,
140
  "step": 38
141
  },
142
  {
143
  "epoch": 11.428571428571429,
144
+ "grad_norm": 0.3264749348163605,
145
  "learning_rate": 1.6296296296296297e-05,
146
+ "loss": 0.0168,
147
  "step": 40
148
  },
149
  {
150
  "epoch": 12.0,
151
+ "eval_LCC": 0.02330881609272888,
152
+ "eval_SROCC": -0.02809123649459784,
153
+ "eval_loss": 0.029941115528345108,
154
+ "eval_runtime": 39.1241,
155
+ "eval_samples_per_second": 1.278,
156
+ "eval_steps_per_second": 0.051,
157
  "step": 42
158
  },
159
  {
160
  "epoch": 12.857142857142858,
161
+ "eval_LCC": 0.03261216335612809,
162
+ "eval_SROCC": -0.042785114045618244,
163
+ "eval_loss": 0.03004513680934906,
164
+ "eval_runtime": 36.9998,
165
+ "eval_samples_per_second": 1.351,
166
+ "eval_steps_per_second": 0.054,
167
  "step": 45
168
  },
169
  {
170
  "epoch": 14.0,
171
+ "eval_LCC": 0.051745647526359385,
172
+ "eval_SROCC": -0.02376950780312125,
173
+ "eval_loss": 0.028606927022337914,
174
+ "eval_runtime": 36.9029,
175
+ "eval_samples_per_second": 1.355,
176
+ "eval_steps_per_second": 0.054,
177
  "step": 49
178
  },
179
  {
180
  "epoch": 14.285714285714286,
181
+ "grad_norm": 0.6258419156074524,
182
  "learning_rate": 1.4814814814814815e-05,
183
+ "loss": 0.0143,
184
  "step": 50
185
  },
186
  {
187
  "epoch": 14.857142857142858,
188
+ "eval_LCC": 0.06012754354341758,
189
+ "eval_SROCC": -0.018583433373349337,
190
+ "eval_loss": 0.028338493779301643,
191
+ "eval_runtime": 39.2004,
192
+ "eval_samples_per_second": 1.275,
193
+ "eval_steps_per_second": 0.051,
194
  "step": 52
195
  },
196
  {
197
  "epoch": 16.0,
198
+ "eval_LCC": 0.08678963760193395,
199
+ "eval_SROCC": -0.0024489795918367346,
200
+ "eval_loss": 0.027331581339240074,
201
+ "eval_runtime": 39.1787,
202
+ "eval_samples_per_second": 1.276,
203
+ "eval_steps_per_second": 0.051,
204
  "step": 56
205
  },
206
  {
207
  "epoch": 16.857142857142858,
208
+ "eval_LCC": 0.11189936135943072,
209
+ "eval_SROCC": 0.028283313325330132,
210
+ "eval_loss": 0.02574434131383896,
211
+ "eval_runtime": 37.6775,
212
+ "eval_samples_per_second": 1.327,
213
+ "eval_steps_per_second": 0.053,
214
  "step": 59
215
  },
216
  {
217
  "epoch": 17.142857142857142,
218
+ "grad_norm": 0.2967870831489563,
219
  "learning_rate": 1.3333333333333333e-05,
220
+ "loss": 0.013,
221
  "step": 60
222
  },
223
  {
224
  "epoch": 18.0,
225
+ "eval_LCC": 0.140408573006196,
226
+ "eval_SROCC": 0.05421368547418968,
227
+ "eval_loss": 0.024704232811927795,
228
+ "eval_runtime": 39.5048,
229
+ "eval_samples_per_second": 1.266,
230
+ "eval_steps_per_second": 0.051,
231
  "step": 63
232
  },
233
  {
234
  "epoch": 18.857142857142858,
235
+ "eval_LCC": 0.15329553575685126,
236
+ "eval_SROCC": 0.07025210084033613,
237
+ "eval_loss": 0.0247227493673563,
238
+ "eval_runtime": 37.5863,
239
+ "eval_samples_per_second": 1.33,
240
+ "eval_steps_per_second": 0.053,
241
  "step": 66
242
  },
243
  {
244
  "epoch": 20.0,
245
+ "grad_norm": 0.35133129358291626,
246
  "learning_rate": 1.1851851851851852e-05,
247
+ "loss": 0.0111,
248
  "step": 70
249
  },
250
  {
251
  "epoch": 20.0,
252
+ "eval_LCC": 0.16704999475534,
253
+ "eval_SROCC": 0.08004801920768306,
254
+ "eval_loss": 0.02460792474448681,
255
+ "eval_runtime": 39.6008,
256
+ "eval_samples_per_second": 1.263,
257
+ "eval_steps_per_second": 0.051,
258
  "step": 70
259
  },
260
  {
261
  "epoch": 20.857142857142858,
262
+ "eval_LCC": 0.17734737426317984,
263
+ "eval_SROCC": 0.08955582232893158,
264
+ "eval_loss": 0.02460244856774807,
265
+ "eval_runtime": 39.3838,
266
+ "eval_samples_per_second": 1.27,
267
+ "eval_steps_per_second": 0.051,
268
  "step": 73
269
  },
270
  {
271
  "epoch": 22.0,
272
+ "eval_LCC": 0.183477067706457,
273
+ "eval_SROCC": 0.09983193277310923,
274
+ "eval_loss": 0.025651078671216965,
275
+ "eval_runtime": 37.2011,
276
+ "eval_samples_per_second": 1.344,
277
+ "eval_steps_per_second": 0.054,
278
  "step": 77
279
  },
280
  {
281
  "epoch": 22.857142857142858,
282
+ "grad_norm": 0.3938016891479492,
283
  "learning_rate": 1.037037037037037e-05,
284
+ "loss": 0.0104,
285
  "step": 80
286
  },
287
  {
288
  "epoch": 22.857142857142858,
289
+ "eval_LCC": 0.1943336496302965,
290
+ "eval_SROCC": 0.10165666266506602,
291
+ "eval_loss": 0.02549559995532036,
292
+ "eval_runtime": 39.3404,
293
+ "eval_samples_per_second": 1.271,
294
+ "eval_steps_per_second": 0.051,
295
  "step": 80
296
  },
297
  {
298
  "epoch": 24.0,
299
+ "eval_LCC": 0.20850983626278138,
300
+ "eval_SROCC": 0.11490996398559422,
301
+ "eval_loss": 0.02545199543237686,
302
+ "eval_runtime": 39.5182,
303
+ "eval_samples_per_second": 1.265,
304
+ "eval_steps_per_second": 0.051,
305
  "step": 84
306
  },
307
  {
308
  "epoch": 24.857142857142858,
309
+ "eval_LCC": 0.21549093905447098,
310
+ "eval_SROCC": 0.12451380552220888,
311
+ "eval_loss": 0.025542089715600014,
312
+ "eval_runtime": 37.4091,
313
+ "eval_samples_per_second": 1.337,
314
+ "eval_steps_per_second": 0.053,
315
  "step": 87
316
  },
317
  {
318
  "epoch": 25.714285714285715,
319
+ "grad_norm": 0.5060675740242004,
320
  "learning_rate": 8.888888888888888e-06,
321
+ "loss": 0.0088,
322
  "step": 90
323
  },
324
  {
325
  "epoch": 26.0,
326
+ "eval_LCC": 0.22575900814493188,
327
+ "eval_SROCC": 0.13190876350540215,
328
+ "eval_loss": 0.026240630075335503,
329
+ "eval_runtime": 38.9035,
330
+ "eval_samples_per_second": 1.285,
331
+ "eval_steps_per_second": 0.051,
332
  "step": 91
333
  },
334
  {
335
+ "epoch": 26.0,
336
+ "step": 91,
337
+ "total_flos": 2.2813585220126638e+18,
338
+ "train_loss": 0.041763259517540646,
339
+ "train_runtime": 5114.8643,
340
+ "train_samples_per_second": 2.092,
341
+ "train_steps_per_second": 0.029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  }
343
  ],
344
  "logging_steps": 10,
 
367
  "attributes": {}
368
  }
369
  },
370
+ "total_flos": 2.2813585220126638e+18,
371
  "train_batch_size": 16,
372
  "trial_name": null,
373
  "trial_params": null
training_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 32.857142857142854,
3
- "total_flos": 2.895570431785304e+18,
4
- "train_loss": 0.030544885701459388,
5
- "train_runtime": 6073.1055,
6
- "train_samples_per_second": 1.762,
7
- "train_steps_per_second": 0.025
8
  }
 
1
  {
2
+ "epoch": 26.0,
3
+ "total_flos": 2.2813585220126638e+18,
4
+ "train_loss": 0.041763259517540646,
5
+ "train_runtime": 5114.8643,
6
+ "train_samples_per_second": 2.092,
7
+ "train_steps_per_second": 0.029
8
  }