ben81828 commited on
Commit
d1c4b8a
1 Parent(s): a753bd1

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -19,7 +19,7 @@
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
- "target_modules": "^(?!.*patch_embed).*(?:fc2|q_proj|up_proj|v_proj|gate_proj|k_proj|qkv|down_proj|fc1|o_proj|proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
 
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
+ "target_modules": "^(?!.*patch_embed).*(?:k_proj|gate_proj|up_proj|fc2|v_proj|down_proj|proj|fc1|qkv|q_proj|o_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c7ee3fff01ccc34d52689d6b0a0f69c7bba11fef8f035de5c213e712b0aa8c
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a1dc75a4969cd4402c49224cdbeb8259d9c8645a8db3aac5f2374a463b3078
3
  size 29034840
last-checkpoint/global_step50/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b2c56e7d91776c788318ba1e3b4016e2c73a01c7eccc74e51c263c488772df8
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7c505d6abf258ee9c48764b56a1b54a9b537c8f7f6620c954d636e30695ebf3
3
  size 43429616
last-checkpoint/global_step50/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4549aa2e4e283d76baddab756d68e8bfc02b90868be4144af49f348c3e65b3cd
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e429059273aae561b31750be91a7e0b5527171d6cd29798def53c3a20a71ad9
3
  size 43429616
last-checkpoint/global_step50/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef092d3f2bcaca1e1fc6d77a5185ee9d6fccf6a0af053e70a9aaa421aab376ed
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633189312a6d347dd04c28f96f91ca6de9203641fb44b01657e3fffafb5d6511
3
  size 43429616
last-checkpoint/global_step50/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c85f4e66f1f6e2702a641255c370d870b9241283a1714fcbcfa1f337918d2822
3
  size 43429616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3fafe9c1cf642ec6773405177d559dc9380e81a5ab08e42d2baefebc08e60d
3
  size 43429616
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step350
 
1
+ global_step50
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee97cd82dba4d425fdd8dfdb88d4a43d0d4b1979b5c81ab4a24914fb00d4f332
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe0d720c4c75a6a04213fa3b64bacbe794718a53e2b56ebb67a1a795014dfad
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91dad95440fb85dc4a31745642117165c1a72173b2e389679ea8c0b2b6fcd7e2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72452d3138d0ca2ff89429e3294a834ae7a68e8596fc757735ca56ae52509d57
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98698326b023c2af02c94f18726ce52c7f7a6fe290734dd7edbe99bc807fcfa0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36e306fb8ebcf53a167bfd6c9af74db410a269ada1e619e3e816f5269543b9d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:708e7c6b5bf8a327e688779ebc08830ce249928bcb1ff5c82b1b1d0bf6d2660b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb47ce0c6f815a6f8302b0e3819b4c2315ca71dae3138d97fdceb765cdd0a039
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d599041d87ad45b8804f766f89f2a165a0fb27c19b2b7170a0970b76e6a885f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e54bce6b4aaf0a5592cd43962528d57d1fed20da37bdb85e37a6f6789f8da9f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,640 +1,106 @@
1
  {
2
- "best_metric": 0.830344021320343,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily/lora/sft/checkpoint-350",
4
- "epoch": 0.9722222222222222,
5
  "eval_steps": 50,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.013888888888888888,
13
- "grad_norm": 20.155857548048633,
14
  "learning_rate": 6.944444444444445e-06,
15
- "loss": 2.8851,
16
  "num_input_tokens_seen": 77944,
17
  "step": 5
18
  },
19
  {
20
- "epoch": 0.027777777777777776,
21
- "grad_norm": 21.665571432342183,
22
  "learning_rate": 1.388888888888889e-05,
23
- "loss": 2.8741,
24
- "num_input_tokens_seen": 155872,
25
  "step": 10
26
  },
27
  {
28
- "epoch": 0.041666666666666664,
29
- "grad_norm": 17.590222014530585,
30
  "learning_rate": 2.0833333333333336e-05,
31
- "loss": 2.3696,
32
  "num_input_tokens_seen": 233896,
33
  "step": 15
34
  },
35
  {
36
- "epoch": 0.05555555555555555,
37
- "grad_norm": 6.32118188455302,
38
  "learning_rate": 2.777777777777778e-05,
39
- "loss": 1.383,
40
- "num_input_tokens_seen": 311912,
41
  "step": 20
42
  },
43
  {
44
- "epoch": 0.06944444444444445,
45
- "grad_norm": 4.561362427555974,
46
  "learning_rate": 3.472222222222222e-05,
47
- "loss": 0.948,
48
- "num_input_tokens_seen": 389912,
49
  "step": 25
50
  },
51
  {
52
- "epoch": 0.08333333333333333,
53
- "grad_norm": 2.1863542292953797,
54
  "learning_rate": 4.166666666666667e-05,
55
- "loss": 0.9201,
56
- "num_input_tokens_seen": 467848,
57
  "step": 30
58
  },
59
  {
60
- "epoch": 0.09722222222222222,
61
- "grad_norm": 1.3024950703221743,
62
  "learning_rate": 4.8611111111111115e-05,
63
- "loss": 0.9244,
64
- "num_input_tokens_seen": 545864,
65
  "step": 35
66
  },
67
  {
68
- "epoch": 0.1111111111111111,
69
- "grad_norm": 0.8165033262251763,
70
  "learning_rate": 5.555555555555556e-05,
71
- "loss": 0.9158,
72
- "num_input_tokens_seen": 623832,
73
  "step": 40
74
  },
75
  {
76
- "epoch": 0.125,
77
- "grad_norm": 0.8426328521707785,
78
  "learning_rate": 6.25e-05,
79
- "loss": 0.9065,
80
- "num_input_tokens_seen": 701864,
81
  "step": 45
82
  },
83
  {
84
- "epoch": 0.1388888888888889,
85
- "grad_norm": 0.7883178352462868,
86
  "learning_rate": 6.944444444444444e-05,
87
- "loss": 0.9013,
88
- "num_input_tokens_seen": 779840,
89
  "step": 50
90
  },
91
  {
92
- "epoch": 0.1388888888888889,
93
- "eval_loss": 0.9147798418998718,
94
- "eval_runtime": 77.1567,
95
- "eval_samples_per_second": 1.892,
96
- "eval_steps_per_second": 0.48,
97
- "num_input_tokens_seen": 779840,
98
  "step": 50
99
- },
100
- {
101
- "epoch": 0.1527777777777778,
102
- "grad_norm": 0.679315264214148,
103
- "learning_rate": 7.638888888888889e-05,
104
- "loss": 0.9113,
105
- "num_input_tokens_seen": 857840,
106
- "step": 55
107
- },
108
- {
109
- "epoch": 0.16666666666666666,
110
- "grad_norm": 0.6447084432724877,
111
- "learning_rate": 8.333333333333334e-05,
112
- "loss": 0.8999,
113
- "num_input_tokens_seen": 935880,
114
- "step": 60
115
- },
116
- {
117
- "epoch": 0.18055555555555555,
118
- "grad_norm": 0.4717419386009716,
119
- "learning_rate": 9.027777777777779e-05,
120
- "loss": 0.9027,
121
- "num_input_tokens_seen": 1013824,
122
- "step": 65
123
- },
124
- {
125
- "epoch": 0.19444444444444445,
126
- "grad_norm": 0.21169543073461422,
127
- "learning_rate": 9.722222222222223e-05,
128
- "loss": 0.8994,
129
- "num_input_tokens_seen": 1091808,
130
- "step": 70
131
- },
132
- {
133
- "epoch": 0.20833333333333334,
134
- "grad_norm": 0.40266651181937524,
135
- "learning_rate": 9.999471159635539e-05,
136
- "loss": 0.9049,
137
- "num_input_tokens_seen": 1169744,
138
- "step": 75
139
- },
140
- {
141
- "epoch": 0.2222222222222222,
142
- "grad_norm": 0.5050973334018934,
143
- "learning_rate": 9.996239762521151e-05,
144
- "loss": 0.9054,
145
- "num_input_tokens_seen": 1247784,
146
- "step": 80
147
- },
148
- {
149
- "epoch": 0.2361111111111111,
150
- "grad_norm": 0.5873934366128214,
151
- "learning_rate": 9.990072664903717e-05,
152
- "loss": 0.8997,
153
- "num_input_tokens_seen": 1325712,
154
- "step": 85
155
- },
156
- {
157
- "epoch": 0.25,
158
- "grad_norm": 0.4823533866978058,
159
- "learning_rate": 9.980973490458728e-05,
160
- "loss": 0.9019,
161
- "num_input_tokens_seen": 1403680,
162
- "step": 90
163
- },
164
- {
165
- "epoch": 0.2638888888888889,
166
- "grad_norm": 0.36744550192138214,
167
- "learning_rate": 9.968947585697214e-05,
168
- "loss": 0.9014,
169
- "num_input_tokens_seen": 1481584,
170
- "step": 95
171
- },
172
- {
173
- "epoch": 0.2777777777777778,
174
- "grad_norm": 0.5064142218129948,
175
- "learning_rate": 9.954002016824227e-05,
176
- "loss": 0.9068,
177
- "num_input_tokens_seen": 1559560,
178
- "step": 100
179
- },
180
- {
181
- "epoch": 0.2777777777777778,
182
- "eval_loss": 0.8995540738105774,
183
- "eval_runtime": 47.1233,
184
- "eval_samples_per_second": 3.098,
185
- "eval_steps_per_second": 0.785,
186
- "num_input_tokens_seen": 1559560,
187
- "step": 100
188
- },
189
- {
190
- "epoch": 0.2916666666666667,
191
- "grad_norm": 0.7063850677986605,
192
- "learning_rate": 9.936145565586871e-05,
193
- "loss": 0.9044,
194
- "num_input_tokens_seen": 1637512,
195
- "step": 105
196
- },
197
- {
198
- "epoch": 0.3055555555555556,
199
- "grad_norm": 0.4612572104309768,
200
- "learning_rate": 9.915388724114301e-05,
201
- "loss": 0.8938,
202
- "num_input_tokens_seen": 1715496,
203
- "step": 110
204
- },
205
- {
206
- "epoch": 0.3194444444444444,
207
- "grad_norm": 0.28644492571570335,
208
- "learning_rate": 9.891743688752738e-05,
209
- "loss": 0.8986,
210
- "num_input_tokens_seen": 1793520,
211
- "step": 115
212
- },
213
- {
214
- "epoch": 0.3333333333333333,
215
- "grad_norm": 0.40522057279685425,
216
- "learning_rate": 9.865224352899119e-05,
217
- "loss": 0.8954,
218
- "num_input_tokens_seen": 1871496,
219
- "step": 120
220
- },
221
- {
222
- "epoch": 0.3472222222222222,
223
- "grad_norm": 0.36152742532038146,
224
- "learning_rate": 9.835846298837584e-05,
225
- "loss": 0.9014,
226
- "num_input_tokens_seen": 1949496,
227
- "step": 125
228
- },
229
- {
230
- "epoch": 0.3611111111111111,
231
- "grad_norm": 0.4355502300592313,
232
- "learning_rate": 9.803626788583603e-05,
233
- "loss": 0.9069,
234
- "num_input_tokens_seen": 2027472,
235
- "step": 130
236
- },
237
- {
238
- "epoch": 0.375,
239
- "grad_norm": 0.26465732155670246,
240
- "learning_rate": 9.768584753741134e-05,
241
- "loss": 0.9036,
242
- "num_input_tokens_seen": 2105488,
243
- "step": 135
244
- },
245
- {
246
- "epoch": 0.3888888888888889,
247
- "grad_norm": 0.4321544586407889,
248
- "learning_rate": 9.730740784378753e-05,
249
- "loss": 0.8974,
250
- "num_input_tokens_seen": 2183488,
251
- "step": 140
252
- },
253
- {
254
- "epoch": 0.4027777777777778,
255
- "grad_norm": 0.34548611738400625,
256
- "learning_rate": 9.69011711693129e-05,
257
- "loss": 0.8999,
258
- "num_input_tokens_seen": 2261472,
259
- "step": 145
260
- },
261
- {
262
- "epoch": 0.4166666666666667,
263
- "grad_norm": 0.39871875335322654,
264
- "learning_rate": 9.646737621134112e-05,
265
- "loss": 0.908,
266
- "num_input_tokens_seen": 2339512,
267
- "step": 150
268
- },
269
- {
270
- "epoch": 0.4166666666666667,
271
- "eval_loss": 0.8949049115180969,
272
- "eval_runtime": 46.7248,
273
- "eval_samples_per_second": 3.125,
274
- "eval_steps_per_second": 0.792,
275
- "num_input_tokens_seen": 2339512,
276
- "step": 150
277
- },
278
- {
279
- "epoch": 0.4305555555555556,
280
- "grad_norm": 0.4541004672886896,
281
- "learning_rate": 9.600627785997696e-05,
282
- "loss": 0.8971,
283
- "num_input_tokens_seen": 2417488,
284
- "step": 155
285
- },
286
- {
287
- "epoch": 0.4444444444444444,
288
- "grad_norm": 0.4613539033606643,
289
- "learning_rate": 9.551814704830734e-05,
290
- "loss": 0.8952,
291
- "num_input_tokens_seen": 2495464,
292
- "step": 160
293
- },
294
- {
295
- "epoch": 0.4583333333333333,
296
- "grad_norm": 0.37123415305068785,
297
- "learning_rate": 9.500327059320606e-05,
298
- "loss": 0.8972,
299
- "num_input_tokens_seen": 2573408,
300
- "step": 165
301
- },
302
- {
303
- "epoch": 0.4722222222222222,
304
- "grad_norm": 0.38977847466547694,
305
- "learning_rate": 9.446195102680531e-05,
306
- "loss": 0.8979,
307
- "num_input_tokens_seen": 2651368,
308
- "step": 170
309
- },
310
- {
311
- "epoch": 0.4861111111111111,
312
- "grad_norm": 0.30747699940882145,
313
- "learning_rate": 9.389450641873323e-05,
314
- "loss": 0.8908,
315
- "num_input_tokens_seen": 2729352,
316
- "step": 175
317
- },
318
- {
319
- "epoch": 0.5,
320
- "grad_norm": 0.33529998689248586,
321
- "learning_rate": 9.330127018922194e-05,
322
- "loss": 0.8997,
323
- "num_input_tokens_seen": 2807320,
324
- "step": 180
325
- },
326
- {
327
- "epoch": 0.5138888888888888,
328
- "grad_norm": 0.5426765737063947,
329
- "learning_rate": 9.268259091319582e-05,
330
- "loss": 0.9024,
331
- "num_input_tokens_seen": 2885368,
332
- "step": 185
333
- },
334
- {
335
- "epoch": 0.5277777777777778,
336
- "grad_norm": 0.4593898499279331,
337
- "learning_rate": 9.203883211545517e-05,
338
- "loss": 0.9002,
339
- "num_input_tokens_seen": 2963376,
340
- "step": 190
341
- },
342
- {
343
- "epoch": 0.5416666666666666,
344
- "grad_norm": 0.27583660078148825,
345
- "learning_rate": 9.137037205707552e-05,
346
- "loss": 0.9,
347
- "num_input_tokens_seen": 3041376,
348
- "step": 195
349
- },
350
- {
351
- "epoch": 0.5555555555555556,
352
- "grad_norm": 0.26593205655845514,
353
- "learning_rate": 9.067760351314838e-05,
354
- "loss": 0.8989,
355
- "num_input_tokens_seen": 3119304,
356
- "step": 200
357
- },
358
- {
359
- "epoch": 0.5555555555555556,
360
- "eval_loss": 0.8992709517478943,
361
- "eval_runtime": 46.6014,
362
- "eval_samples_per_second": 3.133,
363
- "eval_steps_per_second": 0.794,
364
- "num_input_tokens_seen": 3119304,
365
- "step": 200
366
- },
367
- {
368
- "epoch": 0.5694444444444444,
369
- "grad_norm": 0.5150976787458876,
370
- "learning_rate": 8.996093354199349e-05,
371
- "loss": 0.8994,
372
- "num_input_tokens_seen": 3197320,
373
- "step": 205
374
- },
375
- {
376
- "epoch": 0.5833333333333334,
377
- "grad_norm": 0.5612526379811666,
378
- "learning_rate": 8.922078324597879e-05,
379
- "loss": 0.9036,
380
- "num_input_tokens_seen": 3275288,
381
- "step": 210
382
- },
383
- {
384
- "epoch": 0.5972222222222222,
385
- "grad_norm": 0.6484992865693107,
386
- "learning_rate": 8.845758752408826e-05,
387
- "loss": 0.8988,
388
- "num_input_tokens_seen": 3353240,
389
- "step": 215
390
- },
391
- {
392
- "epoch": 0.6111111111111112,
393
- "grad_norm": 0.4709224015616086,
394
- "learning_rate": 8.767179481638303e-05,
395
- "loss": 0.9015,
396
- "num_input_tokens_seen": 3431248,
397
- "step": 220
398
- },
399
- {
400
- "epoch": 0.625,
401
- "grad_norm": 0.6046638090352222,
402
- "learning_rate": 8.68638668405062e-05,
403
- "loss": 0.9047,
404
- "num_input_tokens_seen": 3509192,
405
- "step": 225
406
- },
407
- {
408
- "epoch": 0.6388888888888888,
409
- "grad_norm": 0.4688289739018474,
410
- "learning_rate": 8.603427832038574e-05,
411
- "loss": 0.901,
412
- "num_input_tokens_seen": 3587152,
413
- "step": 230
414
- },
415
- {
416
- "epoch": 0.6527777777777778,
417
- "grad_norm": 0.5904442121458054,
418
- "learning_rate": 8.518351670729529e-05,
419
- "loss": 0.8898,
420
- "num_input_tokens_seen": 3665128,
421
- "step": 235
422
- },
423
- {
424
- "epoch": 0.6666666666666666,
425
- "grad_norm": 0.6120652373129304,
426
- "learning_rate": 8.43120818934367e-05,
427
- "loss": 0.8896,
428
- "num_input_tokens_seen": 3743128,
429
- "step": 240
430
- },
431
- {
432
- "epoch": 0.6805555555555556,
433
- "grad_norm": 1.313292312803758,
434
- "learning_rate": 8.342048591821212e-05,
435
- "loss": 0.8993,
436
- "num_input_tokens_seen": 3821088,
437
- "step": 245
438
- },
439
- {
440
- "epoch": 0.6944444444444444,
441
- "grad_norm": 0.9735602658006927,
442
- "learning_rate": 8.250925266735918e-05,
443
- "loss": 0.8907,
444
- "num_input_tokens_seen": 3899064,
445
- "step": 250
446
- },
447
- {
448
- "epoch": 0.6944444444444444,
449
- "eval_loss": 0.8925368785858154,
450
- "eval_runtime": 46.6124,
451
- "eval_samples_per_second": 3.132,
452
- "eval_steps_per_second": 0.794,
453
- "num_input_tokens_seen": 3899064,
454
- "step": 250
455
- },
456
- {
457
- "epoch": 0.7083333333333334,
458
- "grad_norm": 1.2050807227161144,
459
- "learning_rate": 8.157891756512488e-05,
460
- "loss": 0.878,
461
- "num_input_tokens_seen": 3977048,
462
- "step": 255
463
- },
464
- {
465
- "epoch": 0.7222222222222222,
466
- "grad_norm": 1.9771414914416532,
467
- "learning_rate": 8.063002725966015e-05,
468
- "loss": 0.8881,
469
- "num_input_tokens_seen": 4055080,
470
- "step": 260
471
- },
472
- {
473
- "epoch": 0.7361111111111112,
474
- "grad_norm": 1.605957774581815,
475
- "learning_rate": 7.966313930181912e-05,
476
- "loss": 0.8755,
477
- "num_input_tokens_seen": 4133008,
478
- "step": 265
479
- },
480
- {
481
- "epoch": 0.75,
482
- "grad_norm": 9.19186897524454,
483
- "learning_rate": 7.86788218175523e-05,
484
- "loss": 0.8558,
485
- "num_input_tokens_seen": 4210992,
486
- "step": 270
487
- },
488
- {
489
- "epoch": 0.7638888888888888,
490
- "grad_norm": 4.328337833890427,
491
- "learning_rate": 7.767765317408613e-05,
492
- "loss": 0.9057,
493
- "num_input_tokens_seen": 4288976,
494
- "step": 275
495
- },
496
- {
497
- "epoch": 0.7777777777777778,
498
- "grad_norm": 3.1206913180819185,
499
- "learning_rate": 7.666022164008457e-05,
500
- "loss": 0.845,
501
- "num_input_tokens_seen": 4366920,
502
- "step": 280
503
- },
504
- {
505
- "epoch": 0.7916666666666666,
506
- "grad_norm": 3.6462009524262773,
507
- "learning_rate": 7.562712503999327e-05,
508
- "loss": 0.8524,
509
- "num_input_tokens_seen": 4444912,
510
- "step": 285
511
- },
512
- {
513
- "epoch": 0.8055555555555556,
514
- "grad_norm": 2.1379581270956285,
515
- "learning_rate": 7.457897040276853e-05,
516
- "loss": 0.8694,
517
- "num_input_tokens_seen": 4522912,
518
- "step": 290
519
- },
520
- {
521
- "epoch": 0.8194444444444444,
522
- "grad_norm": 3.553029913178992,
523
- "learning_rate": 7.351637360519813e-05,
524
- "loss": 0.8605,
525
- "num_input_tokens_seen": 4600848,
526
- "step": 295
527
- },
528
- {
529
- "epoch": 0.8333333333333334,
530
- "grad_norm": 3.332905153564647,
531
- "learning_rate": 7.243995901002312e-05,
532
- "loss": 0.8739,
533
- "num_input_tokens_seen": 4678776,
534
- "step": 300
535
- },
536
- {
537
- "epoch": 0.8333333333333334,
538
- "eval_loss": 0.8601770997047424,
539
- "eval_runtime": 46.9817,
540
- "eval_samples_per_second": 3.108,
541
- "eval_steps_per_second": 0.788,
542
- "num_input_tokens_seen": 4678776,
543
- "step": 300
544
- },
545
- {
546
- "epoch": 0.8472222222222222,
547
- "grad_norm": 3.824074793385472,
548
- "learning_rate": 7.135035909907358e-05,
549
- "loss": 0.8838,
550
- "num_input_tokens_seen": 4756744,
551
- "step": 305
552
- },
553
- {
554
- "epoch": 0.8611111111111112,
555
- "grad_norm": 1.8566349178683659,
556
- "learning_rate": 7.024821410163368e-05,
557
- "loss": 0.8843,
558
- "num_input_tokens_seen": 4834648,
559
- "step": 310
560
- },
561
- {
562
- "epoch": 0.875,
563
- "grad_norm": 1.8800583465943999,
564
- "learning_rate": 6.91341716182545e-05,
565
- "loss": 0.8637,
566
- "num_input_tokens_seen": 4912632,
567
- "step": 315
568
- },
569
- {
570
- "epoch": 0.8888888888888888,
571
- "grad_norm": 2.339387141795884,
572
- "learning_rate": 6.800888624023553e-05,
573
- "loss": 0.8714,
574
- "num_input_tokens_seen": 4990632,
575
- "step": 320
576
- },
577
- {
578
- "epoch": 0.9027777777777778,
579
- "grad_norm": 1.650009361432866,
580
- "learning_rate": 6.687301916499871e-05,
581
- "loss": 0.8581,
582
- "num_input_tokens_seen": 5068632,
583
- "step": 325
584
- },
585
- {
586
- "epoch": 0.9166666666666666,
587
- "grad_norm": 2.8384085546182294,
588
- "learning_rate": 6.572723780758069e-05,
589
- "loss": 0.8655,
590
- "num_input_tokens_seen": 5146648,
591
- "step": 330
592
- },
593
- {
594
- "epoch": 0.9305555555555556,
595
- "grad_norm": 6.227674605881871,
596
- "learning_rate": 6.457221540847176e-05,
597
- "loss": 0.8721,
598
- "num_input_tokens_seen": 5224624,
599
- "step": 335
600
- },
601
- {
602
- "epoch": 0.9444444444444444,
603
- "grad_norm": 3.498647817420285,
604
- "learning_rate": 6.340863063803188e-05,
605
- "loss": 0.8563,
606
- "num_input_tokens_seen": 5302600,
607
- "step": 340
608
- },
609
- {
610
- "epoch": 0.9583333333333334,
611
- "grad_norm": 4.211929847119524,
612
- "learning_rate": 6.22371671977162e-05,
613
- "loss": 0.8634,
614
- "num_input_tokens_seen": 5380600,
615
- "step": 345
616
- },
617
- {
618
- "epoch": 0.9722222222222222,
619
- "grad_norm": 3.696407127177043,
620
- "learning_rate": 6.105851341834439e-05,
621
- "loss": 0.8526,
622
- "num_input_tokens_seen": 5458520,
623
- "step": 350
624
- },
625
- {
626
- "epoch": 0.9722222222222222,
627
- "eval_loss": 0.830344021320343,
628
- "eval_runtime": 46.3813,
629
- "eval_samples_per_second": 3.148,
630
- "eval_steps_per_second": 0.798,
631
- "num_input_tokens_seen": 5458520,
632
- "step": 350
633
  }
634
  ],
635
  "logging_steps": 5,
636
- "max_steps": 720,
637
- "num_input_tokens_seen": 5458520,
638
  "num_train_epochs": 2,
639
  "save_steps": 50,
640
  "stateful_callbacks": {
@@ -649,7 +115,7 @@
649
  "attributes": {}
650
  }
651
  },
652
- "total_flos": 369059492790272.0,
653
  "train_batch_size": 1,
654
  "trial_name": null,
655
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9039102792739868,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily/lora/sft/checkpoint-50",
4
+ "epoch": 0.13961605584642234,
5
  "eval_steps": 50,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.013961605584642234,
13
+ "grad_norm": 21.25276507868793,
14
  "learning_rate": 6.944444444444445e-06,
15
+ "loss": 2.9908,
16
  "num_input_tokens_seen": 77944,
17
  "step": 5
18
  },
19
  {
20
+ "epoch": 0.027923211169284468,
21
+ "grad_norm": 21.89043285054519,
22
  "learning_rate": 1.388888888888889e-05,
23
+ "loss": 3.0071,
24
+ "num_input_tokens_seen": 155896,
25
  "step": 10
26
  },
27
  {
28
+ "epoch": 0.041884816753926704,
29
+ "grad_norm": 16.65776874449816,
30
  "learning_rate": 2.0833333333333336e-05,
31
+ "loss": 2.354,
32
  "num_input_tokens_seen": 233896,
33
  "step": 15
34
  },
35
  {
36
+ "epoch": 0.055846422338568937,
37
+ "grad_norm": 3.772799389266845,
38
  "learning_rate": 2.777777777777778e-05,
39
+ "loss": 1.2959,
40
+ "num_input_tokens_seen": 311840,
41
  "step": 20
42
  },
43
  {
44
+ "epoch": 0.06980802792321117,
45
+ "grad_norm": 2.5936011954385334,
46
  "learning_rate": 3.472222222222222e-05,
47
+ "loss": 1.0206,
48
+ "num_input_tokens_seen": 389816,
49
  "step": 25
50
  },
51
  {
52
+ "epoch": 0.08376963350785341,
53
+ "grad_norm": 1.380523901017673,
54
  "learning_rate": 4.166666666666667e-05,
55
+ "loss": 0.9285,
56
+ "num_input_tokens_seen": 467808,
57
  "step": 30
58
  },
59
  {
60
+ "epoch": 0.09773123909249563,
61
+ "grad_norm": 0.9535971270874376,
62
  "learning_rate": 4.8611111111111115e-05,
63
+ "loss": 0.9052,
64
+ "num_input_tokens_seen": 545776,
65
  "step": 35
66
  },
67
  {
68
+ "epoch": 0.11169284467713787,
69
+ "grad_norm": 0.7487685762175865,
70
  "learning_rate": 5.555555555555556e-05,
71
+ "loss": 0.929,
72
+ "num_input_tokens_seen": 623744,
73
  "step": 40
74
  },
75
  {
76
+ "epoch": 0.1256544502617801,
77
+ "grad_norm": 0.9517829869317949,
78
  "learning_rate": 6.25e-05,
79
+ "loss": 0.9076,
80
+ "num_input_tokens_seen": 701720,
81
  "step": 45
82
  },
83
  {
84
+ "epoch": 0.13961605584642234,
85
+ "grad_norm": 0.5105376471286923,
86
  "learning_rate": 6.944444444444444e-05,
87
+ "loss": 0.9039,
88
+ "num_input_tokens_seen": 779728,
89
  "step": 50
90
  },
91
  {
92
+ "epoch": 0.13961605584642234,
93
+ "eval_loss": 0.9039102792739868,
94
+ "eval_runtime": 74.9579,
95
+ "eval_samples_per_second": 1.948,
96
+ "eval_steps_per_second": 0.494,
97
+ "num_input_tokens_seen": 779728,
98
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 5,
102
+ "max_steps": 716,
103
+ "num_input_tokens_seen": 779728,
104
  "num_train_epochs": 2,
105
  "save_steps": 50,
106
  "stateful_callbacks": {
 
115
  "attributes": {}
116
  }
117
  },
118
+ "total_flos": 52663320313856.0,
119
  "train_batch_size": 1,
120
  "trial_name": null,
121
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ae1f2f88db7ed48f6bd7b609c8d1a0011f4c2a12f13011280474cb269b33af9
3
  size 7352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0af6075e02b8e93a59ce938db15a9c8a754a7d6b6c53d6278e322b07db3808
3
  size 7352