jeffrey03 commited on
Commit
0a1fb31
·
verified ·
1 Parent(s): 89e7817

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ba78013670a5e8754e3e538080ba8af3888f00be0641ecd712b9a49e93870c1
3
  size 109086416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213054c4927c6f3bca1e8082d827c430bb295745b32b92115688f61434f7f075
3
  size 109086416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b192ffa3c570887b69f25ba42412c7594e70565f65b802562fded9f8bbdb794
3
  size 218260730
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102d67e35bfbcc4b9e815d03b2ae65a3860e14c952cb6a160cddaed2bd9e62df
3
  size 218260730
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d72ed5d4a41f5335cd07414271c9444686910c8d74653e496881abb75037bae2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca646238b9defc5bf2a095179cb788aa143627991f51a70cd39966e3562a2aab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d5e804c4c0039d75698286d90173db37204c5c7e59b000f6920a7c9798abfca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199e6824b18b1d9debbd05233cf73ed2bcbb72beea16b10948ab69d09fd868c5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6910121440887451,
3
  "best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-750",
4
- "epoch": 1.2,
5
  "eval_steps": 50,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -232,6 +232,81 @@
232
  "eval_samples_per_second": 20.085,
233
  "eval_steps_per_second": 5.021,
234
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  }
236
  ],
237
  "logging_steps": 50,
@@ -251,7 +326,7 @@
251
  "attributes": {}
252
  }
253
  },
254
- "total_flos": 1.927851979580375e+17,
255
  "train_batch_size": 16,
256
  "trial_name": null,
257
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6910121440887451,
3
  "best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-750",
4
+ "epoch": 1.6,
5
  "eval_steps": 50,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
232
  "eval_samples_per_second": 20.085,
233
  "eval_steps_per_second": 5.021,
234
  "step": 750
235
+ },
236
+ {
237
+ "epoch": 1.28,
238
+ "grad_norm": 1.9570444822311401,
239
+ "learning_rate": 6.414690945243768e-05,
240
+ "loss": 1.6109,
241
+ "step": 800
242
+ },
243
+ {
244
+ "epoch": 1.28,
245
+ "eval_loss": 1.7327255010604858,
246
+ "eval_runtime": 4.9795,
247
+ "eval_samples_per_second": 20.082,
248
+ "eval_steps_per_second": 5.021,
249
+ "step": 800
250
+ },
251
+ {
252
+ "epoch": 1.3599999999999999,
253
+ "grad_norm": 1.9022583961486816,
254
+ "learning_rate": 5.9955696203559285e-05,
255
+ "loss": 1.615,
256
+ "step": 850
257
+ },
258
+ {
259
+ "epoch": 1.3599999999999999,
260
+ "eval_loss": 1.7244207859039307,
261
+ "eval_runtime": 4.9869,
262
+ "eval_samples_per_second": 20.052,
263
+ "eval_steps_per_second": 5.013,
264
+ "step": 850
265
+ },
266
+ {
267
+ "epoch": 1.44,
268
+ "grad_norm": 1.445749044418335,
269
+ "learning_rate": 5.5690206112115884e-05,
270
+ "loss": 1.6122,
271
+ "step": 900
272
+ },
273
+ {
274
+ "epoch": 1.44,
275
+ "eval_loss": 1.689263939857483,
276
+ "eval_runtime": 4.9757,
277
+ "eval_samples_per_second": 20.098,
278
+ "eval_steps_per_second": 5.024,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 1.52,
283
+ "grad_norm": 2.5496785640716553,
284
+ "learning_rate": 5.1382262882799395e-05,
285
+ "loss": 1.6248,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 1.52,
290
+ "eval_loss": 1.6721502542495728,
291
+ "eval_runtime": 4.9817,
292
+ "eval_samples_per_second": 20.073,
293
+ "eval_steps_per_second": 5.018,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 1.6,
298
+ "grad_norm": 1.7256929874420166,
299
+ "learning_rate": 4.706400695204749e-05,
300
+ "loss": 1.5938,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 1.6,
305
+ "eval_loss": 1.698430061340332,
306
+ "eval_runtime": 4.9786,
307
+ "eval_samples_per_second": 20.086,
308
+ "eval_steps_per_second": 5.021,
309
+ "step": 1000
310
  }
311
  ],
312
  "logging_steps": 50,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 2.5704331516064563e+17,
330
  "train_batch_size": 16,
331
  "trial_name": null,
332
  "trial_params": null