dixedus commited on
Commit
858ce6d
·
verified ·
1 Parent(s): f036aa0

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5ef3beb9448d72cca91d94dfe6a496c55f1eeb8999d0eaa8713ad4c2713c966
3
  size 144748392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e1ddf20e43ee498cc289f2bad036aa9eb0970219206ea831c5581b208af2226
3
  size 144748392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18cd7e8f7810a66c037ca385abba7a1212abce264c65faa9cb6e38bdb05aedd3
3
  size 73877972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:800b02184e3f2fd61d964ff1a7bb0ea20318449052b1c3837ad59923d32c1d68
3
  size 73877972
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c3ff7e6b44015b9baf9a104f355f5f950f6a24cd408cba2777c3c7df222047
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7e4d7d2b0c8aa3af21365f6f0926784e1d68c844ac2fbc1cc56728a1f7c21d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe0a47cc675a5d4ba5fe8c0d42564476e1fe842799977ab67bf2a8317adef53f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.511861264705658,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.08089387734465847,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -327,6 +327,84 @@
327
  "eval_samples_per_second": 25.263,
328
  "eval_steps_per_second": 6.316,
329
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 10,
@@ -355,7 +433,7 @@
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 1.8399454524604416e+17,
359
  "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5024861097335815,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.10111734668082309,
5
  "eval_steps": 100,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
327
  "eval_samples_per_second": 25.263,
328
  "eval_steps_per_second": 6.316,
329
  "step": 400
330
+ },
331
+ {
332
+ "epoch": 0.08291622427827494,
333
+ "grad_norm": 0.16395606100559235,
334
+ "learning_rate": 4.695790918802576e-05,
335
+ "loss": 0.6875,
336
+ "step": 410
337
+ },
338
+ {
339
+ "epoch": 0.0849385712118914,
340
+ "grad_norm": 0.53562331199646,
341
+ "learning_rate": 4.252125897855932e-05,
342
+ "loss": 0.5775,
343
+ "step": 420
344
+ },
345
+ {
346
+ "epoch": 0.08696091814550787,
347
+ "grad_norm": 0.15994961559772491,
348
+ "learning_rate": 3.824753850538082e-05,
349
+ "loss": 0.5303,
350
+ "step": 430
351
+ },
352
+ {
353
+ "epoch": 0.08898326507912432,
354
+ "grad_norm": 0.2935360074043274,
355
+ "learning_rate": 3.414886209349615e-05,
356
+ "loss": 0.5177,
357
+ "step": 440
358
+ },
359
+ {
360
+ "epoch": 0.09100561201274078,
361
+ "grad_norm": 0.4813726544380188,
362
+ "learning_rate": 3.0236847886501542e-05,
363
+ "loss": 0.2539,
364
+ "step": 450
365
+ },
366
+ {
367
+ "epoch": 0.09302795894635725,
368
+ "grad_norm": 0.15252335369586945,
369
+ "learning_rate": 2.6522584913693294e-05,
370
+ "loss": 0.6666,
371
+ "step": 460
372
+ },
373
+ {
374
+ "epoch": 0.0950503058799737,
375
+ "grad_norm": 0.3821258246898651,
376
+ "learning_rate": 2.301660165700936e-05,
377
+ "loss": 0.5569,
378
+ "step": 470
379
+ },
380
+ {
381
+ "epoch": 0.09707265281359018,
382
+ "grad_norm": 0.14530214667320251,
383
+ "learning_rate": 1.9728836206903656e-05,
384
+ "loss": 0.5426,
385
+ "step": 480
386
+ },
387
+ {
388
+ "epoch": 0.09909499974720663,
389
+ "grad_norm": 0.28957322239875793,
390
+ "learning_rate": 1.6668608091748495e-05,
391
+ "loss": 0.5386,
392
+ "step": 490
393
+ },
394
+ {
395
+ "epoch": 0.10111734668082309,
396
+ "grad_norm": 0.19388361275196075,
397
+ "learning_rate": 1.3844591860619383e-05,
398
+ "loss": 0.2322,
399
+ "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.10111734668082309,
403
+ "eval_loss": 0.5024861097335815,
404
+ "eval_runtime": 330.0244,
405
+ "eval_samples_per_second": 25.234,
406
+ "eval_steps_per_second": 6.309,
407
+ "step": 500
408
  }
409
  ],
410
  "logging_steps": 10,
 
433
  "attributes": {}
434
  }
435
  },
436
+ "total_flos": 2.2973010092752896e+17,
437
  "train_batch_size": 8,
438
  "trial_name": null,
439
  "trial_params": null