dixedus commited on
Commit
f7117eb
·
verified ·
1 Parent(s): 246630a

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e1ddf20e43ee498cc289f2bad036aa9eb0970219206ea831c5581b208af2226
3
  size 144748392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:460ce77b418e7f64403d43dffff8d5aee922c9451c1fc9b58a1888cce55f8bbc
3
  size 144748392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:800b02184e3f2fd61d964ff1a7bb0ea20318449052b1c3837ad59923d32c1d68
3
  size 73877972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988d0134d663890b0eeb99458d71ca96be9fc17c1ae772234f2651d71911dbe5
3
  size 73877972
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7e4d7d2b0c8aa3af21365f6f0926784e1d68c844ac2fbc1cc56728a1f7c21d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf20a3d34af62b8eb2d0f4ec84aff799c8013a6c9543f2e58bba92b16054d64
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad54995b081fae25638228c5d9c8f38ca277e5c5ad00bc3e49897b543f84405
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5024861097335815,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.10111734668082309,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -405,6 +405,84 @@
405
  "eval_samples_per_second": 25.234,
406
  "eval_steps_per_second": 6.309,
407
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  }
409
  ],
410
  "logging_steps": 10,
@@ -428,12 +506,12 @@
428
  "should_evaluate": false,
429
  "should_log": false,
430
  "should_save": true,
431
- "should_training_stop": false
432
  },
433
  "attributes": {}
434
  }
435
  },
436
- "total_flos": 2.2973010092752896e+17,
437
  "train_batch_size": 8,
438
  "trial_name": null,
439
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5015363097190857,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 0.12134081601698772,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
405
  "eval_samples_per_second": 25.234,
406
  "eval_steps_per_second": 6.309,
407
  "step": 500
408
+ },
409
+ {
410
+ "epoch": 0.10313969361443956,
411
+ "grad_norm": 0.16003485023975372,
412
+ "learning_rate": 1.1264792494342857e-05,
413
+ "loss": 0.6579,
414
+ "step": 510
415
+ },
416
+ {
417
+ "epoch": 0.10516204054805602,
418
+ "grad_norm": 0.03965291753411293,
419
+ "learning_rate": 8.936522714508678e-06,
420
+ "loss": 0.5382,
421
+ "step": 520
422
+ },
423
+ {
424
+ "epoch": 0.10718438748167249,
425
+ "grad_norm": 0.15766242146492004,
426
+ "learning_rate": 6.866382254766157e-06,
427
+ "loss": 0.5313,
428
+ "step": 530
429
+ },
430
+ {
431
+ "epoch": 0.10920673441528894,
432
+ "grad_norm": 0.24531126022338867,
433
+ "learning_rate": 5.060239153161872e-06,
434
+ "loss": 0.4857,
435
+ "step": 540
436
+ },
437
+ {
438
+ "epoch": 0.1112290813489054,
439
+ "grad_norm": 0.3989189863204956,
440
+ "learning_rate": 3.5232131185484076e-06,
441
+ "loss": 0.219,
442
+ "step": 550
443
+ },
444
+ {
445
+ "epoch": 0.11325142828252187,
446
+ "grad_norm": 0.14650988578796387,
447
+ "learning_rate": 2.259661018213333e-06,
448
+ "loss": 0.6528,
449
+ "step": 560
450
+ },
451
+ {
452
+ "epoch": 0.11527377521613832,
453
+ "grad_norm": 0.07405520975589752,
454
+ "learning_rate": 1.2731645278655445e-06,
455
+ "loss": 0.565,
456
+ "step": 570
457
+ },
458
+ {
459
+ "epoch": 0.1172961221497548,
460
+ "grad_norm": 0.15201494097709656,
461
+ "learning_rate": 5.665199789862907e-07,
462
+ "loss": 0.5497,
463
+ "step": 580
464
+ },
465
+ {
466
+ "epoch": 0.11931846908337125,
467
+ "grad_norm": 0.27944013476371765,
468
+ "learning_rate": 1.4173043232380557e-07,
469
+ "loss": 0.5393,
470
+ "step": 590
471
+ },
472
+ {
473
+ "epoch": 0.12134081601698772,
474
+ "grad_norm": 0.40690985321998596,
475
+ "learning_rate": 0.0,
476
+ "loss": 0.1843,
477
+ "step": 600
478
+ },
479
+ {
480
+ "epoch": 0.12134081601698772,
481
+ "eval_loss": 0.5015363097190857,
482
+ "eval_runtime": 329.9939,
483
+ "eval_samples_per_second": 25.237,
484
+ "eval_steps_per_second": 6.309,
485
+ "step": 600
486
  }
487
  ],
488
  "logging_steps": 10,
 
506
  "should_evaluate": false,
507
  "should_log": false,
508
  "should_save": true,
509
+ "should_training_stop": true
510
  },
511
  "attributes": {}
512
  }
513
  },
514
+ "total_flos": 2.7570850026749952e+17,
515
  "train_batch_size": 8,
516
  "trial_name": null,
517
  "trial_params": null