besimray commited on
Commit
4572532
·
verified ·
1 Parent(s): dc5346c

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05a0608a0d108d197d5820e3b216e8d7a09ddb76226408ab8f49c2438e9fb16d
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a3b29856143aecc3a1fd76b949ca0dd86fb486460a8825bb04011135aea6da
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761ab0ea9c61d6e62dfbe008243dbfa326d67c392160d21b091f12b6a9a11a62
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdd3a8541cf52ef664e6919d0834665f7aee03a1cc90078eecd7f50255b8cf0
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:597c452dbb6d2e2055a44445aa9e337aa4085eacd11fcbc7213a5eb6f6055867
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63c9cc0f3c04b2c00adeda1f5d60d230e61e99e3404c96ea1e859f8d4e96fa90
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.602448582649231,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.022590893046240734,
5
  "eval_steps": 10,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -639,6 +639,84 @@
639
  "eval_samples_per_second": 5.584,
640
  "eval_steps_per_second": 5.584,
641
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  }
643
  ],
644
  "logging_steps": 1,
@@ -653,7 +731,7 @@
653
  "early_stopping_threshold": 0.0
654
  },
655
  "attributes": {
656
- "early_stopping_patience_counter": 0
657
  }
658
  },
659
  "TrainerControl": {
@@ -667,7 +745,7 @@
667
  "attributes": {}
668
  }
669
  },
670
- "total_flos": 7830428681502720.0,
671
  "train_batch_size": 1,
672
  "trial_name": null,
673
  "trial_params": null
 
1
  {
2
  "best_metric": 1.602448582649231,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.025414754677020826,
5
  "eval_steps": 10,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
639
  "eval_samples_per_second": 5.584,
640
  "eval_steps_per_second": 5.584,
641
  "step": 80
642
+ },
643
+ {
644
+ "epoch": 0.022873279209318743,
645
+ "grad_norm": 1.699086308479309,
646
+ "learning_rate": 0.0001898168561213419,
647
+ "loss": 1.3892,
648
+ "step": 81
649
+ },
650
+ {
651
+ "epoch": 0.02315566537239675,
652
+ "grad_norm": 1.2091821432113647,
653
+ "learning_rate": 0.0001895331334332753,
654
+ "loss": 1.5162,
655
+ "step": 82
656
+ },
657
+ {
658
+ "epoch": 0.023438051535474763,
659
+ "grad_norm": 1.6631978750228882,
660
+ "learning_rate": 0.0001892457303887706,
661
+ "loss": 0.8076,
662
+ "step": 83
663
+ },
664
+ {
665
+ "epoch": 0.02372043769855277,
666
+ "grad_norm": 1.577644944190979,
667
+ "learning_rate": 0.0001889546588018412,
668
+ "loss": 1.4393,
669
+ "step": 84
670
+ },
671
+ {
672
+ "epoch": 0.02400282386163078,
673
+ "grad_norm": 1.207412838935852,
674
+ "learning_rate": 0.00018865993063730004,
675
+ "loss": 2.1015,
676
+ "step": 85
677
+ },
678
+ {
679
+ "epoch": 0.02428521002470879,
680
+ "grad_norm": 2.7810745239257812,
681
+ "learning_rate": 0.00018836155801026753,
682
+ "loss": 1.4547,
683
+ "step": 86
684
+ },
685
+ {
686
+ "epoch": 0.024567596187786797,
687
+ "grad_norm": 2.054161787033081,
688
+ "learning_rate": 0.0001880595531856738,
689
+ "loss": 1.3083,
690
+ "step": 87
691
+ },
692
+ {
693
+ "epoch": 0.02484998235086481,
694
+ "grad_norm": 3.753908634185791,
695
+ "learning_rate": 0.00018775392857775432,
696
+ "loss": 2.8305,
697
+ "step": 88
698
+ },
699
+ {
700
+ "epoch": 0.025132368513942818,
701
+ "grad_norm": 4.611723899841309,
702
+ "learning_rate": 0.00018744469674953956,
703
+ "loss": 1.7302,
704
+ "step": 89
705
+ },
706
+ {
707
+ "epoch": 0.025414754677020826,
708
+ "grad_norm": 2.27549409866333,
709
+ "learning_rate": 0.00018713187041233896,
710
+ "loss": 1.8115,
711
+ "step": 90
712
+ },
713
+ {
714
+ "epoch": 0.025414754677020826,
715
+ "eval_loss": 1.6072229146957397,
716
+ "eval_runtime": 133.6033,
717
+ "eval_samples_per_second": 5.584,
718
+ "eval_steps_per_second": 5.584,
719
+ "step": 90
720
  }
721
  ],
722
  "logging_steps": 1,
 
731
  "early_stopping_threshold": 0.0
732
  },
733
  "attributes": {
734
+ "early_stopping_patience_counter": 1
735
  }
736
  },
737
  "TrainerControl": {
 
745
  "attributes": {}
746
  }
747
  },
748
+ "total_flos": 8809232266690560.0,
749
  "train_batch_size": 1,
750
  "trial_name": null,
751
  "trial_params": null