besimray commited on
Commit
c028a75
·
verified ·
1 Parent(s): ecb0ceb

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e54408fb2a33f5c91595c277d43af9ca7442d78555f6734133f909076eb5ee0b
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d47375795bb95c7810811251ed890d09b08485d56f154652a654c75ae9c485c
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3fcc665ceb8166c03b90dbe8b812cc65aef67e5c4040511438f0bcec2d036b
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45dee7d2b556b85d291e06e42e7c8f845e737ce91e3c96b980ce976a2402877c
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e023d57ffc9febc8fefa58b1faee1161f6172e0c816bc8ad1dc30d22145a166
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d010434ee16c14958906b5bf42c930dd1be27075db7b352777346c25649d79d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398198b060b9edcfe93ff59de4a929b40cbc42323ec0afb0426f8d7b821a61c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6011497974395752,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.028238616307800918,
5
  "eval_steps": 10,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -795,6 +795,84 @@
795
  "eval_samples_per_second": 5.575,
796
  "eval_steps_per_second": 5.575,
797
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
  }
799
  ],
800
  "logging_steps": 1,
@@ -809,7 +887,7 @@
809
  "early_stopping_threshold": 0.0
810
  },
811
  "attributes": {
812
- "early_stopping_patience_counter": 0
813
  }
814
  },
815
  "TrainerControl": {
@@ -823,7 +901,7 @@
823
  "attributes": {}
824
  }
825
  },
826
- "total_flos": 9788035851878400.0,
827
  "train_batch_size": 1,
828
  "trial_name": null,
829
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6011497974395752,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.03106247793858101,
5
  "eval_steps": 10,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
795
  "eval_samples_per_second": 5.575,
796
  "eval_steps_per_second": 5.575,
797
  "step": 100
798
+ },
799
+ {
800
+ "epoch": 0.028521002470878926,
801
+ "grad_norm": 1.1203055381774902,
802
+ "learning_rate": 0.00018345732537213027,
803
+ "loss": 1.7475,
804
+ "step": 101
805
+ },
806
+ {
807
+ "epoch": 0.028803388633956935,
808
+ "grad_norm": 1.274515986442566,
809
+ "learning_rate": 0.00018310240965131041,
810
+ "loss": 2.6023,
811
+ "step": 102
812
+ },
813
+ {
814
+ "epoch": 0.029085774797034947,
815
+ "grad_norm": 2.5792765617370605,
816
+ "learning_rate": 0.00018274407791591966,
817
+ "loss": 1.1908,
818
+ "step": 103
819
+ },
820
+ {
821
+ "epoch": 0.029368160960112955,
822
+ "grad_norm": 1.466035008430481,
823
+ "learning_rate": 0.00018238234489557215,
824
+ "loss": 0.7359,
825
+ "step": 104
826
+ },
827
+ {
828
+ "epoch": 0.029650547123190964,
829
+ "grad_norm": 3.4681172370910645,
830
+ "learning_rate": 0.0001820172254596956,
831
+ "loss": 1.8144,
832
+ "step": 105
833
+ },
834
+ {
835
+ "epoch": 0.029932933286268972,
836
+ "grad_norm": 4.0510993003845215,
837
+ "learning_rate": 0.00018164873461691986,
838
+ "loss": 0.7832,
839
+ "step": 106
840
+ },
841
+ {
842
+ "epoch": 0.03021531944934698,
843
+ "grad_norm": 5.226031303405762,
844
+ "learning_rate": 0.00018127688751446027,
845
+ "loss": 1.7575,
846
+ "step": 107
847
+ },
848
+ {
849
+ "epoch": 0.030497705612424993,
850
+ "grad_norm": 1.0487242937088013,
851
+ "learning_rate": 0.00018090169943749476,
852
+ "loss": 2.077,
853
+ "step": 108
854
+ },
855
+ {
856
+ "epoch": 0.030780091775503,
857
+ "grad_norm": 1.5338118076324463,
858
+ "learning_rate": 0.0001805231858085356,
859
+ "loss": 1.5191,
860
+ "step": 109
861
+ },
862
+ {
863
+ "epoch": 0.03106247793858101,
864
+ "grad_norm": 1.2566704750061035,
865
+ "learning_rate": 0.00018014136218679567,
866
+ "loss": 1.756,
867
+ "step": 110
868
+ },
869
+ {
870
+ "epoch": 0.03106247793858101,
871
+ "eval_loss": 1.6045676469802856,
872
+ "eval_runtime": 133.6566,
873
+ "eval_samples_per_second": 5.581,
874
+ "eval_steps_per_second": 5.581,
875
+ "step": 110
876
  }
877
  ],
878
  "logging_steps": 1,
 
887
  "early_stopping_threshold": 0.0
888
  },
889
  "attributes": {
890
+ "early_stopping_patience_counter": 1
891
  }
892
  },
893
  "TrainerControl": {
 
901
  "attributes": {}
902
  }
903
  },
904
+ "total_flos": 1.076683943706624e+16,
905
  "train_batch_size": 1,
906
  "trial_name": null,
907
  "trial_params": null