mika5883 commited on
Commit
523f96d
1 Parent(s): 708d5d7

Training in progress, step 65000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfbbbdf5e0f7e1e75984b8a87f3e00820c6720f8b58391005c381702a07c4655
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd99d22f631d655bafa0ef921825bcac2b2df42c1345f4f9a951d158bf615583
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec27daf70b494d2ff8c64e8643c93445ac61cf57d9539ca709da5a21408ca147
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724bb64220f2c161eb3405e2f9cd8e66607b6d55bd8fd225e7c8e74e6dbcc62b
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb6e27605cfbdbbe56fb86eafa8a4f9c8afa89179aa209cea4a2aad4a0e0629b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87df364b8a2ed1fef782c858fba27ed776a184876aa3b66d329ff8aa75bd7a24
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cbd243928a750e8ab80bcce9d0d6e45db7cd8c8e143e8c4373b48e856fd52e7
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1452c3e81c52ae774720bef3063ad9b20552f8d2905fb135042cced0dd4241c8
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3904,
5
  "eval_steps": 500,
6
- "global_step": 61000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -861,6 +861,62 @@
861
  "learning_rate": 4.024352e-05,
862
  "loss": 0.3265,
863
  "step": 61000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
864
  }
865
  ],
866
  "logging_steps": 500,
@@ -880,7 +936,7 @@
880
  "attributes": {}
881
  }
882
  },
883
- "total_flos": 2.9717145059328e+17,
884
  "train_batch_size": 64,
885
  "trial_name": null,
886
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.416,
5
  "eval_steps": 500,
6
+ "global_step": 65000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
861
  "learning_rate": 4.024352e-05,
862
  "loss": 0.3265,
863
  "step": 61000
864
+ },
865
+ {
866
+ "epoch": 0.3936,
867
+ "grad_norm": 0.6397412419319153,
868
+ "learning_rate": 4.0163520000000004e-05,
869
+ "loss": 0.3268,
870
+ "step": 61500
871
+ },
872
+ {
873
+ "epoch": 0.3968,
874
+ "grad_norm": 0.7801005840301514,
875
+ "learning_rate": 4.0083520000000005e-05,
876
+ "loss": 0.3314,
877
+ "step": 62000
878
+ },
879
+ {
880
+ "epoch": 0.4,
881
+ "grad_norm": 0.6966884136199951,
882
+ "learning_rate": 4.000352e-05,
883
+ "loss": 0.3263,
884
+ "step": 62500
885
+ },
886
+ {
887
+ "epoch": 0.4032,
888
+ "grad_norm": 0.7413304448127747,
889
+ "learning_rate": 3.9923520000000006e-05,
890
+ "loss": 0.3284,
891
+ "step": 63000
892
+ },
893
+ {
894
+ "epoch": 0.4064,
895
+ "grad_norm": 0.7089780569076538,
896
+ "learning_rate": 3.984352e-05,
897
+ "loss": 0.3252,
898
+ "step": 63500
899
+ },
900
+ {
901
+ "epoch": 0.4096,
902
+ "grad_norm": 0.6669878959655762,
903
+ "learning_rate": 3.976352e-05,
904
+ "loss": 0.3239,
905
+ "step": 64000
906
+ },
907
+ {
908
+ "epoch": 0.4128,
909
+ "grad_norm": 0.7352403998374939,
910
+ "learning_rate": 3.968368e-05,
911
+ "loss": 0.3226,
912
+ "step": 64500
913
+ },
914
+ {
915
+ "epoch": 0.416,
916
+ "grad_norm": 0.6916635036468506,
917
+ "learning_rate": 3.9603840000000005e-05,
918
+ "loss": 0.3234,
919
+ "step": 65000
920
  }
921
  ],
922
  "logging_steps": 500,
 
936
  "attributes": {}
937
  }
938
  },
939
+ "total_flos": 3.166581030912e+17,
940
  "train_batch_size": 64,
941
  "trial_name": null,
942
  "trial_params": null