besimray commited on
Commit
1e5b00b
1 Parent(s): 3fdca4e

Training in progress, step 130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:021782730cbc0dd9442791d4bbeb76ef95bd8476b8c416818e4a2325ee96f06e
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cb5c99fa6a855d28a17bf1c0bb727f661d2305e6ccf1eedacf257af1918190
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b0d83b35fe5deee471bfe28a6ca6ef3c994bdeaa39d567accad5569e3a08da0
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4da30ad3ecaf80f0132f8c30fcf3db794bf59cdbedc0a77a47030a55fbe18e8
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4c4d2d769c6f52183fdfe62140ef02b36aa1e936b1d8050f51672d3d58fb1e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6035598516464233,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.0338863395693611,
5
  "eval_steps": 10,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -951,6 +951,84 @@
951
  "eval_samples_per_second": 5.554,
952
  "eval_steps_per_second": 5.554,
953
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
954
  }
955
  ],
956
  "logging_steps": 1,
@@ -965,7 +1043,7 @@
965
  "early_stopping_threshold": 0.0
966
  },
967
  "attributes": {
968
- "early_stopping_patience_counter": 2
969
  }
970
  },
971
  "TrainerControl": {
@@ -974,12 +1052,12 @@
974
  "should_evaluate": false,
975
  "should_log": false,
976
  "should_save": true,
977
- "should_training_stop": false
978
  },
979
  "attributes": {}
980
  }
981
  },
982
- "total_flos": 1.174564302225408e+16,
983
  "train_batch_size": 1,
984
  "trial_name": null,
985
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6035598516464233,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.03671020120014119,
5
  "eval_steps": 10,
6
+ "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
951
  "eval_samples_per_second": 5.554,
952
  "eval_steps_per_second": 5.554,
953
  "step": 120
954
+ },
955
+ {
956
+ "epoch": 0.03416872573243911,
957
+ "grad_norm": 1.1370112895965576,
958
+ "learning_rate": 0.00017572742764761055,
959
+ "loss": 1.9316,
960
+ "step": 121
961
+ },
962
+ {
963
+ "epoch": 0.03445111189551712,
964
+ "grad_norm": 1.037766456604004,
965
+ "learning_rate": 0.00017530714660036112,
966
+ "loss": 1.3929,
967
+ "step": 122
968
+ },
969
+ {
970
+ "epoch": 0.03473349805859513,
971
+ "grad_norm": 0.7955436706542969,
972
+ "learning_rate": 0.00017488376997127283,
973
+ "loss": 2.3555,
974
+ "step": 123
975
+ },
976
+ {
977
+ "epoch": 0.035015884221673135,
978
+ "grad_norm": 3.0241856575012207,
979
+ "learning_rate": 0.0001744573151637007,
980
+ "loss": 1.9409,
981
+ "step": 124
982
+ },
983
+ {
984
+ "epoch": 0.03529827038475115,
985
+ "grad_norm": 1.1123616695404053,
986
+ "learning_rate": 0.00017402779970753155,
987
+ "loss": 2.8273,
988
+ "step": 125
989
+ },
990
+ {
991
+ "epoch": 0.03558065654782916,
992
+ "grad_norm": 1.9221562147140503,
993
+ "learning_rate": 0.0001735952412584635,
994
+ "loss": 2.0479,
995
+ "step": 126
996
+ },
997
+ {
998
+ "epoch": 0.035863042710907164,
999
+ "grad_norm": 1.6352111101150513,
1000
+ "learning_rate": 0.00017315965759728014,
1001
+ "loss": 1.9576,
1002
+ "step": 127
1003
+ },
1004
+ {
1005
+ "epoch": 0.036145428873985176,
1006
+ "grad_norm": 1.4023115634918213,
1007
+ "learning_rate": 0.00017272106662911973,
1008
+ "loss": 1.6282,
1009
+ "step": 128
1010
+ },
1011
+ {
1012
+ "epoch": 0.03642781503706318,
1013
+ "grad_norm": 0.36453551054000854,
1014
+ "learning_rate": 0.00017227948638273916,
1015
+ "loss": 0.2846,
1016
+ "step": 129
1017
+ },
1018
+ {
1019
+ "epoch": 0.03671020120014119,
1020
+ "grad_norm": 1.499108076095581,
1021
+ "learning_rate": 0.00017183493500977278,
1022
+ "loss": 2.0915,
1023
+ "step": 130
1024
+ },
1025
+ {
1026
+ "epoch": 0.03671020120014119,
1027
+ "eval_loss": 1.625510811805725,
1028
+ "eval_runtime": 134.291,
1029
+ "eval_samples_per_second": 5.555,
1030
+ "eval_steps_per_second": 5.555,
1031
+ "step": 130
1032
  }
1033
  ],
1034
  "logging_steps": 1,
 
1043
  "early_stopping_threshold": 0.0
1044
  },
1045
  "attributes": {
1046
+ "early_stopping_patience_counter": 3
1047
  }
1048
  },
1049
  "TrainerControl": {
 
1052
  "should_evaluate": false,
1053
  "should_log": false,
1054
  "should_save": true,
1055
+ "should_training_stop": true
1056
  },
1057
  "attributes": {}
1058
  }
1059
  },
1060
+ "total_flos": 1.272444660744192e+16,
1061
  "train_batch_size": 1,
1062
  "trial_name": null,
1063
  "trial_params": null