mika5883 commited on
Commit
7e9e46b
1 Parent(s): 0f59454

Training in progress, step 75000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:579a71fbcf796ed2320b1bfdbc71fc5176b97eb09c34e49378b4d90296c31eb8
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c522b64bf739d00b582ffd0a643c3efac07a2c50ad5c8217960816597aa1b7
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cee76ad81de6f6dc048b803ad246fcece9bfd6318e54fcb58440f7a3d549a71
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ea6b08f3a21ebd27e904f6c480a2e53e645065339857b33fb47d39a33ab84a4
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fd0d34d79fa86fd679ccbb446dbdf9eb6966c7696206cc4f31e110f5764bfd7
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a111aa87150022a7015d18e06b4152718ab7403244bfc3590d03281fc35af0e
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65145316c0b4fb3c5a98c06838fed3bfdbe12f66538cde46484d28c82940a1a3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d655df9252eb0df8fffa1d13ade85fbe45fc35430c05ca667845bcc4d9ed179
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4288,
5
  "eval_steps": 500,
6
- "global_step": 67000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -945,6 +945,118 @@
945
  "learning_rate": 3.928384e-05,
946
  "loss": 0.3291,
947
  "step": 67000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
948
  }
949
  ],
950
  "logging_steps": 500,
@@ -964,7 +1076,7 @@
964
  "attributes": {}
965
  }
966
  },
967
- "total_flos": 3.2640142934016e+17,
968
  "train_batch_size": 64,
969
  "trial_name": null,
970
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.48,
5
  "eval_steps": 500,
6
+ "global_step": 75000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
945
  "learning_rate": 3.928384e-05,
946
  "loss": 0.3291,
947
  "step": 67000
948
+ },
949
+ {
950
+ "epoch": 0.432,
951
+ "grad_norm": 0.6616542339324951,
952
+ "learning_rate": 3.920384e-05,
953
+ "loss": 0.3266,
954
+ "step": 67500
955
+ },
956
+ {
957
+ "epoch": 0.4352,
958
+ "grad_norm": 0.5957266092300415,
959
+ "learning_rate": 3.912384e-05,
960
+ "loss": 0.32,
961
+ "step": 68000
962
+ },
963
+ {
964
+ "epoch": 0.4384,
965
+ "grad_norm": 0.6576407551765442,
966
+ "learning_rate": 3.904384e-05,
967
+ "loss": 0.3246,
968
+ "step": 68500
969
+ },
970
+ {
971
+ "epoch": 0.4416,
972
+ "grad_norm": 0.6852056384086609,
973
+ "learning_rate": 3.896416e-05,
974
+ "loss": 0.3268,
975
+ "step": 69000
976
+ },
977
+ {
978
+ "epoch": 0.4448,
979
+ "grad_norm": 0.780893087387085,
980
+ "learning_rate": 3.888416e-05,
981
+ "loss": 0.3229,
982
+ "step": 69500
983
+ },
984
+ {
985
+ "epoch": 0.448,
986
+ "grad_norm": 0.6741476655006409,
987
+ "learning_rate": 3.880416e-05,
988
+ "loss": 0.3188,
989
+ "step": 70000
990
+ },
991
+ {
992
+ "epoch": 0.4512,
993
+ "grad_norm": 0.5919800400733948,
994
+ "learning_rate": 3.872416e-05,
995
+ "loss": 0.3208,
996
+ "step": 70500
997
+ },
998
+ {
999
+ "epoch": 0.4544,
1000
+ "grad_norm": 0.6476633548736572,
1001
+ "learning_rate": 3.864416e-05,
1002
+ "loss": 0.322,
1003
+ "step": 71000
1004
+ },
1005
+ {
1006
+ "epoch": 0.4576,
1007
+ "grad_norm": 0.5667979717254639,
1008
+ "learning_rate": 3.8564159999999996e-05,
1009
+ "loss": 0.3151,
1010
+ "step": 71500
1011
+ },
1012
+ {
1013
+ "epoch": 0.4608,
1014
+ "grad_norm": 0.6126554608345032,
1015
+ "learning_rate": 3.8484160000000004e-05,
1016
+ "loss": 0.3185,
1017
+ "step": 72000
1018
+ },
1019
+ {
1020
+ "epoch": 0.464,
1021
+ "grad_norm": 0.7995546460151672,
1022
+ "learning_rate": 3.840416e-05,
1023
+ "loss": 0.3174,
1024
+ "step": 72500
1025
+ },
1026
+ {
1027
+ "epoch": 0.4672,
1028
+ "grad_norm": 0.5964981317520142,
1029
+ "learning_rate": 3.8324160000000005e-05,
1030
+ "loss": 0.3187,
1031
+ "step": 73000
1032
+ },
1033
+ {
1034
+ "epoch": 0.4704,
1035
+ "grad_norm": 0.7718212008476257,
1036
+ "learning_rate": 3.824416e-05,
1037
+ "loss": 0.3156,
1038
+ "step": 73500
1039
+ },
1040
+ {
1041
+ "epoch": 0.4736,
1042
+ "grad_norm": 0.7086686491966248,
1043
+ "learning_rate": 3.8164320000000005e-05,
1044
+ "loss": 0.3189,
1045
+ "step": 74000
1046
+ },
1047
+ {
1048
+ "epoch": 0.4768,
1049
+ "grad_norm": 0.7988029718399048,
1050
+ "learning_rate": 3.808432e-05,
1051
+ "loss": 0.3151,
1052
+ "step": 74500
1053
+ },
1054
+ {
1055
+ "epoch": 0.48,
1056
+ "grad_norm": 0.6092699766159058,
1057
+ "learning_rate": 3.8004320000000006e-05,
1058
+ "loss": 0.3153,
1059
+ "step": 75000
1060
  }
1061
  ],
1062
  "logging_steps": 500,
 
1076
  "attributes": {}
1077
  }
1078
  },
1079
+ "total_flos": 3.65374734336e+17,
1080
  "train_batch_size": 64,
1081
  "trial_name": null,
1082
  "trial_params": null