ProgramInNonsense commited on
Commit
9dbeebc
·
verified ·
1 Parent(s): 86aa022

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c183e3feecff9557142a855b8a1c1f80dc472645ed46c1ba04b79f019729e890
3
  size 71587936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7fe2ade1fae4960c22687660aa14a21fe52eebf9909776d0d18bd4b70c1bb93
3
  size 71587936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1454d56ab95019e30e7a49fc394817b5626081f2105359032d711a3fe55d444d
3
  size 143272698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9cc8d8fc1bc3c1cebdc993c69347b59c877a733f0496e5509b158f092ae1dfa
3
  size 143272698
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9280cad9f00210eea6704ca4f491000410f7eb6d01a6bdbdfaaae8655962fef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3451332b5c2cceb627ad5aef7cde0dc7d05ed215f747deb0c67d2971516852
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5970f260f172c657eeef27e6aeae7a30e4709084d1893dbe88e6ca5b72a8ae32
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05089b5f122589ed831ae5ec00bfa1d74500e2dfd86f0ab8693de9fdc4ff1bb1
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.3738386631011963,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 2.3121387283236996,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -911,6 +911,119 @@
911
  "eval_samples_per_second": 7.934,
912
  "eval_steps_per_second": 7.934,
913
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  }
915
  ],
916
  "logging_steps": 10,
@@ -930,7 +1043,7 @@
930
  "attributes": {}
931
  }
932
  },
933
- "total_flos": 2.017629700003492e+17,
934
  "train_batch_size": 4,
935
  "trial_name": null,
936
  "trial_params": null
 
1
  {
2
  "best_metric": 1.3738386631011963,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 2.601156069364162,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
911
  "eval_samples_per_second": 7.934,
912
  "eval_steps_per_second": 7.934,
913
  "step": 1200
914
+ },
915
+ {
916
+ "epoch": 2.33140655105973,
917
+ "grad_norm": 2.493377685546875,
918
+ "learning_rate": 0.00010982964227975658,
919
+ "loss": 0.9833,
920
+ "step": 1210
921
+ },
922
+ {
923
+ "epoch": 2.350674373795761,
924
+ "grad_norm": 2.9550833702087402,
925
+ "learning_rate": 0.00010956696662522569,
926
+ "loss": 1.1317,
927
+ "step": 1220
928
+ },
929
+ {
930
+ "epoch": 2.3699421965317917,
931
+ "grad_norm": 3.639575481414795,
932
+ "learning_rate": 0.00010930235623204551,
933
+ "loss": 1.1061,
934
+ "step": 1230
935
+ },
936
+ {
937
+ "epoch": 2.3892100192678227,
938
+ "grad_norm": 1.6815850734710693,
939
+ "learning_rate": 0.00010903582197731294,
940
+ "loss": 0.8695,
941
+ "step": 1240
942
+ },
943
+ {
944
+ "epoch": 2.4084778420038537,
945
+ "grad_norm": 2.584642171859741,
946
+ "learning_rate": 0.00010876737481720722,
947
+ "loss": 0.9503,
948
+ "step": 1250
949
+ },
950
+ {
951
+ "epoch": 2.4277456647398843,
952
+ "grad_norm": 3.372694253921509,
953
+ "learning_rate": 0.0001084970257865397,
954
+ "loss": 0.9139,
955
+ "step": 1260
956
+ },
957
+ {
958
+ "epoch": 2.4470134874759153,
959
+ "grad_norm": 2.441466808319092,
960
+ "learning_rate": 0.00010822478599830008,
961
+ "loss": 0.9879,
962
+ "step": 1270
963
+ },
964
+ {
965
+ "epoch": 2.4662813102119463,
966
+ "grad_norm": 2.6547255516052246,
967
+ "learning_rate": 0.00010795066664319983,
968
+ "loss": 0.9459,
969
+ "step": 1280
970
+ },
971
+ {
972
+ "epoch": 2.485549132947977,
973
+ "grad_norm": 2.409785270690918,
974
+ "learning_rate": 0.00010767467898921197,
975
+ "loss": 1.018,
976
+ "step": 1290
977
+ },
978
+ {
979
+ "epoch": 2.504816955684008,
980
+ "grad_norm": 2.16044020652771,
981
+ "learning_rate": 0.00010739683438110797,
982
+ "loss": 1.0421,
983
+ "step": 1300
984
+ },
985
+ {
986
+ "epoch": 2.5240847784200384,
987
+ "grad_norm": 2.5198447704315186,
988
+ "learning_rate": 0.00010711714423999145,
989
+ "loss": 0.9385,
990
+ "step": 1310
991
+ },
992
+ {
993
+ "epoch": 2.5433526011560694,
994
+ "grad_norm": 4.743651866912842,
995
+ "learning_rate": 0.00010683562006282861,
996
+ "loss": 1.1201,
997
+ "step": 1320
998
+ },
999
+ {
1000
+ "epoch": 2.5626204238921,
1001
+ "grad_norm": 3.1418023109436035,
1002
+ "learning_rate": 0.00010655227342197574,
1003
+ "loss": 1.0317,
1004
+ "step": 1330
1005
+ },
1006
+ {
1007
+ "epoch": 2.581888246628131,
1008
+ "grad_norm": 1.665715217590332,
1009
+ "learning_rate": 0.00010626711596470343,
1010
+ "loss": 0.9744,
1011
+ "step": 1340
1012
+ },
1013
+ {
1014
+ "epoch": 2.601156069364162,
1015
+ "grad_norm": 3.1245648860931396,
1016
+ "learning_rate": 0.0001059801594127179,
1017
+ "loss": 1.0085,
1018
+ "step": 1350
1019
+ },
1020
+ {
1021
+ "epoch": 2.601156069364162,
1022
+ "eval_loss": 1.4368656873703003,
1023
+ "eval_runtime": 28.9097,
1024
+ "eval_samples_per_second": 7.99,
1025
+ "eval_steps_per_second": 7.99,
1026
+ "step": 1350
1027
  }
1028
  ],
1029
  "logging_steps": 10,
 
1043
  "attributes": {}
1044
  }
1045
  },
1046
+ "total_flos": 2.2681627831868006e+17,
1047
  "train_batch_size": 4,
1048
  "trial_name": null,
1049
  "trial_params": null