Training in progress, step 90, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b120c235acc603d7e0a88df6eb116c05a31d43ac85e97d14eca60636aa4e9e1
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bcb30213c268b08c544b2c7db8161f621672109ce6ac437fbf66b545997526d
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c28a74ee961f8b24fb2735619f4f437d34485673c89b57deafa93df5086d70b
 size 52046596

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0645116a7c0984cadcde7eccc2e07e2cf29f24691185e9c2bb5c66a51d9adad
 size 52046596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cd11dbeb5727fc345d367ecbecf218eaa06a9e3b7752506d05b1cc38c858f04
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0580e74248926dc2cb618fc9ab207371e56a1f66f91fbcaaa73e7d0342f25366
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01006985965133111,
   "eval_steps": 25,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -599,6 +599,76 @@
       "learning_rate": 2.339555568810221e-05,
       "loss": 0.0003,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -618,7 +688,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.26830721302528e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.011328592107747499,
   "eval_steps": 25,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.339555568810221e-05,
       "loss": 0.0003,
       "step": 80
+    },
+    {
+      "epoch": 0.010195732896972749,
+      "grad_norm": 5.916388511657715,
+      "learning_rate": 2.119892463932781e-05,
+      "loss": 1.0992,
+      "step": 81
+    },
+    {
+      "epoch": 0.010321606142614388,
+      "grad_norm": 0.010422502644360065,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.0005,
+      "step": 82
+    },
+    {
+      "epoch": 0.010447479388256025,
+      "grad_norm": 3.361562728881836,
+      "learning_rate": 1.7096242744495837e-05,
+      "loss": 0.1324,
+      "step": 83
+    },
+    {
+      "epoch": 0.010573352633897665,
+      "grad_norm": 0.04859397932887077,
+      "learning_rate": 1.5195190384357404e-05,
+      "loss": 0.0013,
+      "step": 84
+    },
+    {
+      "epoch": 0.010699225879539304,
+      "grad_norm": 0.023048996925354004,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.0011,
+      "step": 85
+    },
+    {
+      "epoch": 0.010825099125180943,
+      "grad_norm": 0.02878495492041111,
+      "learning_rate": 1.1705240714107302e-05,
+      "loss": 0.0012,
+      "step": 86
+    },
+    {
+      "epoch": 0.010950972370822582,
+      "grad_norm": 0.03888264298439026,
+      "learning_rate": 1.0120595370083318e-05,
+      "loss": 0.0033,
+      "step": 87
+    },
+    {
+      "epoch": 0.01107684561646422,
+      "grad_norm": 3.1203830242156982,
+      "learning_rate": 8.645454235739903e-06,
+      "loss": 0.1787,
+      "step": 88
+    },
+    {
+      "epoch": 0.01120271886210586,
+      "grad_norm": 3.7907369136810303,
+      "learning_rate": 7.281614543321269e-06,
+      "loss": 0.3365,
+      "step": 89
+    },
+    {
+      "epoch": 0.011328592107747499,
+      "grad_norm": 2.6835732460021973,
+      "learning_rate": 6.030737921409169e-06,
+      "loss": 0.0975,
+      "step": 90
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.92684561465344e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null