Training in progress, step 63, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +111 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0752891ff9a06875105d7fdf9d01ed664e7e9b822573eb280f2cfd89af4be07
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3fdfbbadacc757e67777153358d07deb6fcd022edaac4a8f31fdca7413bf30d
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b848d6987b9344c713a0234a5034b53fc7b94c24626d49be2ed6437595c6f67e
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:23dbfac893b0513a48ee0fcc19bd5a9933862addb52295331762e417d13a5c7c
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d9d0917e79b66907d60a1ed5973717fa0784cbbd4084c120130e72992abe081
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ce2385d0b72feeba90e46a4879daa6d2c384d48acb74d18c1c80eded380d609
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4dd76e4cfbed647f5393d42b2fd1c0eead588dbf173cae704b7e26883a38f902
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:badf8dd7fd843c03f3dfa379f5a62cbdac918ff18b84343f94ed7c04cd128a3f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.380952380952381,
   "eval_steps": 6,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -429,6 +429,113 @@
       "learning_rate": 1.4124637147783432e-05,
       "loss": 10.3489,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -443,12 +550,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2595508912128.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0,
   "eval_steps": 6,
+  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4124637147783432e-05,
       "loss": 10.3489,
       "step": 50
+    },
+    {
+      "epoch": 2.4285714285714284,
+      "grad_norm": 0.2930206060409546,
+      "learning_rate": 1.2124437891918993e-05,
+      "loss": 10.3489,
+      "step": 51
+    },
+    {
+      "epoch": 2.4761904761904763,
+      "grad_norm": 0.29116538166999817,
+      "learning_rate": 1.0257277929332332e-05,
+      "loss": 10.3495,
+      "step": 52
+    },
+    {
+      "epoch": 2.5238095238095237,
+      "grad_norm": 0.2917405664920807,
+      "learning_rate": 8.529715727489912e-06,
+      "loss": 10.3501,
+      "step": 53
+    },
+    {
+      "epoch": 2.571428571428571,
+      "grad_norm": 0.30513086915016174,
+      "learning_rate": 6.947819411632223e-06,
+      "loss": 10.348,
+      "step": 54
+    },
+    {
+      "epoch": 2.571428571428571,
+      "eval_loss": 10.34866714477539,
+      "eval_runtime": 0.0726,
+      "eval_samples_per_second": 247.859,
+      "eval_steps_per_second": 41.31,
+      "step": 54
+    },
+    {
+      "epoch": 2.619047619047619,
+      "grad_norm": 0.29732662439346313,
+      "learning_rate": 5.51714545026264e-06,
+      "loss": 10.3491,
+      "step": 55
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.2938980758190155,
+      "learning_rate": 4.242719137849077e-06,
+      "loss": 10.3488,
+      "step": 56
+    },
+    {
+      "epoch": 2.7142857142857144,
+      "grad_norm": 0.2909833490848541,
+      "learning_rate": 3.1290169432939553e-06,
+      "loss": 10.3491,
+      "step": 57
+    },
+    {
+      "epoch": 2.761904761904762,
+      "grad_norm": 0.2944483160972595,
+      "learning_rate": 2.179950786173879e-06,
+      "loss": 10.3492,
+      "step": 58
+    },
+    {
+      "epoch": 2.8095238095238093,
+      "grad_norm": 0.29118263721466064,
+      "learning_rate": 1.3988542959794627e-06,
+      "loss": 10.3491,
+      "step": 59
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.282505601644516,
+      "learning_rate": 7.884711026201585e-07,
+      "loss": 10.3495,
+      "step": 60
+    },
+    {
+      "epoch": 2.857142857142857,
+      "eval_loss": 10.34833812713623,
+      "eval_runtime": 0.0639,
+      "eval_samples_per_second": 281.843,
+      "eval_steps_per_second": 46.974,
+      "step": 60
+    },
+    {
+      "epoch": 2.9047619047619047,
+      "grad_norm": 0.2876209318637848,
+      "learning_rate": 3.5094519932415417e-07,
+      "loss": 10.3495,
+      "step": 61
+    },
+    {
+      "epoch": 2.9523809523809526,
+      "grad_norm": 0.29724815487861633,
+      "learning_rate": 8.781341178393244e-08,
+      "loss": 10.3485,
+      "step": 62
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.2917063534259796,
+      "learning_rate": 0.0,
+      "loss": 10.3492,
+      "step": 63
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3265634009088.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null