Training in progress, step 80, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +55 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:159277feec320a7592284f9e3696651e378c75cb98a9616d5fc9a41efc0eec40
 size 183784

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8537667eb2c1590a1b2a1c34f2de9292e65cc7ace043f57a94bbce298c07cee
 size 183784

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c71a9e87aa3c595eecb7f30d889af53933360e76b6456ae393643c7d90dcacb
 size 236760

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ff737ce4bd3480df082e35a2ceac05f083a433be3eb3fc3fc4b55760a97bcda
 size 236760

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99ae411404197432466fcb959effa7956b7aabac6b0ee3018ce67d44282b87b1
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:db3e559c298d4a2aa71fc2be31e00e408588329761236aec5fe912fd29d2384d
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e0c13bbed523a6d7bec142d7a3836e9629b2dc23935ee4a5086689a05f762e6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7417d3df19f914d5ff3de50e1ce3e883c2e459b2dba528703683d4a328a4e0f
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29316b978407a35ab6f860f3a2bcf442e67f1f9bd92ef1016961e6d3aa0c3d14
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c3395b638fca0417167bebc767ae35bf6c7bb1a639f0851b624a26e9c08419e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45e9c496f59967ab95befd85ccf8b9fef5104a06d33cdbe2714ed501882c6167
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f14ae5014776de6223329b6dc8f9efc58d39f88f71b06e9dacbef3ff996556e
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcaaab9b4e9d97f524192da2cde7b8ea63f0956124955e5031658e7310a4fbcd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:aab4ba69e83f65129bdf14a330e6b14308f6903214fa4aa747256ae7ff2c663e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.3793103448275863,
   "eval_steps": 16,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -59,6 +59,58 @@
       "learning_rate": 8.945702546981969e-05,
       "loss": 11.9214,
       "step": 40
     }
   ],
   "logging_steps": 10,
@@ -78,7 +130,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 647500922880.0,
   "train_batch_size": 7,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.7586206896551726,
   "eval_steps": 16,
+  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.945702546981969e-05,
       "loss": 11.9214,
       "step": 40
+    },
+    {
+      "epoch": 1.6551724137931034,
+      "eval_loss": 11.91930103302002,
+      "eval_runtime": 0.8997,
+      "eval_samples_per_second": 167.834,
+      "eval_steps_per_second": 6.669,
+      "step": 48
+    },
+    {
+      "epoch": 1.7241379310344827,
+      "grad_norm": 0.024184564128518105,
+      "learning_rate": 8.231496189304704e-05,
+      "loss": 11.9204,
+      "step": 50
+    },
+    {
+      "epoch": 2.0689655172413794,
+      "grad_norm": 0.029018325731158257,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 11.9198,
+      "step": 60
+    },
+    {
+      "epoch": 2.206896551724138,
+      "eval_loss": 11.917800903320312,
+      "eval_runtime": 0.8209,
+      "eval_samples_per_second": 183.936,
+      "eval_steps_per_second": 7.309,
+      "step": 64
+    },
+    {
+      "epoch": 2.413793103448276,
+      "grad_norm": 0.025336025282740593,
+      "learning_rate": 6.426681121245527e-05,
+      "loss": 11.919,
+      "step": 70
+    },
+    {
+      "epoch": 2.7586206896551726,
+      "grad_norm": 0.027750149369239807,
+      "learning_rate": 5.4128967273616625e-05,
+      "loss": 11.9188,
+      "step": 80
+    },
+    {
+      "epoch": 2.7586206896551726,
+      "eval_loss": 11.916128158569336,
+      "eval_runtime": 0.8846,
+      "eval_samples_per_second": 170.691,
+      "eval_steps_per_second": 6.782,
+      "step": 80
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1295001845760.0,
   "train_batch_size": 7,
   "trial_name": null,
   "trial_params": null