Training in progress, step 2109, checkpoint

Browse files

Files changed (16) hide show

last-checkpoint/optimizer_0/.metadata +0 -0
last-checkpoint/optimizer_0/__0_0.distcp +1 -1
last-checkpoint/optimizer_0/__1_0.distcp +1 -1
last-checkpoint/optimizer_0/__2_0.distcp +1 -1
last-checkpoint/optimizer_0/__3_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +67 -4

last-checkpoint/optimizer_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ

last-checkpoint/optimizer_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:050e4db55e69664bf6d9c834522ec2206b36b64c8d2f6ed4d5d17b4cf9da2f4e
 size 13934748

 version https://git-lfs.github.com/spec/v1
+oid sha256:37ffda79ba118623656f8db97819ae6e85518aa69d76fdc791db100043e48be3
 size 13934748

last-checkpoint/optimizer_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adb3a674abc7da7a23279462f8cae294d8ecdec98362fed586fc3bccef1a61d4
 size 13999412

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed8e07a5506949e411645ce87a449f3891f4b957ab0ddc49de3f672874f62723
 size 13999412

last-checkpoint/optimizer_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1e641d2b2e349a4c213409e52cf62d25bc236ac15c9791b7bc804909f7f92c3
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5b48b159ad925162a1fb4fdb564c7822ef0b8faee2ca4553b90be22401cfc27
 size 13990904

last-checkpoint/optimizer_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:866ec72c28b8ea1e8a4c76f5ed42b739d69875ec24137c268880795bd767ba9b
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:74d8401662575808efa6d4f055be82b79a2163808e88edd675b8c2d69ffec243
 size 13990904

last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ

last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f5d80504b530d1236d869d6a0431889ce3c16ca369fab9ca79aef572e1e676f
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:130e8e526dc3be989ce9e1461bf9ba00c6c3268dd82397cdb981f3011ad5d2d8
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b0148021e09d7b7a64e41765bf2c33e45d25853d9e709eca7c135e74bee54b7
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab35ef8e239aeb1a02ce2664693e6a54a69dc22deb0f8cc4e9d4aadcd8208b19
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cda280986df81c923c89a0a8a61df0a1484f3b11f668604be6beb240af22c140
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c02ee9352c0cf1e8394f869f2bc7131d250afe60e955f4919648fd5da62223c
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bb78205b0b38be64245705e3d63c368f26e81d439c05fe7f4f6ee459319648f
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:6df05302d7fd7f51f68396fc2bb6ad042d2a59fc7d68c901ad21562911ed44e8
 size 6966784

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68bc9217b6e9ab217f22aba698dbeddd344df01c6c8d3bf496373786b4d6b46f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa3642bf088423aa8179accacc32785bc88d1c8451ac1c553aa395bd19b6282d
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72a20a9702c2689058ab5d5d2baeb8c7227e34d68571334f92805043bd9e18eb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:85938c29325af8d2e4e3e20d49a2776c8cdfd754b3f6d2de7d434efbe1e18dc2
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8533b05acf81e2a8c388c137bc99083b4a5fc4f3554fc80f7b0497d2e0eca05f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c76ba06042e26108cca2c1434deb9e836ac081642cac834e23811c5b667f8b38
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4e0b82e92d540a47961438b15ece197574d010671ffe40e6c7ee07f5dac4307
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:53cd2af488fd8da1bd97f5b13b847fe755bce635dec1434cf52aae85b3dad708
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ac1b330f53ae14ab4a2bb829af8af4d5e4c909474cfca651cf822672c87529f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2e5684d7b341caf8fde096d7c7ea6b392a41b77ac783f1df1a641a78a0c87fb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9954965631666272,
   "eval_steps": 20,
-  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -15979,6 +15979,69 @@
       "eval_samples_per_second": 5.418,
       "eval_steps_per_second": 0.18,
       "step": 2100
     }
   ],
   "logging_steps": 1,
@@ -15993,12 +16056,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.6093907946255155e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997629770087698,
   "eval_steps": 20,
+  "global_step": 2109,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.418,
       "eval_steps_per_second": 0.18,
       "step": 2100
+    },
+    {
+      "epoch": 0.9959706091490874,
+      "grad_norm": 4.999462127685547,
+      "learning_rate": 8.766999093690054e-10,
+      "loss": 0.139,
+      "step": 2101
+    },
+    {
+      "epoch": 0.9964446551315478,
+      "grad_norm": 7.40416145324707,
+      "learning_rate": 6.71225666831754e-10,
+      "loss": 0.1862,
+      "step": 2102
+    },
+    {
+      "epoch": 0.996918701114008,
+      "grad_norm": 3.0805890560150146,
+      "learning_rate": 4.931468515556593e-10,
+      "loss": 0.1179,
+      "step": 2103
+    },
+    {
+      "epoch": 0.9973927470964684,
+      "grad_norm": 3.0441319942474365,
+      "learning_rate": 3.4246395142822906e-10,
+      "loss": 0.1261,
+      "step": 2104
+    },
+    {
+      "epoch": 0.9978667930789287,
+      "grad_norm": 3.2093586921691895,
+      "learning_rate": 2.1917737927812377e-10,
+      "loss": 0.0813,
+      "step": 2105
+    },
+    {
+      "epoch": 0.9983408390613889,
+      "grad_norm": 5.661715507507324,
+      "learning_rate": 1.2328747287848609e-10,
+      "loss": 0.1345,
+      "step": 2106
+    },
+    {
+      "epoch": 0.9988148850438493,
+      "grad_norm": 3.4387176036834717,
+      "learning_rate": 5.4794494941390333e-11,
+      "loss": 0.1216,
+      "step": 2107
+    },
+    {
+      "epoch": 0.9992889310263096,
+      "grad_norm": 3.1428146362304688,
+      "learning_rate": 1.3698633117842365e-11,
+      "loss": 0.1275,
+      "step": 2108
+    },
+    {
+      "epoch": 0.9997629770087698,
+      "grad_norm": 8.101336479187012,
+      "learning_rate": 0.0,
+      "loss": 0.2897,
+      "step": 2109
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.63300984822956e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null