Training in progress, step 2115, checkpoint

Browse files

Files changed (16) hide show

last-checkpoint/optimizer_0/.metadata +0 -0
last-checkpoint/optimizer_0/__0_0.distcp +1 -1
last-checkpoint/optimizer_0/__1_0.distcp +1 -1
last-checkpoint/optimizer_0/__2_0.distcp +1 -1
last-checkpoint/optimizer_0/__3_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -4

last-checkpoint/optimizer_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ

last-checkpoint/optimizer_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c626d88cecaf6f37244c24626ee31bda254de73e335860f886b2be28c4358d97
 size 13934748

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3463bb2fcbde477a6451746c5e93c2c02d428df311895e645e51bd4d7bc5770
 size 13934748

last-checkpoint/optimizer_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b98f82f21939a42caf63b3e60fa8693d044a5bada470fb4c47ca564bc1aa2906
 size 13999412

 version https://git-lfs.github.com/spec/v1
+oid sha256:45759926af3c5a130310d7c3435a2592683cc6f827fa760589170a59285976df
 size 13999412

last-checkpoint/optimizer_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1bccfb3da16edb9ca2352f991e7e2c84949c2cebb82bdfe6dff4edb7588812b
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac0546bb14dfa550d667edb17bad381998c91ad46b19cf4fa9c242b50a816ed5
 size 13990904

last-checkpoint/optimizer_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f1b5e474c9b591c523f4c4558a63e2fdd86f92990aa17d39609578b1c9d025a
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:94c0f6244ae9eafb7bfc7053255ee1f03da545471ca58c905e513373711ac6d1
 size 13990904

last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ

last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84eef1c8a398e669a09b130c39c3f146f2a1df5c8f58186431773f03716ad0dd
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9e31bdf1204fd1d0f71a72f790c89994b7b42e355866f78e398dcd1fd72ff59
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85c2c1bfcfbe43cb98961bcf7bbee9910700d60cc94ea9e559cdcc0bfcaf1d3a
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:7637349be59f99c83b895a14ee2abc08edf7040f91572c826bfc5e240c6cf3cc
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65404a56baaeb38eea09621cc68aa2f31f268f0657702a26eb129038b9b80d1b
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:96722daeaa044ab72310631ca9e6cb3421c010477ca15702bf14dc71a0459b49
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3c2b908498addeec6c50ef933c786ada650e8ffdacabaf686c730cc90d5e9dd
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:36ecb8f00470b147597cb0cacfa91635a0005a8aa8e917d79b71eed2692f187e
 size 6966784

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75d7eee0983d654dc4f4d9d0aeab1c0cc99847a413b7ee9122cbe6f31278739d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6408f4b0aa195908f564453cafd5739860df4366aa11fbce643000cf71fb361a
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6b89b5ae016f3558d6cf4489eb242de8fea1141c77af78593bebef95e5e45eb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2de12b682debc58a16157122005ee34693196684c691e90572cca7e76466246
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9375cbe9615de32a9bfeb48c97d58f16a884f450ceae1c1433fd9c53f512214c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bb54c6b25889889c49cdbd9de91a22d239105af3881dd38ba49b92d0a971950
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13c4476d4d3e749b45bb7cf5bd672971013f9e7d9039dbfad26020d82e32caff
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:671cb48e85bbaaa47ed0fd861991ab59a348f2bf0b24bf261a2744e7f24b2809
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20531ddcffa25460cb7198bef6ec4382015b394eaa7700ad1ffe8c13cee7ce9f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4afcb9cd9602dc2e00eeaf78dfe6d4b6f74f46f4affaac94377d375b2a776c3d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9929078014184397,
   "eval_steps": 20,
-  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -15979,6 +15979,111 @@
       "eval_samples_per_second": 5.753,
       "eval_steps_per_second": 0.188,
       "step": 2100
     }
   ],
   "logging_steps": 1,
@@ -15993,12 +16098,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.377331196550185e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 20,
+  "global_step": 2115,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.753,
       "eval_steps_per_second": 0.188,
       "step": 2100
+    },
+    {
+      "epoch": 0.9933806146572104,
+      "grad_norm": 5.17624044418335,
+      "learning_rate": 2.6707234039302642e-09,
+      "loss": 0.2462,
+      "step": 2101
+    },
+    {
+      "epoch": 0.9938534278959811,
+      "grad_norm": 6.951145648956299,
+      "learning_rate": 2.302831750020662e-09,
+      "loss": 0.2213,
+      "step": 2102
+    },
+    {
+      "epoch": 0.9943262411347518,
+      "grad_norm": 4.699036598205566,
+      "learning_rate": 1.962187306674412e-09,
+      "loss": 0.2068,
+      "step": 2103
+    },
+    {
+      "epoch": 0.9947990543735225,
+      "grad_norm": 5.011316776275635,
+      "learning_rate": 1.6487910022666698e-09,
+      "loss": 0.154,
+      "step": 2104
+    },
+    {
+      "epoch": 0.9952718676122931,
+      "grad_norm": 5.612926483154297,
+      "learning_rate": 1.3626436909131014e-09,
+      "loss": 0.2245,
+      "step": 2105
+    },
+    {
+      "epoch": 0.9957446808510638,
+      "grad_norm": 6.049012184143066,
+      "learning_rate": 1.1037461524643355e-09,
+      "loss": 0.2118,
+      "step": 2106
+    },
+    {
+      "epoch": 0.9962174940898345,
+      "grad_norm": 6.17867374420166,
+      "learning_rate": 8.720990925059625e-10,
+      "loss": 0.281,
+      "step": 2107
+    },
+    {
+      "epoch": 0.9966903073286052,
+      "grad_norm": 5.029500484466553,
+      "learning_rate": 6.677031423574232e-10,
+      "loss": 0.2281,
+      "step": 2108
+    },
+    {
+      "epoch": 0.9971631205673759,
+      "grad_norm": 6.733471393585205,
+      "learning_rate": 4.905588590686794e-10,
+      "loss": 0.2776,
+      "step": 2109
+    },
+    {
+      "epoch": 0.9976359338061466,
+      "grad_norm": 9.003266334533691,
+      "learning_rate": 3.4066672541910317e-10,
+      "loss": 0.3067,
+      "step": 2110
+    },
+    {
+      "epoch": 0.9981087470449173,
+      "grad_norm": 6.541497230529785,
+      "learning_rate": 2.180271499185871e-10,
+      "loss": 0.2418,
+      "step": 2111
+    },
+    {
+      "epoch": 0.9985815602836879,
+      "grad_norm": 3.8773562908172607,
+      "learning_rate": 1.2264046680088294e-10,
+      "loss": 0.1889,
+      "step": 2112
+    },
+    {
+      "epoch": 0.9990543735224586,
+      "grad_norm": 5.770922660827637,
+      "learning_rate": 5.4506936030263026e-11,
+      "loss": 0.2226,
+      "step": 2113
+    },
+    {
+      "epoch": 0.9995271867612293,
+      "grad_norm": 4.714475631713867,
+      "learning_rate": 1.3626743291528244e-11,
+      "loss": 0.1988,
+      "step": 2114
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 9.07297134399414,
+      "learning_rate": 0.0,
+      "loss": 0.2536,
+      "step": 2115
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.4142614901253734e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null