Training in progress, step 808, checkpoint

Browse files

Files changed (16) hide show

last-checkpoint/optimizer_0/.metadata +0 -0
last-checkpoint/optimizer_0/__0_0.distcp +1 -1
last-checkpoint/optimizer_0/__1_0.distcp +1 -1
last-checkpoint/optimizer_0/__2_0.distcp +1 -1
last-checkpoint/optimizer_0/__3_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +60 -4

last-checkpoint/optimizer_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ

last-checkpoint/optimizer_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:690953a5f1eaa9653131c0bd57c0088778a2758f6f1caff67bc9dc9bd0a61e4e
 size 13934748

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d79a8a24bad92ed6eddb4f1a5ae34147ccedd0938830a31dd4b49d4107b25fe
 size 13934748

last-checkpoint/optimizer_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:961a2146c9b2e620d54c67901bd3fa375b0dd4b3895406d10bf2578c5745f6ec
 size 13999412

 version https://git-lfs.github.com/spec/v1
+oid sha256:19d5894029ff5fb8c3f198f3d0dc4721910b9e651dcf6775ef035747ababd49d
 size 13999412

last-checkpoint/optimizer_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6e7156894345a02a9422f425abf2ddc88061246f26736af17b7bd221ea2b39c
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:72f89593cdfdb1d0d63e29c7655dd6926cfa8d7de6f64ba99861afa048925281
 size 13990904

last-checkpoint/optimizer_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afb3878b0710bfa44676e56faea2bf2e81a1c4bc0b34feb980fc568965e9b5bf
 size 13990904

 version https://git-lfs.github.com/spec/v1
+oid sha256:1de76ba031b2df70290abcc273c0ca7c76e5324bf347df6f17f34aee7dc63d1c
 size 13990904

last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED Viewed

Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ

last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d17c8c706eb940a9d1d5520139b6ffe5a44eb3dd61f2326391f6febfc3a6784f
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:26f35c31737247f2cdd166bfc3866830e294dca3791e7c58a4a4c7da0bde1664
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2101a5cc6a3e4ee7d9bcd0e452c1b7b0e0ae6c681cf45975f0c718f88b871286
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee274d658c2cd7d90c2cb7a6e847197c382e7b20d51d4d940ab01b8f68c15df2
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03595f74c1fc252124eeec1b051b356f6d7bf67292b9806a2f8674da24f88c10
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:43d339baab952c823a939ba4aaa601da58042e6b41d474cb9dd88a0b1db4d424
 size 6966784

last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b14a06466128fc4e8a81dd91b128a90acb8f65c2f692c8d576ff2e0ea82b7812
 size 6966784

 version https://git-lfs.github.com/spec/v1
+oid sha256:8564d765df3996b99c746bc714c38fa581f05f6f7bd8afb7323b7cee52b46db5
 size 6966784

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:037251a482261d469d05ca212322fd7a8b0d86b1e9774acdb0dd0d30ed213fb4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5267e6651ba6650aad8d4187f305b173ad364e0765707ff4829b4e5b285e5d5
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:971deb5c5f50d793d58f55fcc23e2e2e3d0e3e15819213e61585330da863b511
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6eb6e8a7a5edcac0f188611ba47c80f2c5826e8fdf1f9517cb0c11df9afe649c
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6969e1b0dfca6ead716e7d65a7048792f64786424a159e6ca34b0c6558b3ebd0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:81b06ae580a53262ce36ae07c3fb3c1ed4571ad7e635f838d2bd9d98ff1de4da
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26ac0fcb20e2693146b5db8fdd7935ee7136850497cc891b3eff53fc7c189e4b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c23b8d462da5c4f7f12b2321b9990502056ba1dfc5c41ddf990e0cf27e4f862b
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ff85c01ce8c147a5568f0e98607017d923cd6d903d76df137d87352cddf54da
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:df74225d229428c183e5117a04f191bf9faa39263780150f0dc259da5480c864
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9900990099009901,
   "eval_steps": 20,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6099,6 +6099,62 @@
       "eval_samples_per_second": 5.734,
       "eval_steps_per_second": 0.187,
       "step": 800
     }
   ],
   "logging_steps": 1,
@@ -6113,12 +6169,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.4540790520807424e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 20,
+  "global_step": 808,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.734,
       "eval_steps_per_second": 0.187,
       "step": 800
+    },
+    {
+      "epoch": 0.9913366336633663,
+      "grad_norm": 5.27916955947876,
+      "learning_rate": 4.574713411816811e-09,
+      "loss": 0.1858,
+      "step": 801
+    },
+    {
+      "epoch": 0.9925742574257426,
+      "grad_norm": 5.716527462005615,
+      "learning_rate": 3.361081929664778e-09,
+      "loss": 0.1604,
+      "step": 802
+    },
+    {
+      "epoch": 0.9938118811881188,
+      "grad_norm": 5.259273529052734,
+      "learning_rate": 2.3341246279806606e-09,
+      "loss": 0.1598,
+      "step": 803
+    },
+    {
+      "epoch": 0.995049504950495,
+      "grad_norm": 4.770205974578857,
+      "learning_rate": 1.493860683851045e-09,
+      "loss": 0.1981,
+      "step": 804
+    },
+    {
+      "epoch": 0.9962871287128713,
+      "grad_norm": 3.5331854820251465,
+      "learning_rate": 8.403057881067877e-10,
+      "loss": 0.1671,
+      "step": 805
+    },
+    {
+      "epoch": 0.9975247524752475,
+      "grad_norm": 4.290172576904297,
+      "learning_rate": 3.7347214503435927e-10,
+      "loss": 0.2307,
+      "step": 806
+    },
+    {
+      "epoch": 0.9987623762376238,
+      "grad_norm": 4.1493964195251465,
+      "learning_rate": 9.33684721426964e-11,
+      "loss": 0.1106,
+      "step": 807
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.5634615421295166,
+      "learning_rate": 0.0,
+      "loss": 0.1396,
+      "step": 808
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.4786605370559693e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null