Training in progress, step 200, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b0598e8f46fd69c79fac25f48397dcbd244fc9b902a026534d3b40f1551d546
 size 59933632

 version https://git-lfs.github.com/spec/v1
+oid sha256:afe2562f9e1f4b9b281322dcf662c4c0068093d223fa6a28c49b8f5a9b226634
 size 59933632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65afaff6b38ac06e34340d257956eaa3ebc419b19a0e3e8bb67355d15ffad52f
 size 31822948

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d71b6993e1f1a30331cbc897038d3b7b9e14d8cdb6405bc9c4e2d6d663a84bd
 size 31822948

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:885f1b5cbad3f6b5a7e8fb3b2b5ec5099503cb1e4888b5c2a60c07652eaa53e9
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:daa01d70103eca816c9caea1a70073abac5d90396152ded5e143afb7a52f19a7
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac282a99480d19ea41c6c9f1dc26a3baa62e38a13d7fd98643fa6ee293d07e8c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0505112d632d53e82cee2329f76d7e94211d84a59d996f55df86ae6ae9ee311d
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56da720b7acf310b42ccc93d154a0559f94e394aeb119ac7a170200f57bc5e52
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5075b808795dcb97e18546dfba7f70c21f9bfd20646d2420afc8cbdf2dce6f4
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9f1b326ec0f899cfedefef459c82bd147fba05526a8891fae55e26b25185912
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:82a0cca12e9c673614bade46294a1fd83d99628233a79145e759f07bebf4d367
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.010876658690450293,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -249,6 +249,84 @@
       "eval_samples_per_second": 72.271,
       "eval_steps_per_second": 9.036,
       "step": 150
     }
   ],
   "logging_steps": 5,
@@ -263,12 +341,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.115140238606336e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014502211587267059,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 72.271,
       "eval_steps_per_second": 9.036,
       "step": 150
+    },
+    {
+      "epoch": 0.01123921398013197,
+      "grad_norm": 1.0315732955932617,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 1.4924,
+      "step": 155
+    },
+    {
+      "epoch": 0.011601769269813647,
+      "grad_norm": 0.6600778698921204,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 1.4561,
+      "step": 160
+    },
+    {
+      "epoch": 0.011964324559495323,
+      "grad_norm": 0.6818755269050598,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 1.3286,
+      "step": 165
+    },
+    {
+      "epoch": 0.012326879849177,
+      "grad_norm": 0.6849693655967712,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 1.4,
+      "step": 170
+    },
+    {
+      "epoch": 0.012689435138858675,
+      "grad_norm": 0.7216675281524658,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 1.482,
+      "step": 175
+    },
+    {
+      "epoch": 0.013051990428540352,
+      "grad_norm": 0.7677770256996155,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 1.314,
+      "step": 180
+    },
+    {
+      "epoch": 0.013414545718222029,
+      "grad_norm": 0.7985463738441467,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.3757,
+      "step": 185
+    },
+    {
+      "epoch": 0.013777101007903705,
+      "grad_norm": 0.7273080945014954,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 1.3603,
+      "step": 190
+    },
+    {
+      "epoch": 0.014139656297585382,
+      "grad_norm": 0.6467849612236023,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.3336,
+      "step": 195
+    },
+    {
+      "epoch": 0.014502211587267059,
+      "grad_norm": 0.721644401550293,
+      "learning_rate": 0.0,
+      "loss": 1.4137,
+      "step": 200
+    },
+    {
+      "epoch": 0.014502211587267059,
+      "eval_loss": 1.3191771507263184,
+      "eval_runtime": 322.0672,
+      "eval_samples_per_second": 72.118,
+      "eval_steps_per_second": 9.017,
+      "step": 200
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2166089718890496e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null