Training in progress, epoch 3, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8bf5052cabb23e9dcf452aa143aa34ae321d54f843f8247ad8e3927c4c149c3
 size 600177236

 version https://git-lfs.github.com/spec/v1
+oid sha256:139fec45154ca33906c2903e3974cf5bb7b5d00a3eef28beee8d1f2ed03c7c7d
 size 600177236

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b57597d0067e7abffddcf181ec99b30a24399213ec61ed08eda3db8aebfeb0c
 size 1200001786

 version https://git-lfs.github.com/spec/v1
+oid sha256:3202d3b24f8b5a2935b12e801a57e12580102cf9a946ee725dcc7abf8cbb966b
 size 1200001786

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45bb083c87006f454b2f544360e3b1b61397e295b76720295d40e74907c141a1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b03b11193656c1a3d536bab7a0679a12f7b4ef3b480229b7a017067ad82448d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbaf1fe07c08cae80261838683ae5bba41ac45905af686e74e6d05f19f13430d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e60debad2e0820d2d64238ae4a209b8d40fdcf351db5f0394509952de535c45
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.12813186645507812,
   "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
-  "epoch": 2.0,
   "eval_steps": 500,
-  "global_step": 6570,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -114,6 +114,48 @@
       "eval_samples_per_second": 193.121,
       "eval_steps_per_second": 5.371,
       "step": 6570
     }
   ],
   "logging_steps": 500,
@@ -128,12 +170,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.40801080983552e+16,
   "train_batch_size": 36,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.12813186645507812,
   "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
+  "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 9855,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 193.121,
       "eval_steps_per_second": 5.371,
       "step": 6570
+    },
+    {
+      "epoch": 2.13089802130898,
+      "grad_norm": 0.4530036449432373,
+      "learning_rate": 1.4485032978183663e-05,
+      "loss": 0.1129,
+      "step": 7000
+    },
+    {
+      "epoch": 2.2831050228310503,
+      "grad_norm": 0.3842392861843109,
+      "learning_rate": 1.1948249619482495e-05,
+      "loss": 0.1092,
+      "step": 7500
+    },
+    {
+      "epoch": 2.43531202435312,
+      "grad_norm": 0.4166797697544098,
+      "learning_rate": 9.411466260781329e-06,
+      "loss": 0.1086,
+      "step": 8000
+    },
+    {
+      "epoch": 2.5875190258751903,
+      "grad_norm": 0.5072170495986938,
+      "learning_rate": 6.874682902080162e-06,
+      "loss": 0.1075,
+      "step": 8500
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.4664016664028168,
+      "learning_rate": 4.337899543378996e-06,
+      "loss": 0.1089,
+      "step": 9000
+    },
+    {
+      "epoch": 2.8919330289193304,
+      "grad_norm": 0.48263782262802124,
+      "learning_rate": 1.8011161846778284e-06,
+      "loss": 0.1069,
+      "step": 9500
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.11201621475328e+16,
   "train_batch_size": 36,
   "trial_name": null,
   "trial_params": null