Training in progress, step 40, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b768e3a4d249cd9bb3425b75641f3e3d66c2119e027046042a354d0900705a9c
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b011a4c60534b932db042fd736d34e87af082176c079c6271cdbf3136a38ac8
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8bc270d065791693bfe545dd959cf593a80072a2fe6e4cb2f28f6520970ef91
 size 52046596

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bb1754679627bc12d4f937b9e502f31a2534b4e172c3cc865c7c598bca6d2e4
 size 52046596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c6a3932b0c6757b2a554606edacf63dde2370212156fc61645da06ea61feaa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:15979b93c5d9f4120bbd488b10f50e9f5e39387c20523984d041d80836523182
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:624cde959d3a917007c76687e7ed04f5f5ce5a570abfa20dd466a4e55f6684fa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:288483c81220cbf22f51f35045c4ef691c80220055a03dc4880f485f05c71ede
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.003776197369249166,
   "eval_steps": 25,
-  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -233,6 +233,76 @@
       "learning_rate": 0.0001766044443118978,
       "loss": 0.0424,
       "step": 30
     }
   ],
   "logging_steps": 1,
@@ -252,7 +322,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.97561520488448e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.005034929825665555,
   "eval_steps": 25,
+  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001766044443118978,
       "loss": 0.0424,
       "step": 30
+    },
+    {
+      "epoch": 0.003902070614890805,
+      "grad_norm": 0.22993697226047516,
+      "learning_rate": 0.00017431448254773944,
+      "loss": 0.0041,
+      "step": 31
+    },
+    {
+      "epoch": 0.004027943860532444,
+      "grad_norm": 1.7465827465057373,
+      "learning_rate": 0.0001719339800338651,
+      "loss": 0.0228,
+      "step": 32
+    },
+    {
+      "epoch": 0.004153817106174082,
+      "grad_norm": 0.28992122411727905,
+      "learning_rate": 0.00016946583704589973,
+      "loss": 0.0067,
+      "step": 33
+    },
+    {
+      "epoch": 0.004279690351815722,
+      "grad_norm": 0.9798206090927124,
+      "learning_rate": 0.00016691306063588583,
+      "loss": 0.0262,
+      "step": 34
+    },
+    {
+      "epoch": 0.004405563597457361,
+      "grad_norm": 0.009784302674233913,
+      "learning_rate": 0.00016427876096865394,
+      "loss": 0.0007,
+      "step": 35
+    },
+    {
+      "epoch": 0.004531436843098999,
+      "grad_norm": 2.536167860031128,
+      "learning_rate": 0.0001615661475325658,
+      "loss": 1.0487,
+      "step": 36
+    },
+    {
+      "epoch": 0.0046573100887406385,
+      "grad_norm": 0.8000788688659668,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 0.0149,
+      "step": 37
+    },
+    {
+      "epoch": 0.004783183334382277,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0001559192903470747,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 0.004909056580023916,
+      "grad_norm": 0.015558776445686817,
+      "learning_rate": 0.0001529919264233205,
+      "loss": 0.0014,
+      "step": 39
+    },
+    {
+      "epoch": 0.005034929825665555,
+      "grad_norm": 3.2701315879821777,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 0.2162,
+      "step": 40
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.63415360651264e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null