Training in progress, step 4950, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8999735af70410295de19f30e22966f6dd3004beff68125e830fdb7575c32b33
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d82577e4315a0245aa3888fca2f71a8260373eb445fbad147c6fd76cb585817
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8286c1a06c14ad0ab5875b0a8c24a687654a9093c97b325c38cb13df4b32b10
 size 671467026

 version https://git-lfs.github.com/spec/v1
+oid sha256:d986a4ba297308f83f2bc0a91fbe53d870040131c1b3ee9e78cb05739dfc0c9f
 size 671467026

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43f70beeab3045e8185d11bdb74b1a7618728d482bc5259a57ebd6fa0bd44177
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:68a2c94f0c11b9467b4681897c349d42ab25acb9747f60932ec6d730da9a44df
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aee07848921afadf2e29f9d8e13f23709a7903803a3baa073dfe984ab9de3b14
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c9be9776f3509ee5ee5abd9b7b0086172f78ba2db59e74bb85fa6d84d248249
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.5389729738235474,
   "best_model_checkpoint": "./output/checkpoint-4800",
-  "epoch": 0.11384389156369329,
   "eval_steps": 150,
-  "global_step": 4800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
       "eval_samples_per_second": 9.502,
       "eval_steps_per_second": 9.502,
       "step": 4800
     }
   ],
   "logging_steps": 10,
@@ -3642,7 +3755,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.6346743679687066e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.5389729738235474,
   "best_model_checkpoint": "./output/checkpoint-4800",
+  "epoch": 0.1174015131750587,
   "eval_steps": 150,
+  "global_step": 4950,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.502,
       "eval_steps_per_second": 9.502,
       "step": 4800
+    },
+    {
+      "epoch": 0.11408106633778431,
+      "grad_norm": 18.068140029907227,
+      "learning_rate": 4.631564202709354e-08,
+      "loss": 1.7176,
+      "step": 4810
+    },
+    {
+      "epoch": 0.11431824111187534,
+      "grad_norm": 12.530498504638672,
+      "learning_rate": 4.1573879615262184e-08,
+      "loss": 1.5955,
+      "step": 4820
+    },
+    {
+      "epoch": 0.11455541588596636,
+      "grad_norm": 14.457528114318848,
+      "learning_rate": 3.708732128449785e-08,
+      "loss": 1.5592,
+      "step": 4830
+    },
+    {
+      "epoch": 0.1147925906600574,
+      "grad_norm": 9.580428123474121,
+      "learning_rate": 3.2856151459641216e-08,
+      "loss": 1.527,
+      "step": 4840
+    },
+    {
+      "epoch": 0.11502976543414842,
+      "grad_norm": 31.357723236083984,
+      "learning_rate": 2.8880544067511063e-08,
+      "loss": 1.6266,
+      "step": 4850
+    },
+    {
+      "epoch": 0.11526694020823945,
+      "grad_norm": 14.516955375671387,
+      "learning_rate": 2.5160662529755823e-08,
+      "loss": 1.4625,
+      "step": 4860
+    },
+    {
+      "epoch": 0.11550411498233049,
+      "grad_norm": 17.862333297729492,
+      "learning_rate": 2.169665975613605e-08,
+      "loss": 1.6717,
+      "step": 4870
+    },
+    {
+      "epoch": 0.1157412897564215,
+      "grad_norm": 11.661571502685547,
+      "learning_rate": 1.8488678138238458e-08,
+      "loss": 1.5904,
+      "step": 4880
+    },
+    {
+      "epoch": 0.11597846453051254,
+      "grad_norm": 19.821514129638672,
+      "learning_rate": 1.5536849543621583e-08,
+      "loss": 1.633,
+      "step": 4890
+    },
+    {
+      "epoch": 0.11621563930460356,
+      "grad_norm": 13.340200424194336,
+      "learning_rate": 1.2841295310397906e-08,
+      "loss": 1.4345,
+      "step": 4900
+    },
+    {
+      "epoch": 0.1164528140786946,
+      "grad_norm": 14.068325996398926,
+      "learning_rate": 1.0402126242244764e-08,
+      "loss": 1.5872,
+      "step": 4910
+    },
+    {
+      "epoch": 0.11668998885278561,
+      "grad_norm": 13.935022354125977,
+      "learning_rate": 8.219442603847605e-09,
+      "loss": 1.5762,
+      "step": 4920
+    },
+    {
+      "epoch": 0.11692716362687665,
+      "grad_norm": 10.617624282836914,
+      "learning_rate": 6.293334116783817e-09,
+      "loss": 1.5457,
+      "step": 4930
+    },
+    {
+      "epoch": 0.11716433840096767,
+      "grad_norm": 11.610488891601562,
+      "learning_rate": 4.623879955827082e-09,
+      "loss": 1.4991,
+      "step": 4940
+    },
+    {
+      "epoch": 0.1174015131750587,
+      "grad_norm": 11.387807846069336,
+      "learning_rate": 3.211148745700665e-09,
+      "loss": 1.5349,
+      "step": 4950
+    },
+    {
+      "epoch": 0.1174015131750587,
+      "eval_loss": 1.5390245914459229,
+      "eval_runtime": 53.2681,
+      "eval_samples_per_second": 9.405,
+      "eval_steps_per_second": 9.405,
+      "step": 4950
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.7387334060399e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null