Training in progress, epoch 76, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eaef454aca5a90004a1f7fa735006aaf07a09b80a0c6a02c48a5c9954a9c62a8
 size 559424792

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4af379246575accc58f6abfac00aee925ce6b65883c8632448c83ee50ddfd07
 size 559424792

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6db55a8256108f51c16b7c04a0dda48b0840ae7fedf6f98128494c32336c189e
 size 1118926970

 version https://git-lfs.github.com/spec/v1
+oid sha256:930083a4131ce65cdc8b356487bf1b813c159fa19cc01194a70d602dd9b94b36
 size 1118926970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95d7a84ac41b5d0b0849140ed4b55c3d3d4144a4583408c6d56207542a111683
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:914809037b3f88a1a2608685ab6ce1391a78e990c3ce33067466cc03d6a8480d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e895d61aa04b2cc52d453ad8ef1da3d7dbc854daaaeb69662e0cdbedb748f6ab
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f90a182f176031c61cd117d508af79c4ff26bf3b95484e9bbe4017a087414d71
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 12.716951370239258,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
-  "epoch": 75.0,
   "eval_steps": 500,
-  "global_step": 97875,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7453,6 +7453,105 @@
       "eval_samples_per_second": 29.556,
       "eval_steps_per_second": 3.714,
       "step": 97875
     }
   ],
   "logging_steps": 100,
@@ -7467,7 +7566,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 7
       }
     },
     "TrainerControl": {
@@ -7481,7 +7580,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.560071444651981e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 12.716951370239258,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
+  "epoch": 76.0,
   "eval_steps": 500,
+  "global_step": 99180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.556,
       "eval_steps_per_second": 3.714,
       "step": 97875
+    },
+    {
+      "epoch": 75.01915708812261,
+      "grad_norm": 0.9783554673194885,
+      "learning_rate": 3.1317049808429124e-06,
+      "loss": 11.7455,
+      "step": 97900
+    },
+    {
+      "epoch": 75.09578544061303,
+      "grad_norm": 1.4434301853179932,
+      "learning_rate": 3.0838122605363985e-06,
+      "loss": 11.99,
+      "step": 98000
+    },
+    {
+      "epoch": 75.17241379310344,
+      "grad_norm": 1.2560200691223145,
+      "learning_rate": 3.035919540229885e-06,
+      "loss": 11.8445,
+      "step": 98100
+    },
+    {
+      "epoch": 75.24904214559388,
+      "grad_norm": 1.123687982559204,
+      "learning_rate": 2.988026819923372e-06,
+      "loss": 11.8894,
+      "step": 98200
+    },
+    {
+      "epoch": 75.32567049808429,
+      "grad_norm": 1.2393250465393066,
+      "learning_rate": 2.9401340996168583e-06,
+      "loss": 11.7591,
+      "step": 98300
+    },
+    {
+      "epoch": 75.40229885057471,
+      "grad_norm": 2.023070812225342,
+      "learning_rate": 2.892241379310345e-06,
+      "loss": 11.7083,
+      "step": 98400
+    },
+    {
+      "epoch": 75.47892720306514,
+      "grad_norm": 1.7746585607528687,
+      "learning_rate": 2.8443486590038316e-06,
+      "loss": 12.0237,
+      "step": 98500
+    },
+    {
+      "epoch": 75.55555555555556,
+      "grad_norm": 1.6215800046920776,
+      "learning_rate": 2.796455938697318e-06,
+      "loss": 11.8271,
+      "step": 98600
+    },
+    {
+      "epoch": 75.63218390804597,
+      "grad_norm": 2.3727614879608154,
+      "learning_rate": 2.7490421455938698e-06,
+      "loss": 11.9133,
+      "step": 98700
+    },
+    {
+      "epoch": 75.7088122605364,
+      "grad_norm": 1.562569260597229,
+      "learning_rate": 2.7011494252873562e-06,
+      "loss": 11.8886,
+      "step": 98800
+    },
+    {
+      "epoch": 75.78544061302682,
+      "grad_norm": 0.8996521830558777,
+      "learning_rate": 2.653256704980843e-06,
+      "loss": 11.6606,
+      "step": 98900
+    },
+    {
+      "epoch": 75.86206896551724,
+      "grad_norm": 1.6331411600112915,
+      "learning_rate": 2.6053639846743296e-06,
+      "loss": 12.057,
+      "step": 99000
+    },
+    {
+      "epoch": 75.93869731800767,
+      "grad_norm": 1.2690104246139526,
+      "learning_rate": 2.5574712643678165e-06,
+      "loss": 11.9791,
+      "step": 99100
+    },
+    {
+      "epoch": 76.0,
+      "eval_loss": 12.717323303222656,
+      "eval_runtime": 44.1546,
+      "eval_samples_per_second": 29.555,
+      "eval_steps_per_second": 3.714,
+      "step": 99180
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 8
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.620861214306099e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null