Training in progress, step 4000, checkpoint

Files changed (6) hide show

checkpoint-4000/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fce991d55127919f2e336f5ec756024a8a9c92eb569b1aea2ae942d896f02c2a
 size 51007160

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7774ea055ba209b0622ca46eba2a3108ce70c38abba893c84e6fb6ddc434c85
 size 51007160

checkpoint-4000/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5adbacd7f8df774cd11c33604feba06f196de29e73099d2d3a98e9f03a1e16e0
 size 102078202

 version https://git-lfs.github.com/spec/v1
+oid sha256:775ec6fece3677d36b35721bcf6caa91f79fc3b10f92eb9821f9b349fbfd6160
 size 102078202

checkpoint-4000/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1ddc64e1f6979bd08e434c532e949131fbedf2d0259be96df14634fc94df7a9
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:d441e627a17e600f38f437f3b8bc81e1298392560003a20a62b75cbab2905880
 size 14308

checkpoint-4000/tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4000/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 5.102311611175537,
-  "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/fr_clm/childes_30/checkpoint-4000",
-  "epoch": 6.289308176100629,
   "eval_steps": 2000,
   "global_step": 4000,
   "is_hyper_param_search": false,
@@ -9,33 +9,33 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 3.1446540880503147,
-      "eval_loss": 6.67229700088501,
-      "eval_runtime": 0.6568,
-      "eval_samples_per_second": 1386.938,
-      "eval_steps_per_second": 86.779,
       "step": 2000
     },
     {
-      "epoch": 6.289308176100629,
-      "grad_norm": 1.4630517959594727,
       "learning_rate": 1e-05,
-      "loss": 6.5359,
       "step": 4000
     },
     {
-      "epoch": 6.289308176100629,
-      "eval_loss": 5.102311611175537,
-      "eval_runtime": 0.6383,
-      "eval_samples_per_second": 1427.13,
-      "eval_steps_per_second": 89.294,
       "step": 4000
     }
   ],
   "logging_steps": 4000,
   "max_steps": 100000,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 158,
   "save_steps": 4000,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -49,7 +49,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1034168072404992.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 5.884151935577393,
+  "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/de_clm/childes_30/checkpoint-4000",
+  "epoch": 4.1928721174004195,
   "eval_steps": 2000,
   "global_step": 4000,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 2.0964360587002098,
+      "eval_loss": 7.102903366088867,
+      "eval_runtime": 0.9708,
+      "eval_samples_per_second": 1416.286,
+      "eval_steps_per_second": 88.582,
       "step": 2000
     },
     {
+      "epoch": 4.1928721174004195,
+      "grad_norm": 1.3964662551879883,
       "learning_rate": 1e-05,
+      "loss": 6.9987,
       "step": 4000
     },
     {
+      "epoch": 4.1928721174004195,
+      "eval_loss": 5.884151935577393,
+      "eval_runtime": 0.966,
+      "eval_samples_per_second": 1423.408,
+      "eval_steps_per_second": 89.028,
       "step": 4000
     }
   ],
   "logging_steps": 4000,
   "max_steps": 100000,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 105,
   "save_steps": 4000,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1034264024727552.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

checkpoint-4000/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6af9de70859d62cbd3b61f71e8a5bc95702dbc6ddb62d2994641a31953e4ea9b
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c779122676b54107edc62ae0b9293c062733193c5f82a36a2bc097bca192814
 size 5368