Pushed the IDEFICS2 fine-tuned model on some archae context sheets just to figure out the workflow.

Browse files

Files changed (12) hide show

checkpoint-25/adapter_model.safetensors +1 -1
checkpoint-25/optimizer.pt +1 -1
checkpoint-25/rng_state.pth +1 -1
checkpoint-25/scheduler.pt +1 -1
checkpoint-25/trainer_state.json +32 -32
checkpoint-25/training_args.bin +1 -1
checkpoint-30/adapter_model.safetensors +1 -1
checkpoint-30/optimizer.pt +1 -1
checkpoint-30/rng_state.pth +1 -1
checkpoint-30/scheduler.pt +1 -1
checkpoint-30/trainer_state.json +41 -41
checkpoint-30/training_args.bin +1 -1

checkpoint-25/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2026e8fb25685ae0ce63663d0819759dcdb86aab842b9ca5c35477497fd3e84d
 size 93378688

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6d7b334eb490c2c309e20faf9e676ac140b812c2f25f174d784ba4737482e21
 size 93378688

checkpoint-25/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ab9629bd5cd5aaeb0813c30b4d3f7ec9e9a91827407a2e5649058963578af73
 size 48071944

 version https://git-lfs.github.com/spec/v1
+oid sha256:c33271db3bb9fb15c420653e5e95f4ac97a7864bcb7a240b24143fe9ce831704
 size 48071944

checkpoint-25/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18be2d7d7016a23e80a9378e2335a532bc2e5e3415b4b79017b671f73a12d199
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e9cf1e2b80a52e7a77a9580bf6adc6f13f28f3310ef491aeef3c52cc69312f2
 size 14244

checkpoint-25/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d59010514a15a6e9216aca151abe98fd87d0bf6c46500f7d6d4fb4979b3ac25
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd1ca147bfe22a3195c5bd942d2592aec616b2c4d98403ef43122a3fa147216c
 size 1064

checkpoint-25/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1218026796589525,
   "eval_steps": 10,
   "global_step": 25,
   "is_hyper_param_search": false,
@@ -9,54 +9,54 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.024360535931790498,
-      "grad_norm": 46.74486541748047,
-      "learning_rate": 0.00018,
-      "loss": 3.1014,
       "step": 5
     },
     {
-      "epoch": 0.048721071863580996,
-      "grad_norm": 17.46522331237793,
-      "learning_rate": 0.00014666666666666666,
-      "loss": 1.4103,
       "step": 10
     },
     {
-      "epoch": 0.048721071863580996,
-      "eval_loss": 1.4243313074111938,
-      "eval_runtime": 216.3894,
-      "eval_samples_per_second": 3.022,
-      "eval_steps_per_second": 1.511,
       "step": 10
     },
     {
-      "epoch": 0.0730816077953715,
-      "grad_norm": 8.364654541015625,
-      "learning_rate": 0.00011333333333333334,
-      "loss": 1.3099,
       "step": 15
     },
     {
-      "epoch": 0.09744214372716199,
-      "grad_norm": 9.115853309631348,
-      "learning_rate": 8.666666666666667e-05,
-      "loss": 1.4179,
       "step": 20
     },
     {
-      "epoch": 0.09744214372716199,
-      "eval_loss": 1.2988728284835815,
-      "eval_runtime": 216.8511,
-      "eval_samples_per_second": 3.016,
-      "eval_steps_per_second": 1.508,
       "step": 20
     },
     {
-      "epoch": 0.1218026796589525,
-      "grad_norm": 8.558521270751953,
-      "learning_rate": 5.333333333333333e-05,
-      "loss": 1.2332,
       "step": 25
     }
   ],
@@ -77,7 +77,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2232558297100032.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11841326228537596,
   "eval_steps": 10,
   "global_step": 25,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.023682652457075192,
+      "grad_norm": 38.363624572753906,
+      "learning_rate": 9e-05,
+      "loss": 3.9738,
       "step": 5
     },
     {
+      "epoch": 0.047365304914150384,
+      "grad_norm": Infinity,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 3.5925,
       "step": 10
     },
     {
+      "epoch": 0.047365304914150384,
+      "eval_loss": 2.8427393436431885,
+      "eval_runtime": 244.1843,
+      "eval_samples_per_second": 2.756,
+      "eval_steps_per_second": 1.38,
       "step": 10
     },
     {
+      "epoch": 0.07104795737122557,
+      "grad_norm": 12.08838939666748,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 2.7399,
       "step": 15
     },
     {
+      "epoch": 0.09473060982830077,
+      "grad_norm": 11.703186988830566,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 1.3988,
       "step": 20
     },
     {
+      "epoch": 0.09473060982830077,
+      "eval_loss": 1.3483482599258423,
+      "eval_runtime": 247.0633,
+      "eval_samples_per_second": 2.724,
+      "eval_steps_per_second": 1.364,
       "step": 20
     },
     {
+      "epoch": 0.11841326228537596,
+      "grad_norm": 21.7695369720459,
+      "learning_rate": 4e-05,
+      "loss": 1.3289,
       "step": 25
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 2246384959161024.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

checkpoint-25/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b113e4a74aec7cc7121c45daccc8f966abb0afcc818bbe1d6baa1983309930d6
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
 size 5304

checkpoint-30/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b93f3d85939af6263d6c0f8ed3656ea2e40eba29c3edd099553346d1a4deb24
 size 93378688

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8ebef7d5a7347009f334d030bc17a34a72c0a046597547a996fa3e252966e27
 size 93378688

checkpoint-30/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7635015371d214eb4eafab7816036fac14b3792e35c8d346db33fe39732541dc
 size 48071944

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e246e143a47a3be4c37648d013008ce5b18ceaccabdfa487e854f6a2668ced1
 size 48071944

checkpoint-30/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:339b32163eec5c9832623026af39db671dcf74c09cb1e1c4ef25f5057cc414a0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b98cbc14b559a2c3eabfc23b1ceaf69d5c3ce0cefe144a24e4fcfe9f6f18a81b
 size 14244

checkpoint-30/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00d1c54fc5f2a267711c02343ef8760b45d4d54a34fbe0c53c5131a83d91cbc6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9241253195e2761992c7c0b60b7dd0fd1941182b7ded242213ead4f9c0f8c82
 size 1064

checkpoint-30/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.146163215590743,
   "eval_steps": 10,
   "global_step": 30,
   "is_hyper_param_search": false,
@@ -9,69 +9,69 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.024360535931790498,
-      "grad_norm": 46.74486541748047,
-      "learning_rate": 0.00018,
-      "loss": 3.1014,
       "step": 5
     },
     {
-      "epoch": 0.048721071863580996,
-      "grad_norm": 17.46522331237793,
-      "learning_rate": 0.00014666666666666666,
-      "loss": 1.4103,
       "step": 10
     },
     {
-      "epoch": 0.048721071863580996,
-      "eval_loss": 1.4243313074111938,
-      "eval_runtime": 216.3894,
-      "eval_samples_per_second": 3.022,
-      "eval_steps_per_second": 1.511,
       "step": 10
     },
     {
-      "epoch": 0.0730816077953715,
-      "grad_norm": 8.364654541015625,
-      "learning_rate": 0.00011333333333333334,
-      "loss": 1.3099,
       "step": 15
     },
     {
-      "epoch": 0.09744214372716199,
-      "grad_norm": 9.115853309631348,
-      "learning_rate": 8.666666666666667e-05,
-      "loss": 1.4179,
       "step": 20
     },
     {
-      "epoch": 0.09744214372716199,
-      "eval_loss": 1.2988728284835815,
-      "eval_runtime": 216.8511,
-      "eval_samples_per_second": 3.016,
-      "eval_steps_per_second": 1.508,
       "step": 20
     },
     {
-      "epoch": 0.1218026796589525,
-      "grad_norm": 8.558521270751953,
-      "learning_rate": 5.333333333333333e-05,
-      "loss": 1.2332,
       "step": 25
     },
     {
-      "epoch": 0.146163215590743,
-      "grad_norm": 8.526884078979492,
-      "learning_rate": 2e-05,
-      "loss": 1.2218,
       "step": 30
     },
     {
-      "epoch": 0.146163215590743,
-      "eval_loss": 1.2392737865447998,
-      "eval_runtime": 216.8293,
-      "eval_samples_per_second": 3.016,
-      "eval_steps_per_second": 1.508,
       "step": 30
     }
   ],
@@ -92,7 +92,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2664865299381120.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.14209591474245115,
   "eval_steps": 10,
   "global_step": 30,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.023682652457075192,
+      "grad_norm": 38.363624572753906,
+      "learning_rate": 9e-05,
+      "loss": 3.9738,
       "step": 5
     },
     {
+      "epoch": 0.047365304914150384,
+      "grad_norm": Infinity,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 3.5925,
       "step": 10
     },
     {
+      "epoch": 0.047365304914150384,
+      "eval_loss": 2.8427393436431885,
+      "eval_runtime": 244.1843,
+      "eval_samples_per_second": 2.756,
+      "eval_steps_per_second": 1.38,
       "step": 10
     },
     {
+      "epoch": 0.07104795737122557,
+      "grad_norm": 12.08838939666748,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 2.7399,
       "step": 15
     },
     {
+      "epoch": 0.09473060982830077,
+      "grad_norm": 11.703186988830566,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 1.3988,
       "step": 20
     },
     {
+      "epoch": 0.09473060982830077,
+      "eval_loss": 1.3483482599258423,
+      "eval_runtime": 247.0633,
+      "eval_samples_per_second": 2.724,
+      "eval_steps_per_second": 1.364,
       "step": 20
     },
     {
+      "epoch": 0.11841326228537596,
+      "grad_norm": 21.7695369720459,
+      "learning_rate": 4e-05,
+      "loss": 1.3289,
       "step": 25
     },
     {
+      "epoch": 0.14209591474245115,
+      "grad_norm": 10.536179542541504,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 1.2495,
       "step": 30
     },
     {
+      "epoch": 0.14209591474245115,
+      "eval_loss": 1.2752900123596191,
+      "eval_runtime": 247.8091,
+      "eval_samples_per_second": 2.716,
+      "eval_steps_per_second": 1.36,
       "step": 30
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 2689832869002048.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

checkpoint-30/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b113e4a74aec7cc7121c45daccc8f966abb0afcc818bbe1d6baa1983309930d6
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
 size 5304