Pushed the IDEFICS2 fine-tuned model on some archae context sheets just to figure out the workflow.

Browse files

Files changed (13) hide show

checkpoint-25/adapter_model.safetensors +1 -1
checkpoint-25/optimizer.pt +1 -1
checkpoint-25/scheduler.pt +1 -1
checkpoint-25/trainer_state.json +26 -26
checkpoint-25/training_args.bin +1 -1
checkpoint-30/adapter_config.json +29 -0
checkpoint-30/adapter_model.safetensors +3 -0
checkpoint-30/generation_config.json +18 -0
checkpoint-30/optimizer.pt +3 -0
checkpoint-30/rng_state.pth +3 -0
checkpoint-30/scheduler.pt +3 -0
checkpoint-30/trainer_state.json +99 -0
checkpoint-30/training_args.bin +3 -0

checkpoint-25/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be9e155a00a091ac429c9d7a002d77384488d3de0efeaae002cad2bbc9c46207
 size 93378688

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3b8a5cd7034273b7b144bcbfa022ca6b5184d724c03e821d8683fffa686153d
 size 93378688

checkpoint-25/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5cec25c51806ee812922158a4f65e6cf97e564598eb8475b8f6b140257657cd
 size 48071944

 version https://git-lfs.github.com/spec/v1
+oid sha256:009570f31da8c655af3b904517b62088a73c25662cda765d5a13ba70c08c0037
 size 48071944

checkpoint-25/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a05716f05747fb967268b6b65bee49602693de25bf95ae9c915a7eff48a89265
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:58d35264248af70bc04142557841b4f17281f57af4edd5637dfec599681b99ca
 size 1064

checkpoint-25/trainer_state.json CHANGED Viewed

@@ -10,60 +10,60 @@
   "log_history": [
     {
       "epoch": 0.7017543859649122,
-      "grad_norm": 86.28084564208984,
-      "learning_rate": 0.00017600000000000002,
-      "loss": 3.9501,
       "step": 5
     },
     {
       "epoch": 1.280701754385965,
-      "grad_norm": 109.0323486328125,
-      "learning_rate": 0.000144,
-      "loss": 2.0948,
       "step": 10
     },
     {
       "epoch": 1.280701754385965,
-      "eval_loss": 0.6712124943733215,
-      "eval_runtime": 31.8555,
-      "eval_samples_per_second": 0.722,
-      "eval_steps_per_second": 0.377,
       "step": 10
     },
     {
       "epoch": 1.9824561403508771,
-      "grad_norm": 51.63697052001953,
-      "learning_rate": 0.00010400000000000001,
-      "loss": 2.2882,
       "step": 15
     },
     {
       "epoch": 2.56140350877193,
-      "grad_norm": 53.5184326171875,
-      "learning_rate": 6.400000000000001e-05,
-      "loss": 1.3999,
       "step": 20
     },
     {
       "epoch": 2.56140350877193,
-      "eval_loss": 0.5531365871429443,
-      "eval_runtime": 31.7285,
-      "eval_samples_per_second": 0.725,
-      "eval_steps_per_second": 0.378,
       "step": 20
     },
     {
       "epoch": 3.1403508771929824,
-      "grad_norm": 20.981565475463867,
-      "learning_rate": 3.2000000000000005e-05,
-      "loss": 1.3415,
       "step": 25
     }
   ],
   "logging_steps": 5,
-  "max_steps": 25,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -72,7 +72,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }

   "log_history": [
     {
       "epoch": 0.7017543859649122,
+      "grad_norm": 44.80005645751953,
+      "learning_rate": 0.00017333333333333334,
+      "loss": 2.0998,
       "step": 5
     },
     {
       "epoch": 1.280701754385965,
+      "grad_norm": 901.4852294921875,
+      "learning_rate": 0.00016,
+      "loss": 1.986,
       "step": 10
     },
     {
       "epoch": 1.280701754385965,
+      "eval_loss": 0.7075792551040649,
+      "eval_runtime": 31.4718,
+      "eval_samples_per_second": 0.731,
+      "eval_steps_per_second": 0.381,
       "step": 10
     },
     {
       "epoch": 1.9824561403508771,
+      "grad_norm": 42.54853439331055,
+      "learning_rate": 0.00012666666666666666,
+      "loss": 2.1037,
       "step": 15
     },
     {
       "epoch": 2.56140350877193,
+      "grad_norm": 84.47293853759766,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.2837,
       "step": 20
     },
     {
       "epoch": 2.56140350877193,
+      "eval_loss": 0.6040331125259399,
+      "eval_runtime": 31.5373,
+      "eval_samples_per_second": 0.729,
+      "eval_steps_per_second": 0.381,
       "step": 20
     },
     {
       "epoch": 3.1403508771929824,
+      "grad_norm": 52.28364181518555,
+      "learning_rate": 6e-05,
+      "loss": 1.1705,
       "step": 25
     }
   ],
   "logging_steps": 5,
+  "max_steps": 30,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 25,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }

checkpoint-25/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4f668871e21e05e5e1625fecbdb54559b75b1aff22294454ac78ae0c7e83537
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d18e7524883970e68cdada6591481ad12d536248a71cc627706abc935569009
 size 5304

checkpoint-30/adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "HuggingFaceM4/idefics2-8b",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
+  "task_type": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-30/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8cdf564080619dd5da832a2e6dddc7ddeb9b49dcfdb9471eb1888410a03eac5
+size 93378688

checkpoint-30/generation_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "_from_model_config": true,
+  "bad_words_ids": [
+    [
+      32000
+    ],
+    [
+      32001
+    ]
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": [
+    2,
+    32002
+  ],
+  "pad_token_id": 0,
+  "transformers_version": "4.48.0.dev0"
+}

checkpoint-30/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a59487f60933ab4844b5854bfc6f13394aa385968d6d81c9c53278ef2d2d9ea5
+size 48071944

checkpoint-30/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dcba77c57f5eab0141fde363e0cf9700ae7efb305f3d3dfc25572c16c1f4e0b
+size 14244

checkpoint-30/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:174c77dd52478115dc3752b25cb54f830116ea3fe612fb9bdb81ac01b6d182be
+size 1064

checkpoint-30/trainer_state.json ADDED Viewed

	@@ -0,0 +1,99 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.8421052631578947,
+  "eval_steps": 10,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7017543859649122,
+      "grad_norm": 44.80005645751953,
+      "learning_rate": 0.00017333333333333334,
+      "loss": 2.0998,
+      "step": 5
+    },
+    {
+      "epoch": 1.280701754385965,
+      "grad_norm": 901.4852294921875,
+      "learning_rate": 0.00016,
+      "loss": 1.986,
+      "step": 10
+    },
+    {
+      "epoch": 1.280701754385965,
+      "eval_loss": 0.7075792551040649,
+      "eval_runtime": 31.4718,
+      "eval_samples_per_second": 0.731,
+      "eval_steps_per_second": 0.381,
+      "step": 10
+    },
+    {
+      "epoch": 1.9824561403508771,
+      "grad_norm": 42.54853439331055,
+      "learning_rate": 0.00012666666666666666,
+      "loss": 2.1037,
+      "step": 15
+    },
+    {
+      "epoch": 2.56140350877193,
+      "grad_norm": 84.47293853759766,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.2837,
+      "step": 20
+    },
+    {
+      "epoch": 2.56140350877193,
+      "eval_loss": 0.6040331125259399,
+      "eval_runtime": 31.5373,
+      "eval_samples_per_second": 0.729,
+      "eval_steps_per_second": 0.381,
+      "step": 20
+    },
+    {
+      "epoch": 3.1403508771929824,
+      "grad_norm": 52.28364181518555,
+      "learning_rate": 6e-05,
+      "loss": 1.1705,
+      "step": 25
+    },
+    {
+      "epoch": 3.8421052631578947,
+      "grad_norm": 16.730497360229492,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 1.2169,
+      "step": 30
+    },
+    {
+      "epoch": 3.8421052631578947,
+      "eval_loss": 0.5927651524543762,
+      "eval_runtime": 31.4582,
+      "eval_samples_per_second": 0.731,
+      "eval_steps_per_second": 0.381,
+      "step": 30
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 30,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 25,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2043610494331296.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-30/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d18e7524883970e68cdada6591481ad12d536248a71cc627706abc935569009
+size 5304