sgraham commited on
Commit
f38ac8c
·
verified ·
1 Parent(s): efa31ee

Pushed the IDEFICS2 fine-tuned model on some archae context sheets just to figure out the workflow.

Browse files
checkpoint-25/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2026e8fb25685ae0ce63663d0819759dcdb86aab842b9ca5c35477497fd3e84d
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d7b334eb490c2c309e20faf9e676ac140b812c2f25f174d784ba4737482e21
3
  size 93378688
checkpoint-25/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ab9629bd5cd5aaeb0813c30b4d3f7ec9e9a91827407a2e5649058963578af73
3
  size 48071944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33271db3bb9fb15c420653e5e95f4ac97a7864bcb7a240b24143fe9ce831704
3
  size 48071944
checkpoint-25/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18be2d7d7016a23e80a9378e2335a532bc2e5e3415b4b79017b671f73a12d199
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9cf1e2b80a52e7a77a9580bf6adc6f13f28f3310ef491aeef3c52cc69312f2
3
  size 14244
checkpoint-25/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d59010514a15a6e9216aca151abe98fd87d0bf6c46500f7d6d4fb4979b3ac25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1ca147bfe22a3195c5bd942d2592aec616b2c4d98403ef43122a3fa147216c
3
  size 1064
checkpoint-25/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1218026796589525,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
@@ -9,54 +9,54 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.024360535931790498,
13
- "grad_norm": 46.74486541748047,
14
- "learning_rate": 0.00018,
15
- "loss": 3.1014,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.048721071863580996,
20
- "grad_norm": 17.46522331237793,
21
- "learning_rate": 0.00014666666666666666,
22
- "loss": 1.4103,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.048721071863580996,
27
- "eval_loss": 1.4243313074111938,
28
- "eval_runtime": 216.3894,
29
- "eval_samples_per_second": 3.022,
30
- "eval_steps_per_second": 1.511,
31
  "step": 10
32
  },
33
  {
34
- "epoch": 0.0730816077953715,
35
- "grad_norm": 8.364654541015625,
36
- "learning_rate": 0.00011333333333333334,
37
- "loss": 1.3099,
38
  "step": 15
39
  },
40
  {
41
- "epoch": 0.09744214372716199,
42
- "grad_norm": 9.115853309631348,
43
- "learning_rate": 8.666666666666667e-05,
44
- "loss": 1.4179,
45
  "step": 20
46
  },
47
  {
48
- "epoch": 0.09744214372716199,
49
- "eval_loss": 1.2988728284835815,
50
- "eval_runtime": 216.8511,
51
- "eval_samples_per_second": 3.016,
52
- "eval_steps_per_second": 1.508,
53
  "step": 20
54
  },
55
  {
56
- "epoch": 0.1218026796589525,
57
- "grad_norm": 8.558521270751953,
58
- "learning_rate": 5.333333333333333e-05,
59
- "loss": 1.2332,
60
  "step": 25
61
  }
62
  ],
@@ -77,7 +77,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 2232558297100032.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11841326228537596,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.023682652457075192,
13
+ "grad_norm": 38.363624572753906,
14
+ "learning_rate": 9e-05,
15
+ "loss": 3.9738,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.047365304914150384,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 8.666666666666667e-05,
22
+ "loss": 3.5925,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.047365304914150384,
27
+ "eval_loss": 2.8427393436431885,
28
+ "eval_runtime": 244.1843,
29
+ "eval_samples_per_second": 2.756,
30
+ "eval_steps_per_second": 1.38,
31
  "step": 10
32
  },
33
  {
34
+ "epoch": 0.07104795737122557,
35
+ "grad_norm": 12.08838939666748,
36
+ "learning_rate": 7.333333333333333e-05,
37
+ "loss": 2.7399,
38
  "step": 15
39
  },
40
  {
41
+ "epoch": 0.09473060982830077,
42
+ "grad_norm": 11.703186988830566,
43
+ "learning_rate": 5.666666666666667e-05,
44
+ "loss": 1.3988,
45
  "step": 20
46
  },
47
  {
48
+ "epoch": 0.09473060982830077,
49
+ "eval_loss": 1.3483482599258423,
50
+ "eval_runtime": 247.0633,
51
+ "eval_samples_per_second": 2.724,
52
+ "eval_steps_per_second": 1.364,
53
  "step": 20
54
  },
55
  {
56
+ "epoch": 0.11841326228537596,
57
+ "grad_norm": 21.7695369720459,
58
+ "learning_rate": 4e-05,
59
+ "loss": 1.3289,
60
  "step": 25
61
  }
62
  ],
 
77
  "attributes": {}
78
  }
79
  },
80
+ "total_flos": 2246384959161024.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
checkpoint-25/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b113e4a74aec7cc7121c45daccc8f966abb0afcc818bbe1d6baa1983309930d6
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
3
  size 5304
checkpoint-30/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b93f3d85939af6263d6c0f8ed3656ea2e40eba29c3edd099553346d1a4deb24
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ebef7d5a7347009f334d030bc17a34a72c0a046597547a996fa3e252966e27
3
  size 93378688
checkpoint-30/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7635015371d214eb4eafab7816036fac14b3792e35c8d346db33fe39732541dc
3
  size 48071944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e246e143a47a3be4c37648d013008ce5b18ceaccabdfa487e854f6a2668ced1
3
  size 48071944
checkpoint-30/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:339b32163eec5c9832623026af39db671dcf74c09cb1e1c4ef25f5057cc414a0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98cbc14b559a2c3eabfc23b1ceaf69d5c3ce0cefe144a24e4fcfe9f6f18a81b
3
  size 14244
checkpoint-30/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00d1c54fc5f2a267711c02343ef8760b45d4d54a34fbe0c53c5131a83d91cbc6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9241253195e2761992c7c0b60b7dd0fd1941182b7ded242213ead4f9c0f8c82
3
  size 1064
checkpoint-30/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.146163215590743,
5
  "eval_steps": 10,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
@@ -9,69 +9,69 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.024360535931790498,
13
- "grad_norm": 46.74486541748047,
14
- "learning_rate": 0.00018,
15
- "loss": 3.1014,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.048721071863580996,
20
- "grad_norm": 17.46522331237793,
21
- "learning_rate": 0.00014666666666666666,
22
- "loss": 1.4103,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.048721071863580996,
27
- "eval_loss": 1.4243313074111938,
28
- "eval_runtime": 216.3894,
29
- "eval_samples_per_second": 3.022,
30
- "eval_steps_per_second": 1.511,
31
  "step": 10
32
  },
33
  {
34
- "epoch": 0.0730816077953715,
35
- "grad_norm": 8.364654541015625,
36
- "learning_rate": 0.00011333333333333334,
37
- "loss": 1.3099,
38
  "step": 15
39
  },
40
  {
41
- "epoch": 0.09744214372716199,
42
- "grad_norm": 9.115853309631348,
43
- "learning_rate": 8.666666666666667e-05,
44
- "loss": 1.4179,
45
  "step": 20
46
  },
47
  {
48
- "epoch": 0.09744214372716199,
49
- "eval_loss": 1.2988728284835815,
50
- "eval_runtime": 216.8511,
51
- "eval_samples_per_second": 3.016,
52
- "eval_steps_per_second": 1.508,
53
  "step": 20
54
  },
55
  {
56
- "epoch": 0.1218026796589525,
57
- "grad_norm": 8.558521270751953,
58
- "learning_rate": 5.333333333333333e-05,
59
- "loss": 1.2332,
60
  "step": 25
61
  },
62
  {
63
- "epoch": 0.146163215590743,
64
- "grad_norm": 8.526884078979492,
65
- "learning_rate": 2e-05,
66
- "loss": 1.2218,
67
  "step": 30
68
  },
69
  {
70
- "epoch": 0.146163215590743,
71
- "eval_loss": 1.2392737865447998,
72
- "eval_runtime": 216.8293,
73
- "eval_samples_per_second": 3.016,
74
- "eval_steps_per_second": 1.508,
75
  "step": 30
76
  }
77
  ],
@@ -92,7 +92,7 @@
92
  "attributes": {}
93
  }
94
  },
95
- "total_flos": 2664865299381120.0,
96
  "train_batch_size": 2,
97
  "trial_name": null,
98
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.14209591474245115,
5
  "eval_steps": 10,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.023682652457075192,
13
+ "grad_norm": 38.363624572753906,
14
+ "learning_rate": 9e-05,
15
+ "loss": 3.9738,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.047365304914150384,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 8.666666666666667e-05,
22
+ "loss": 3.5925,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.047365304914150384,
27
+ "eval_loss": 2.8427393436431885,
28
+ "eval_runtime": 244.1843,
29
+ "eval_samples_per_second": 2.756,
30
+ "eval_steps_per_second": 1.38,
31
  "step": 10
32
  },
33
  {
34
+ "epoch": 0.07104795737122557,
35
+ "grad_norm": 12.08838939666748,
36
+ "learning_rate": 7.333333333333333e-05,
37
+ "loss": 2.7399,
38
  "step": 15
39
  },
40
  {
41
+ "epoch": 0.09473060982830077,
42
+ "grad_norm": 11.703186988830566,
43
+ "learning_rate": 5.666666666666667e-05,
44
+ "loss": 1.3988,
45
  "step": 20
46
  },
47
  {
48
+ "epoch": 0.09473060982830077,
49
+ "eval_loss": 1.3483482599258423,
50
+ "eval_runtime": 247.0633,
51
+ "eval_samples_per_second": 2.724,
52
+ "eval_steps_per_second": 1.364,
53
  "step": 20
54
  },
55
  {
56
+ "epoch": 0.11841326228537596,
57
+ "grad_norm": 21.7695369720459,
58
+ "learning_rate": 4e-05,
59
+ "loss": 1.3289,
60
  "step": 25
61
  },
62
  {
63
+ "epoch": 0.14209591474245115,
64
+ "grad_norm": 10.536179542541504,
65
+ "learning_rate": 2.3333333333333336e-05,
66
+ "loss": 1.2495,
67
  "step": 30
68
  },
69
  {
70
+ "epoch": 0.14209591474245115,
71
+ "eval_loss": 1.2752900123596191,
72
+ "eval_runtime": 247.8091,
73
+ "eval_samples_per_second": 2.716,
74
+ "eval_steps_per_second": 1.36,
75
  "step": 30
76
  }
77
  ],
 
92
  "attributes": {}
93
  }
94
  },
95
+ "total_flos": 2689832869002048.0,
96
  "train_batch_size": 2,
97
  "trial_name": null,
98
  "trial_params": null
checkpoint-30/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b113e4a74aec7cc7121c45daccc8f966abb0afcc818bbe1d6baa1983309930d6
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
3
  size 5304