sgraham commited on
Commit
8ab1e3d
·
verified ·
1 Parent(s): bc66c7a

Pushing complete fine-tuned model with all necessary files

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ebef7d5a7347009f334d030bc17a34a72c0a046597547a996fa3e252966e27
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f16fb1332d29600738b4376134403effff4be038f8b6bddb7948e9138d25e8
3
  size 93378688
checkpoint-25/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6d7b334eb490c2c309e20faf9e676ac140b812c2f25f174d784ba4737482e21
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c414724e6efbe25f59394136d833c6e589761346a36a2d9e3ddbe5b4eb9bc807
3
  size 93378688
checkpoint-25/generation_config.json CHANGED
@@ -14,5 +14,5 @@
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
- "transformers_version": "4.48.0.dev0"
18
  }
 
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
+ "transformers_version": "4.49.0.dev0"
18
  }
checkpoint-25/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c33271db3bb9fb15c420653e5e95f4ac97a7864bcb7a240b24143fe9ce831704
3
  size 48071944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b39f752a8785b1af9a576a5a1e90728102a2e2ea1c05e6362d39529aa253f7
3
  size 48071944
checkpoint-25/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e9cf1e2b80a52e7a77a9580bf6adc6f13f28f3310ef491aeef3c52cc69312f2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc28ad80cea8f243bc60a65f57b083b27baee89443e3fcd78e3b1a5fd06f6a3
3
  size 14244
checkpoint-25/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd1ca147bfe22a3195c5bd942d2592aec616b2c4d98403ef43122a3fa147216c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c016ebe9cc98685d69c3b36179d5db608efbd4f844e87281ef2d749531cd8d2
3
  size 1064
checkpoint-25/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.11841326228537596,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
@@ -9,54 +9,54 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.023682652457075192,
13
- "grad_norm": 38.363624572753906,
14
- "learning_rate": 9e-05,
15
- "loss": 3.9738,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.047365304914150384,
20
- "grad_norm": Infinity,
21
- "learning_rate": 8.666666666666667e-05,
22
- "loss": 3.5925,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.047365304914150384,
27
- "eval_loss": 2.8427393436431885,
28
- "eval_runtime": 244.1843,
29
- "eval_samples_per_second": 2.756,
30
- "eval_steps_per_second": 1.38,
31
  "step": 10
32
  },
33
  {
34
- "epoch": 0.07104795737122557,
35
- "grad_norm": 12.08838939666748,
36
- "learning_rate": 7.333333333333333e-05,
37
- "loss": 2.7399,
38
  "step": 15
39
  },
40
  {
41
- "epoch": 0.09473060982830077,
42
- "grad_norm": 11.703186988830566,
43
- "learning_rate": 5.666666666666667e-05,
44
- "loss": 1.3988,
45
  "step": 20
46
  },
47
  {
48
- "epoch": 0.09473060982830077,
49
- "eval_loss": 1.3483482599258423,
50
- "eval_runtime": 247.0633,
51
- "eval_samples_per_second": 2.724,
52
- "eval_steps_per_second": 1.364,
53
  "step": 20
54
  },
55
  {
56
- "epoch": 0.11841326228537596,
57
- "grad_norm": 21.7695369720459,
58
- "learning_rate": 4e-05,
59
- "loss": 1.3289,
60
  "step": 25
61
  }
62
  ],
@@ -77,7 +77,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 2246384959161024.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12195121951219512,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.024390243902439025,
13
+ "grad_norm": 8.752623558044434,
14
+ "learning_rate": 8.333333333333334e-05,
15
+ "loss": 0.5615,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.04878048780487805,
20
+ "grad_norm": 11.641535758972168,
21
+ "learning_rate": 6.666666666666667e-05,
22
+ "loss": 0.8518,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.04878048780487805,
27
+ "eval_loss": 1.3595986366271973,
28
+ "eval_runtime": 233.8268,
29
+ "eval_samples_per_second": 2.793,
30
+ "eval_steps_per_second": 1.398,
31
  "step": 10
32
  },
33
  {
34
+ "epoch": 0.07317073170731707,
35
+ "grad_norm": 10.912981033325195,
36
+ "learning_rate": 5e-05,
37
+ "loss": 0.8933,
38
  "step": 15
39
  },
40
  {
41
+ "epoch": 0.0975609756097561,
42
+ "grad_norm": 10.059850692749023,
43
+ "learning_rate": 3.3333333333333335e-05,
44
+ "loss": 0.8969,
45
  "step": 20
46
  },
47
  {
48
+ "epoch": 0.0975609756097561,
49
+ "eval_loss": 1.281290888786316,
50
+ "eval_runtime": 234.7195,
51
+ "eval_samples_per_second": 2.782,
52
+ "eval_steps_per_second": 1.393,
53
  "step": 20
54
  },
55
  {
56
+ "epoch": 0.12195121951219512,
57
+ "grad_norm": 9.400066375732422,
58
+ "learning_rate": 1.6666666666666667e-05,
59
+ "loss": 0.8338,
60
  "step": 25
61
  }
62
  ],
 
77
  "attributes": {}
78
  }
79
  },
80
+ "total_flos": 2183717354136384.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
checkpoint-25/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eed6c37f072298a1664e5513f470ad3c4130aa13a3822fa4fd447a25eec7a3f
3
  size 5304
checkpoint-30/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ebef7d5a7347009f334d030bc17a34a72c0a046597547a996fa3e252966e27
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f16fb1332d29600738b4376134403effff4be038f8b6bddb7948e9138d25e8
3
  size 93378688
checkpoint-30/generation_config.json CHANGED
@@ -14,5 +14,5 @@
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
- "transformers_version": "4.48.0.dev0"
18
  }
 
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
+ "transformers_version": "4.49.0.dev0"
18
  }
checkpoint-30/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e246e143a47a3be4c37648d013008ce5b18ceaccabdfa487e854f6a2668ced1
3
  size 48071944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93c12b54990119a04354f6bbc24075b3c635809ff27f85b65441ec577efddff
3
  size 48071944
checkpoint-30/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98cbc14b559a2c3eabfc23b1ceaf69d5c3ce0cefe144a24e4fcfe9f6f18a81b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04248c42bbebc8c793e7835eb43553821faa507ba4b4f25d2dcfb0d342fe243e
3
  size 14244
checkpoint-30/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9241253195e2761992c7c0b60b7dd0fd1941182b7ded242213ead4f9c0f8c82
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e9cc72c20ddd925ef39b6005e82a4d8730b1dde32cfcd070d74c83a8a3564a
3
  size 1064
checkpoint-30/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.14209591474245115,
5
  "eval_steps": 10,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
@@ -9,69 +9,69 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.023682652457075192,
13
- "grad_norm": 38.363624572753906,
14
- "learning_rate": 9e-05,
15
- "loss": 3.9738,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.047365304914150384,
20
- "grad_norm": Infinity,
21
- "learning_rate": 8.666666666666667e-05,
22
- "loss": 3.5925,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.047365304914150384,
27
- "eval_loss": 2.8427393436431885,
28
- "eval_runtime": 244.1843,
29
- "eval_samples_per_second": 2.756,
30
- "eval_steps_per_second": 1.38,
31
  "step": 10
32
  },
33
  {
34
- "epoch": 0.07104795737122557,
35
- "grad_norm": 12.08838939666748,
36
- "learning_rate": 7.333333333333333e-05,
37
- "loss": 2.7399,
38
  "step": 15
39
  },
40
  {
41
- "epoch": 0.09473060982830077,
42
- "grad_norm": 11.703186988830566,
43
- "learning_rate": 5.666666666666667e-05,
44
- "loss": 1.3988,
45
  "step": 20
46
  },
47
  {
48
- "epoch": 0.09473060982830077,
49
- "eval_loss": 1.3483482599258423,
50
- "eval_runtime": 247.0633,
51
- "eval_samples_per_second": 2.724,
52
- "eval_steps_per_second": 1.364,
53
  "step": 20
54
  },
55
  {
56
- "epoch": 0.11841326228537596,
57
- "grad_norm": 21.7695369720459,
58
- "learning_rate": 4e-05,
59
- "loss": 1.3289,
60
  "step": 25
61
  },
62
  {
63
- "epoch": 0.14209591474245115,
64
- "grad_norm": 10.536179542541504,
65
- "learning_rate": 2.3333333333333336e-05,
66
- "loss": 1.2495,
67
  "step": 30
68
  },
69
  {
70
- "epoch": 0.14209591474245115,
71
- "eval_loss": 1.2752900123596191,
72
- "eval_runtime": 247.8091,
73
- "eval_samples_per_second": 2.716,
74
- "eval_steps_per_second": 1.36,
75
  "step": 30
76
  }
77
  ],
@@ -92,7 +92,7 @@
92
  "attributes": {}
93
  }
94
  },
95
- "total_flos": 2689832869002048.0,
96
  "train_batch_size": 2,
97
  "trial_name": null,
98
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.14634146341463414,
5
  "eval_steps": 10,
6
  "global_step": 30,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.024390243902439025,
13
+ "grad_norm": 8.752623558044434,
14
+ "learning_rate": 8.333333333333334e-05,
15
+ "loss": 0.5615,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.04878048780487805,
20
+ "grad_norm": 11.641535758972168,
21
+ "learning_rate": 6.666666666666667e-05,
22
+ "loss": 0.8518,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.04878048780487805,
27
+ "eval_loss": 1.3595986366271973,
28
+ "eval_runtime": 233.8268,
29
+ "eval_samples_per_second": 2.793,
30
+ "eval_steps_per_second": 1.398,
31
  "step": 10
32
  },
33
  {
34
+ "epoch": 0.07317073170731707,
35
+ "grad_norm": 10.912981033325195,
36
+ "learning_rate": 5e-05,
37
+ "loss": 0.8933,
38
  "step": 15
39
  },
40
  {
41
+ "epoch": 0.0975609756097561,
42
+ "grad_norm": 10.059850692749023,
43
+ "learning_rate": 3.3333333333333335e-05,
44
+ "loss": 0.8969,
45
  "step": 20
46
  },
47
  {
48
+ "epoch": 0.0975609756097561,
49
+ "eval_loss": 1.281290888786316,
50
+ "eval_runtime": 234.7195,
51
+ "eval_samples_per_second": 2.782,
52
+ "eval_steps_per_second": 1.393,
53
  "step": 20
54
  },
55
  {
56
+ "epoch": 0.12195121951219512,
57
+ "grad_norm": 9.400066375732422,
58
+ "learning_rate": 1.6666666666666667e-05,
59
+ "loss": 0.8338,
60
  "step": 25
61
  },
62
  {
63
+ "epoch": 0.14634146341463414,
64
+ "grad_norm": 8.542190551757812,
65
+ "learning_rate": 0.0,
66
+ "loss": 0.7171,
67
  "step": 30
68
  },
69
  {
70
+ "epoch": 0.14634146341463414,
71
+ "eval_loss": 1.2695448398590088,
72
+ "eval_runtime": 234.1517,
73
+ "eval_samples_per_second": 2.789,
74
+ "eval_steps_per_second": 1.397,
75
  "step": 30
76
  }
77
  ],
 
92
  "attributes": {}
93
  }
94
  },
95
+ "total_flos": 2620003251974592.0,
96
  "train_batch_size": 2,
97
  "trial_name": null,
98
  "trial_params": null
checkpoint-30/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc637676303d4f40e3a45fd2a9af293c99f68beb0db90cfaa8062315c931a25a
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eed6c37f072298a1664e5513f470ad3c4130aa13a3822fa4fd447a25eec7a3f
3
  size 5304
generation_config.json CHANGED
@@ -14,5 +14,5 @@
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
- "transformers_version": "4.48.0.dev0"
18
  }
 
14
  32002
15
  ],
16
  "pad_token_id": 0,
17
+ "transformers_version": "4.49.0.dev0"
18
  }