Ahatsham commited on
Commit
268fc09
·
verified ·
1 Parent(s): 9243e0e

Model save

Browse files
README.md CHANGED
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7220
22
- - Balanced Accuracy: 0.6118
23
- - Accuracy: 0.6615
24
 
25
  ## Model description
26
 
@@ -51,23 +51,25 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Balanced Accuracy | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:-----------------:|:--------:|
54
- | No log | 1.0 | 96 | 0.6740 | 0.5680 | 0.5260 |
55
- | No log | 2.0 | 192 | 0.6547 | 0.6243 | 0.6823 |
56
- | No log | 3.0 | 288 | 0.6300 | 0.6719 | 0.6927 |
57
- | No log | 4.0 | 384 | 0.6242 | 0.6735 | 0.6823 |
58
- | No log | 5.0 | 480 | 0.6103 | 0.6707 | 0.6979 |
59
- | 0.6274 | 6.0 | 576 | 0.6159 | 0.6777 | 0.6927 |
60
- | 0.6274 | 7.0 | 672 | 0.6264 | 0.6713 | 0.7083 |
61
- | 0.6274 | 8.0 | 768 | 0.6304 | 0.6551 | 0.6875 |
62
- | 0.6274 | 9.0 | 864 | 0.6457 | 0.6630 | 0.6667 |
63
- | 0.6274 | 10.0 | 960 | 0.6843 | 0.6164 | 0.6771 |
64
- | 0.5593 | 11.0 | 1056 | 0.6747 | 0.6418 | 0.6927 |
65
- | 0.5593 | 12.0 | 1152 | 0.6588 | 0.6484 | 0.6823 |
66
- | 0.5593 | 13.0 | 1248 | 0.6911 | 0.6269 | 0.6771 |
67
- | 0.5593 | 14.0 | 1344 | 0.7006 | 0.6409 | 0.6875 |
68
- | 0.5593 | 15.0 | 1440 | 0.6882 | 0.6518 | 0.6771 |
69
- | 0.4936 | 16.0 | 1536 | 0.7000 | 0.6439 | 0.6823 |
70
- | 0.4936 | 17.0 | 1632 | 0.7220 | 0.6118 | 0.6615 |
 
 
71
 
72
 
73
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8755
22
+ - Balanced Accuracy: 0.7620
23
+ - Accuracy: 0.7703
24
 
25
  ## Model description
26
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Balanced Accuracy | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:-----------------:|:--------:|
54
+ | No log | 1.0 | 105 | 0.6539 | 0.6583 | 0.6699 |
55
+ | No log | 2.0 | 210 | 0.6195 | 0.6923 | 0.7033 |
56
+ | No log | 3.0 | 315 | 0.6555 | 0.6903 | 0.6651 |
57
+ | No log | 4.0 | 420 | 0.5576 | 0.7073 | 0.7129 |
58
+ | 0.6494 | 5.0 | 525 | 0.5759 | 0.7387 | 0.6794 |
59
+ | 0.6494 | 6.0 | 630 | 0.6838 | 0.7417 | 0.7129 |
60
+ | 0.6494 | 7.0 | 735 | 0.5210 | 0.7229 | 0.7225 |
61
+ | 0.6494 | 8.0 | 840 | 0.5587 | 0.7535 | 0.7273 |
62
+ | 0.6494 | 9.0 | 945 | 0.5491 | 0.7622 | 0.7703 |
63
+ | 0.4708 | 10.0 | 1050 | 0.5130 | 0.7211 | 0.7273 |
64
+ | 0.4708 | 11.0 | 1155 | 0.5937 | 0.7195 | 0.7273 |
65
+ | 0.4708 | 12.0 | 1260 | 0.5614 | 0.7424 | 0.7368 |
66
+ | 0.4708 | 13.0 | 1365 | 0.6149 | 0.7461 | 0.7416 |
67
+ | 0.4708 | 14.0 | 1470 | 0.6935 | 0.7225 | 0.7321 |
68
+ | 0.3052 | 15.0 | 1575 | 0.6925 | 0.7180 | 0.7273 |
69
+ | 0.3052 | 16.0 | 1680 | 0.7422 | 0.7544 | 0.7608 |
70
+ | 0.3052 | 17.0 | 1785 | 0.8234 | 0.7522 | 0.7608 |
71
+ | 0.3052 | 18.0 | 1890 | 0.8475 | 0.7620 | 0.7703 |
72
+ | 0.3052 | 19.0 | 1995 | 0.8755 | 0.7620 | 0.7703 |
73
 
74
 
75
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e131b03255cc50af798f9cbbb008566f82070f514a62220b2483e15de1629898
3
  size 54593240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57159f3a36ea7f6ae4743b74b4bcf2968dec71a1413e56ecc4a88bb07b6d986
3
  size 54593240
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 17.0,
3
- "total_flos": 2.0631847563185357e+17,
4
- "train_loss": 0.5514274087606692,
5
- "train_runtime": 18758.2394,
6
- "train_samples": 768,
7
- "train_samples_per_second": 0.819,
8
- "train_steps_per_second": 0.102
9
  }
 
1
  {
2
+ "epoch": 19.0,
3
+ "total_flos": 5.3616756937324954e+17,
4
+ "train_loss": 0.3997560008725427,
5
+ "train_runtime": 41381.9003,
6
+ "train_samples": 834,
7
+ "train_samples_per_second": 0.403,
8
+ "train_steps_per_second": 0.051
9
  }
runs/Feb26_17-02-37_LCEEE-HAL/events.out.tfevents.1740610958.LCEEE-HAL.3015176.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:809f1ec918751facfafc0d1e0b9507adb6c25d5e4c3bdada38bcedb3333d3ade
3
- size 13200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d2609274043f638268f5bf157c1e882479e88236146504d3a778546c73ca55
3
+ size 13938
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd751a4191592ee601080b532d487e2444024e3205900acca92b7c94791e29dd
3
- size 17210060
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00224c78475a2ff2e27556796824b19aed9bd853b1925d36728fd05fd9d7693b
3
+ size 17210230
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 17.0,
3
- "total_flos": 2.0631847563185357e+17,
4
- "train_loss": 0.5514274087606692,
5
- "train_runtime": 18758.2394,
6
- "train_samples": 768,
7
- "train_samples_per_second": 0.819,
8
- "train_steps_per_second": 0.102
9
  }
 
1
  {
2
+ "epoch": 19.0,
3
+ "total_flos": 5.3616756937324954e+17,
4
+ "train_loss": 0.3997560008725427,
5
+ "train_runtime": 41381.9003,
6
+ "train_samples": 834,
7
+ "train_samples_per_second": 0.403,
8
+ "train_steps_per_second": 0.051
9
  }
trainer_state.json CHANGED
@@ -1,216 +1,236 @@
1
  {
2
- "best_metric": 0.7083333333333334,
3
- "best_model_checkpoint": "Output_llama3_80-20/checkpoint-672",
4
- "epoch": 17.0,
5
  "eval_steps": 500,
6
- "global_step": 1632,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5260416666666666,
14
- "eval_balanced_accuracy": 0.568034188034188,
15
- "eval_loss": 0.6740227341651917,
16
- "eval_runtime": 86.2229,
17
- "eval_samples_per_second": 2.227,
18
- "eval_steps_per_second": 0.278,
19
- "step": 96
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.6822916666666666,
24
- "eval_balanced_accuracy": 0.6242846661775495,
25
- "eval_loss": 0.6547484993934631,
26
- "eval_runtime": 85.2498,
27
- "eval_samples_per_second": 2.252,
28
- "eval_steps_per_second": 0.282,
29
- "step": 192
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_accuracy": 0.6927083333333334,
34
- "eval_balanced_accuracy": 0.671880181275561,
35
- "eval_loss": 0.6299501061439514,
36
- "eval_runtime": 84.0275,
37
- "eval_samples_per_second": 2.285,
38
- "eval_steps_per_second": 0.286,
39
- "step": 288
40
  },
41
  {
42
  "epoch": 4.0,
43
- "eval_accuracy": 0.6822916666666666,
44
- "eval_balanced_accuracy": 0.6735393319551735,
45
- "eval_loss": 0.6242036819458008,
46
- "eval_runtime": 84.0436,
47
- "eval_samples_per_second": 2.285,
48
- "eval_steps_per_second": 0.286,
49
- "step": 384
50
  },
51
  {
52
- "epoch": 5.0,
53
- "eval_accuracy": 0.6979166666666666,
54
- "eval_balanced_accuracy": 0.6707152496626181,
55
- "eval_loss": 0.6103450059890747,
56
- "eval_runtime": 84.9699,
57
- "eval_samples_per_second": 2.26,
58
- "eval_steps_per_second": 0.282,
59
- "step": 480
60
- },
61
- {
62
- "epoch": 5.208333333333333,
63
- "grad_norm": 43.049320220947266,
64
- "learning_rate": 7.395833333333335e-06,
65
- "loss": 0.6274,
66
  "step": 500
67
  },
 
 
 
 
 
 
 
 
 
 
68
  {
69
  "epoch": 6.0,
70
- "eval_accuracy": 0.6927083333333334,
71
- "eval_balanced_accuracy": 0.6776683087027915,
72
- "eval_loss": 0.6159375309944153,
73
- "eval_runtime": 85.0216,
74
- "eval_samples_per_second": 2.258,
75
- "eval_steps_per_second": 0.282,
76
- "step": 576
77
  },
78
  {
79
  "epoch": 7.0,
80
- "eval_accuracy": 0.7083333333333334,
81
- "eval_balanced_accuracy": 0.6712523719165084,
82
- "eval_loss": 0.6264284253120422,
83
- "eval_runtime": 85.8317,
84
- "eval_samples_per_second": 2.237,
85
- "eval_steps_per_second": 0.28,
86
- "step": 672
87
  },
88
  {
89
  "epoch": 8.0,
90
- "eval_accuracy": 0.6875,
91
- "eval_balanced_accuracy": 0.6550618415025194,
92
- "eval_loss": 0.6303848624229431,
93
- "eval_runtime": 83.7759,
94
- "eval_samples_per_second": 2.292,
95
- "eval_steps_per_second": 0.286,
96
- "step": 768
97
  },
98
  {
99
  "epoch": 9.0,
100
- "eval_accuracy": 0.6666666666666666,
101
- "eval_balanced_accuracy": 0.6630481980026053,
102
- "eval_loss": 0.6457447409629822,
103
- "eval_runtime": 84.2188,
104
- "eval_samples_per_second": 2.28,
105
- "eval_steps_per_second": 0.285,
106
- "step": 864
107
  },
108
  {
109
- "epoch": 10.0,
110
- "eval_accuracy": 0.6770833333333334,
111
- "eval_balanced_accuracy": 0.6164383561643836,
112
- "eval_loss": 0.6843230128288269,
113
- "eval_runtime": 84.6517,
114
- "eval_samples_per_second": 2.268,
115
- "eval_steps_per_second": 0.284,
116
- "step": 960
117
- },
118
- {
119
- "epoch": 10.416666666666666,
120
- "grad_norm": 21.24892234802246,
121
- "learning_rate": 4.791666666666668e-06,
122
- "loss": 0.5593,
123
  "step": 1000
124
  },
 
 
 
 
 
 
 
 
 
 
125
  {
126
  "epoch": 11.0,
127
- "eval_accuracy": 0.6927083333333334,
128
- "eval_balanced_accuracy": 0.6418487851228452,
129
- "eval_loss": 0.6747085452079773,
130
- "eval_runtime": 83.9693,
131
- "eval_samples_per_second": 2.287,
132
- "eval_steps_per_second": 0.286,
133
- "step": 1056
134
  },
135
  {
136
  "epoch": 12.0,
137
- "eval_accuracy": 0.6822916666666666,
138
- "eval_balanced_accuracy": 0.6483826407275239,
139
- "eval_loss": 0.6588295102119446,
140
- "eval_runtime": 84.8037,
141
- "eval_samples_per_second": 2.264,
142
- "eval_steps_per_second": 0.283,
143
- "step": 1152
144
  },
145
  {
146
  "epoch": 13.0,
147
- "eval_accuracy": 0.6770833333333334,
148
- "eval_balanced_accuracy": 0.6268656716417911,
149
- "eval_loss": 0.6911186575889587,
150
- "eval_runtime": 85.1959,
151
- "eval_samples_per_second": 2.254,
152
- "eval_steps_per_second": 0.282,
153
- "step": 1248
154
  },
155
  {
156
  "epoch": 14.0,
157
- "eval_accuracy": 0.6875,
158
- "eval_balanced_accuracy": 0.6409090909090909,
159
- "eval_loss": 0.7006358504295349,
160
- "eval_runtime": 84.6723,
161
- "eval_samples_per_second": 2.268,
162
- "eval_steps_per_second": 0.283,
163
- "step": 1344
164
  },
165
  {
166
- "epoch": 15.0,
167
- "eval_accuracy": 0.6770833333333334,
168
- "eval_balanced_accuracy": 0.6517857142857143,
169
- "eval_loss": 0.6882149577140808,
170
- "eval_runtime": 83.7055,
171
- "eval_samples_per_second": 2.294,
172
- "eval_steps_per_second": 0.287,
173
- "step": 1440
174
- },
175
- {
176
- "epoch": 15.625,
177
- "grad_norm": 77.84115600585938,
178
- "learning_rate": 2.1875000000000002e-06,
179
- "loss": 0.4936,
180
  "step": 1500
181
  },
 
 
 
 
 
 
 
 
 
 
182
  {
183
  "epoch": 16.0,
184
- "eval_accuracy": 0.6822916666666666,
185
- "eval_balanced_accuracy": 0.6438670908448215,
186
- "eval_loss": 0.6999824047088623,
187
- "eval_runtime": 84.378,
188
- "eval_samples_per_second": 2.275,
189
- "eval_steps_per_second": 0.284,
190
- "step": 1536
191
  },
192
  {
193
  "epoch": 17.0,
194
- "eval_accuracy": 0.6614583333333334,
195
- "eval_balanced_accuracy": 0.6117507195595044,
196
- "eval_loss": 0.7219934463500977,
197
- "eval_runtime": 84.9653,
198
- "eval_samples_per_second": 2.26,
199
- "eval_steps_per_second": 0.282,
200
- "step": 1632
201
  },
202
  {
203
- "epoch": 17.0,
204
- "step": 1632,
205
- "total_flos": 2.0631847563185357e+17,
206
- "train_loss": 0.5514274087606692,
207
- "train_runtime": 18758.2394,
208
- "train_samples_per_second": 0.819,
209
- "train_steps_per_second": 0.102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
  "logging_steps": 500,
213
- "max_steps": 1920,
214
  "num_input_tokens_seen": 0,
215
  "num_train_epochs": 20,
216
  "save_steps": 500,
@@ -235,7 +255,7 @@
235
  "attributes": {}
236
  }
237
  },
238
- "total_flos": 2.0631847563185357e+17,
239
  "train_batch_size": 8,
240
  "trial_name": null,
241
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7703349282296651,
3
+ "best_model_checkpoint": "Output_llama3_80-20/checkpoint-945",
4
+ "epoch": 19.0,
5
  "eval_steps": 500,
6
+ "global_step": 1995,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6698564593301436,
14
+ "eval_balanced_accuracy": 0.6583002457931557,
15
+ "eval_loss": 0.6538845300674438,
16
+ "eval_runtime": 197.5626,
17
+ "eval_samples_per_second": 1.058,
18
+ "eval_steps_per_second": 0.137,
19
+ "step": 105
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.7033492822966507,
24
+ "eval_balanced_accuracy": 0.6922929814496079,
25
+ "eval_loss": 0.619515597820282,
26
+ "eval_runtime": 199.2963,
27
+ "eval_samples_per_second": 1.049,
28
+ "eval_steps_per_second": 0.135,
29
+ "step": 210
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_accuracy": 0.6650717703349283,
34
+ "eval_balanced_accuracy": 0.6903409090909092,
35
+ "eval_loss": 0.6554874777793884,
36
+ "eval_runtime": 197.6424,
37
+ "eval_samples_per_second": 1.057,
38
+ "eval_steps_per_second": 0.137,
39
+ "step": 315
40
  },
41
  {
42
  "epoch": 4.0,
43
+ "eval_accuracy": 0.7129186602870813,
44
+ "eval_balanced_accuracy": 0.7072901325478644,
45
+ "eval_loss": 0.5575785040855408,
46
+ "eval_runtime": 196.6661,
47
+ "eval_samples_per_second": 1.063,
48
+ "eval_steps_per_second": 0.137,
49
+ "step": 420
50
  },
51
  {
52
+ "epoch": 4.761904761904762,
53
+ "grad_norm": 12.616178512573242,
54
+ "learning_rate": 7.61904761904762e-06,
55
+ "loss": 0.6494,
 
 
 
 
 
 
 
 
 
 
56
  "step": 500
57
  },
58
+ {
59
+ "epoch": 5.0,
60
+ "eval_accuracy": 0.6794258373205742,
61
+ "eval_balanced_accuracy": 0.7386645962732918,
62
+ "eval_loss": 0.5758755803108215,
63
+ "eval_runtime": 196.7588,
64
+ "eval_samples_per_second": 1.062,
65
+ "eval_steps_per_second": 0.137,
66
+ "step": 525
67
+ },
68
  {
69
  "epoch": 6.0,
70
+ "eval_accuracy": 0.7129186602870813,
71
+ "eval_balanced_accuracy": 0.7417343793779769,
72
+ "eval_loss": 0.6838177442550659,
73
+ "eval_runtime": 199.6172,
74
+ "eval_samples_per_second": 1.047,
75
+ "eval_steps_per_second": 0.135,
76
+ "step": 630
77
  },
78
  {
79
  "epoch": 7.0,
80
+ "eval_accuracy": 0.722488038277512,
81
+ "eval_balanced_accuracy": 0.7229395604395604,
82
+ "eval_loss": 0.5210055708885193,
83
+ "eval_runtime": 197.9194,
84
+ "eval_samples_per_second": 1.056,
85
+ "eval_steps_per_second": 0.136,
86
+ "step": 735
87
  },
88
  {
89
  "epoch": 8.0,
90
+ "eval_accuracy": 0.7272727272727273,
91
+ "eval_balanced_accuracy": 0.7535104364326376,
92
+ "eval_loss": 0.558698296546936,
93
+ "eval_runtime": 147.8334,
94
+ "eval_samples_per_second": 1.414,
95
+ "eval_steps_per_second": 0.183,
96
+ "step": 840
97
  },
98
  {
99
  "epoch": 9.0,
100
+ "eval_accuracy": 0.7703349282296651,
101
+ "eval_balanced_accuracy": 0.7622394339261809,
102
+ "eval_loss": 0.5491181015968323,
103
+ "eval_runtime": 147.6758,
104
+ "eval_samples_per_second": 1.415,
105
+ "eval_steps_per_second": 0.183,
106
+ "step": 945
107
  },
108
  {
109
+ "epoch": 9.523809523809524,
110
+ "grad_norm": 23.234663009643555,
111
+ "learning_rate": 5.2380952380952384e-06,
112
+ "loss": 0.4708,
 
 
 
 
 
 
 
 
 
 
113
  "step": 1000
114
  },
115
+ {
116
+ "epoch": 10.0,
117
+ "eval_accuracy": 0.7272727272727273,
118
+ "eval_balanced_accuracy": 0.7211467551622419,
119
+ "eval_loss": 0.5130271911621094,
120
+ "eval_runtime": 147.7065,
121
+ "eval_samples_per_second": 1.415,
122
+ "eval_steps_per_second": 0.183,
123
+ "step": 1050
124
+ },
125
  {
126
  "epoch": 11.0,
127
+ "eval_accuracy": 0.7272727272727273,
128
+ "eval_balanced_accuracy": 0.7194849959448499,
129
+ "eval_loss": 0.5936519503593445,
130
+ "eval_runtime": 147.7154,
131
+ "eval_samples_per_second": 1.415,
132
+ "eval_steps_per_second": 0.183,
133
+ "step": 1155
134
  },
135
  {
136
  "epoch": 12.0,
137
+ "eval_accuracy": 0.7368421052631579,
138
+ "eval_balanced_accuracy": 0.7424242424242424,
139
+ "eval_loss": 0.5613722205162048,
140
+ "eval_runtime": 147.7519,
141
+ "eval_samples_per_second": 1.415,
142
+ "eval_steps_per_second": 0.183,
143
+ "step": 1260
144
  },
145
  {
146
  "epoch": 13.0,
147
+ "eval_accuracy": 0.7416267942583732,
148
+ "eval_balanced_accuracy": 0.7461009174311927,
149
+ "eval_loss": 0.6149305701255798,
150
+ "eval_runtime": 147.8796,
151
+ "eval_samples_per_second": 1.413,
152
+ "eval_steps_per_second": 0.183,
153
+ "step": 1365
154
  },
155
  {
156
  "epoch": 14.0,
157
+ "eval_accuracy": 0.7320574162679426,
158
+ "eval_balanced_accuracy": 0.722541382667965,
159
+ "eval_loss": 0.6935343146324158,
160
+ "eval_runtime": 147.7695,
161
+ "eval_samples_per_second": 1.414,
162
+ "eval_steps_per_second": 0.183,
163
+ "step": 1470
164
  },
165
  {
166
+ "epoch": 14.285714285714286,
167
+ "grad_norm": 22.915342330932617,
168
+ "learning_rate": 2.8571428571428573e-06,
169
+ "loss": 0.3052,
 
 
 
 
 
 
 
 
 
 
170
  "step": 1500
171
  },
172
+ {
173
+ "epoch": 15.0,
174
+ "eval_accuracy": 0.7272727272727273,
175
+ "eval_balanced_accuracy": 0.7179752066115702,
176
+ "eval_loss": 0.6924564242362976,
177
+ "eval_runtime": 147.6901,
178
+ "eval_samples_per_second": 1.415,
179
+ "eval_steps_per_second": 0.183,
180
+ "step": 1575
181
+ },
182
  {
183
  "epoch": 16.0,
184
+ "eval_accuracy": 0.7607655502392344,
185
+ "eval_balanced_accuracy": 0.7543859649122807,
186
+ "eval_loss": 0.7421520352363586,
187
+ "eval_runtime": 147.7078,
188
+ "eval_samples_per_second": 1.415,
189
+ "eval_steps_per_second": 0.183,
190
+ "step": 1680
191
  },
192
  {
193
  "epoch": 17.0,
194
+ "eval_accuracy": 0.7607655502392344,
195
+ "eval_balanced_accuracy": 0.7522470835723847,
196
+ "eval_loss": 0.823431134223938,
197
+ "eval_runtime": 147.7864,
198
+ "eval_samples_per_second": 1.414,
199
+ "eval_steps_per_second": 0.183,
200
+ "step": 1785
201
  },
202
  {
203
+ "epoch": 18.0,
204
+ "eval_accuracy": 0.7703349282296651,
205
+ "eval_balanced_accuracy": 0.7620493358633775,
206
+ "eval_loss": 0.847453773021698,
207
+ "eval_runtime": 147.8029,
208
+ "eval_samples_per_second": 1.414,
209
+ "eval_steps_per_second": 0.183,
210
+ "step": 1890
211
+ },
212
+ {
213
+ "epoch": 19.0,
214
+ "eval_accuracy": 0.7703349282296651,
215
+ "eval_balanced_accuracy": 0.7620493358633775,
216
+ "eval_loss": 0.8755035996437073,
217
+ "eval_runtime": 147.7297,
218
+ "eval_samples_per_second": 1.415,
219
+ "eval_steps_per_second": 0.183,
220
+ "step": 1995
221
+ },
222
+ {
223
+ "epoch": 19.0,
224
+ "step": 1995,
225
+ "total_flos": 5.3616756937324954e+17,
226
+ "train_loss": 0.3997560008725427,
227
+ "train_runtime": 41381.9003,
228
+ "train_samples_per_second": 0.403,
229
+ "train_steps_per_second": 0.051
230
  }
231
  ],
232
  "logging_steps": 500,
233
+ "max_steps": 2100,
234
  "num_input_tokens_seen": 0,
235
  "num_train_epochs": 20,
236
  "save_steps": 500,
 
255
  "attributes": {}
256
  }
257
  },
258
+ "total_flos": 5.3616756937324954e+17,
259
  "train_batch_size": 8,
260
  "trial_name": null,
261
  "trial_params": null