ChiefTheLord commited on
Commit
c6ca8ec
verified
1 Parent(s): 6ccdddc

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-627/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf40f90ba338ac76a4b1d0f364a0e2fe020fa8a92b8e200ec46bdf4c1feca08
3
+ size 78947288
checkpoints/checkpoint-627/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:758d0ff0f7cf462b7572a23f74da4a78eb5c4b9f57b98fbc70094461f763adaa
3
+ size 157878906
checkpoints/checkpoint-627/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec28ea0c416565eeac14a0e9c944f185ac250f4ed4bd15c84ff77ed78ba9301
3
+ size 14244
checkpoints/checkpoint-627/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f90aca6451b193db5c59b14756ae1f8f33b96ce8badec4b2be33c8f4d1ed57d8
3
+ size 1064
checkpoints/checkpoint-627/trainer_state.json ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 11.0,
5
+ "eval_steps": 64,
6
+ "global_step": 627,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": {
14
+ "accuracy": 0.6990740740740741
15
+ },
16
+ "eval_f1": {
17
+ "f1": 0.5246378412207559
18
+ },
19
+ "eval_loss": 1.4924211502075195,
20
+ "eval_precision": {
21
+ "precision": 0.4763165636952045
22
+ },
23
+ "eval_recall": {
24
+ "recall": 0.59375
25
+ },
26
+ "eval_runtime": 7.4612,
27
+ "eval_samples_per_second": 28.95,
28
+ "eval_steps_per_second": 7.237,
29
+ "step": 57
30
+ },
31
+ {
32
+ "epoch": 1.1228070175438596,
33
+ "grad_norm": 12.269176483154297,
34
+ "learning_rate": 1.1496350364963505e-06,
35
+ "loss": 1.5298,
36
+ "step": 64
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": {
41
+ "accuracy": 0.7129629629629629
42
+ },
43
+ "eval_f1": {
44
+ "f1": 0.53001124288253
45
+ },
46
+ "eval_loss": 1.3766342401504517,
47
+ "eval_precision": {
48
+ "precision": 0.4774156660949114
49
+ },
50
+ "eval_recall": {
51
+ "recall": 0.6041666666666666
52
+ },
53
+ "eval_runtime": 7.1848,
54
+ "eval_samples_per_second": 30.063,
55
+ "eval_steps_per_second": 7.516,
56
+ "step": 114
57
+ },
58
+ {
59
+ "epoch": 2.245614035087719,
60
+ "grad_norm": 11.047577857971191,
61
+ "learning_rate": 2.3175182481751825e-06,
62
+ "loss": 1.3875,
63
+ "step": 128
64
+ },
65
+ {
66
+ "epoch": 3.0,
67
+ "eval_accuracy": {
68
+ "accuracy": 0.7962962962962963
69
+ },
70
+ "eval_f1": {
71
+ "f1": 0.7359516439448823
72
+ },
73
+ "eval_loss": 1.3143658638000488,
74
+ "eval_precision": {
75
+ "precision": 0.8463221219578889
76
+ },
77
+ "eval_recall": {
78
+ "recall": 0.7291666666666666
79
+ },
80
+ "eval_runtime": 6.953,
81
+ "eval_samples_per_second": 31.066,
82
+ "eval_steps_per_second": 7.766,
83
+ "step": 171
84
+ },
85
+ {
86
+ "epoch": 3.3684210526315788,
87
+ "grad_norm": 10.464760780334473,
88
+ "learning_rate": 3.4854014598540146e-06,
89
+ "loss": 1.292,
90
+ "step": 192
91
+ },
92
+ {
93
+ "epoch": 4.0,
94
+ "eval_accuracy": {
95
+ "accuracy": 0.7824074074074074
96
+ },
97
+ "eval_f1": {
98
+ "f1": 0.7084821399163013
99
+ },
100
+ "eval_loss": 1.2212026119232178,
101
+ "eval_precision": {
102
+ "precision": 0.8394571333995365
103
+ },
104
+ "eval_recall": {
105
+ "recall": 0.7083333333333334
106
+ },
107
+ "eval_runtime": 7.4938,
108
+ "eval_samples_per_second": 28.824,
109
+ "eval_steps_per_second": 7.206,
110
+ "step": 228
111
+ },
112
+ {
113
+ "epoch": 4.491228070175438,
114
+ "grad_norm": 12.949272155761719,
115
+ "learning_rate": 4.653284671532847e-06,
116
+ "loss": 1.2461,
117
+ "step": 256
118
+ },
119
+ {
120
+ "epoch": 5.0,
121
+ "eval_accuracy": {
122
+ "accuracy": 0.8055555555555556
123
+ },
124
+ "eval_f1": {
125
+ "f1": 0.7569049164117302
126
+ },
127
+ "eval_loss": 1.1568979024887085,
128
+ "eval_precision": {
129
+ "precision": 0.8381390145435089
130
+ },
131
+ "eval_recall": {
132
+ "recall": 0.7465277777777778
133
+ },
134
+ "eval_runtime": 7.129,
135
+ "eval_samples_per_second": 30.299,
136
+ "eval_steps_per_second": 7.575,
137
+ "step": 285
138
+ },
139
+ {
140
+ "epoch": 5.614035087719298,
141
+ "grad_norm": 10.92992877960205,
142
+ "learning_rate": 5.821167883211679e-06,
143
+ "loss": 1.1103,
144
+ "step": 320
145
+ },
146
+ {
147
+ "epoch": 6.0,
148
+ "eval_accuracy": {
149
+ "accuracy": 0.7777777777777778
150
+ },
151
+ "eval_f1": {
152
+ "f1": 0.7295260138258725
153
+ },
154
+ "eval_loss": 1.1286191940307617,
155
+ "eval_precision": {
156
+ "precision": 0.8194070080862534
157
+ },
158
+ "eval_recall": {
159
+ "recall": 0.7800925925925926
160
+ },
161
+ "eval_runtime": 7.0035,
162
+ "eval_samples_per_second": 30.842,
163
+ "eval_steps_per_second": 7.71,
164
+ "step": 342
165
+ },
166
+ {
167
+ "epoch": 6.7368421052631575,
168
+ "grad_norm": 21.803577423095703,
169
+ "learning_rate": 6.989051094890511e-06,
170
+ "loss": 1.0695,
171
+ "step": 384
172
+ },
173
+ {
174
+ "epoch": 7.0,
175
+ "eval_accuracy": {
176
+ "accuracy": 0.6574074074074074
177
+ },
178
+ "eval_f1": {
179
+ "f1": 0.5127626686719305
180
+ },
181
+ "eval_loss": 1.287270188331604,
182
+ "eval_precision": {
183
+ "precision": 0.441052029731275
184
+ },
185
+ "eval_recall": {
186
+ "recall": 0.6597222222222222
187
+ },
188
+ "eval_runtime": 7.1142,
189
+ "eval_samples_per_second": 30.362,
190
+ "eval_steps_per_second": 7.591,
191
+ "step": 399
192
+ },
193
+ {
194
+ "epoch": 7.859649122807017,
195
+ "grad_norm": 6.482203960418701,
196
+ "learning_rate": 8.156934306569345e-06,
197
+ "loss": 0.9602,
198
+ "step": 448
199
+ },
200
+ {
201
+ "epoch": 8.0,
202
+ "eval_accuracy": {
203
+ "accuracy": 0.8796296296296297
204
+ },
205
+ "eval_f1": {
206
+ "f1": 0.8678067806780678
207
+ },
208
+ "eval_loss": 0.871719241142273,
209
+ "eval_precision": {
210
+ "precision": 0.8869953337390951
211
+ },
212
+ "eval_recall": {
213
+ "recall": 0.8819444444444443
214
+ },
215
+ "eval_runtime": 7.2624,
216
+ "eval_samples_per_second": 29.742,
217
+ "eval_steps_per_second": 7.436,
218
+ "step": 456
219
+ },
220
+ {
221
+ "epoch": 8.982456140350877,
222
+ "grad_norm": 24.75027847290039,
223
+ "learning_rate": 9.306569343065694e-06,
224
+ "loss": 0.907,
225
+ "step": 512
226
+ },
227
+ {
228
+ "epoch": 9.0,
229
+ "eval_accuracy": {
230
+ "accuracy": 0.8796296296296297
231
+ },
232
+ "eval_f1": {
233
+ "f1": 0.8678067806780678
234
+ },
235
+ "eval_loss": 0.7951932549476624,
236
+ "eval_precision": {
237
+ "precision": 0.8869953337390951
238
+ },
239
+ "eval_recall": {
240
+ "recall": 0.8819444444444443
241
+ },
242
+ "eval_runtime": 7.9299,
243
+ "eval_samples_per_second": 27.239,
244
+ "eval_steps_per_second": 6.81,
245
+ "step": 513
246
+ },
247
+ {
248
+ "epoch": 10.0,
249
+ "eval_accuracy": {
250
+ "accuracy": 0.8611111111111112
251
+ },
252
+ "eval_f1": {
253
+ "f1": 0.8535169178557792
254
+ },
255
+ "eval_loss": 0.8125227689743042,
256
+ "eval_precision": {
257
+ "precision": 0.8559314456035767
258
+ },
259
+ "eval_recall": {
260
+ "recall": 0.8692129629629629
261
+ },
262
+ "eval_runtime": 7.3185,
263
+ "eval_samples_per_second": 29.514,
264
+ "eval_steps_per_second": 7.379,
265
+ "step": 570
266
+ },
267
+ {
268
+ "epoch": 10.105263157894736,
269
+ "grad_norm": 15.228086471557617,
270
+ "learning_rate": 9.682926829268292e-06,
271
+ "loss": 0.8145,
272
+ "step": 576
273
+ },
274
+ {
275
+ "epoch": 11.0,
276
+ "eval_accuracy": {
277
+ "accuracy": 0.8796296296296297
278
+ },
279
+ "eval_f1": {
280
+ "f1": 0.8678067806780678
281
+ },
282
+ "eval_loss": 0.807636559009552,
283
+ "eval_precision": {
284
+ "precision": 0.8869953337390951
285
+ },
286
+ "eval_recall": {
287
+ "recall": 0.8819444444444443
288
+ },
289
+ "eval_runtime": 7.4338,
290
+ "eval_samples_per_second": 29.057,
291
+ "eval_steps_per_second": 7.264,
292
+ "step": 627
293
+ }
294
+ ],
295
+ "logging_steps": 64,
296
+ "max_steps": 1368,
297
+ "num_input_tokens_seen": 0,
298
+ "num_train_epochs": 24,
299
+ "save_steps": 64,
300
+ "stateful_callbacks": {
301
+ "TrainerControl": {
302
+ "args": {
303
+ "should_epoch_stop": false,
304
+ "should_evaluate": false,
305
+ "should_log": false,
306
+ "should_save": true,
307
+ "should_training_stop": false
308
+ },
309
+ "attributes": {}
310
+ }
311
+ },
312
+ "total_flos": 0.0,
313
+ "train_batch_size": 16,
314
+ "trial_name": null,
315
+ "trial_params": null
316
+ }
checkpoints/checkpoint-627/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4229f73f210e46319463b8ac3691f8a1d0602065fd6dccda4819f6160cb9b1e0
3
+ size 5176