ChiefTheLord commited on
Commit
955e8b1
verified
1 Parent(s): c6ca8ec

Delete checkpoints/checkpoint-627

Browse files
checkpoints/checkpoint-627/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf40f90ba338ac76a4b1d0f364a0e2fe020fa8a92b8e200ec46bdf4c1feca08
3
- size 78947288
 
 
 
 
checkpoints/checkpoint-627/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:758d0ff0f7cf462b7572a23f74da4a78eb5c4b9f57b98fbc70094461f763adaa
3
- size 157878906
 
 
 
 
checkpoints/checkpoint-627/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ec28ea0c416565eeac14a0e9c944f185ac250f4ed4bd15c84ff77ed78ba9301
3
- size 14244
 
 
 
 
checkpoints/checkpoint-627/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f90aca6451b193db5c59b14756ae1f8f33b96ce8badec4b2be33c8f4d1ed57d8
3
- size 1064
 
 
 
 
checkpoints/checkpoint-627/trainer_state.json DELETED
@@ -1,316 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
- "eval_steps": 64,
6
- "global_step": 627,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "eval_accuracy": {
14
- "accuracy": 0.6990740740740741
15
- },
16
- "eval_f1": {
17
- "f1": 0.5246378412207559
18
- },
19
- "eval_loss": 1.4924211502075195,
20
- "eval_precision": {
21
- "precision": 0.4763165636952045
22
- },
23
- "eval_recall": {
24
- "recall": 0.59375
25
- },
26
- "eval_runtime": 7.4612,
27
- "eval_samples_per_second": 28.95,
28
- "eval_steps_per_second": 7.237,
29
- "step": 57
30
- },
31
- {
32
- "epoch": 1.1228070175438596,
33
- "grad_norm": 12.269176483154297,
34
- "learning_rate": 1.1496350364963505e-06,
35
- "loss": 1.5298,
36
- "step": 64
37
- },
38
- {
39
- "epoch": 2.0,
40
- "eval_accuracy": {
41
- "accuracy": 0.7129629629629629
42
- },
43
- "eval_f1": {
44
- "f1": 0.53001124288253
45
- },
46
- "eval_loss": 1.3766342401504517,
47
- "eval_precision": {
48
- "precision": 0.4774156660949114
49
- },
50
- "eval_recall": {
51
- "recall": 0.6041666666666666
52
- },
53
- "eval_runtime": 7.1848,
54
- "eval_samples_per_second": 30.063,
55
- "eval_steps_per_second": 7.516,
56
- "step": 114
57
- },
58
- {
59
- "epoch": 2.245614035087719,
60
- "grad_norm": 11.047577857971191,
61
- "learning_rate": 2.3175182481751825e-06,
62
- "loss": 1.3875,
63
- "step": 128
64
- },
65
- {
66
- "epoch": 3.0,
67
- "eval_accuracy": {
68
- "accuracy": 0.7962962962962963
69
- },
70
- "eval_f1": {
71
- "f1": 0.7359516439448823
72
- },
73
- "eval_loss": 1.3143658638000488,
74
- "eval_precision": {
75
- "precision": 0.8463221219578889
76
- },
77
- "eval_recall": {
78
- "recall": 0.7291666666666666
79
- },
80
- "eval_runtime": 6.953,
81
- "eval_samples_per_second": 31.066,
82
- "eval_steps_per_second": 7.766,
83
- "step": 171
84
- },
85
- {
86
- "epoch": 3.3684210526315788,
87
- "grad_norm": 10.464760780334473,
88
- "learning_rate": 3.4854014598540146e-06,
89
- "loss": 1.292,
90
- "step": 192
91
- },
92
- {
93
- "epoch": 4.0,
94
- "eval_accuracy": {
95
- "accuracy": 0.7824074074074074
96
- },
97
- "eval_f1": {
98
- "f1": 0.7084821399163013
99
- },
100
- "eval_loss": 1.2212026119232178,
101
- "eval_precision": {
102
- "precision": 0.8394571333995365
103
- },
104
- "eval_recall": {
105
- "recall": 0.7083333333333334
106
- },
107
- "eval_runtime": 7.4938,
108
- "eval_samples_per_second": 28.824,
109
- "eval_steps_per_second": 7.206,
110
- "step": 228
111
- },
112
- {
113
- "epoch": 4.491228070175438,
114
- "grad_norm": 12.949272155761719,
115
- "learning_rate": 4.653284671532847e-06,
116
- "loss": 1.2461,
117
- "step": 256
118
- },
119
- {
120
- "epoch": 5.0,
121
- "eval_accuracy": {
122
- "accuracy": 0.8055555555555556
123
- },
124
- "eval_f1": {
125
- "f1": 0.7569049164117302
126
- },
127
- "eval_loss": 1.1568979024887085,
128
- "eval_precision": {
129
- "precision": 0.8381390145435089
130
- },
131
- "eval_recall": {
132
- "recall": 0.7465277777777778
133
- },
134
- "eval_runtime": 7.129,
135
- "eval_samples_per_second": 30.299,
136
- "eval_steps_per_second": 7.575,
137
- "step": 285
138
- },
139
- {
140
- "epoch": 5.614035087719298,
141
- "grad_norm": 10.92992877960205,
142
- "learning_rate": 5.821167883211679e-06,
143
- "loss": 1.1103,
144
- "step": 320
145
- },
146
- {
147
- "epoch": 6.0,
148
- "eval_accuracy": {
149
- "accuracy": 0.7777777777777778
150
- },
151
- "eval_f1": {
152
- "f1": 0.7295260138258725
153
- },
154
- "eval_loss": 1.1286191940307617,
155
- "eval_precision": {
156
- "precision": 0.8194070080862534
157
- },
158
- "eval_recall": {
159
- "recall": 0.7800925925925926
160
- },
161
- "eval_runtime": 7.0035,
162
- "eval_samples_per_second": 30.842,
163
- "eval_steps_per_second": 7.71,
164
- "step": 342
165
- },
166
- {
167
- "epoch": 6.7368421052631575,
168
- "grad_norm": 21.803577423095703,
169
- "learning_rate": 6.989051094890511e-06,
170
- "loss": 1.0695,
171
- "step": 384
172
- },
173
- {
174
- "epoch": 7.0,
175
- "eval_accuracy": {
176
- "accuracy": 0.6574074074074074
177
- },
178
- "eval_f1": {
179
- "f1": 0.5127626686719305
180
- },
181
- "eval_loss": 1.287270188331604,
182
- "eval_precision": {
183
- "precision": 0.441052029731275
184
- },
185
- "eval_recall": {
186
- "recall": 0.6597222222222222
187
- },
188
- "eval_runtime": 7.1142,
189
- "eval_samples_per_second": 30.362,
190
- "eval_steps_per_second": 7.591,
191
- "step": 399
192
- },
193
- {
194
- "epoch": 7.859649122807017,
195
- "grad_norm": 6.482203960418701,
196
- "learning_rate": 8.156934306569345e-06,
197
- "loss": 0.9602,
198
- "step": 448
199
- },
200
- {
201
- "epoch": 8.0,
202
- "eval_accuracy": {
203
- "accuracy": 0.8796296296296297
204
- },
205
- "eval_f1": {
206
- "f1": 0.8678067806780678
207
- },
208
- "eval_loss": 0.871719241142273,
209
- "eval_precision": {
210
- "precision": 0.8869953337390951
211
- },
212
- "eval_recall": {
213
- "recall": 0.8819444444444443
214
- },
215
- "eval_runtime": 7.2624,
216
- "eval_samples_per_second": 29.742,
217
- "eval_steps_per_second": 7.436,
218
- "step": 456
219
- },
220
- {
221
- "epoch": 8.982456140350877,
222
- "grad_norm": 24.75027847290039,
223
- "learning_rate": 9.306569343065694e-06,
224
- "loss": 0.907,
225
- "step": 512
226
- },
227
- {
228
- "epoch": 9.0,
229
- "eval_accuracy": {
230
- "accuracy": 0.8796296296296297
231
- },
232
- "eval_f1": {
233
- "f1": 0.8678067806780678
234
- },
235
- "eval_loss": 0.7951932549476624,
236
- "eval_precision": {
237
- "precision": 0.8869953337390951
238
- },
239
- "eval_recall": {
240
- "recall": 0.8819444444444443
241
- },
242
- "eval_runtime": 7.9299,
243
- "eval_samples_per_second": 27.239,
244
- "eval_steps_per_second": 6.81,
245
- "step": 513
246
- },
247
- {
248
- "epoch": 10.0,
249
- "eval_accuracy": {
250
- "accuracy": 0.8611111111111112
251
- },
252
- "eval_f1": {
253
- "f1": 0.8535169178557792
254
- },
255
- "eval_loss": 0.8125227689743042,
256
- "eval_precision": {
257
- "precision": 0.8559314456035767
258
- },
259
- "eval_recall": {
260
- "recall": 0.8692129629629629
261
- },
262
- "eval_runtime": 7.3185,
263
- "eval_samples_per_second": 29.514,
264
- "eval_steps_per_second": 7.379,
265
- "step": 570
266
- },
267
- {
268
- "epoch": 10.105263157894736,
269
- "grad_norm": 15.228086471557617,
270
- "learning_rate": 9.682926829268292e-06,
271
- "loss": 0.8145,
272
- "step": 576
273
- },
274
- {
275
- "epoch": 11.0,
276
- "eval_accuracy": {
277
- "accuracy": 0.8796296296296297
278
- },
279
- "eval_f1": {
280
- "f1": 0.8678067806780678
281
- },
282
- "eval_loss": 0.807636559009552,
283
- "eval_precision": {
284
- "precision": 0.8869953337390951
285
- },
286
- "eval_recall": {
287
- "recall": 0.8819444444444443
288
- },
289
- "eval_runtime": 7.4338,
290
- "eval_samples_per_second": 29.057,
291
- "eval_steps_per_second": 7.264,
292
- "step": 627
293
- }
294
- ],
295
- "logging_steps": 64,
296
- "max_steps": 1368,
297
- "num_input_tokens_seen": 0,
298
- "num_train_epochs": 24,
299
- "save_steps": 64,
300
- "stateful_callbacks": {
301
- "TrainerControl": {
302
- "args": {
303
- "should_epoch_stop": false,
304
- "should_evaluate": false,
305
- "should_log": false,
306
- "should_save": true,
307
- "should_training_stop": false
308
- },
309
- "attributes": {}
310
- }
311
- },
312
- "total_flos": 0.0,
313
- "train_batch_size": 16,
314
- "trial_name": null,
315
- "trial_params": null
316
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-627/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4229f73f210e46319463b8ac3691f8a1d0602065fd6dccda4819f6160cb9b1e0
3
- size 5176