huhu233 commited on
Commit
1496fe4
1 Parent(s): f3f0455

Delete checkpoint-157500_sTrain

Browse files
checkpoint-157500_sTrain/config.json DELETED
@@ -1,64 +0,0 @@
1
- {
2
- "_name_or_path": "trans_model",
3
- "activation_dropout": 0.0,
4
- "activation_function": "swish",
5
- "add_bias_logits": false,
6
- "add_final_layer_norm": false,
7
- "architectures": [
8
- "MarianMTModel"
9
- ],
10
- "attention_dropout": 0.0,
11
- "bad_words_ids": [
12
- [
13
- 65000
14
- ]
15
- ],
16
- "bos_token_id": 0,
17
- "classif_dropout": 0.0,
18
- "classifier_dropout": 0.0,
19
- "d_model": 512,
20
- "decoder_attention_heads": 8,
21
- "decoder_ffn_dim": 2048,
22
- "decoder_layerdrop": 0.0,
23
- "decoder_layers": 6,
24
- "decoder_start_token_id": 65000,
25
- "decoder_vocab_size": 65001,
26
- "do_blenderbot_90_layernorm": false,
27
- "dropout": 0.1,
28
- "encoder_attention_heads": 8,
29
- "encoder_ffn_dim": 2048,
30
- "encoder_layerdrop": 0.0,
31
- "encoder_layers": 6,
32
- "eos_token_id": 0,
33
- "extra_pos_embeddings": 0,
34
- "force_bos_token_to_be_generated": false,
35
- "forced_eos_token_id": 0,
36
- "gradient_checkpointing": false,
37
- "id2label": {
38
- "0": "LABEL_0",
39
- "1": "LABEL_1",
40
- "2": "LABEL_2"
41
- },
42
- "init_std": 0.02,
43
- "is_encoder_decoder": true,
44
- "label2id": {
45
- "LABEL_0": 0,
46
- "LABEL_1": 1,
47
- "LABEL_2": 2
48
- },
49
- "max_length": 512,
50
- "max_position_embeddings": 512,
51
- "model_type": "marian",
52
- "normalize_before": false,
53
- "normalize_embedding": false,
54
- "num_beams": 4,
55
- "num_hidden_layers": 6,
56
- "pad_token_id": 65000,
57
- "scale_embedding": true,
58
- "share_encoder_decoder_embeddings": true,
59
- "static_position_embeddings": true,
60
- "torch_dtype": "float32",
61
- "transformers_version": "4.30.2",
62
- "use_cache": true,
63
- "vocab_size": 65001
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-157500_sTrain/generation_config.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bad_words_ids": [
4
- [
5
- 65000
6
- ]
7
- ],
8
- "bos_token_id": 0,
9
- "decoder_start_token_id": 65000,
10
- "eos_token_id": 0,
11
- "forced_eos_token_id": 0,
12
- "max_length": 512,
13
- "num_beams": 4,
14
- "pad_token_id": 65000,
15
- "transformers_version": "4.30.2"
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-157500_sTrain/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b975db1caf46413da27809a7786e61eb2f3c4e9c2647f16de1c82a4281fea9
3
- size 619501061
 
 
 
 
checkpoint-157500_sTrain/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:072ccbd6ec6adf71e38319f0c08fa691c51e79def9360c13ee9ad6d4e195a30d
3
- size 310022533
 
 
 
 
checkpoint-157500_sTrain/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba189c7848e4096e993e8ab804f5bb42f8755bcda7a33135ed67461afe47ac38
3
- size 14511
 
 
 
 
checkpoint-157500_sTrain/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:519e73e22f5a8054abe5b4756ca77bf6ddeb528bd80ca85e2b1e31b233f1adac
3
- size 627
 
 
 
 
checkpoint-157500_sTrain/source.spm DELETED
Binary file (806 kB)
 
checkpoint-157500_sTrain/special_tokens_map.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "eos_token": "</s>",
3
- "pad_token": "<pad>",
4
- "unk_token": "<unk>"
5
- }
 
 
 
 
 
 
checkpoint-157500_sTrain/target.spm DELETED
Binary file (805 kB)
 
checkpoint-157500_sTrain/tokenizer_config.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "eos_token": "</s>",
4
- "model_max_length": 512,
5
- "pad_token": "<pad>",
6
- "separate_vocabs": false,
7
- "source_lang": "eng",
8
- "sp_model_kwargs": {},
9
- "target_lang": "zho",
10
- "tokenizer_class": "MarianTokenizer",
11
- "unk_token": "<unk>"
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-157500_sTrain/trainer_state.json DELETED
@@ -1,1978 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.968985378821444,
5
- "global_step": 157500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 1.9936704854737643e-05,
13
- "loss": 2.1885,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.06,
18
- "learning_rate": 1.9873409709475284e-05,
19
- "loss": 2.1503,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.09,
24
- "learning_rate": 1.9810114564212926e-05,
25
- "loss": 2.1207,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.13,
30
- "learning_rate": 1.974681941895057e-05,
31
- "loss": 2.093,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.16,
36
- "learning_rate": 1.968352427368821e-05,
37
- "loss": 2.0938,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.19,
42
- "learning_rate": 1.9620229128425853e-05,
43
- "loss": 2.0842,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.22,
48
- "learning_rate": 1.9556933983163494e-05,
49
- "loss": 2.0991,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.25,
54
- "learning_rate": 1.9493638837901136e-05,
55
- "loss": 2.0715,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.28,
60
- "learning_rate": 1.9430343692638777e-05,
61
- "loss": 2.073,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.32,
66
- "learning_rate": 1.936704854737642e-05,
67
- "loss": 2.08,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.35,
72
- "learning_rate": 1.930375340211406e-05,
73
- "loss": 2.0494,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 0.38,
78
- "learning_rate": 1.92404582568517e-05,
79
- "loss": 2.0616,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 0.41,
84
- "learning_rate": 1.9177163111589342e-05,
85
- "loss": 2.0571,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 0.44,
90
- "learning_rate": 1.9113867966326984e-05,
91
- "loss": 2.0561,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 0.47,
96
- "learning_rate": 1.9050572821064625e-05,
97
- "loss": 2.0658,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 0.51,
102
- "learning_rate": 1.8987277675802266e-05,
103
- "loss": 2.0275,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 0.54,
108
- "learning_rate": 1.8923982530539908e-05,
109
- "loss": 2.0335,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 0.57,
114
- "learning_rate": 1.8860687385277552e-05,
115
- "loss": 2.0312,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 0.6,
120
- "learning_rate": 1.8797392240015194e-05,
121
- "loss": 2.0138,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 0.63,
126
- "learning_rate": 1.8734097094752835e-05,
127
- "loss": 2.0005,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 0.66,
132
- "learning_rate": 1.8670801949490476e-05,
133
- "loss": 2.0248,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 0.7,
138
- "learning_rate": 1.8607506804228118e-05,
139
- "loss": 2.0152,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 0.73,
144
- "learning_rate": 1.854421165896576e-05,
145
- "loss": 2.0168,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 0.76,
150
- "learning_rate": 1.84809165137034e-05,
151
- "loss": 2.0196,
152
- "step": 12000
153
- },
154
- {
155
- "epoch": 0.79,
156
- "learning_rate": 1.841762136844104e-05,
157
- "loss": 2.0003,
158
- "step": 12500
159
- },
160
- {
161
- "epoch": 0.82,
162
- "learning_rate": 1.8354326223178683e-05,
163
- "loss": 2.0267,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 0.85,
168
- "learning_rate": 1.8291031077916324e-05,
169
- "loss": 2.0022,
170
- "step": 13500
171
- },
172
- {
173
- "epoch": 0.89,
174
- "learning_rate": 1.8227735932653966e-05,
175
- "loss": 2.0102,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 0.92,
180
- "learning_rate": 1.8164440787391607e-05,
181
- "loss": 2.0,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 0.95,
186
- "learning_rate": 1.810114564212925e-05,
187
- "loss": 1.9751,
188
- "step": 15000
189
- },
190
- {
191
- "epoch": 0.98,
192
- "learning_rate": 1.803785049686689e-05,
193
- "loss": 1.9912,
194
- "step": 15500
195
- },
196
- {
197
- "epoch": 1.0,
198
- "eval_loss": 1.708198070526123,
199
- "eval_runtime": 629.4341,
200
- "eval_samples_per_second": 401.594,
201
- "eval_steps_per_second": 25.1,
202
- "step": 15799
203
- },
204
- {
205
- "epoch": 1.01,
206
- "learning_rate": 1.797455535160453e-05,
207
- "loss": 1.9373,
208
- "step": 16000
209
- },
210
- {
211
- "epoch": 1.04,
212
- "learning_rate": 1.7911260206342176e-05,
213
- "loss": 1.8308,
214
- "step": 16500
215
- },
216
- {
217
- "epoch": 1.08,
218
- "learning_rate": 1.7847965061079817e-05,
219
- "loss": 1.8442,
220
- "step": 17000
221
- },
222
- {
223
- "epoch": 1.11,
224
- "learning_rate": 1.778466991581746e-05,
225
- "loss": 1.8558,
226
- "step": 17500
227
- },
228
- {
229
- "epoch": 1.14,
230
- "learning_rate": 1.77213747705551e-05,
231
- "loss": 1.8564,
232
- "step": 18000
233
- },
234
- {
235
- "epoch": 1.17,
236
- "learning_rate": 1.765807962529274e-05,
237
- "loss": 1.8578,
238
- "step": 18500
239
- },
240
- {
241
- "epoch": 1.2,
242
- "learning_rate": 1.7594784480030382e-05,
243
- "loss": 1.8312,
244
- "step": 19000
245
- },
246
- {
247
- "epoch": 1.23,
248
- "learning_rate": 1.7531489334768024e-05,
249
- "loss": 1.8784,
250
- "step": 19500
251
- },
252
- {
253
- "epoch": 1.27,
254
- "learning_rate": 1.746819418950567e-05,
255
- "loss": 1.8497,
256
- "step": 20000
257
- },
258
- {
259
- "epoch": 1.3,
260
- "learning_rate": 1.740489904424331e-05,
261
- "loss": 1.8528,
262
- "step": 20500
263
- },
264
- {
265
- "epoch": 1.33,
266
- "learning_rate": 1.734160389898095e-05,
267
- "loss": 1.8645,
268
- "step": 21000
269
- },
270
- {
271
- "epoch": 1.36,
272
- "learning_rate": 1.7278308753718592e-05,
273
- "loss": 1.8563,
274
- "step": 21500
275
- },
276
- {
277
- "epoch": 1.39,
278
- "learning_rate": 1.7215013608456234e-05,
279
- "loss": 1.8616,
280
- "step": 22000
281
- },
282
- {
283
- "epoch": 1.42,
284
- "learning_rate": 1.7151718463193875e-05,
285
- "loss": 1.8699,
286
- "step": 22500
287
- },
288
- {
289
- "epoch": 1.46,
290
- "learning_rate": 1.7088423317931516e-05,
291
- "loss": 1.8583,
292
- "step": 23000
293
- },
294
- {
295
- "epoch": 1.49,
296
- "learning_rate": 1.7025128172669158e-05,
297
- "loss": 1.868,
298
- "step": 23500
299
- },
300
- {
301
- "epoch": 1.52,
302
- "learning_rate": 1.69618330274068e-05,
303
- "loss": 1.8534,
304
- "step": 24000
305
- },
306
- {
307
- "epoch": 1.55,
308
- "learning_rate": 1.689853788214444e-05,
309
- "loss": 1.8557,
310
- "step": 24500
311
- },
312
- {
313
- "epoch": 1.58,
314
- "learning_rate": 1.683524273688208e-05,
315
- "loss": 1.8709,
316
- "step": 25000
317
- },
318
- {
319
- "epoch": 1.61,
320
- "learning_rate": 1.6771947591619723e-05,
321
- "loss": 1.8544,
322
- "step": 25500
323
- },
324
- {
325
- "epoch": 1.65,
326
- "learning_rate": 1.6708652446357364e-05,
327
- "loss": 1.8803,
328
- "step": 26000
329
- },
330
- {
331
- "epoch": 1.68,
332
- "learning_rate": 1.6645357301095006e-05,
333
- "loss": 1.8573,
334
- "step": 26500
335
- },
336
- {
337
- "epoch": 1.71,
338
- "learning_rate": 1.658206215583265e-05,
339
- "loss": 1.8668,
340
- "step": 27000
341
- },
342
- {
343
- "epoch": 1.74,
344
- "learning_rate": 1.6518767010570292e-05,
345
- "loss": 1.8592,
346
- "step": 27500
347
- },
348
- {
349
- "epoch": 1.77,
350
- "learning_rate": 1.6455471865307933e-05,
351
- "loss": 1.8551,
352
- "step": 28000
353
- },
354
- {
355
- "epoch": 1.8,
356
- "learning_rate": 1.6392176720045574e-05,
357
- "loss": 1.8504,
358
- "step": 28500
359
- },
360
- {
361
- "epoch": 1.84,
362
- "learning_rate": 1.6328881574783216e-05,
363
- "loss": 1.8578,
364
- "step": 29000
365
- },
366
- {
367
- "epoch": 1.87,
368
- "learning_rate": 1.6265586429520857e-05,
369
- "loss": 1.8614,
370
- "step": 29500
371
- },
372
- {
373
- "epoch": 1.9,
374
- "learning_rate": 1.62022912842585e-05,
375
- "loss": 1.8592,
376
- "step": 30000
377
- },
378
- {
379
- "epoch": 1.93,
380
- "learning_rate": 1.613899613899614e-05,
381
- "loss": 1.854,
382
- "step": 30500
383
- },
384
- {
385
- "epoch": 1.96,
386
- "learning_rate": 1.607570099373378e-05,
387
- "loss": 1.8536,
388
- "step": 31000
389
- },
390
- {
391
- "epoch": 1.99,
392
- "learning_rate": 1.6012405848471422e-05,
393
- "loss": 1.8687,
394
- "step": 31500
395
- },
396
- {
397
- "epoch": 2.0,
398
- "eval_loss": 1.5787432193756104,
399
- "eval_runtime": 629.6856,
400
- "eval_samples_per_second": 401.434,
401
- "eval_steps_per_second": 25.09,
402
- "step": 31598
403
- },
404
- {
405
- "epoch": 2.03,
406
- "learning_rate": 1.5949110703209064e-05,
407
- "loss": 1.7515,
408
- "step": 32000
409
- },
410
- {
411
- "epoch": 2.06,
412
- "learning_rate": 1.5885815557946705e-05,
413
- "loss": 1.7233,
414
- "step": 32500
415
- },
416
- {
417
- "epoch": 2.09,
418
- "learning_rate": 1.5822520412684346e-05,
419
- "loss": 1.754,
420
- "step": 33000
421
- },
422
- {
423
- "epoch": 2.12,
424
- "learning_rate": 1.5759225267421988e-05,
425
- "loss": 1.7302,
426
- "step": 33500
427
- },
428
- {
429
- "epoch": 2.15,
430
- "learning_rate": 1.5695930122159632e-05,
431
- "loss": 1.7369,
432
- "step": 34000
433
- },
434
- {
435
- "epoch": 2.18,
436
- "learning_rate": 1.5632634976897274e-05,
437
- "loss": 1.7294,
438
- "step": 34500
439
- },
440
- {
441
- "epoch": 2.22,
442
- "learning_rate": 1.5569339831634915e-05,
443
- "loss": 1.7317,
444
- "step": 35000
445
- },
446
- {
447
- "epoch": 2.25,
448
- "learning_rate": 1.5506044686372556e-05,
449
- "loss": 1.7457,
450
- "step": 35500
451
- },
452
- {
453
- "epoch": 2.28,
454
- "learning_rate": 1.5442749541110198e-05,
455
- "loss": 1.758,
456
- "step": 36000
457
- },
458
- {
459
- "epoch": 2.31,
460
- "learning_rate": 1.537945439584784e-05,
461
- "loss": 1.7442,
462
- "step": 36500
463
- },
464
- {
465
- "epoch": 2.34,
466
- "learning_rate": 1.531615925058548e-05,
467
- "loss": 1.7449,
468
- "step": 37000
469
- },
470
- {
471
- "epoch": 2.37,
472
- "learning_rate": 1.5252864105323122e-05,
473
- "loss": 1.7502,
474
- "step": 37500
475
- },
476
- {
477
- "epoch": 2.41,
478
- "learning_rate": 1.5189568960060765e-05,
479
- "loss": 1.7529,
480
- "step": 38000
481
- },
482
- {
483
- "epoch": 2.44,
484
- "learning_rate": 1.5126273814798406e-05,
485
- "loss": 1.7675,
486
- "step": 38500
487
- },
488
- {
489
- "epoch": 2.47,
490
- "learning_rate": 1.5062978669536047e-05,
491
- "loss": 1.7537,
492
- "step": 39000
493
- },
494
- {
495
- "epoch": 2.5,
496
- "learning_rate": 1.4999683524273689e-05,
497
- "loss": 1.7546,
498
- "step": 39500
499
- },
500
- {
501
- "epoch": 2.53,
502
- "learning_rate": 1.493638837901133e-05,
503
- "loss": 1.7409,
504
- "step": 40000
505
- },
506
- {
507
- "epoch": 2.56,
508
- "learning_rate": 1.4873093233748971e-05,
509
- "loss": 1.7599,
510
- "step": 40500
511
- },
512
- {
513
- "epoch": 2.6,
514
- "learning_rate": 1.4809798088486613e-05,
515
- "loss": 1.7467,
516
- "step": 41000
517
- },
518
- {
519
- "epoch": 2.63,
520
- "learning_rate": 1.4746502943224257e-05,
521
- "loss": 1.7426,
522
- "step": 41500
523
- },
524
- {
525
- "epoch": 2.66,
526
- "learning_rate": 1.4683207797961899e-05,
527
- "loss": 1.7421,
528
- "step": 42000
529
- },
530
- {
531
- "epoch": 2.69,
532
- "learning_rate": 1.461991265269954e-05,
533
- "loss": 1.7572,
534
- "step": 42500
535
- },
536
- {
537
- "epoch": 2.72,
538
- "learning_rate": 1.4556617507437181e-05,
539
- "loss": 1.7489,
540
- "step": 43000
541
- },
542
- {
543
- "epoch": 2.75,
544
- "learning_rate": 1.4493322362174823e-05,
545
- "loss": 1.7482,
546
- "step": 43500
547
- },
548
- {
549
- "epoch": 2.78,
550
- "learning_rate": 1.4430027216912464e-05,
551
- "loss": 1.7578,
552
- "step": 44000
553
- },
554
- {
555
- "epoch": 2.82,
556
- "learning_rate": 1.4366732071650105e-05,
557
- "loss": 1.7608,
558
- "step": 44500
559
- },
560
- {
561
- "epoch": 2.85,
562
- "learning_rate": 1.4303436926387748e-05,
563
- "loss": 1.7623,
564
- "step": 45000
565
- },
566
- {
567
- "epoch": 2.88,
568
- "learning_rate": 1.424014178112539e-05,
569
- "loss": 1.7534,
570
- "step": 45500
571
- },
572
- {
573
- "epoch": 2.91,
574
- "learning_rate": 1.4176846635863031e-05,
575
- "loss": 1.7513,
576
- "step": 46000
577
- },
578
- {
579
- "epoch": 2.94,
580
- "learning_rate": 1.4113551490600672e-05,
581
- "loss": 1.7539,
582
- "step": 46500
583
- },
584
- {
585
- "epoch": 2.97,
586
- "learning_rate": 1.4050256345338314e-05,
587
- "loss": 1.7529,
588
- "step": 47000
589
- },
590
- {
591
- "epoch": 3.0,
592
- "eval_loss": 1.4882566928863525,
593
- "eval_runtime": 671.3515,
594
- "eval_samples_per_second": 376.52,
595
- "eval_steps_per_second": 23.533,
596
- "step": 47397
597
- },
598
- {
599
- "epoch": 3.01,
600
- "learning_rate": 1.3986961200075955e-05,
601
- "loss": 1.7233,
602
- "step": 47500
603
- },
604
- {
605
- "epoch": 3.04,
606
- "learning_rate": 1.3923666054813596e-05,
607
- "loss": 1.6255,
608
- "step": 48000
609
- },
610
- {
611
- "epoch": 3.07,
612
- "learning_rate": 1.386037090955124e-05,
613
- "loss": 1.6566,
614
- "step": 48500
615
- },
616
- {
617
- "epoch": 3.1,
618
- "learning_rate": 1.379707576428888e-05,
619
- "loss": 1.6442,
620
- "step": 49000
621
- },
622
- {
623
- "epoch": 3.13,
624
- "learning_rate": 1.3733780619026522e-05,
625
- "loss": 1.6439,
626
- "step": 49500
627
- },
628
- {
629
- "epoch": 3.16,
630
- "learning_rate": 1.3670485473764163e-05,
631
- "loss": 1.6438,
632
- "step": 50000
633
- },
634
- {
635
- "epoch": 3.2,
636
- "learning_rate": 1.3607190328501805e-05,
637
- "loss": 1.6527,
638
- "step": 50500
639
- },
640
- {
641
- "epoch": 3.23,
642
- "learning_rate": 1.3543895183239446e-05,
643
- "loss": 1.6426,
644
- "step": 51000
645
- },
646
- {
647
- "epoch": 3.26,
648
- "learning_rate": 1.3480600037977087e-05,
649
- "loss": 1.6802,
650
- "step": 51500
651
- },
652
- {
653
- "epoch": 3.29,
654
- "learning_rate": 1.341730489271473e-05,
655
- "loss": 1.6568,
656
- "step": 52000
657
- },
658
- {
659
- "epoch": 3.32,
660
- "learning_rate": 1.3354009747452372e-05,
661
- "loss": 1.6657,
662
- "step": 52500
663
- },
664
- {
665
- "epoch": 3.35,
666
- "learning_rate": 1.3290714602190013e-05,
667
- "loss": 1.6734,
668
- "step": 53000
669
- },
670
- {
671
- "epoch": 3.39,
672
- "learning_rate": 1.3227419456927654e-05,
673
- "loss": 1.655,
674
- "step": 53500
675
- },
676
- {
677
- "epoch": 3.42,
678
- "learning_rate": 1.3164124311665296e-05,
679
- "loss": 1.6831,
680
- "step": 54000
681
- },
682
- {
683
- "epoch": 3.45,
684
- "learning_rate": 1.3100829166402937e-05,
685
- "loss": 1.6532,
686
- "step": 54500
687
- },
688
- {
689
- "epoch": 3.48,
690
- "learning_rate": 1.3037534021140578e-05,
691
- "loss": 1.6649,
692
- "step": 55000
693
- },
694
- {
695
- "epoch": 3.51,
696
- "learning_rate": 1.2974238875878221e-05,
697
- "loss": 1.6643,
698
- "step": 55500
699
- },
700
- {
701
- "epoch": 3.54,
702
- "learning_rate": 1.2910943730615863e-05,
703
- "loss": 1.6749,
704
- "step": 56000
705
- },
706
- {
707
- "epoch": 3.58,
708
- "learning_rate": 1.2847648585353504e-05,
709
- "loss": 1.6802,
710
- "step": 56500
711
- },
712
- {
713
- "epoch": 3.61,
714
- "learning_rate": 1.2784353440091145e-05,
715
- "loss": 1.6753,
716
- "step": 57000
717
- },
718
- {
719
- "epoch": 3.64,
720
- "learning_rate": 1.2721058294828787e-05,
721
- "loss": 1.6759,
722
- "step": 57500
723
- },
724
- {
725
- "epoch": 3.67,
726
- "learning_rate": 1.2657763149566428e-05,
727
- "loss": 1.6756,
728
- "step": 58000
729
- },
730
- {
731
- "epoch": 3.7,
732
- "learning_rate": 1.259446800430407e-05,
733
- "loss": 1.6733,
734
- "step": 58500
735
- },
736
- {
737
- "epoch": 3.73,
738
- "learning_rate": 1.253117285904171e-05,
739
- "loss": 1.671,
740
- "step": 59000
741
- },
742
- {
743
- "epoch": 3.77,
744
- "learning_rate": 1.2467877713779355e-05,
745
- "loss": 1.6697,
746
- "step": 59500
747
- },
748
- {
749
- "epoch": 3.8,
750
- "learning_rate": 1.2404582568516997e-05,
751
- "loss": 1.668,
752
- "step": 60000
753
- },
754
- {
755
- "epoch": 3.83,
756
- "learning_rate": 1.2341287423254638e-05,
757
- "loss": 1.6689,
758
- "step": 60500
759
- },
760
- {
761
- "epoch": 3.86,
762
- "learning_rate": 1.227799227799228e-05,
763
- "loss": 1.6874,
764
- "step": 61000
765
- },
766
- {
767
- "epoch": 3.89,
768
- "learning_rate": 1.221469713272992e-05,
769
- "loss": 1.6926,
770
- "step": 61500
771
- },
772
- {
773
- "epoch": 3.92,
774
- "learning_rate": 1.2151401987467562e-05,
775
- "loss": 1.6819,
776
- "step": 62000
777
- },
778
- {
779
- "epoch": 3.96,
780
- "learning_rate": 1.2088106842205203e-05,
781
- "loss": 1.6599,
782
- "step": 62500
783
- },
784
- {
785
- "epoch": 3.99,
786
- "learning_rate": 1.2024811696942846e-05,
787
- "loss": 1.6886,
788
- "step": 63000
789
- },
790
- {
791
- "epoch": 4.0,
792
- "eval_loss": 1.417983055114746,
793
- "eval_runtime": 634.8433,
794
- "eval_samples_per_second": 398.172,
795
- "eval_steps_per_second": 24.886,
796
- "step": 63196
797
- },
798
- {
799
- "epoch": 4.02,
800
- "learning_rate": 1.1961516551680488e-05,
801
- "loss": 1.6122,
802
- "step": 63500
803
- },
804
- {
805
- "epoch": 4.05,
806
- "learning_rate": 1.1898221406418129e-05,
807
- "loss": 1.578,
808
- "step": 64000
809
- },
810
- {
811
- "epoch": 4.08,
812
- "learning_rate": 1.183492626115577e-05,
813
- "loss": 1.5662,
814
- "step": 64500
815
- },
816
- {
817
- "epoch": 4.11,
818
- "learning_rate": 1.1771631115893412e-05,
819
- "loss": 1.5732,
820
- "step": 65000
821
- },
822
- {
823
- "epoch": 4.15,
824
- "learning_rate": 1.1708335970631053e-05,
825
- "loss": 1.5726,
826
- "step": 65500
827
- },
828
- {
829
- "epoch": 4.18,
830
- "learning_rate": 1.1645040825368694e-05,
831
- "loss": 1.5868,
832
- "step": 66000
833
- },
834
- {
835
- "epoch": 4.21,
836
- "learning_rate": 1.1581745680106337e-05,
837
- "loss": 1.5781,
838
- "step": 66500
839
- },
840
- {
841
- "epoch": 4.24,
842
- "learning_rate": 1.1518450534843979e-05,
843
- "loss": 1.5965,
844
- "step": 67000
845
- },
846
- {
847
- "epoch": 4.27,
848
- "learning_rate": 1.145515538958162e-05,
849
- "loss": 1.5934,
850
- "step": 67500
851
- },
852
- {
853
- "epoch": 4.3,
854
- "learning_rate": 1.1391860244319261e-05,
855
- "loss": 1.5791,
856
- "step": 68000
857
- },
858
- {
859
- "epoch": 4.34,
860
- "learning_rate": 1.1328565099056903e-05,
861
- "loss": 1.6037,
862
- "step": 68500
863
- },
864
- {
865
- "epoch": 4.37,
866
- "learning_rate": 1.1265269953794544e-05,
867
- "loss": 1.6046,
868
- "step": 69000
869
- },
870
- {
871
- "epoch": 4.4,
872
- "learning_rate": 1.1201974808532185e-05,
873
- "loss": 1.5903,
874
- "step": 69500
875
- },
876
- {
877
- "epoch": 4.43,
878
- "learning_rate": 1.1138679663269828e-05,
879
- "loss": 1.5837,
880
- "step": 70000
881
- },
882
- {
883
- "epoch": 4.46,
884
- "learning_rate": 1.107538451800747e-05,
885
- "loss": 1.6162,
886
- "step": 70500
887
- },
888
- {
889
- "epoch": 4.49,
890
- "learning_rate": 1.1012089372745111e-05,
891
- "loss": 1.5988,
892
- "step": 71000
893
- },
894
- {
895
- "epoch": 4.53,
896
- "learning_rate": 1.0948794227482752e-05,
897
- "loss": 1.6082,
898
- "step": 71500
899
- },
900
- {
901
- "epoch": 4.56,
902
- "learning_rate": 1.0885499082220394e-05,
903
- "loss": 1.5832,
904
- "step": 72000
905
- },
906
- {
907
- "epoch": 4.59,
908
- "learning_rate": 1.0822203936958035e-05,
909
- "loss": 1.6153,
910
- "step": 72500
911
- },
912
- {
913
- "epoch": 4.62,
914
- "learning_rate": 1.0758908791695676e-05,
915
- "loss": 1.6178,
916
- "step": 73000
917
- },
918
- {
919
- "epoch": 4.65,
920
- "learning_rate": 1.0695613646433321e-05,
921
- "loss": 1.5981,
922
- "step": 73500
923
- },
924
- {
925
- "epoch": 4.68,
926
- "learning_rate": 1.0632318501170963e-05,
927
- "loss": 1.6135,
928
- "step": 74000
929
- },
930
- {
931
- "epoch": 4.72,
932
- "learning_rate": 1.0569023355908604e-05,
933
- "loss": 1.6122,
934
- "step": 74500
935
- },
936
- {
937
- "epoch": 4.75,
938
- "learning_rate": 1.0505728210646245e-05,
939
- "loss": 1.5929,
940
- "step": 75000
941
- },
942
- {
943
- "epoch": 4.78,
944
- "learning_rate": 1.0442433065383886e-05,
945
- "loss": 1.6069,
946
- "step": 75500
947
- },
948
- {
949
- "epoch": 4.81,
950
- "learning_rate": 1.0379137920121528e-05,
951
- "loss": 1.6025,
952
- "step": 76000
953
- },
954
- {
955
- "epoch": 4.84,
956
- "learning_rate": 1.0315842774859167e-05,
957
- "loss": 1.6284,
958
- "step": 76500
959
- },
960
- {
961
- "epoch": 4.87,
962
- "learning_rate": 1.0252547629596812e-05,
963
- "loss": 1.6134,
964
- "step": 77000
965
- },
966
- {
967
- "epoch": 4.91,
968
- "learning_rate": 1.0189252484334454e-05,
969
- "loss": 1.6092,
970
- "step": 77500
971
- },
972
- {
973
- "epoch": 4.94,
974
- "learning_rate": 1.0125957339072095e-05,
975
- "loss": 1.6194,
976
- "step": 78000
977
- },
978
- {
979
- "epoch": 4.97,
980
- "learning_rate": 1.0062662193809736e-05,
981
- "loss": 1.6227,
982
- "step": 78500
983
- },
984
- {
985
- "epoch": 5.0,
986
- "eval_loss": 1.3593807220458984,
987
- "eval_runtime": 634.5713,
988
- "eval_samples_per_second": 398.343,
989
- "eval_steps_per_second": 24.897,
990
- "step": 78995
991
- },
992
- {
993
- "epoch": 5.0,
994
- "learning_rate": 9.999367048547378e-06,
995
- "loss": 1.6451,
996
- "step": 79000
997
- },
998
- {
999
- "epoch": 5.03,
1000
- "learning_rate": 9.936071903285019e-06,
1001
- "loss": 1.5186,
1002
- "step": 79500
1003
- },
1004
- {
1005
- "epoch": 5.06,
1006
- "learning_rate": 9.87277675802266e-06,
1007
- "loss": 1.5124,
1008
- "step": 80000
1009
- },
1010
- {
1011
- "epoch": 5.1,
1012
- "learning_rate": 9.809481612760301e-06,
1013
- "loss": 1.5223,
1014
- "step": 80500
1015
- },
1016
- {
1017
- "epoch": 5.13,
1018
- "learning_rate": 9.746186467497943e-06,
1019
- "loss": 1.5234,
1020
- "step": 81000
1021
- },
1022
- {
1023
- "epoch": 5.16,
1024
- "learning_rate": 9.682891322235586e-06,
1025
- "loss": 1.5298,
1026
- "step": 81500
1027
- },
1028
- {
1029
- "epoch": 5.19,
1030
- "learning_rate": 9.619596176973227e-06,
1031
- "loss": 1.5259,
1032
- "step": 82000
1033
- },
1034
- {
1035
- "epoch": 5.22,
1036
- "learning_rate": 9.556301031710869e-06,
1037
- "loss": 1.5463,
1038
- "step": 82500
1039
- },
1040
- {
1041
- "epoch": 5.25,
1042
- "learning_rate": 9.49300588644851e-06,
1043
- "loss": 1.5367,
1044
- "step": 83000
1045
- },
1046
- {
1047
- "epoch": 5.29,
1048
- "learning_rate": 9.429710741186153e-06,
1049
- "loss": 1.543,
1050
- "step": 83500
1051
- },
1052
- {
1053
- "epoch": 5.32,
1054
- "learning_rate": 9.366415595923794e-06,
1055
- "loss": 1.5379,
1056
- "step": 84000
1057
- },
1058
- {
1059
- "epoch": 5.35,
1060
- "learning_rate": 9.303120450661436e-06,
1061
- "loss": 1.5215,
1062
- "step": 84500
1063
- },
1064
- {
1065
- "epoch": 5.38,
1066
- "learning_rate": 9.239825305399077e-06,
1067
- "loss": 1.5339,
1068
- "step": 85000
1069
- },
1070
- {
1071
- "epoch": 5.41,
1072
- "learning_rate": 9.176530160136718e-06,
1073
- "loss": 1.5588,
1074
- "step": 85500
1075
- },
1076
- {
1077
- "epoch": 5.44,
1078
- "learning_rate": 9.11323501487436e-06,
1079
- "loss": 1.5522,
1080
- "step": 86000
1081
- },
1082
- {
1083
- "epoch": 5.48,
1084
- "learning_rate": 9.049939869612e-06,
1085
- "loss": 1.5516,
1086
- "step": 86500
1087
- },
1088
- {
1089
- "epoch": 5.51,
1090
- "learning_rate": 8.986644724349644e-06,
1091
- "loss": 1.5503,
1092
- "step": 87000
1093
- },
1094
- {
1095
- "epoch": 5.54,
1096
- "learning_rate": 8.923349579087285e-06,
1097
- "loss": 1.5459,
1098
- "step": 87500
1099
- },
1100
- {
1101
- "epoch": 5.57,
1102
- "learning_rate": 8.860054433824927e-06,
1103
- "loss": 1.5437,
1104
- "step": 88000
1105
- },
1106
- {
1107
- "epoch": 5.6,
1108
- "learning_rate": 8.796759288562568e-06,
1109
- "loss": 1.5452,
1110
- "step": 88500
1111
- },
1112
- {
1113
- "epoch": 5.63,
1114
- "learning_rate": 8.73346414330021e-06,
1115
- "loss": 1.5434,
1116
- "step": 89000
1117
- },
1118
- {
1119
- "epoch": 5.66,
1120
- "learning_rate": 8.67016899803785e-06,
1121
- "loss": 1.5633,
1122
- "step": 89500
1123
- },
1124
- {
1125
- "epoch": 5.7,
1126
- "learning_rate": 8.606873852775492e-06,
1127
- "loss": 1.5535,
1128
- "step": 90000
1129
- },
1130
- {
1131
- "epoch": 5.73,
1132
- "learning_rate": 8.543578707513135e-06,
1133
- "loss": 1.5692,
1134
- "step": 90500
1135
- },
1136
- {
1137
- "epoch": 5.76,
1138
- "learning_rate": 8.480283562250776e-06,
1139
- "loss": 1.5609,
1140
- "step": 91000
1141
- },
1142
- {
1143
- "epoch": 5.79,
1144
- "learning_rate": 8.416988416988418e-06,
1145
- "loss": 1.5529,
1146
- "step": 91500
1147
- },
1148
- {
1149
- "epoch": 5.82,
1150
- "learning_rate": 8.353693271726059e-06,
1151
- "loss": 1.5602,
1152
- "step": 92000
1153
- },
1154
- {
1155
- "epoch": 5.85,
1156
- "learning_rate": 8.290398126463702e-06,
1157
- "loss": 1.5547,
1158
- "step": 92500
1159
- },
1160
- {
1161
- "epoch": 5.89,
1162
- "learning_rate": 8.227102981201343e-06,
1163
- "loss": 1.5557,
1164
- "step": 93000
1165
- },
1166
- {
1167
- "epoch": 5.92,
1168
- "learning_rate": 8.163807835938985e-06,
1169
- "loss": 1.5488,
1170
- "step": 93500
1171
- },
1172
- {
1173
- "epoch": 5.95,
1174
- "learning_rate": 8.100512690676626e-06,
1175
- "loss": 1.5736,
1176
- "step": 94000
1177
- },
1178
- {
1179
- "epoch": 5.98,
1180
- "learning_rate": 8.037217545414267e-06,
1181
- "loss": 1.559,
1182
- "step": 94500
1183
- },
1184
- {
1185
- "epoch": 6.0,
1186
- "eval_loss": 1.3149573802947998,
1187
- "eval_runtime": 678.6783,
1188
- "eval_samples_per_second": 372.455,
1189
- "eval_steps_per_second": 23.279,
1190
- "step": 94794
1191
- },
1192
- {
1193
- "epoch": 6.01,
1194
- "learning_rate": 7.973922400151909e-06,
1195
- "loss": 1.5248,
1196
- "step": 95000
1197
- },
1198
- {
1199
- "epoch": 6.04,
1200
- "learning_rate": 7.91062725488955e-06,
1201
- "loss": 1.4873,
1202
- "step": 95500
1203
- },
1204
- {
1205
- "epoch": 6.08,
1206
- "learning_rate": 7.847332109627193e-06,
1207
- "loss": 1.4885,
1208
- "step": 96000
1209
- },
1210
- {
1211
- "epoch": 6.11,
1212
- "learning_rate": 7.784036964364834e-06,
1213
- "loss": 1.4882,
1214
- "step": 96500
1215
- },
1216
- {
1217
- "epoch": 6.14,
1218
- "learning_rate": 7.720741819102476e-06,
1219
- "loss": 1.499,
1220
- "step": 97000
1221
- },
1222
- {
1223
- "epoch": 6.17,
1224
- "learning_rate": 7.657446673840117e-06,
1225
- "loss": 1.493,
1226
- "step": 97500
1227
- },
1228
- {
1229
- "epoch": 6.2,
1230
- "learning_rate": 7.594151528577759e-06,
1231
- "loss": 1.4864,
1232
- "step": 98000
1233
- },
1234
- {
1235
- "epoch": 6.23,
1236
- "learning_rate": 7.5308563833154e-06,
1237
- "loss": 1.4889,
1238
- "step": 98500
1239
- },
1240
- {
1241
- "epoch": 6.27,
1242
- "learning_rate": 7.467561238053042e-06,
1243
- "loss": 1.5047,
1244
- "step": 99000
1245
- },
1246
- {
1247
- "epoch": 6.3,
1248
- "learning_rate": 7.404266092790684e-06,
1249
- "loss": 1.4828,
1250
- "step": 99500
1251
- },
1252
- {
1253
- "epoch": 6.33,
1254
- "learning_rate": 7.340970947528325e-06,
1255
- "loss": 1.4884,
1256
- "step": 100000
1257
- },
1258
- {
1259
- "epoch": 6.36,
1260
- "learning_rate": 7.2776758022659665e-06,
1261
- "loss": 1.4981,
1262
- "step": 100500
1263
- },
1264
- {
1265
- "epoch": 6.39,
1266
- "learning_rate": 7.214380657003608e-06,
1267
- "loss": 1.494,
1268
- "step": 101000
1269
- },
1270
- {
1271
- "epoch": 6.42,
1272
- "learning_rate": 7.15108551174125e-06,
1273
- "loss": 1.4798,
1274
- "step": 101500
1275
- },
1276
- {
1277
- "epoch": 6.46,
1278
- "learning_rate": 7.087790366478891e-06,
1279
- "loss": 1.498,
1280
- "step": 102000
1281
- },
1282
- {
1283
- "epoch": 6.49,
1284
- "learning_rate": 7.024495221216533e-06,
1285
- "loss": 1.496,
1286
- "step": 102500
1287
- },
1288
- {
1289
- "epoch": 6.52,
1290
- "learning_rate": 6.961200075954176e-06,
1291
- "loss": 1.5097,
1292
- "step": 103000
1293
- },
1294
- {
1295
- "epoch": 6.55,
1296
- "learning_rate": 6.897904930691817e-06,
1297
- "loss": 1.5032,
1298
- "step": 103500
1299
- },
1300
- {
1301
- "epoch": 6.58,
1302
- "learning_rate": 6.8346097854294576e-06,
1303
- "loss": 1.5001,
1304
- "step": 104000
1305
- },
1306
- {
1307
- "epoch": 6.61,
1308
- "learning_rate": 6.771314640167099e-06,
1309
- "loss": 1.5097,
1310
- "step": 104500
1311
- },
1312
- {
1313
- "epoch": 6.65,
1314
- "learning_rate": 6.708019494904742e-06,
1315
- "loss": 1.5065,
1316
- "step": 105000
1317
- },
1318
- {
1319
- "epoch": 6.68,
1320
- "learning_rate": 6.644724349642383e-06,
1321
- "loss": 1.4961,
1322
- "step": 105500
1323
- },
1324
- {
1325
- "epoch": 6.71,
1326
- "learning_rate": 6.5814292043800246e-06,
1327
- "loss": 1.5092,
1328
- "step": 106000
1329
- },
1330
- {
1331
- "epoch": 6.74,
1332
- "learning_rate": 6.518134059117667e-06,
1333
- "loss": 1.5079,
1334
- "step": 106500
1335
- },
1336
- {
1337
- "epoch": 6.77,
1338
- "learning_rate": 6.454838913855308e-06,
1339
- "loss": 1.513,
1340
- "step": 107000
1341
- },
1342
- {
1343
- "epoch": 6.8,
1344
- "learning_rate": 6.391543768592949e-06,
1345
- "loss": 1.5076,
1346
- "step": 107500
1347
- },
1348
- {
1349
- "epoch": 6.84,
1350
- "learning_rate": 6.328248623330591e-06,
1351
- "loss": 1.5123,
1352
- "step": 108000
1353
- },
1354
- {
1355
- "epoch": 6.87,
1356
- "learning_rate": 6.264953478068233e-06,
1357
- "loss": 1.5117,
1358
- "step": 108500
1359
- },
1360
- {
1361
- "epoch": 6.9,
1362
- "learning_rate": 6.201658332805874e-06,
1363
- "loss": 1.5056,
1364
- "step": 109000
1365
- },
1366
- {
1367
- "epoch": 6.93,
1368
- "learning_rate": 6.1383631875435156e-06,
1369
- "loss": 1.517,
1370
- "step": 109500
1371
- },
1372
- {
1373
- "epoch": 6.96,
1374
- "learning_rate": 6.075068042281157e-06,
1375
- "loss": 1.515,
1376
- "step": 110000
1377
- },
1378
- {
1379
- "epoch": 6.99,
1380
- "learning_rate": 6.011772897018799e-06,
1381
- "loss": 1.5193,
1382
- "step": 110500
1383
- },
1384
- {
1385
- "epoch": 7.0,
1386
- "eval_loss": 1.2794440984725952,
1387
- "eval_runtime": 637.2277,
1388
- "eval_samples_per_second": 396.682,
1389
- "eval_steps_per_second": 24.793,
1390
- "step": 110593
1391
- },
1392
- {
1393
- "epoch": 7.03,
1394
- "learning_rate": 5.94847775175644e-06,
1395
- "loss": 1.4557,
1396
- "step": 111000
1397
- },
1398
- {
1399
- "epoch": 7.06,
1400
- "learning_rate": 5.885182606494082e-06,
1401
- "loss": 1.4395,
1402
- "step": 111500
1403
- },
1404
- {
1405
- "epoch": 7.09,
1406
- "learning_rate": 5.821887461231725e-06,
1407
- "loss": 1.4518,
1408
- "step": 112000
1409
- },
1410
- {
1411
- "epoch": 7.12,
1412
- "learning_rate": 5.758592315969366e-06,
1413
- "loss": 1.4513,
1414
- "step": 112500
1415
- },
1416
- {
1417
- "epoch": 7.15,
1418
- "learning_rate": 5.695297170707007e-06,
1419
- "loss": 1.454,
1420
- "step": 113000
1421
- },
1422
- {
1423
- "epoch": 7.18,
1424
- "learning_rate": 5.632002025444649e-06,
1425
- "loss": 1.4597,
1426
- "step": 113500
1427
- },
1428
- {
1429
- "epoch": 7.22,
1430
- "learning_rate": 5.568706880182291e-06,
1431
- "loss": 1.4383,
1432
- "step": 114000
1433
- },
1434
- {
1435
- "epoch": 7.25,
1436
- "learning_rate": 5.505411734919932e-06,
1437
- "loss": 1.4529,
1438
- "step": 114500
1439
- },
1440
- {
1441
- "epoch": 7.28,
1442
- "learning_rate": 5.442116589657574e-06,
1443
- "loss": 1.4706,
1444
- "step": 115000
1445
- },
1446
- {
1447
- "epoch": 7.31,
1448
- "learning_rate": 5.378821444395216e-06,
1449
- "loss": 1.4576,
1450
- "step": 115500
1451
- },
1452
- {
1453
- "epoch": 7.34,
1454
- "learning_rate": 5.315526299132857e-06,
1455
- "loss": 1.4681,
1456
- "step": 116000
1457
- },
1458
- {
1459
- "epoch": 7.37,
1460
- "learning_rate": 5.252231153870498e-06,
1461
- "loss": 1.4537,
1462
- "step": 116500
1463
- },
1464
- {
1465
- "epoch": 7.41,
1466
- "learning_rate": 5.18893600860814e-06,
1467
- "loss": 1.4583,
1468
- "step": 117000
1469
- },
1470
- {
1471
- "epoch": 7.44,
1472
- "learning_rate": 5.125640863345782e-06,
1473
- "loss": 1.4645,
1474
- "step": 117500
1475
- },
1476
- {
1477
- "epoch": 7.47,
1478
- "learning_rate": 5.062345718083423e-06,
1479
- "loss": 1.455,
1480
- "step": 118000
1481
- },
1482
- {
1483
- "epoch": 7.5,
1484
- "learning_rate": 4.999050572821065e-06,
1485
- "loss": 1.4821,
1486
- "step": 118500
1487
- },
1488
- {
1489
- "epoch": 7.53,
1490
- "learning_rate": 4.935755427558707e-06,
1491
- "loss": 1.4605,
1492
- "step": 119000
1493
- },
1494
- {
1495
- "epoch": 7.56,
1496
- "learning_rate": 4.872460282296348e-06,
1497
- "loss": 1.4621,
1498
- "step": 119500
1499
- },
1500
- {
1501
- "epoch": 7.6,
1502
- "learning_rate": 4.8091651370339894e-06,
1503
- "loss": 1.4601,
1504
- "step": 120000
1505
- },
1506
- {
1507
- "epoch": 7.63,
1508
- "learning_rate": 4.745869991771632e-06,
1509
- "loss": 1.4648,
1510
- "step": 120500
1511
- },
1512
- {
1513
- "epoch": 7.66,
1514
- "learning_rate": 4.682574846509273e-06,
1515
- "loss": 1.4723,
1516
- "step": 121000
1517
- },
1518
- {
1519
- "epoch": 7.69,
1520
- "learning_rate": 4.619279701246915e-06,
1521
- "loss": 1.4733,
1522
- "step": 121500
1523
- },
1524
- {
1525
- "epoch": 7.72,
1526
- "learning_rate": 4.5559845559845564e-06,
1527
- "loss": 1.4723,
1528
- "step": 122000
1529
- },
1530
- {
1531
- "epoch": 7.75,
1532
- "learning_rate": 4.492689410722198e-06,
1533
- "loss": 1.4788,
1534
- "step": 122500
1535
- },
1536
- {
1537
- "epoch": 7.79,
1538
- "learning_rate": 4.42939426545984e-06,
1539
- "loss": 1.4665,
1540
- "step": 123000
1541
- },
1542
- {
1543
- "epoch": 7.82,
1544
- "learning_rate": 4.366099120197481e-06,
1545
- "loss": 1.4699,
1546
- "step": 123500
1547
- },
1548
- {
1549
- "epoch": 7.85,
1550
- "learning_rate": 4.3028039749351235e-06,
1551
- "loss": 1.4908,
1552
- "step": 124000
1553
- },
1554
- {
1555
- "epoch": 7.88,
1556
- "learning_rate": 4.239508829672764e-06,
1557
- "loss": 1.4712,
1558
- "step": 124500
1559
- },
1560
- {
1561
- "epoch": 7.91,
1562
- "learning_rate": 4.176213684410406e-06,
1563
- "loss": 1.4722,
1564
- "step": 125000
1565
- },
1566
- {
1567
- "epoch": 7.94,
1568
- "learning_rate": 4.1129185391480474e-06,
1569
- "loss": 1.4856,
1570
- "step": 125500
1571
- },
1572
- {
1573
- "epoch": 7.98,
1574
- "learning_rate": 4.04962339388569e-06,
1575
- "loss": 1.4793,
1576
- "step": 126000
1577
- },
1578
- {
1579
- "epoch": 8.0,
1580
- "eval_loss": 1.2516653537750244,
1581
- "eval_runtime": 654.6089,
1582
- "eval_samples_per_second": 386.15,
1583
- "eval_steps_per_second": 24.135,
1584
- "step": 126392
1585
- },
1586
- {
1587
- "epoch": 8.01,
1588
- "learning_rate": 3.986328248623331e-06,
1589
- "loss": 1.4563,
1590
- "step": 126500
1591
- },
1592
- {
1593
- "epoch": 8.04,
1594
- "learning_rate": 3.923033103360972e-06,
1595
- "loss": 1.4263,
1596
- "step": 127000
1597
- },
1598
- {
1599
- "epoch": 8.07,
1600
- "learning_rate": 3.8597379580986145e-06,
1601
- "loss": 1.4301,
1602
- "step": 127500
1603
- },
1604
- {
1605
- "epoch": 8.1,
1606
- "learning_rate": 3.7964428128362558e-06,
1607
- "loss": 1.43,
1608
- "step": 128000
1609
- },
1610
- {
1611
- "epoch": 8.13,
1612
- "learning_rate": 3.7331476675738975e-06,
1613
- "loss": 1.4355,
1614
- "step": 128500
1615
- },
1616
- {
1617
- "epoch": 8.17,
1618
- "learning_rate": 3.669852522311539e-06,
1619
- "loss": 1.4384,
1620
- "step": 129000
1621
- },
1622
- {
1623
- "epoch": 8.2,
1624
- "learning_rate": 3.6065573770491806e-06,
1625
- "loss": 1.4398,
1626
- "step": 129500
1627
- },
1628
- {
1629
- "epoch": 8.23,
1630
- "learning_rate": 3.543262231786822e-06,
1631
- "loss": 1.425,
1632
- "step": 130000
1633
- },
1634
- {
1635
- "epoch": 8.26,
1636
- "learning_rate": 3.4799670865244637e-06,
1637
- "loss": 1.423,
1638
- "step": 130500
1639
- },
1640
- {
1641
- "epoch": 8.29,
1642
- "learning_rate": 3.416671941262105e-06,
1643
- "loss": 1.4278,
1644
- "step": 131000
1645
- },
1646
- {
1647
- "epoch": 8.32,
1648
- "learning_rate": 3.3533767959997472e-06,
1649
- "loss": 1.4368,
1650
- "step": 131500
1651
- },
1652
- {
1653
- "epoch": 8.35,
1654
- "learning_rate": 3.290081650737389e-06,
1655
- "loss": 1.4351,
1656
- "step": 132000
1657
- },
1658
- {
1659
- "epoch": 8.39,
1660
- "learning_rate": 3.2267865054750303e-06,
1661
- "loss": 1.4351,
1662
- "step": 132500
1663
- },
1664
- {
1665
- "epoch": 8.42,
1666
- "learning_rate": 3.163491360212672e-06,
1667
- "loss": 1.4299,
1668
- "step": 133000
1669
- },
1670
- {
1671
- "epoch": 8.45,
1672
- "learning_rate": 3.1001962149503134e-06,
1673
- "loss": 1.4265,
1674
- "step": 133500
1675
- },
1676
- {
1677
- "epoch": 8.48,
1678
- "learning_rate": 3.036901069687955e-06,
1679
- "loss": 1.4468,
1680
- "step": 134000
1681
- },
1682
- {
1683
- "epoch": 8.51,
1684
- "learning_rate": 2.9736059244255965e-06,
1685
- "loss": 1.4389,
1686
- "step": 134500
1687
- },
1688
- {
1689
- "epoch": 8.54,
1690
- "learning_rate": 2.9103107791632386e-06,
1691
- "loss": 1.4199,
1692
- "step": 135000
1693
- },
1694
- {
1695
- "epoch": 8.58,
1696
- "learning_rate": 2.84701563390088e-06,
1697
- "loss": 1.4361,
1698
- "step": 135500
1699
- },
1700
- {
1701
- "epoch": 8.61,
1702
- "learning_rate": 2.7837204886385217e-06,
1703
- "loss": 1.4401,
1704
- "step": 136000
1705
- },
1706
- {
1707
- "epoch": 8.64,
1708
- "learning_rate": 2.7204253433761635e-06,
1709
- "loss": 1.4423,
1710
- "step": 136500
1711
- },
1712
- {
1713
- "epoch": 8.67,
1714
- "learning_rate": 2.657130198113805e-06,
1715
- "loss": 1.4266,
1716
- "step": 137000
1717
- },
1718
- {
1719
- "epoch": 8.7,
1720
- "learning_rate": 2.5938350528514466e-06,
1721
- "loss": 1.4406,
1722
- "step": 137500
1723
- },
1724
- {
1725
- "epoch": 8.73,
1726
- "learning_rate": 2.530539907589088e-06,
1727
- "loss": 1.441,
1728
- "step": 138000
1729
- },
1730
- {
1731
- "epoch": 8.77,
1732
- "learning_rate": 2.4672447623267296e-06,
1733
- "loss": 1.4551,
1734
- "step": 138500
1735
- },
1736
- {
1737
- "epoch": 8.8,
1738
- "learning_rate": 2.4039496170643714e-06,
1739
- "loss": 1.4452,
1740
- "step": 139000
1741
- },
1742
- {
1743
- "epoch": 8.83,
1744
- "learning_rate": 2.340654471802013e-06,
1745
- "loss": 1.4392,
1746
- "step": 139500
1747
- },
1748
- {
1749
- "epoch": 8.86,
1750
- "learning_rate": 2.2773593265396545e-06,
1751
- "loss": 1.4361,
1752
- "step": 140000
1753
- },
1754
- {
1755
- "epoch": 8.89,
1756
- "learning_rate": 2.2140641812772962e-06,
1757
- "loss": 1.4313,
1758
- "step": 140500
1759
- },
1760
- {
1761
- "epoch": 8.92,
1762
- "learning_rate": 2.1507690360149376e-06,
1763
- "loss": 1.4323,
1764
- "step": 141000
1765
- },
1766
- {
1767
- "epoch": 8.96,
1768
- "learning_rate": 2.0874738907525793e-06,
1769
- "loss": 1.4266,
1770
- "step": 141500
1771
- },
1772
- {
1773
- "epoch": 8.99,
1774
- "learning_rate": 2.024178745490221e-06,
1775
- "loss": 1.4354,
1776
- "step": 142000
1777
- },
1778
- {
1779
- "epoch": 9.0,
1780
- "eval_loss": 1.2341375350952148,
1781
- "eval_runtime": 642.6304,
1782
- "eval_samples_per_second": 393.347,
1783
- "eval_steps_per_second": 24.585,
1784
- "step": 142191
1785
- },
1786
- {
1787
- "epoch": 9.02,
1788
- "learning_rate": 1.960883600227863e-06,
1789
- "loss": 1.4034,
1790
- "step": 142500
1791
- },
1792
- {
1793
- "epoch": 9.05,
1794
- "learning_rate": 1.8975884549655044e-06,
1795
- "loss": 1.3966,
1796
- "step": 143000
1797
- },
1798
- {
1799
- "epoch": 9.08,
1800
- "learning_rate": 1.834293309703146e-06,
1801
- "loss": 1.3921,
1802
- "step": 143500
1803
- },
1804
- {
1805
- "epoch": 9.11,
1806
- "learning_rate": 1.7709981644407874e-06,
1807
- "loss": 1.396,
1808
- "step": 144000
1809
- },
1810
- {
1811
- "epoch": 9.15,
1812
- "learning_rate": 1.7077030191784292e-06,
1813
- "loss": 1.411,
1814
- "step": 144500
1815
- },
1816
- {
1817
- "epoch": 9.18,
1818
- "learning_rate": 1.6444078739160707e-06,
1819
- "loss": 1.406,
1820
- "step": 145000
1821
- },
1822
- {
1823
- "epoch": 9.21,
1824
- "learning_rate": 1.5811127286537123e-06,
1825
- "loss": 1.407,
1826
- "step": 145500
1827
- },
1828
- {
1829
- "epoch": 9.24,
1830
- "learning_rate": 1.5178175833913538e-06,
1831
- "loss": 1.4182,
1832
- "step": 146000
1833
- },
1834
- {
1835
- "epoch": 9.27,
1836
- "learning_rate": 1.4545224381289958e-06,
1837
- "loss": 1.4116,
1838
- "step": 146500
1839
- },
1840
- {
1841
- "epoch": 9.3,
1842
- "learning_rate": 1.3912272928666373e-06,
1843
- "loss": 1.4166,
1844
- "step": 147000
1845
- },
1846
- {
1847
- "epoch": 9.34,
1848
- "learning_rate": 1.3279321476042789e-06,
1849
- "loss": 1.4063,
1850
- "step": 147500
1851
- },
1852
- {
1853
- "epoch": 9.37,
1854
- "learning_rate": 1.2646370023419204e-06,
1855
- "loss": 1.4025,
1856
- "step": 148000
1857
- },
1858
- {
1859
- "epoch": 9.4,
1860
- "learning_rate": 1.2013418570795622e-06,
1861
- "loss": 1.4061,
1862
- "step": 148500
1863
- },
1864
- {
1865
- "epoch": 9.43,
1866
- "learning_rate": 1.1380467118172037e-06,
1867
- "loss": 1.4066,
1868
- "step": 149000
1869
- },
1870
- {
1871
- "epoch": 9.46,
1872
- "learning_rate": 1.0747515665548455e-06,
1873
- "loss": 1.4152,
1874
- "step": 149500
1875
- },
1876
- {
1877
- "epoch": 9.49,
1878
- "learning_rate": 1.011456421292487e-06,
1879
- "loss": 1.417,
1880
- "step": 150000
1881
- },
1882
- {
1883
- "epoch": 9.53,
1884
- "learning_rate": 9.481612760301285e-07,
1885
- "loss": 1.411,
1886
- "step": 150500
1887
- },
1888
- {
1889
- "epoch": 9.56,
1890
- "learning_rate": 8.848661307677701e-07,
1891
- "loss": 1.4162,
1892
- "step": 151000
1893
- },
1894
- {
1895
- "epoch": 9.59,
1896
- "learning_rate": 8.215709855054118e-07,
1897
- "loss": 1.4195,
1898
- "step": 151500
1899
- },
1900
- {
1901
- "epoch": 9.62,
1902
- "learning_rate": 7.582758402430535e-07,
1903
- "loss": 1.4226,
1904
- "step": 152000
1905
- },
1906
- {
1907
- "epoch": 9.65,
1908
- "learning_rate": 6.94980694980695e-07,
1909
- "loss": 1.4239,
1910
- "step": 152500
1911
- },
1912
- {
1913
- "epoch": 9.68,
1914
- "learning_rate": 6.316855497183366e-07,
1915
- "loss": 1.4078,
1916
- "step": 153000
1917
- },
1918
- {
1919
- "epoch": 9.72,
1920
- "learning_rate": 5.683904044559782e-07,
1921
- "loss": 1.4101,
1922
- "step": 153500
1923
- },
1924
- {
1925
- "epoch": 9.75,
1926
- "learning_rate": 5.050952591936199e-07,
1927
- "loss": 1.416,
1928
- "step": 154000
1929
- },
1930
- {
1931
- "epoch": 9.78,
1932
- "learning_rate": 4.418001139312615e-07,
1933
- "loss": 1.4182,
1934
- "step": 154500
1935
- },
1936
- {
1937
- "epoch": 9.81,
1938
- "learning_rate": 3.785049686689031e-07,
1939
- "loss": 1.4196,
1940
- "step": 155000
1941
- },
1942
- {
1943
- "epoch": 9.84,
1944
- "learning_rate": 3.1520982340654476e-07,
1945
- "loss": 1.4132,
1946
- "step": 155500
1947
- },
1948
- {
1949
- "epoch": 9.87,
1950
- "learning_rate": 2.5191467814418635e-07,
1951
- "loss": 1.4138,
1952
- "step": 156000
1953
- },
1954
- {
1955
- "epoch": 9.91,
1956
- "learning_rate": 1.88619532881828e-07,
1957
- "loss": 1.4333,
1958
- "step": 156500
1959
- },
1960
- {
1961
- "epoch": 9.94,
1962
- "learning_rate": 1.253243876194696e-07,
1963
- "loss": 1.413,
1964
- "step": 157000
1965
- },
1966
- {
1967
- "epoch": 9.97,
1968
- "learning_rate": 6.202924235711122e-08,
1969
- "loss": 1.4116,
1970
- "step": 157500
1971
- }
1972
- ],
1973
- "max_steps": 157990,
1974
- "num_train_epochs": 10,
1975
- "total_flos": 3.820454731815322e+16,
1976
- "trial_name": null,
1977
- "trial_params": null
1978
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-157500_sTrain/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d624b7f61620cf3bd86485c3abbf851a0ba124edd2a7b3aec3c4a0d5076e32
3
- size 4091
 
 
 
 
checkpoint-157500_sTrain/vocab.json DELETED
The diff for this file is too large to render. See raw diff