chaojiang06 commited on
Commit
4b4876e
1 Parent(s): 869f209

Upload 15 files

Browse files
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: tst-translation355
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # tst-translation355
13
+
14
+ This model is a fine-tuned version of [tmp/tst-translation355](https://huggingface.co/tmp/tst-translation355) on an unknown dataset.
15
+
16
+ ## Model description
17
+
18
+ More information needed
19
+
20
+ ## Intended uses & limitations
21
+
22
+ More information needed
23
+
24
+ ## Training and evaluation data
25
+
26
+ More information needed
27
+
28
+ ## Training procedure
29
+
30
+ ### Training hyperparameters
31
+
32
+ The following hyperparameters were used during training:
33
+ - learning_rate: 5e-05
34
+ - train_batch_size: 12
35
+ - eval_batch_size: 12
36
+ - seed: 42
37
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
38
+ - lr_scheduler_type: linear
39
+ - num_epochs: 10.0
40
+
41
+ ### Framework versions
42
+
43
+ - Transformers 4.17.0
44
+ - Pytorch 1.11.0+cu113
45
+ - Datasets 1.17.0
46
+ - Tokenizers 0.11.6
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[MATH]": 32100, "[CITATION]": 32103, "[EQUATION]": 32101, "[REF]": 32102}
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.7930232558139535,
4
+ "eval_loss": 0.13093818724155426,
5
+ "eval_runtime": 29.0938,
6
+ "eval_samples": 430,
7
+ "eval_samples_per_second": 14.78,
8
+ "eval_steps_per_second": 1.237,
9
+ "predict_accuracy": 0.3494363929146538,
10
+ "predict_loss": 0.5730153322219849,
11
+ "predict_runtime": 37.0085,
12
+ "predict_samples": 621,
13
+ "predict_samples_per_second": 16.78,
14
+ "predict_steps_per_second": 1.405,
15
+ "train_loss": 0.2194767295746576,
16
+ "train_runtime": 810.0029,
17
+ "train_samples": 1254,
18
+ "train_samples_per_second": 15.481,
19
+ "train_steps_per_second": 1.296
20
+ }}
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-large",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 4096,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 24,
19
+ "num_heads": 16,
20
+ "num_layers": 24,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_num_buckets": 32,
24
+ "task_specific_params": {
25
+ "summarization": {
26
+ "early_stopping": true,
27
+ "length_penalty": 2.0,
28
+ "max_length": 200,
29
+ "min_length": 30,
30
+ "no_repeat_ngram_size": 3,
31
+ "num_beams": 4,
32
+ "prefix": "summarize: "
33
+ },
34
+ "translation_en_to_de": {
35
+ "early_stopping": true,
36
+ "max_length": 300,
37
+ "num_beams": 4,
38
+ "prefix": "translate English to German: "
39
+ },
40
+ "translation_en_to_fr": {
41
+ "early_stopping": true,
42
+ "max_length": 300,
43
+ "num_beams": 4,
44
+ "prefix": "translate English to French: "
45
+ },
46
+ "translation_en_to_ro": {
47
+ "early_stopping": true,
48
+ "max_length": 300,
49
+ "num_beams": 4,
50
+ "prefix": "translate English to Romanian: "
51
+ }
52
+ },
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.17.0",
55
+ "use_cache": true,
56
+ "vocab_size": 32104
57
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.7930232558139535,
3
+ "eval_loss": 0.13093818724155426,
4
+ "eval_runtime": 26.7221,
5
+ "eval_samples": 430,
6
+ "eval_samples_per_second": 16.092,
7
+ "eval_steps_per_second": 1.347
8
+ }
generated_predictions.txt ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Format
2
+ Content
3
+ Improve-grammar-Typo
4
+ Content
5
+ Content
6
+ Lang-accurate-spefific
7
+ Content
8
+ Format
9
+ Format
10
+ Lang-accurate-spefific
11
+ Format
12
+ Improve-grammar-Typo
13
+ Lang-accurate-spefific
14
+ Content
15
+ Format
16
+ Content
17
+ Improve-grammar-Typo
18
+ Lang-improve-readability-Simplify
19
+ Lang-improve-readability-Simplify
20
+ Format
21
+ Content
22
+ Improve-grammar-Typo
23
+ Content
24
+ Improve-grammar-Typo
25
+ Improve-grammar-Typo
26
+ Format
27
+ Improve-grammar-Typo
28
+ Lang-professional-Improve-style
29
+ Content
30
+ Lang-accurate-spefific
31
+ Format
32
+ Improve-grammar-Typo
33
+ Format
34
+ Lang-accurate-spefific
35
+ Content
36
+ Format
37
+ Improve-grammar-Typo
38
+ Format
39
+ Format
40
+ Lang-improve-readability-Simplify
41
+ Lang-accurate-spefific
42
+ Lang-accurate-spefific
43
+ Content
44
+ Content
45
+ Format
46
+ Content
47
+ Format
48
+ Format
49
+ Format
50
+ Format
51
+ Lang-accurate-spefific
52
+ Format
53
+ Improve-grammar-Typo
54
+ Content
55
+ Format
56
+ Lang-improve-readability-Simplify
57
+ Improve-grammar-Typo
58
+ Lang-improve-readability-Simplify
59
+ Content
60
+ Lang-improve-readability-Simplify
61
+ Format
62
+ Content
63
+ Content
64
+ Content
65
+ Lang-accurate-spefific
66
+ Lang-professional-Improve-style
67
+ Lang-accurate-spefific
68
+ Format
69
+ Lang-accurate-spefific
70
+ Lang-improve-readability-Simplify
71
+ Lang-improve-readability-Simplify
72
+ Content
73
+ Lang-professional-Improve-style
74
+ Improve-grammar-Typo
75
+ Improve-grammar-Typo
76
+ Content
77
+ Improve-grammar-Typo
78
+ Format
79
+ Format
80
+ Improve-grammar-Typo
81
+ Format
82
+ Content
83
+ Content
84
+ Lang-accurate-spefific
85
+ Improve-grammar-Typo
86
+ Format
87
+ Lang-improve-readability-Simplify
88
+ Content
89
+ Format
90
+ Format
91
+ Lang-accurate-spefific
92
+ Improve-grammar-Typo
93
+ Content
94
+ Lang-accurate-spefific
95
+ Format
96
+ Format
97
+ Content
98
+ Lang-accurate-spefific
99
+ Improve-grammar-Typo
100
+ Content
101
+ Format
102
+ Content
103
+ Content
104
+ Lang-professional-Improve-style
105
+ Content
106
+ Content
107
+ Content
108
+ Lang-professional-Improve-style
109
+ Lang-professional-Improve-style
110
+ Format
111
+ Format
112
+ Format
113
+ Format
114
+ Format
115
+ Lang-professional-Improve-style
116
+ Content
117
+ Improve-grammar-Typo
118
+ Improve-grammar-Typo
119
+ Format
120
+ Format
121
+ Improve-grammar-Typo
122
+ Content
123
+ Content
124
+ Improve-grammar-Typo
125
+ Improve-grammar-Typo
126
+ Improve-grammar-Typo
127
+ Improve-grammar-Typo
128
+ Content
129
+ Improve-grammar-Typo
130
+ Content
131
+ Improve-grammar-Typo
132
+ Format
133
+ Format
134
+ Content
135
+ Content
136
+ Content
137
+ Content
138
+ Content
139
+ Improve-grammar-Typo
140
+ Content
141
+ Content
142
+ Lang-professional-Improve-style
143
+ Improve-grammar-Typo
144
+ Content
145
+ Format
146
+ Lang-professional-Improve-style
147
+ Improve-grammar-Typo
148
+ Improve-grammar-Typo
149
+ Content
150
+ Content
151
+ Lang-accurate-spefific
152
+ Content
153
+ Lang-improve-readability-Simplify
154
+ Content
155
+ Improve-grammar-Typo
156
+ Improve-grammar-Typo
157
+ Improve-grammar-Typo
158
+ Content
159
+ Content
160
+ Format
161
+ Improve-grammar-Typo
162
+ Format
163
+ Lang-accurate-spefific
164
+ Content
165
+ Content
166
+ Content
167
+ Improve-grammar-Typo
168
+ Content
169
+ Improve-grammar-Typo
170
+ Format
171
+ Content
172
+ Lang-accurate-spefific
173
+ Format
174
+ Format
175
+ Format
176
+ Lang-accurate-spefific
177
+ Format
178
+ Content
179
+ Content
180
+ Content
181
+ Content
182
+ Content
183
+ Improve-grammar-Typo
184
+ Format
185
+ Lang-accurate-spefific
186
+ Content
187
+ Improve-grammar-Typo
188
+ Format
189
+ Content
190
+ Content
191
+ Lang-accurate-spefific
192
+ Content
193
+ Format
194
+ Format
195
+ Format
196
+ Lang-accurate-spefific
197
+ Lang-accurate-spefific
198
+ Improve-grammar-Typo
199
+ Content
200
+ Format
201
+ Lang-professional-Improve-style
202
+ Content
203
+ Lang-accurate-spefific
204
+ Lang-improve-readability-Simplify
205
+ Format
206
+ Content
207
+ Format
208
+ Improve-grammar-Typo
209
+ Format
210
+ Lang-professional-Improve-style
211
+ Content
212
+ Content
213
+ Improve-grammar-Typo
214
+ Improve-grammar-Typo
215
+ Format
216
+ Improve-grammar-Typo
217
+ Content
218
+ Content
219
+ Lang-improve-readability-Simplify
220
+ Improve-grammar-Typo
221
+ Format
222
+ Content
223
+ Lang-professional-Improve-style
224
+ Format
225
+ Format
226
+ Improve-grammar-Typo
227
+ Lang-professional-Improve-style
228
+ Improve-grammar-Typo
229
+ Improve-grammar-Typo
230
+ Lang-professional-Improve-style
231
+ Format
232
+ Content
233
+ Lang-accurate-spefific
234
+ Content
235
+ Lang-professional-Improve-style
236
+ Lang-accurate-spefific
237
+ Content
238
+ Content
239
+ Content
240
+ Content
241
+ Lang-professional-Improve-style
242
+ Content
243
+ Format
244
+ Lang-accurate-spefific
245
+ Lang-improve-readability-Simplify
246
+ Lang-improve-readability-Simplify
247
+ Lang-improve-readability-Simplify
248
+ Lang-accurate-spefific
249
+ Content
250
+ Lang-accurate-spefific
251
+ Format
252
+ Improve-grammar-Typo
253
+ Lang-accurate-spefific
254
+ Lang-professional-Improve-style
255
+ Content
256
+ Content
257
+ Lang-accurate-spefific
258
+ Improve-grammar-Typo
259
+ Lang-accurate-spefific
260
+ Format
261
+ Improve-grammar-Typo
262
+ Lang-accurate-spefific
263
+ Improve-grammar-Typo
264
+ Improve-grammar-Typo
265
+ Improve-grammar-Typo
266
+ Improve-grammar-Typo
267
+ Content
268
+ Lang-improve-readability-Simplify
269
+ Improve-grammar-Typo
270
+ Improve-grammar-Typo
271
+ Content
272
+ Improve-grammar-Typo
273
+ Format
274
+ Lang-improve-readability-Simplify
275
+ Lang-accurate-spefific
276
+ Format
277
+ Lang-improve-readability-Simplify
278
+ Lang-professional-Improve-style
279
+ Content
280
+ Format
281
+ Content
282
+ Lang-professional-Improve-style
283
+ Format
284
+ Lang-improve-readability-Simplify
285
+ Content
286
+ Improve-grammar-Typo
287
+ Improve-grammar-Typo
288
+ Lang-accurate-spefific
289
+ Format
290
+ Content
291
+ Improve-grammar-Typo
292
+ Content
293
+ Content
294
+ Improve-grammar-Typo
295
+ Improve-grammar-Typo
296
+ Format
297
+ Content
298
+ Content
299
+ Improve-grammar-Typo
300
+ Lang-professional-Improve-style
301
+ Lang-accurate-spefific
302
+ Improve-grammar-Typo
303
+ Improve-grammar-Typo
304
+ Lang-accurate-spefific
305
+ Lang-accurate-spefific
306
+ Improve-grammar-Typo
307
+ Content
308
+ Content
309
+ Lang-accurate-spefific
310
+ Content
311
+ Content
312
+ Format
313
+ Improve-grammar-Typo
314
+ Format
315
+ Content
316
+ Format
317
+ Format
318
+ Lang-improve-readability-Simplify
319
+ Content
320
+ Improve-grammar-Typo
321
+ Lang-accurate-spefific
322
+ Improve-grammar-Typo
323
+ Improve-grammar-Typo
324
+ Improve-grammar-Typo
325
+ Format
326
+ Content
327
+ Lang-professional-Improve-style
328
+ Lang-improve-readability-Simplify
329
+ Lang-improve-readability-Simplify
330
+ Lang-professional-Improve-style
331
+ Lang-accurate-spefific
332
+ Improve-grammar-Typo
333
+ Content
334
+ Improve-grammar-Typo
335
+ Improve-grammar-Typo
336
+ Content
337
+ Content
338
+ Lang-accurate-spefific
339
+ Format
340
+ Content
341
+ Improve-grammar-Typo
342
+ Improve-grammar-Typo
343
+ Content
344
+ Content
345
+ Lang-accurate-spefific
346
+ Content
347
+ Content
348
+ Lang-accurate-spefific
349
+ Improve-grammar-Typo
350
+ Improve-grammar-Typo
351
+ Improve-grammar-Typo
352
+ Improve-grammar-Typo
353
+ Content
354
+ Format
355
+ Format
356
+ Lang-accurate-spefific
357
+ Lang-professional-Improve-style
358
+ Lang-accurate-spefific
359
+ Content
360
+ Content
361
+ Content
362
+ Content
363
+ Lang-professional-Improve-style
364
+ Content
365
+ Lang-accurate-spefific
366
+ Content
367
+ Format
368
+ Improve-grammar-Typo
369
+ Improve-grammar-Typo
370
+ Content
371
+ Format
372
+ Lang-accurate-spefific
373
+ Improve-grammar-Typo
374
+ Improve-grammar-Typo
375
+ Lang-professional-Improve-style
376
+ Content
377
+ Content
378
+ Improve-grammar-Typo
379
+ Improve-grammar-Typo
380
+ Lang-improve-readability-Simplify
381
+ Lang-professional-Improve-style
382
+ Improve-grammar-Typo
383
+ Improve-grammar-Typo
384
+ Format
385
+ Content
386
+ Content
387
+ Format
388
+ Format
389
+ Content
390
+ Format
391
+ Improve-grammar-Typo
392
+ Lang-accurate-spefific
393
+ Improve-grammar-Typo
394
+ Improve-grammar-Typo
395
+ Lang-accurate-spefific
396
+ Improve-grammar-Typo
397
+ Content
398
+ Improve-grammar-Typo
399
+ Content
400
+ Improve-grammar-Typo
401
+ Content
402
+ Lang-professional-Improve-style
403
+ Lang-professional-Improve-style
404
+ Improve-grammar-Typo
405
+ Improve-grammar-Typo
406
+ Lang-accurate-spefific
407
+ Lang-accurate-spefific
408
+ Improve-grammar-Typo
409
+ Format
410
+ Format
411
+ Lang-professional-Improve-style
412
+ Format
413
+ Improve-grammar-Typo
414
+ Improve-grammar-Typo
415
+ Improve-grammar-Typo
416
+ Improve-grammar-Typo
417
+ Lang-professional-Improve-style
418
+ Improve-grammar-Typo
419
+ Lang-improve-readability-Simplify
420
+ Lang-professional-Improve-style
421
+ Lang-professional-Improve-style
422
+ Improve-grammar-Typo
423
+ Improve-grammar-Typo
424
+ Improve-grammar-Typo
425
+ Content
426
+ Content
427
+ Format
428
+ Lang-accurate-spefific
429
+ Format
430
+ Format
predict_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.3801089918256131,
3
+ "predict_loss": 0.49549296498298645,
4
+ "predict_runtime": 47.4917,
5
+ "predict_samples": 734,
6
+ "predict_samples_per_second": 15.455,
7
+ "predict_steps_per_second": 1.305
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9116891d7a4ae2a64484f44e91c7d9ef82c598ca565dcff42f610d18907d63fe
3
+ size 2950806407
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-large", "tokenizer_class": "T5Tokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.2194767295746576,
4
+ "train_runtime": 810.0029,
5
+ "train_samples": 1254,
6
+ "train_samples_per_second": 15.481,
7
+ "train_steps_per_second": 1.296
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7785388127853882,
3
+ "best_model_checkpoint": "tmp/tst-translation355/checkpoint-840",
4
+ "epoch": 10.0,
5
+ "global_step": 1050,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.2054794520547945,
13
+ "eval_loss": 0.2730105221271515,
14
+ "eval_runtime": 13.9745,
15
+ "eval_samples_per_second": 31.343,
16
+ "eval_steps_per_second": 2.648,
17
+ "step": 105
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.4132420091324201,
22
+ "eval_loss": 0.2166765183210373,
23
+ "eval_runtime": 13.9525,
24
+ "eval_samples_per_second": 31.392,
25
+ "eval_steps_per_second": 2.652,
26
+ "step": 210
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_accuracy": 0.6598173515981736,
31
+ "eval_loss": 0.16653937101364136,
32
+ "eval_runtime": 16.3239,
33
+ "eval_samples_per_second": 26.832,
34
+ "eval_steps_per_second": 2.267,
35
+ "step": 315
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_accuracy": 0.6940639269406392,
40
+ "eval_loss": 0.1442675143480301,
41
+ "eval_runtime": 13.707,
42
+ "eval_samples_per_second": 31.954,
43
+ "eval_steps_per_second": 2.699,
44
+ "step": 420
45
+ },
46
+ {
47
+ "epoch": 4.76,
48
+ "learning_rate": 2.6190476190476192e-05,
49
+ "loss": 0.3779,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 5.0,
54
+ "eval_accuracy": 0.7146118721461188,
55
+ "eval_loss": 0.1320880502462387,
56
+ "eval_runtime": 14.2394,
57
+ "eval_samples_per_second": 30.76,
58
+ "eval_steps_per_second": 2.598,
59
+ "step": 525
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_accuracy": 0.7579908675799086,
64
+ "eval_loss": 0.11835604161024094,
65
+ "eval_runtime": 14.2589,
66
+ "eval_samples_per_second": 30.718,
67
+ "eval_steps_per_second": 2.595,
68
+ "step": 630
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_accuracy": 0.7579908675799086,
73
+ "eval_loss": 0.119329072535038,
74
+ "eval_runtime": 14.1651,
75
+ "eval_samples_per_second": 30.921,
76
+ "eval_steps_per_second": 2.612,
77
+ "step": 735
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_accuracy": 0.7785388127853882,
82
+ "eval_loss": 0.14183764159679413,
83
+ "eval_runtime": 14.017,
84
+ "eval_samples_per_second": 31.248,
85
+ "eval_steps_per_second": 2.64,
86
+ "step": 840
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_accuracy": 0.7602739726027398,
91
+ "eval_loss": 0.1450866013765335,
92
+ "eval_runtime": 14.0858,
93
+ "eval_samples_per_second": 31.095,
94
+ "eval_steps_per_second": 2.627,
95
+ "step": 945
96
+ },
97
+ {
98
+ "epoch": 9.52,
99
+ "learning_rate": 2.3809523809523808e-06,
100
+ "loss": 0.0787,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 10.0,
105
+ "eval_accuracy": 0.7625570776255708,
106
+ "eval_loss": 0.148654505610466,
107
+ "eval_runtime": 15.3383,
108
+ "eval_samples_per_second": 28.556,
109
+ "eval_steps_per_second": 2.412,
110
+ "step": 1050
111
+ },
112
+ {
113
+ "epoch": 10.0,
114
+ "step": 1050,
115
+ "total_flos": 8482350845952000.0,
116
+ "train_loss": 0.2194767295746576,
117
+ "train_runtime": 810.0029,
118
+ "train_samples_per_second": 15.481,
119
+ "train_steps_per_second": 1.296
120
+ }
121
+ ],
122
+ "max_steps": 1050,
123
+ "num_train_epochs": 10,
124
+ "total_flos": 8482350845952000.0,
125
+ "trial_name": null,
126
+ "trial_params": null
127
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19cd7cd0292532c882f8a4ddb3ddc1781dcc7be5521bda178a7645a82bd3bae1
3
+ size 3183