learn3r commited on
Commit
901c470
·
1 Parent(s): 3508f06

End of training

Browse files
Files changed (5) hide show
  1. README.md +20 -8
  2. all_results.json +18 -0
  3. eval_results.json +13 -0
  4. train_results.json +8 -0
  5. trainer_state.json +578 -0
README.md CHANGED
@@ -3,11 +3,23 @@ license: apache-2.0
3
  base_model: google/long-t5-tglobal-xl
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - rouge
8
  model-index:
9
  - name: longt5_xl_summ_screen_bp_10
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,14 +27,14 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # longt5_xl_summ_screen_bp_10
17
 
18
- This model is a fine-tuned version of [google/long-t5-tglobal-xl](https://huggingface.co/google/long-t5-tglobal-xl) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5811
21
- - Rouge1: 35.4657
22
- - Rouge2: 12.0036
23
- - Rougel: 24.7787
24
- - Rougelsum: 34.3775
25
- - Gen Len: 284.0266
26
 
27
  ## Model description
28
 
 
3
  base_model: google/long-t5-tglobal-xl
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - learn3r/summ_screen_fd_bp
8
  metrics:
9
  - rouge
10
  model-index:
11
  - name: longt5_xl_summ_screen_bp_10
12
+ results:
13
+ - task:
14
+ name: Summarization
15
+ type: summarization
16
+ dataset:
17
+ name: learn3r/summ_screen_fd_bp
18
+ type: learn3r/summ_screen_fd_bp
19
+ metrics:
20
+ - name: Rouge1
21
+ type: rouge
22
+ value: 22.9554
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # longt5_xl_summ_screen_bp_10
29
 
30
+ This model is a fine-tuned version of [google/long-t5-tglobal-xl](https://huggingface.co/google/long-t5-tglobal-xl) on the learn3r/summ_screen_fd_bp dataset.
31
  It achieves the following results on the evaluation set:
32
+ - Loss: 1.3323
33
+ - Rouge1: 22.9554
34
+ - Rouge2: 6.4509
35
+ - Rougel: 19.7437
36
+ - Rougelsum: 20.923
37
+ - Gen Len: 497.2456
38
 
39
  ## Model description
40
 
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "eval_gen_len": 497.2455621301775,
4
+ "eval_loss": 1.3323031663894653,
5
+ "eval_rouge1": 22.9554,
6
+ "eval_rouge2": 6.4509,
7
+ "eval_rougeL": 19.7437,
8
+ "eval_rougeLsum": 20.923,
9
+ "eval_runtime": 1805.1059,
10
+ "eval_samples": 338,
11
+ "eval_samples_per_second": 0.187,
12
+ "eval_steps_per_second": 0.024,
13
+ "train_loss": 1.2823251613548823,
14
+ "train_runtime": 53785.754,
15
+ "train_samples": 3673,
16
+ "train_samples_per_second": 0.683,
17
+ "train_steps_per_second": 0.003
18
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "eval_gen_len": 497.2455621301775,
4
+ "eval_loss": 1.3323031663894653,
5
+ "eval_rouge1": 22.9554,
6
+ "eval_rouge2": 6.4509,
7
+ "eval_rougeL": 19.7437,
8
+ "eval_rougeLsum": 20.923,
9
+ "eval_runtime": 1805.1059,
10
+ "eval_samples": 338,
11
+ "eval_samples_per_second": 0.187,
12
+ "eval_steps_per_second": 0.024
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.74,
3
+ "train_loss": 1.2823251613548823,
4
+ "train_runtime": 53785.754,
5
+ "train_samples": 3673,
6
+ "train_samples_per_second": 0.683,
7
+ "train_steps_per_second": 0.003
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.3323031663894653,
3
+ "best_model_checkpoint": "longt5_xl_summ_screen_bp_10/checkpoint-57",
4
+ "epoch": 9.73913043478261,
5
+ "eval_steps": 500,
6
+ "global_step": 140,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14,
13
+ "learning_rate": 0.001,
14
+ "loss": 2.8751,
15
+ "step": 2
16
+ },
17
+ {
18
+ "epoch": 0.28,
19
+ "learning_rate": 0.001,
20
+ "loss": 3.5717,
21
+ "step": 4
22
+ },
23
+ {
24
+ "epoch": 0.42,
25
+ "learning_rate": 0.001,
26
+ "loss": 2.8585,
27
+ "step": 6
28
+ },
29
+ {
30
+ "epoch": 0.56,
31
+ "learning_rate": 0.001,
32
+ "loss": 2.5104,
33
+ "step": 8
34
+ },
35
+ {
36
+ "epoch": 0.7,
37
+ "learning_rate": 0.001,
38
+ "loss": 3.2659,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.83,
43
+ "learning_rate": 0.001,
44
+ "loss": 3.4634,
45
+ "step": 12
46
+ },
47
+ {
48
+ "epoch": 0.97,
49
+ "learning_rate": 0.001,
50
+ "loss": 2.4559,
51
+ "step": 14
52
+ },
53
+ {
54
+ "epoch": 0.97,
55
+ "eval_gen_len": 511.0,
56
+ "eval_loss": 2.0707387924194336,
57
+ "eval_rouge1": 11.7833,
58
+ "eval_rouge2": 1.6011,
59
+ "eval_rougeL": 11.1858,
60
+ "eval_rougeLsum": 10.3025,
61
+ "eval_runtime": 1812.8885,
62
+ "eval_samples_per_second": 0.186,
63
+ "eval_steps_per_second": 0.024,
64
+ "step": 14
65
+ },
66
+ {
67
+ "epoch": 1.11,
68
+ "learning_rate": 0.001,
69
+ "loss": 2.1517,
70
+ "step": 16
71
+ },
72
+ {
73
+ "epoch": 1.25,
74
+ "learning_rate": 0.001,
75
+ "loss": 2.2029,
76
+ "step": 18
77
+ },
78
+ {
79
+ "epoch": 1.39,
80
+ "learning_rate": 0.001,
81
+ "loss": 2.1161,
82
+ "step": 20
83
+ },
84
+ {
85
+ "epoch": 1.53,
86
+ "learning_rate": 0.001,
87
+ "loss": 1.9513,
88
+ "step": 22
89
+ },
90
+ {
91
+ "epoch": 1.67,
92
+ "learning_rate": 0.001,
93
+ "loss": 1.7095,
94
+ "step": 24
95
+ },
96
+ {
97
+ "epoch": 1.81,
98
+ "learning_rate": 0.001,
99
+ "loss": 1.6535,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.95,
104
+ "learning_rate": 0.001,
105
+ "loss": 1.6238,
106
+ "step": 28
107
+ },
108
+ {
109
+ "epoch": 1.95,
110
+ "eval_gen_len": 511.0,
111
+ "eval_loss": 1.5286704301834106,
112
+ "eval_rouge1": 19.0489,
113
+ "eval_rouge2": 4.687,
114
+ "eval_rougeL": 16.6504,
115
+ "eval_rougeLsum": 17.1808,
116
+ "eval_runtime": 1807.4269,
117
+ "eval_samples_per_second": 0.187,
118
+ "eval_steps_per_second": 0.024,
119
+ "step": 28
120
+ },
121
+ {
122
+ "epoch": 2.09,
123
+ "learning_rate": 0.001,
124
+ "loss": 1.5804,
125
+ "step": 30
126
+ },
127
+ {
128
+ "epoch": 2.23,
129
+ "learning_rate": 0.001,
130
+ "loss": 1.511,
131
+ "step": 32
132
+ },
133
+ {
134
+ "epoch": 2.37,
135
+ "learning_rate": 0.001,
136
+ "loss": 1.4961,
137
+ "step": 34
138
+ },
139
+ {
140
+ "epoch": 2.5,
141
+ "learning_rate": 0.001,
142
+ "loss": 1.4334,
143
+ "step": 36
144
+ },
145
+ {
146
+ "epoch": 2.64,
147
+ "learning_rate": 0.001,
148
+ "loss": 1.3994,
149
+ "step": 38
150
+ },
151
+ {
152
+ "epoch": 2.78,
153
+ "learning_rate": 0.001,
154
+ "loss": 1.4018,
155
+ "step": 40
156
+ },
157
+ {
158
+ "epoch": 2.92,
159
+ "learning_rate": 0.001,
160
+ "loss": 1.3964,
161
+ "step": 42
162
+ },
163
+ {
164
+ "epoch": 2.99,
165
+ "eval_gen_len": 511.0,
166
+ "eval_loss": 1.3520147800445557,
167
+ "eval_rouge1": 21.9994,
168
+ "eval_rouge2": 5.8519,
169
+ "eval_rougeL": 18.9231,
170
+ "eval_rougeLsum": 19.958,
171
+ "eval_runtime": 1809.4299,
172
+ "eval_samples_per_second": 0.187,
173
+ "eval_steps_per_second": 0.024,
174
+ "step": 43
175
+ },
176
+ {
177
+ "epoch": 3.06,
178
+ "learning_rate": 0.001,
179
+ "loss": 1.3428,
180
+ "step": 44
181
+ },
182
+ {
183
+ "epoch": 3.2,
184
+ "learning_rate": 0.001,
185
+ "loss": 1.3034,
186
+ "step": 46
187
+ },
188
+ {
189
+ "epoch": 3.34,
190
+ "learning_rate": 0.001,
191
+ "loss": 1.4137,
192
+ "step": 48
193
+ },
194
+ {
195
+ "epoch": 3.48,
196
+ "learning_rate": 0.001,
197
+ "loss": 1.4083,
198
+ "step": 50
199
+ },
200
+ {
201
+ "epoch": 3.62,
202
+ "learning_rate": 0.001,
203
+ "loss": 1.3075,
204
+ "step": 52
205
+ },
206
+ {
207
+ "epoch": 3.76,
208
+ "learning_rate": 0.001,
209
+ "loss": 1.2527,
210
+ "step": 54
211
+ },
212
+ {
213
+ "epoch": 3.9,
214
+ "learning_rate": 0.001,
215
+ "loss": 1.2538,
216
+ "step": 56
217
+ },
218
+ {
219
+ "epoch": 3.97,
220
+ "eval_gen_len": 497.2455621301775,
221
+ "eval_loss": 1.3323031663894653,
222
+ "eval_rouge1": 22.9554,
223
+ "eval_rouge2": 6.4509,
224
+ "eval_rougeL": 19.7437,
225
+ "eval_rougeLsum": 20.923,
226
+ "eval_runtime": 1810.7532,
227
+ "eval_samples_per_second": 0.187,
228
+ "eval_steps_per_second": 0.024,
229
+ "step": 57
230
+ },
231
+ {
232
+ "epoch": 4.03,
233
+ "learning_rate": 0.001,
234
+ "loss": 1.2028,
235
+ "step": 58
236
+ },
237
+ {
238
+ "epoch": 4.17,
239
+ "learning_rate": 0.001,
240
+ "loss": 1.0981,
241
+ "step": 60
242
+ },
243
+ {
244
+ "epoch": 4.31,
245
+ "learning_rate": 0.001,
246
+ "loss": 1.1033,
247
+ "step": 62
248
+ },
249
+ {
250
+ "epoch": 4.45,
251
+ "learning_rate": 0.001,
252
+ "loss": 1.1303,
253
+ "step": 64
254
+ },
255
+ {
256
+ "epoch": 4.59,
257
+ "learning_rate": 0.001,
258
+ "loss": 1.1675,
259
+ "step": 66
260
+ },
261
+ {
262
+ "epoch": 4.73,
263
+ "learning_rate": 0.001,
264
+ "loss": 1.3701,
265
+ "step": 68
266
+ },
267
+ {
268
+ "epoch": 4.87,
269
+ "learning_rate": 0.001,
270
+ "loss": 1.277,
271
+ "step": 70
272
+ },
273
+ {
274
+ "epoch": 4.94,
275
+ "eval_gen_len": 507.2278106508876,
276
+ "eval_loss": 1.5462373495101929,
277
+ "eval_rouge1": 14.6326,
278
+ "eval_rouge2": 3.6509,
279
+ "eval_rougeL": 12.4805,
280
+ "eval_rougeLsum": 13.5001,
281
+ "eval_runtime": 1806.4311,
282
+ "eval_samples_per_second": 0.187,
283
+ "eval_steps_per_second": 0.024,
284
+ "step": 71
285
+ },
286
+ {
287
+ "epoch": 5.01,
288
+ "learning_rate": 0.001,
289
+ "loss": 1.3884,
290
+ "step": 72
291
+ },
292
+ {
293
+ "epoch": 5.15,
294
+ "learning_rate": 0.001,
295
+ "loss": 1.0428,
296
+ "step": 74
297
+ },
298
+ {
299
+ "epoch": 5.29,
300
+ "learning_rate": 0.001,
301
+ "loss": 1.0266,
302
+ "step": 76
303
+ },
304
+ {
305
+ "epoch": 5.43,
306
+ "learning_rate": 0.001,
307
+ "loss": 1.0247,
308
+ "step": 78
309
+ },
310
+ {
311
+ "epoch": 5.57,
312
+ "learning_rate": 0.001,
313
+ "loss": 0.9732,
314
+ "step": 80
315
+ },
316
+ {
317
+ "epoch": 5.7,
318
+ "learning_rate": 0.001,
319
+ "loss": 1.0042,
320
+ "step": 82
321
+ },
322
+ {
323
+ "epoch": 5.84,
324
+ "learning_rate": 0.001,
325
+ "loss": 1.0099,
326
+ "step": 84
327
+ },
328
+ {
329
+ "epoch": 5.98,
330
+ "learning_rate": 0.001,
331
+ "loss": 1.0071,
332
+ "step": 86
333
+ },
334
+ {
335
+ "epoch": 5.98,
336
+ "eval_gen_len": 429.7721893491124,
337
+ "eval_loss": 1.3604055643081665,
338
+ "eval_rouge1": 29.5352,
339
+ "eval_rouge2": 9.9544,
340
+ "eval_rougeL": 22.1073,
341
+ "eval_rougeLsum": 28.1204,
342
+ "eval_runtime": 1808.4033,
343
+ "eval_samples_per_second": 0.187,
344
+ "eval_steps_per_second": 0.024,
345
+ "step": 86
346
+ },
347
+ {
348
+ "epoch": 6.12,
349
+ "learning_rate": 0.001,
350
+ "loss": 0.8375,
351
+ "step": 88
352
+ },
353
+ {
354
+ "epoch": 6.26,
355
+ "learning_rate": 0.001,
356
+ "loss": 0.8301,
357
+ "step": 90
358
+ },
359
+ {
360
+ "epoch": 6.4,
361
+ "learning_rate": 0.001,
362
+ "loss": 0.8551,
363
+ "step": 92
364
+ },
365
+ {
366
+ "epoch": 6.54,
367
+ "learning_rate": 0.001,
368
+ "loss": 0.823,
369
+ "step": 94
370
+ },
371
+ {
372
+ "epoch": 6.68,
373
+ "learning_rate": 0.001,
374
+ "loss": 0.8783,
375
+ "step": 96
376
+ },
377
+ {
378
+ "epoch": 6.82,
379
+ "learning_rate": 0.001,
380
+ "loss": 0.885,
381
+ "step": 98
382
+ },
383
+ {
384
+ "epoch": 6.96,
385
+ "learning_rate": 0.001,
386
+ "loss": 0.8685,
387
+ "step": 100
388
+ },
389
+ {
390
+ "epoch": 6.96,
391
+ "eval_gen_len": 451.78402366863907,
392
+ "eval_loss": 1.4360722303390503,
393
+ "eval_rouge1": 31.0337,
394
+ "eval_rouge2": 10.6724,
395
+ "eval_rougeL": 22.3815,
396
+ "eval_rougeLsum": 29.6325,
397
+ "eval_runtime": 1808.8854,
398
+ "eval_samples_per_second": 0.187,
399
+ "eval_steps_per_second": 0.024,
400
+ "step": 100
401
+ },
402
+ {
403
+ "epoch": 7.1,
404
+ "learning_rate": 0.001,
405
+ "loss": 0.7653,
406
+ "step": 102
407
+ },
408
+ {
409
+ "epoch": 7.23,
410
+ "learning_rate": 0.001,
411
+ "loss": 0.7402,
412
+ "step": 104
413
+ },
414
+ {
415
+ "epoch": 7.37,
416
+ "learning_rate": 0.001,
417
+ "loss": 0.7582,
418
+ "step": 106
419
+ },
420
+ {
421
+ "epoch": 7.51,
422
+ "learning_rate": 0.001,
423
+ "loss": 0.7518,
424
+ "step": 108
425
+ },
426
+ {
427
+ "epoch": 7.65,
428
+ "learning_rate": 0.001,
429
+ "loss": 0.7486,
430
+ "step": 110
431
+ },
432
+ {
433
+ "epoch": 7.79,
434
+ "learning_rate": 0.001,
435
+ "loss": 0.7645,
436
+ "step": 112
437
+ },
438
+ {
439
+ "epoch": 7.93,
440
+ "learning_rate": 0.001,
441
+ "loss": 0.7498,
442
+ "step": 114
443
+ },
444
+ {
445
+ "epoch": 8.0,
446
+ "eval_gen_len": 473.896449704142,
447
+ "eval_loss": 1.530242681503296,
448
+ "eval_rouge1": 28.433,
449
+ "eval_rouge2": 8.4887,
450
+ "eval_rougeL": 21.3588,
451
+ "eval_rougeLsum": 26.6817,
452
+ "eval_runtime": 1807.993,
453
+ "eval_samples_per_second": 0.187,
454
+ "eval_steps_per_second": 0.024,
455
+ "step": 115
456
+ },
457
+ {
458
+ "epoch": 8.07,
459
+ "learning_rate": 0.001,
460
+ "loss": 0.6877,
461
+ "step": 116
462
+ },
463
+ {
464
+ "epoch": 8.21,
465
+ "learning_rate": 0.001,
466
+ "loss": 0.6278,
467
+ "step": 118
468
+ },
469
+ {
470
+ "epoch": 8.35,
471
+ "learning_rate": 0.001,
472
+ "loss": 0.6602,
473
+ "step": 120
474
+ },
475
+ {
476
+ "epoch": 8.49,
477
+ "learning_rate": 0.001,
478
+ "loss": 0.6408,
479
+ "step": 122
480
+ },
481
+ {
482
+ "epoch": 8.63,
483
+ "learning_rate": 0.001,
484
+ "loss": 0.6514,
485
+ "step": 124
486
+ },
487
+ {
488
+ "epoch": 8.77,
489
+ "learning_rate": 0.001,
490
+ "loss": 0.6434,
491
+ "step": 126
492
+ },
493
+ {
494
+ "epoch": 8.9,
495
+ "learning_rate": 0.001,
496
+ "loss": 0.6226,
497
+ "step": 128
498
+ },
499
+ {
500
+ "epoch": 8.97,
501
+ "eval_gen_len": 358.76627218934914,
502
+ "eval_loss": 1.628932237625122,
503
+ "eval_rouge1": 37.251,
504
+ "eval_rouge2": 12.8214,
505
+ "eval_rougeL": 24.8704,
506
+ "eval_rougeLsum": 36.0027,
507
+ "eval_runtime": 1807.901,
508
+ "eval_samples_per_second": 0.187,
509
+ "eval_steps_per_second": 0.024,
510
+ "step": 129
511
+ },
512
+ {
513
+ "epoch": 9.04,
514
+ "learning_rate": 0.001,
515
+ "loss": 0.5826,
516
+ "step": 130
517
+ },
518
+ {
519
+ "epoch": 9.18,
520
+ "learning_rate": 0.001,
521
+ "loss": 0.5105,
522
+ "step": 132
523
+ },
524
+ {
525
+ "epoch": 9.32,
526
+ "learning_rate": 0.001,
527
+ "loss": 0.5395,
528
+ "step": 134
529
+ },
530
+ {
531
+ "epoch": 9.46,
532
+ "learning_rate": 0.001,
533
+ "loss": 0.5103,
534
+ "step": 136
535
+ },
536
+ {
537
+ "epoch": 9.6,
538
+ "learning_rate": 0.001,
539
+ "loss": 0.5377,
540
+ "step": 138
541
+ },
542
+ {
543
+ "epoch": 9.74,
544
+ "learning_rate": 0.001,
545
+ "loss": 0.5558,
546
+ "step": 140
547
+ },
548
+ {
549
+ "epoch": 9.74,
550
+ "eval_gen_len": 284.0266272189349,
551
+ "eval_loss": 1.5811121463775635,
552
+ "eval_rouge1": 35.4657,
553
+ "eval_rouge2": 12.0036,
554
+ "eval_rougeL": 24.7787,
555
+ "eval_rougeLsum": 34.3775,
556
+ "eval_runtime": 1740.8347,
557
+ "eval_samples_per_second": 0.194,
558
+ "eval_steps_per_second": 0.025,
559
+ "step": 140
560
+ },
561
+ {
562
+ "epoch": 9.74,
563
+ "step": 140,
564
+ "total_flos": 2.447850236380324e+18,
565
+ "train_loss": 1.2823251613548823,
566
+ "train_runtime": 53785.754,
567
+ "train_samples_per_second": 0.683,
568
+ "train_steps_per_second": 0.003
569
+ }
570
+ ],
571
+ "logging_steps": 2,
572
+ "max_steps": 140,
573
+ "num_train_epochs": 10,
574
+ "save_steps": 500,
575
+ "total_flos": 2.447850236380324e+18,
576
+ "trial_name": null,
577
+ "trial_params": null
578
+ }