|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 35889, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009860681545877568, |
|
"loss": 6.095, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009721363091755134, |
|
"loss": 2.9616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009582044637632701, |
|
"loss": 2.666, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009442726183510269, |
|
"loss": 2.5328, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009303407729387835, |
|
"loss": 2.497, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009164089275265401, |
|
"loss": 2.4232, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009024770821142969, |
|
"loss": 2.4132, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0008885452367020536, |
|
"loss": 2.3761, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0008746133912898102, |
|
"loss": 2.3447, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000860681545877567, |
|
"loss": 2.2711, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0008467497004653236, |
|
"loss": 2.2602, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008328178550530803, |
|
"loss": 2.2645, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008188860096408371, |
|
"loss": 2.2384, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0008049541642285937, |
|
"loss": 2.2474, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gen_len": 91.9, |
|
"eval_loss": 1.914839744567871, |
|
"eval_rouge1": 31.9324, |
|
"eval_rouge2": 8.9596, |
|
"eval_rougeL": 18.5991, |
|
"eval_rougeLsum": 26.8462, |
|
"eval_runtime": 20609.4374, |
|
"eval_samples_per_second": 0.649, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0007910223188163504, |
|
"loss": 2.2039, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0007770904734041071, |
|
"loss": 2.189, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0007631586279918639, |
|
"loss": 2.1961, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0007492267825796205, |
|
"loss": 2.1764, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0007352949371673772, |
|
"loss": 2.1777, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0007213630917551339, |
|
"loss": 2.1412, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0007074312463428905, |
|
"loss": 2.1669, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0006934994009306474, |
|
"loss": 2.1426, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000679567555518404, |
|
"loss": 2.1428, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0006656357101061606, |
|
"loss": 2.1108, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0006517038646939174, |
|
"loss": 2.0903, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000637772019281674, |
|
"loss": 2.1057, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006238401738694308, |
|
"loss": 2.0874, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006099083284571875, |
|
"loss": 2.0877, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 99.3, |
|
"eval_loss": 1.8038697242736816, |
|
"eval_rouge1": 31.4652, |
|
"eval_rouge2": 8.6423, |
|
"eval_rougeL": 18.1808, |
|
"eval_rougeLsum": 26.2653, |
|
"eval_runtime": 21262.825, |
|
"eval_samples_per_second": 0.629, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0005959764830449441, |
|
"loss": 2.0903, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0005820446376327009, |
|
"loss": 2.0628, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0005681127922204575, |
|
"loss": 2.0909, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0005541809468082142, |
|
"loss": 2.0258, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000540249101395971, |
|
"loss": 2.0156, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005263172559837276, |
|
"loss": 2.0282, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005123854105714843, |
|
"loss": 2.0559, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000498453565159241, |
|
"loss": 2.0603, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004845217197469977, |
|
"loss": 2.0077, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004705898743347544, |
|
"loss": 2.0119, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00045665802892251106, |
|
"loss": 1.9855, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004427261835102678, |
|
"loss": 2.0089, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004287943380980245, |
|
"loss": 2.0029, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00041486249268578117, |
|
"loss": 1.9773, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_gen_len": 101.6, |
|
"eval_loss": 1.73397696018219, |
|
"eval_rouge1": 31.1574, |
|
"eval_rouge2": 8.645, |
|
"eval_rougeL": 18.096, |
|
"eval_rougeLsum": 25.9822, |
|
"eval_runtime": 21624.4917, |
|
"eval_samples_per_second": 0.618, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00040093064727353785, |
|
"loss": 1.9632, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00038699880186129454, |
|
"loss": 1.9768, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003730669564490513, |
|
"loss": 1.9755, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00035913511103680796, |
|
"loss": 2.0125, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0003452032656245646, |
|
"loss": 1.9427, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0003312714202123213, |
|
"loss": 1.9776, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000317339574800078, |
|
"loss": 1.9966, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00030340772938783475, |
|
"loss": 1.9814, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00028947588397559143, |
|
"loss": 1.9585, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00027554403856334806, |
|
"loss": 1.9668, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002616121931511048, |
|
"loss": 1.9564, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002476803477388615, |
|
"loss": 1.9231, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00023374850232661817, |
|
"loss": 1.9078, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00021981665691437488, |
|
"loss": 1.9032, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 106.8, |
|
"eval_loss": 1.6808093786239624, |
|
"eval_rouge1": 31.4328, |
|
"eval_rouge2": 8.6241, |
|
"eval_rougeL": 18.0718, |
|
"eval_rougeLsum": 26.0718, |
|
"eval_runtime": 21979.649, |
|
"eval_samples_per_second": 0.608, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00020588481150213156, |
|
"loss": 1.931, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019195296608988827, |
|
"loss": 1.9416, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017802112067764498, |
|
"loss": 1.963, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016408927526540167, |
|
"loss": 1.9066, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00015015742985315835, |
|
"loss": 1.9583, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00013622558444091503, |
|
"loss": 1.8948, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00012229373902867174, |
|
"loss": 1.9424, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00010836189361642844, |
|
"loss": 1.9082, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.443004820418513e-05, |
|
"loss": 1.9195, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.049820279194182e-05, |
|
"loss": 1.8821, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.656635737969852e-05, |
|
"loss": 1.9194, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.263451196745521e-05, |
|
"loss": 1.902, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.87026665552119e-05, |
|
"loss": 1.9154, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.4770821142968598e-05, |
|
"loss": 1.9181, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_gen_len": 107.9, |
|
"eval_loss": 1.6517904996871948, |
|
"eval_rouge1": 31.4185, |
|
"eval_rouge2": 8.601, |
|
"eval_rougeL": 17.9686, |
|
"eval_rougeLsum": 26.0844, |
|
"eval_runtime": 21981.6496, |
|
"eval_samples_per_second": 0.608, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0838975730725292e-05, |
|
"loss": 1.9275, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 35889, |
|
"total_flos": 1069022126774016000, |
|
"train_runtime": 160064.9582, |
|
"train_samples_per_second": 0.224 |
|
} |
|
], |
|
"max_steps": 35889, |
|
"num_train_epochs": 1, |
|
"total_flos": 1069022126774016000, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|