|
{ |
|
"best_metric": 29.1144, |
|
"best_model_checkpoint": "./ko-en_mbartLarge_exp20p_linear_alpha/checkpoint-60000", |
|
"epoch": 8.81772827474185, |
|
"eval_steps": 4000, |
|
"global_step": 76000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.4920234365935726e-05, |
|
"loss": 2.3766, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.484046873187145e-05, |
|
"loss": 1.7994, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.476070309780717e-05, |
|
"loss": 1.6333, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.4680937463742896e-05, |
|
"loss": 1.554, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.460117182967862e-05, |
|
"loss": 1.5092, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.4521406195614344e-05, |
|
"loss": 1.4709, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.444164056155007e-05, |
|
"loss": 1.4354, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.4361874927485784e-05, |
|
"loss": 1.404, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 22.5375, |
|
"eval_gen_len": 18.6852, |
|
"eval_loss": 1.373841404914856, |
|
"eval_runtime": 1144.9051, |
|
"eval_samples_per_second": 15.055, |
|
"eval_steps_per_second": 0.942, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.4282109293421515e-05, |
|
"loss": 1.3753, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.420234365935724e-05, |
|
"loss": 1.3399, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.412257802529296e-05, |
|
"loss": 1.3272, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.404281239122868e-05, |
|
"loss": 1.3226, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.39630467571644e-05, |
|
"loss": 1.2978, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.388328112310013e-05, |
|
"loss": 1.269, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.3803515489035856e-05, |
|
"loss": 1.2787, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.372374985497158e-05, |
|
"loss": 1.2629, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 25.3741, |
|
"eval_gen_len": 18.7797, |
|
"eval_loss": 1.245756983757019, |
|
"eval_runtime": 1156.1784, |
|
"eval_samples_per_second": 14.909, |
|
"eval_steps_per_second": 0.932, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.36439842209073e-05, |
|
"loss": 1.2572, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.356421858684302e-05, |
|
"loss": 1.2035, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.348445295277875e-05, |
|
"loss": 1.1945, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5.3404687318714474e-05, |
|
"loss": 1.179, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.332492168465019e-05, |
|
"loss": 1.2032, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.3245156050585915e-05, |
|
"loss": 1.192, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.316539041652164e-05, |
|
"loss": 1.1795, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.308562478245736e-05, |
|
"loss": 1.1951, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 26.1281, |
|
"eval_gen_len": 18.6597, |
|
"eval_loss": 1.2066867351531982, |
|
"eval_runtime": 1158.2083, |
|
"eval_samples_per_second": 14.882, |
|
"eval_steps_per_second": 0.931, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.300585914839309e-05, |
|
"loss": 1.1704, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.292609351432881e-05, |
|
"loss": 1.1547, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5.284632788026453e-05, |
|
"loss": 1.1419, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.276656224620026e-05, |
|
"loss": 1.1396, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5.268679661213598e-05, |
|
"loss": 1.1549, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5.2607030978071704e-05, |
|
"loss": 1.1255, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.252726534400743e-05, |
|
"loss": 1.1395, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.244749970994315e-05, |
|
"loss": 1.1317, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_bleu": 26.5384, |
|
"eval_gen_len": 19.2055, |
|
"eval_loss": 1.1767752170562744, |
|
"eval_runtime": 1178.3467, |
|
"eval_samples_per_second": 14.628, |
|
"eval_steps_per_second": 0.915, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.2367734075878875e-05, |
|
"loss": 1.1557, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.22879684418146e-05, |
|
"loss": 1.1478, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.2208202807750315e-05, |
|
"loss": 1.0771, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 5.2128437173686046e-05, |
|
"loss": 1.0135, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.204867153962177e-05, |
|
"loss": 1.0028, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.196890590555749e-05, |
|
"loss": 0.9894, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 5.1889140271493216e-05, |
|
"loss": 1.0178, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 5.180937463742893e-05, |
|
"loss": 0.9906, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bleu": 28.2459, |
|
"eval_gen_len": 18.7269, |
|
"eval_loss": 1.1362603902816772, |
|
"eval_runtime": 1157.7765, |
|
"eval_samples_per_second": 14.888, |
|
"eval_steps_per_second": 0.931, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.1729609003364664e-05, |
|
"loss": 1.0083, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.164984336930039e-05, |
|
"loss": 0.9965, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.157007773523611e-05, |
|
"loss": 0.9992, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.149031210117183e-05, |
|
"loss": 1.0042, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.141054646710755e-05, |
|
"loss": 0.9998, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.1330780833043275e-05, |
|
"loss": 1.0076, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.1251015198979005e-05, |
|
"loss": 1.0047, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5.117124956491473e-05, |
|
"loss": 0.9894, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_bleu": 28.5124, |
|
"eval_gen_len": 18.6882, |
|
"eval_loss": 1.1238752603530884, |
|
"eval_runtime": 1152.8912, |
|
"eval_samples_per_second": 14.951, |
|
"eval_steps_per_second": 0.935, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.1091483930850446e-05, |
|
"loss": 1.0115, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.101171829678617e-05, |
|
"loss": 0.9958, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.093195266272189e-05, |
|
"loss": 1.0044, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 5.0852187028657624e-05, |
|
"loss": 0.9668, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 5.077242139459334e-05, |
|
"loss": 0.8751, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 5.0692655760529064e-05, |
|
"loss": 0.8916, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 5.061289012646479e-05, |
|
"loss": 0.8861, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5.053312449240051e-05, |
|
"loss": 0.8965, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_bleu": 28.5335, |
|
"eval_gen_len": 18.4917, |
|
"eval_loss": 1.1277672052383423, |
|
"eval_runtime": 1142.9887, |
|
"eval_samples_per_second": 15.081, |
|
"eval_steps_per_second": 0.943, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 5.045335885833624e-05, |
|
"loss": 0.8898, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 5.037359322427196e-05, |
|
"loss": 0.8982, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.029382759020768e-05, |
|
"loss": 0.896, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 5.0214061956143406e-05, |
|
"loss": 0.8889, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 5.013429632207913e-05, |
|
"loss": 0.9056, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.005453068801485e-05, |
|
"loss": 0.8867, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.997476505395058e-05, |
|
"loss": 0.911, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.98949994198863e-05, |
|
"loss": 0.9138, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_bleu": 28.8189, |
|
"eval_gen_len": 18.7873, |
|
"eval_loss": 1.1216284036636353, |
|
"eval_runtime": 1156.9444, |
|
"eval_samples_per_second": 14.899, |
|
"eval_steps_per_second": 0.932, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.9815233785822024e-05, |
|
"loss": 0.9023, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.973546815175775e-05, |
|
"loss": 0.9127, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.9655702517693464e-05, |
|
"loss": 0.9168, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.9575936883629195e-05, |
|
"loss": 0.8978, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.949617124956492e-05, |
|
"loss": 0.9229, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.941640561550064e-05, |
|
"loss": 0.828, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.9336639981436366e-05, |
|
"loss": 0.8326, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.925687434737208e-05, |
|
"loss": 0.8272, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_bleu": 28.332, |
|
"eval_gen_len": 18.6516, |
|
"eval_loss": 1.1468099355697632, |
|
"eval_runtime": 1152.7736, |
|
"eval_samples_per_second": 14.953, |
|
"eval_steps_per_second": 0.935, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.9177108713307806e-05, |
|
"loss": 0.8259, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.9097343079243536e-05, |
|
"loss": 0.8243, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.901757744517926e-05, |
|
"loss": 0.8664, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.893781181111498e-05, |
|
"loss": 0.8893, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.88580461770507e-05, |
|
"loss": 0.8958, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.8778280542986424e-05, |
|
"loss": 0.8924, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.8698514908922155e-05, |
|
"loss": 0.885, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 4.861874927485788e-05, |
|
"loss": 0.8753, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_bleu": 28.2695, |
|
"eval_gen_len": 18.4919, |
|
"eval_loss": 1.1344704627990723, |
|
"eval_runtime": 1148.1227, |
|
"eval_samples_per_second": 15.013, |
|
"eval_steps_per_second": 0.939, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.8538983640793595e-05, |
|
"loss": 0.8749, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.845921800672932e-05, |
|
"loss": 0.8621, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 4.837945237266504e-05, |
|
"loss": 0.8616, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.8299686738600766e-05, |
|
"loss": 0.8551, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 4.821992110453649e-05, |
|
"loss": 0.8504, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 4.814015547047221e-05, |
|
"loss": 0.8459, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.806038983640794e-05, |
|
"loss": 0.7255, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.798062420234366e-05, |
|
"loss": 0.6855, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_bleu": 28.7913, |
|
"eval_gen_len": 18.7596, |
|
"eval_loss": 1.154221773147583, |
|
"eval_runtime": 1168.8964, |
|
"eval_samples_per_second": 14.746, |
|
"eval_steps_per_second": 0.922, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.7900858568279384e-05, |
|
"loss": 0.7002, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.782109293421511e-05, |
|
"loss": 0.6982, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 4.774132730015083e-05, |
|
"loss": 0.6976, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.7661561666086555e-05, |
|
"loss": 0.7028, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.758179603202228e-05, |
|
"loss": 0.7138, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.7502030397958e-05, |
|
"loss": 0.7121, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.7422264763893726e-05, |
|
"loss": 0.7043, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.734249912982945e-05, |
|
"loss": 0.7088, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_bleu": 29.0865, |
|
"eval_gen_len": 18.6626, |
|
"eval_loss": 1.153067708015442, |
|
"eval_runtime": 1149.2413, |
|
"eval_samples_per_second": 14.999, |
|
"eval_steps_per_second": 0.938, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.726273349576517e-05, |
|
"loss": 0.7116, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.71829678617009e-05, |
|
"loss": 0.7292, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.710320222763662e-05, |
|
"loss": 0.7289, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.702343659357234e-05, |
|
"loss": 0.7514, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.694367095950807e-05, |
|
"loss": 0.7545, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.686390532544379e-05, |
|
"loss": 0.7362, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.6784139691379515e-05, |
|
"loss": 0.7413, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.670437405731523e-05, |
|
"loss": 0.6738, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_bleu": 28.0235, |
|
"eval_gen_len": 18.4243, |
|
"eval_loss": 1.190636396408081, |
|
"eval_runtime": 1139.2873, |
|
"eval_samples_per_second": 15.13, |
|
"eval_steps_per_second": 0.946, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 4.6624608423250955e-05, |
|
"loss": 0.6467, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.6544842789186686e-05, |
|
"loss": 0.6508, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.646507715512241e-05, |
|
"loss": 0.6742, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 4.638531152105813e-05, |
|
"loss": 0.6609, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 4.630554588699385e-05, |
|
"loss": 0.6464, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.622578025292957e-05, |
|
"loss": 0.6362, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 4.61460146188653e-05, |
|
"loss": 0.6552, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 4.606624898480103e-05, |
|
"loss": 0.6763, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_bleu": 28.1501, |
|
"eval_gen_len": 18.6932, |
|
"eval_loss": 1.1940782070159912, |
|
"eval_runtime": 1154.4139, |
|
"eval_samples_per_second": 14.931, |
|
"eval_steps_per_second": 0.934, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 4.5986483350736744e-05, |
|
"loss": 0.6775, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 4.590671771667247e-05, |
|
"loss": 0.6765, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 4.582695208260819e-05, |
|
"loss": 0.681, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 4.5747186448543915e-05, |
|
"loss": 0.6747, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 4.5667420814479645e-05, |
|
"loss": 0.6612, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 4.558765518041536e-05, |
|
"loss": 0.6662, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 4.5507889546351086e-05, |
|
"loss": 0.6653, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.542812391228681e-05, |
|
"loss": 0.6594, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_bleu": 29.1144, |
|
"eval_gen_len": 18.5459, |
|
"eval_loss": 1.1682050228118896, |
|
"eval_runtime": 1139.7821, |
|
"eval_samples_per_second": 15.123, |
|
"eval_steps_per_second": 0.946, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.534835827822253e-05, |
|
"loss": 0.6333, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 4.526859264415826e-05, |
|
"loss": 0.5491, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 4.518882701009398e-05, |
|
"loss": 0.5476, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 4.5109061376029704e-05, |
|
"loss": 0.5469, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 4.502929574196543e-05, |
|
"loss": 0.5708, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 4.494953010790115e-05, |
|
"loss": 0.5785, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 4.486976447383687e-05, |
|
"loss": 0.5803, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 4.47899988397726e-05, |
|
"loss": 0.5971, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_bleu": 27.9464, |
|
"eval_gen_len": 18.4482, |
|
"eval_loss": 1.2448896169662476, |
|
"eval_runtime": 1136.4568, |
|
"eval_samples_per_second": 15.167, |
|
"eval_steps_per_second": 0.949, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 4.471023320570832e-05, |
|
"loss": 0.5985, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 4.4630467571644046e-05, |
|
"loss": 0.6032, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.455070193757977e-05, |
|
"loss": 0.6079, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.4470936303515486e-05, |
|
"loss": 0.6104, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.439117066945122e-05, |
|
"loss": 0.6063, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 4.431140503538694e-05, |
|
"loss": 0.607, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 4.4231639401322664e-05, |
|
"loss": 0.5891, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.415187376725838e-05, |
|
"loss": 0.5935, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_bleu": 28.6034, |
|
"eval_gen_len": 18.5967, |
|
"eval_loss": 1.2156028747558594, |
|
"eval_runtime": 1146.9398, |
|
"eval_samples_per_second": 15.029, |
|
"eval_steps_per_second": 0.94, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 4.4072108133194104e-05, |
|
"loss": 0.5981, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.399234249912983e-05, |
|
"loss": 0.5881, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.391257686506556e-05, |
|
"loss": 0.5013, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.383281123100128e-05, |
|
"loss": 0.5109, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 4.3753045596937e-05, |
|
"loss": 0.5187, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 4.367327996287272e-05, |
|
"loss": 0.5198, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.3593514328808446e-05, |
|
"loss": 0.5175, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 4.3513748694744177e-05, |
|
"loss": 0.5383, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_bleu": 27.891, |
|
"eval_gen_len": 18.6539, |
|
"eval_loss": 1.2927179336547852, |
|
"eval_runtime": 1152.5208, |
|
"eval_samples_per_second": 14.956, |
|
"eval_steps_per_second": 0.935, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 4.343398306067989e-05, |
|
"loss": 0.5285, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.335421742661562e-05, |
|
"loss": 0.5346, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 4.327445179255134e-05, |
|
"loss": 0.5601, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 4.3194686158487064e-05, |
|
"loss": 0.5869, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.311492052442279e-05, |
|
"loss": 0.5873, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.303515489035851e-05, |
|
"loss": 0.5924, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 4.2955389256294235e-05, |
|
"loss": 0.601, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 4.287562362222996e-05, |
|
"loss": 0.6022, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"eval_bleu": 27.7624, |
|
"eval_gen_len": 18.5558, |
|
"eval_loss": 1.283076286315918, |
|
"eval_runtime": 1144.2558, |
|
"eval_samples_per_second": 15.064, |
|
"eval_steps_per_second": 0.942, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"step": 76000, |
|
"total_flos": 2.6353338197706015e+18, |
|
"train_loss": 0.8847039678473222, |
|
"train_runtime": 77092.0129, |
|
"train_samples_per_second": 71.551, |
|
"train_steps_per_second": 4.472 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 344760, |
|
"num_train_epochs": 40, |
|
"save_steps": 4000, |
|
"total_flos": 2.6353338197706015e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|