|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 29745, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9159522608841824e-05, |
|
"loss": 1.6853, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8319045217683645e-05, |
|
"loss": 1.2089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7478567826525466e-05, |
|
"loss": 1.0874, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6638090435367294e-05, |
|
"loss": 1.0041, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5797613044209115e-05, |
|
"loss": 0.9324, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4957135653050936e-05, |
|
"loss": 0.901, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.411665826189276e-05, |
|
"loss": 0.8671, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.327618087073458e-05, |
|
"loss": 0.8286, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.24357034795764e-05, |
|
"loss": 0.8044, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.159522608841822e-05, |
|
"loss": 0.7759, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.075474869726005e-05, |
|
"loss": 0.7616, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.991427130610187e-05, |
|
"loss": 0.7501, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.907379391494369e-05, |
|
"loss": 0.7303, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.823331652378551e-05, |
|
"loss": 0.7281, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7392839132627334e-05, |
|
"loss": 0.6958, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6552361741469155e-05, |
|
"loss": 0.7032, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5711884350310976e-05, |
|
"loss": 0.6656, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4871406959152804e-05, |
|
"loss": 0.6875, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4030929567994625e-05, |
|
"loss": 0.655, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3190452176836446e-05, |
|
"loss": 0.6567, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.234997478567827e-05, |
|
"loss": 0.6277, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.150949739452009e-05, |
|
"loss": 0.6331, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.066902000336191e-05, |
|
"loss": 0.6342, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9828542612203734e-05, |
|
"loss": 0.6065, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.898806522104556e-05, |
|
"loss": 0.6116, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.814758782988738e-05, |
|
"loss": 0.611, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.73071104387292e-05, |
|
"loss": 0.6014, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6466633047571022e-05, |
|
"loss": 0.5935, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5626155656412843e-05, |
|
"loss": 0.5846, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4785678265254668e-05, |
|
"loss": 0.5844, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.394520087409649e-05, |
|
"loss": 0.5863, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.310472348293831e-05, |
|
"loss": 0.5635, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.226424609178013e-05, |
|
"loss": 0.5627, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1423768700621956e-05, |
|
"loss": 0.568, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0583291309463777e-05, |
|
"loss": 0.5658, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.97428139183056e-05, |
|
"loss": 0.5505, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.890233652714742e-05, |
|
"loss": 0.5476, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8061859135989244e-05, |
|
"loss": 0.5423, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7221381744831065e-05, |
|
"loss": 0.5451, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6380904353672887e-05, |
|
"loss": 0.5473, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5540426962514708e-05, |
|
"loss": 0.5387, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.469994957135653e-05, |
|
"loss": 0.5282, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3859472180198355e-05, |
|
"loss": 0.5138, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3018994789040176e-05, |
|
"loss": 0.5181, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2178517397881997e-05, |
|
"loss": 0.5124, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.133804000672382e-05, |
|
"loss": 0.507, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0497562615565642e-05, |
|
"loss": 0.5033, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.657085224407464e-06, |
|
"loss": 0.5061, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.816607833249287e-06, |
|
"loss": 0.4931, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.976130442091108e-06, |
|
"loss": 0.4979, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.1356530509329304e-06, |
|
"loss": 0.4922, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.295175659774752e-06, |
|
"loss": 0.4972, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.4546982686165745e-06, |
|
"loss": 0.4821, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.6142208774583965e-06, |
|
"loss": 0.488, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.7737434863002185e-06, |
|
"loss": 0.4838, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.933266095142041e-06, |
|
"loss": 0.4894, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.092788703983863e-06, |
|
"loss": 0.4894, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.252311312825685e-06, |
|
"loss": 0.486, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.1183392166750716e-07, |
|
"loss": 0.4803, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 29745, |
|
"total_flos": 2.7743143438424474e+17, |
|
"train_loss": 0.6477808796030912, |
|
"train_runtime": 32967.4964, |
|
"train_samples_per_second": 9.023, |
|
"train_steps_per_second": 0.902 |
|
} |
|
], |
|
"max_steps": 29745, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.7743143438424474e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|