|
{ |
|
"best_metric": 0.8353737113402062, |
|
"best_model_checkpoint": "bert_trainer/run-6/checkpoint-2500", |
|
"epoch": 3.2216494845360826, |
|
"eval_steps": 250, |
|
"global_step": 2500, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.985683583902487e-07, |
|
"loss": 1.498, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.4082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.2917, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.2083, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.1513, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.1182, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.0869, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.0574, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9843, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 1.0196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.6401417525773195, |
|
"eval_loss": 0.9714874029159546, |
|
"eval_runtime": 53.062, |
|
"eval_samples_per_second": 58.498, |
|
"eval_steps_per_second": 3.656, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9886, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9861, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9746, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9386, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9046, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.9213, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.892, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.8545, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.87, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.8463, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.6978092783505154, |
|
"eval_loss": 0.810293436050415, |
|
"eval_runtime": 53.0534, |
|
"eval_samples_per_second": 58.507, |
|
"eval_steps_per_second": 3.657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.8516, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.8086, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7894, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7754, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7558, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7155, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7151, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.758, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6958, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.68, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.7503221649484536, |
|
"eval_loss": 0.6983952522277832, |
|
"eval_runtime": 57.8438, |
|
"eval_samples_per_second": 53.662, |
|
"eval_steps_per_second": 3.354, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6553, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6829, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.7116, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6283, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6559, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6118, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6675, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6426, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6344, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6754, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7970360824742269, |
|
"eval_loss": 0.6213943362236023, |
|
"eval_runtime": 57.838, |
|
"eval_samples_per_second": 53.667, |
|
"eval_steps_per_second": 3.354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6535, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.64, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6442, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5873, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.587, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5789, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.605, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.6431, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5824, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.803479381443299, |
|
"eval_loss": 0.5681772232055664, |
|
"eval_runtime": 57.809, |
|
"eval_samples_per_second": 53.694, |
|
"eval_steps_per_second": 3.356, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.648, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5709, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5504, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5699, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5575, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5644, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.587, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5094, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5724, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5652, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.8128221649484536, |
|
"eval_loss": 0.5369997024536133, |
|
"eval_runtime": 57.8152, |
|
"eval_samples_per_second": 53.688, |
|
"eval_steps_per_second": 3.356, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5197, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5499, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.58, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5348, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4741, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5582, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5219, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4802, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5147, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4912, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.8234536082474226, |
|
"eval_loss": 0.5059388279914856, |
|
"eval_runtime": 57.8239, |
|
"eval_samples_per_second": 53.68, |
|
"eval_steps_per_second": 3.355, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5133, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4629, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4696, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4623, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4649, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5314, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4974, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.464, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5332, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4694, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.8286082474226805, |
|
"eval_loss": 0.48415493965148926, |
|
"eval_runtime": 57.8531, |
|
"eval_samples_per_second": 53.653, |
|
"eval_steps_per_second": 3.353, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5191, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4969, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4499, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5144, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.482, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4193, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4928, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4515, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4775, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5456, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.833118556701031, |
|
"eval_loss": 0.4647773802280426, |
|
"eval_runtime": 57.8252, |
|
"eval_samples_per_second": 53.679, |
|
"eval_steps_per_second": 3.355, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.5226, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4159, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4486, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.3866, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4634, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4179, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4446, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4606, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.4261, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.1971367167804974e-06, |
|
"loss": 0.3907, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.8353737113402062, |
|
"eval_loss": 0.45589184761047363, |
|
"eval_runtime": 57.8334, |
|
"eval_samples_per_second": 53.671, |
|
"eval_steps_per_second": 3.354, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 250, |
|
"total_flos": 1.051683215428608e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.1971367167804974e-06, |
|
"per_device_eval_batch_size": 16, |
|
"per_device_train_batch_size": 16 |
|
} |
|
} |
|
|