|
{ |
|
"best_metric": 0.18517187237739563, |
|
"best_model_checkpoint": "./results/checkpoint-16500", |
|
"epoch": 2.9333333333333336, |
|
"global_step": 16500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9248120300751884e-05, |
|
"loss": 0.2803, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.849624060150376e-05, |
|
"loss": 0.2595, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 0.24845312535762787, |
|
"eval_runtime": 152.0435, |
|
"eval_samples_per_second": 59.194, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.774436090225564e-05, |
|
"loss": 0.2445, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.699248120300752e-05, |
|
"loss": 0.2357, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.62406015037594e-05, |
|
"loss": 0.2308, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.2250121533870697, |
|
"eval_runtime": 151.8556, |
|
"eval_samples_per_second": 59.267, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.548872180451128e-05, |
|
"loss": 0.2239, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 0.2204, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.398496240601504e-05, |
|
"loss": 0.2152, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.21273697912693024, |
|
"eval_runtime": 152.3413, |
|
"eval_samples_per_second": 59.078, |
|
"eval_steps_per_second": 0.926, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.323308270676692e-05, |
|
"loss": 0.213, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.24812030075188e-05, |
|
"loss": 0.2085, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.172932330827068e-05, |
|
"loss": 0.1893, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.20545659959316254, |
|
"eval_runtime": 151.7962, |
|
"eval_samples_per_second": 59.29, |
|
"eval_steps_per_second": 0.929, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.097744360902256e-05, |
|
"loss": 0.1851, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.022556390977444e-05, |
|
"loss": 0.1827, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.1823, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.20052604377269745, |
|
"eval_runtime": 151.8901, |
|
"eval_samples_per_second": 59.253, |
|
"eval_steps_per_second": 0.928, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.87218045112782e-05, |
|
"loss": 0.1791, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.796992481203008e-05, |
|
"loss": 0.1771, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.721804511278196e-05, |
|
"loss": 0.1759, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.19474565982818604, |
|
"eval_runtime": 151.9186, |
|
"eval_samples_per_second": 59.242, |
|
"eval_steps_per_second": 0.928, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.6466165413533835e-05, |
|
"loss": 0.1761, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.1759, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.49624060150376e-05, |
|
"loss": 0.1727, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.19019705057144165, |
|
"eval_runtime": 151.9085, |
|
"eval_samples_per_second": 59.246, |
|
"eval_steps_per_second": 0.928, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 0.1724, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.3458646616541355e-05, |
|
"loss": 0.1592, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.2706766917293236e-05, |
|
"loss": 0.148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.19001474976539612, |
|
"eval_runtime": 152.6517, |
|
"eval_samples_per_second": 58.958, |
|
"eval_steps_per_second": 0.924, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.195488721804512e-05, |
|
"loss": 0.1477, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.120300751879699e-05, |
|
"loss": 0.1469, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.0451127819548874e-05, |
|
"loss": 0.1488, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.18920138478279114, |
|
"eval_runtime": 151.8531, |
|
"eval_samples_per_second": 59.268, |
|
"eval_steps_per_second": 0.929, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.9699248120300755e-05, |
|
"loss": 0.1486, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.1471, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.8195488721804515e-05, |
|
"loss": 0.147, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.186552956700325, |
|
"eval_runtime": 151.8712, |
|
"eval_samples_per_second": 59.261, |
|
"eval_steps_per_second": 0.928, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.7443609022556393e-05, |
|
"loss": 0.147, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6691729323308275e-05, |
|
"loss": 0.1461, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.5939849624060153e-05, |
|
"loss": 0.1453, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.18517187237739563, |
|
"eval_runtime": 151.8569, |
|
"eval_samples_per_second": 59.266, |
|
"eval_steps_per_second": 0.929, |
|
"step": 16500 |
|
} |
|
], |
|
"max_steps": 33750, |
|
"num_train_epochs": 6, |
|
"total_flos": 2.612547588980736e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|