|
{ |
|
"best_metric": 0.15806905925273895, |
|
"best_model_checkpoint": "./results/checkpoint-975", |
|
"epoch": 14.285714285714286, |
|
"eval_steps": 25, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1731, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.4641685485839844, |
|
"eval_runtime": 2.4526, |
|
"eval_samples_per_second": 22.833, |
|
"eval_steps_per_second": 2.854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3444, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.161795735359192, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 22.813, |
|
"eval_steps_per_second": 2.852, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0438, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.938343346118927, |
|
"eval_runtime": 2.4539, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9378, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.8406057357788086, |
|
"eval_runtime": 2.4541, |
|
"eval_samples_per_second": 22.819, |
|
"eval_steps_per_second": 2.852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8852, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.7779091000556946, |
|
"eval_runtime": 2.4548, |
|
"eval_samples_per_second": 22.812, |
|
"eval_steps_per_second": 2.852, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8243, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.7215237021446228, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 2.852, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7581, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.6108285188674927, |
|
"eval_runtime": 2.4539, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6965, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.5861143469810486, |
|
"eval_runtime": 2.4542, |
|
"eval_samples_per_second": 22.818, |
|
"eval_steps_per_second": 2.852, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6161, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 0.5066039562225342, |
|
"eval_runtime": 2.4563, |
|
"eval_samples_per_second": 22.798, |
|
"eval_steps_per_second": 2.85, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5444, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 0.45327526330947876, |
|
"eval_runtime": 2.4538, |
|
"eval_samples_per_second": 22.822, |
|
"eval_steps_per_second": 2.853, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5739, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.4038705825805664, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 2.852, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4352, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 0.3711726665496826, |
|
"eval_runtime": 2.4536, |
|
"eval_samples_per_second": 22.824, |
|
"eval_steps_per_second": 2.853, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4281, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_loss": 0.3348071277141571, |
|
"eval_runtime": 2.4574, |
|
"eval_samples_per_second": 22.789, |
|
"eval_steps_per_second": 2.849, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4371, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.2986474633216858, |
|
"eval_runtime": 2.4531, |
|
"eval_samples_per_second": 22.829, |
|
"eval_steps_per_second": 2.854, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3143, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 0.29203733801841736, |
|
"eval_runtime": 2.454, |
|
"eval_samples_per_second": 22.82, |
|
"eval_steps_per_second": 2.853, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_loss": 0.26739758253097534, |
|
"eval_runtime": 2.4554, |
|
"eval_samples_per_second": 22.807, |
|
"eval_steps_per_second": 2.851, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3224, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_loss": 0.2381574958562851, |
|
"eval_runtime": 2.4593, |
|
"eval_samples_per_second": 22.771, |
|
"eval_steps_per_second": 2.846, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2582, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 0.2326308786869049, |
|
"eval_runtime": 2.4554, |
|
"eval_samples_per_second": 22.807, |
|
"eval_steps_per_second": 2.851, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2889, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_loss": 0.22920013964176178, |
|
"eval_runtime": 2.4566, |
|
"eval_samples_per_second": 22.796, |
|
"eval_steps_per_second": 2.85, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2766, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 0.22648800909519196, |
|
"eval_runtime": 2.4544, |
|
"eval_samples_per_second": 22.816, |
|
"eval_steps_per_second": 2.852, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2476, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.19777119159698486, |
|
"eval_runtime": 2.4535, |
|
"eval_samples_per_second": 22.824, |
|
"eval_steps_per_second": 2.853, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2383, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 0.1977979987859726, |
|
"eval_runtime": 2.4538, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2318, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"eval_loss": 0.19671748578548431, |
|
"eval_runtime": 2.4545, |
|
"eval_samples_per_second": 22.815, |
|
"eval_steps_per_second": 2.852, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_loss": 0.1927504688501358, |
|
"eval_runtime": 2.4546, |
|
"eval_samples_per_second": 22.815, |
|
"eval_steps_per_second": 2.852, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2332, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 0.18296389281749725, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 2.852, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2088, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"eval_loss": 0.19335438311100006, |
|
"eval_runtime": 2.4553, |
|
"eval_samples_per_second": 22.808, |
|
"eval_steps_per_second": 2.851, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2072, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 0.18041561543941498, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 22.813, |
|
"eval_steps_per_second": 2.852, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2233, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.18061913549900055, |
|
"eval_runtime": 2.454, |
|
"eval_samples_per_second": 22.82, |
|
"eval_steps_per_second": 2.853, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1796, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"eval_loss": 0.17700645327568054, |
|
"eval_runtime": 2.4539, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.215, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_loss": 0.18451173603534698, |
|
"eval_runtime": 2.4555, |
|
"eval_samples_per_second": 22.806, |
|
"eval_steps_per_second": 2.851, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1894, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"eval_loss": 0.17726834118366241, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 22.813, |
|
"eval_steps_per_second": 2.852, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1899, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_loss": 0.17982231080532074, |
|
"eval_runtime": 2.4541, |
|
"eval_samples_per_second": 22.819, |
|
"eval_steps_per_second": 2.852, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2009, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 0.1710078865289688, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 2.852, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1859, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"eval_loss": 0.1884012222290039, |
|
"eval_runtime": 2.4546, |
|
"eval_samples_per_second": 22.814, |
|
"eval_steps_per_second": 2.852, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1854, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.16743424534797668, |
|
"eval_runtime": 2.4539, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_loss": 0.16949959099292755, |
|
"eval_runtime": 2.4543, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 2.852, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1912, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 0.15851029753684998, |
|
"eval_runtime": 2.4557, |
|
"eval_samples_per_second": 22.804, |
|
"eval_steps_per_second": 2.851, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1763, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 0.17656584084033966, |
|
"eval_runtime": 2.4541, |
|
"eval_samples_per_second": 22.819, |
|
"eval_steps_per_second": 2.852, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1953, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 0.15806905925273895, |
|
"eval_runtime": 2.455, |
|
"eval_samples_per_second": 22.811, |
|
"eval_steps_per_second": 2.851, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 0.18219564855098724, |
|
"eval_runtime": 2.4533, |
|
"eval_samples_per_second": 22.826, |
|
"eval_steps_per_second": 2.853, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_train_epochs": 15, |
|
"save_steps": 25, |
|
"total_flos": 239052193136640.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|