|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 25.0, |
|
"global_step": 31250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9811272141706926e-05, |
|
"loss": 5.5901, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.9489855072463772e-05, |
|
"loss": 3.3533, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.916779388083736e-05, |
|
"loss": 2.693, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.8847665056360712e-05, |
|
"loss": 2.1327, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8525603864734302e-05, |
|
"loss": 1.8894, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.8204186795491144e-05, |
|
"loss": 1.7199, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.788276972624799e-05, |
|
"loss": 1.6236, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.7561996779388084e-05, |
|
"loss": 1.5586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.724057971014493e-05, |
|
"loss": 1.4624, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.6919162640901772e-05, |
|
"loss": 1.4674, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.6636392914653787e-05, |
|
"loss": 1.452, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.6371014492753626e-05, |
|
"loss": 1.4946, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.6048953301127216e-05, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.5726892109500806e-05, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.54048309178744e-05, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.5082769726247988e-05, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.476070853462158e-05, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.4438647342995172e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1.411658615136876e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.3794524959742352e-05, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1.3472463768115942e-05, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.3150402576489534e-05, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1.2828341384863126e-05, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.2506280193236716e-05, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.2184219001610308e-05, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 1.18621578099839e-05, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 1.1540096618357488e-05, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1.121803542673108e-05, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 1.0895974235104671e-05, |
|
"loss": 0.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.0573913043478262e-05, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 1.0251851851851853e-05, |
|
"loss": 0.0, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 9.929790660225444e-06, |
|
"loss": 0.0, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 9.607729468599034e-06, |
|
"loss": 0.0, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 9.285668276972625e-06, |
|
"loss": 0.0, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 8.963607085346217e-06, |
|
"loss": 0.0, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 8.641545893719807e-06, |
|
"loss": 0.0, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 8.3194847020934e-06, |
|
"loss": 0.0, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 7.99742351046699e-06, |
|
"loss": 0.0, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 7.67536231884058e-06, |
|
"loss": 0.0, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 7.3533011272141705e-06, |
|
"loss": 0.0, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 7.031239935587762e-06, |
|
"loss": 0.0, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 6.709178743961353e-06, |
|
"loss": 0.0, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 6.3871175523349435e-06, |
|
"loss": 0.0, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 6.065056360708535e-06, |
|
"loss": 0.0, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5.742995169082126e-06, |
|
"loss": 0.0, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 5.420933977455716e-06, |
|
"loss": 0.0, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 5.098872785829307e-06, |
|
"loss": 0.0, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.776811594202899e-06, |
|
"loss": 0.0, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 4.45475040257649e-06, |
|
"loss": 0.0, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.132689210950081e-06, |
|
"loss": 0.0, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 3.8106280193236717e-06, |
|
"loss": 0.0, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3.4885668276972627e-06, |
|
"loss": 0.0, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 3.1665056360708537e-06, |
|
"loss": 0.0, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 2.8444444444444446e-06, |
|
"loss": 0.0, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.522383252818036e-06, |
|
"loss": 0.0, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 2.2003220611916266e-06, |
|
"loss": 0.0, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 1.8782608695652174e-06, |
|
"loss": 0.0, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 1.5561996779388086e-06, |
|
"loss": 0.0, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 1.2341384863123995e-06, |
|
"loss": 0.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 9.120772946859904e-07, |
|
"loss": 0.0, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 5.900161030595814e-07, |
|
"loss": 0.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 2.679549114331723e-07, |
|
"loss": 0.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 31250, |
|
"total_flos": 2.3280874488575558e+19, |
|
"train_loss": 0.42298770703125, |
|
"train_runtime": 13299.417, |
|
"train_samples_per_second": 18.798, |
|
"train_steps_per_second": 2.35 |
|
} |
|
], |
|
"max_steps": 31250, |
|
"num_train_epochs": 25, |
|
"total_flos": 2.3280874488575558e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|