fpadovani's picture
Training in progress, step 52000, checkpoint
a7035c8 verified
{
"best_metric": 4.583011150360107,
"best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/de_clm/childes_30/checkpoint-32000",
"epoch": 54.507337526205454,
"eval_steps": 2000,
"global_step": 52000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.0964360587002098,
"eval_loss": 7.102903366088867,
"eval_runtime": 0.9708,
"eval_samples_per_second": 1416.286,
"eval_steps_per_second": 88.582,
"step": 2000
},
{
"epoch": 4.1928721174004195,
"grad_norm": 1.3964662551879883,
"learning_rate": 1e-05,
"loss": 6.9987,
"step": 4000
},
{
"epoch": 4.1928721174004195,
"eval_loss": 5.884151935577393,
"eval_runtime": 0.966,
"eval_samples_per_second": 1423.408,
"eval_steps_per_second": 89.028,
"step": 4000
},
{
"epoch": 6.289308176100629,
"eval_loss": 5.54873514175415,
"eval_runtime": 0.9657,
"eval_samples_per_second": 1423.84,
"eval_steps_per_second": 89.055,
"step": 6000
},
{
"epoch": 8.385744234800839,
"grad_norm": 2.7172107696533203,
"learning_rate": 2e-05,
"loss": 5.2204,
"step": 8000
},
{
"epoch": 8.385744234800839,
"eval_loss": 5.2793288230896,
"eval_runtime": 0.9644,
"eval_samples_per_second": 1425.779,
"eval_steps_per_second": 89.176,
"step": 8000
},
{
"epoch": 10.482180293501049,
"eval_loss": 5.10486364364624,
"eval_runtime": 0.9641,
"eval_samples_per_second": 1426.204,
"eval_steps_per_second": 89.203,
"step": 10000
},
{
"epoch": 12.578616352201259,
"grad_norm": 2.500443458557129,
"learning_rate": 2.99925e-05,
"loss": 4.7358,
"step": 12000
},
{
"epoch": 12.578616352201259,
"eval_loss": 4.983631134033203,
"eval_runtime": 0.9644,
"eval_samples_per_second": 1425.809,
"eval_steps_per_second": 89.178,
"step": 12000
},
{
"epoch": 14.675052410901468,
"eval_loss": 4.882917404174805,
"eval_runtime": 0.9686,
"eval_samples_per_second": 1419.612,
"eval_steps_per_second": 88.79,
"step": 14000
},
{
"epoch": 16.771488469601678,
"grad_norm": 2.400749444961548,
"learning_rate": 3.999e-05,
"loss": 4.4216,
"step": 16000
},
{
"epoch": 16.771488469601678,
"eval_loss": 4.802889823913574,
"eval_runtime": 0.9763,
"eval_samples_per_second": 1408.393,
"eval_steps_per_second": 88.089,
"step": 16000
},
{
"epoch": 18.867924528301888,
"eval_loss": 4.74226188659668,
"eval_runtime": 0.976,
"eval_samples_per_second": 1408.882,
"eval_steps_per_second": 88.119,
"step": 18000
},
{
"epoch": 20.964360587002098,
"grad_norm": 2.2613022327423096,
"learning_rate": 4.9985e-05,
"loss": 4.1842,
"step": 20000
},
{
"epoch": 20.964360587002098,
"eval_loss": 4.690371513366699,
"eval_runtime": 0.9669,
"eval_samples_per_second": 1422.037,
"eval_steps_per_second": 88.942,
"step": 20000
},
{
"epoch": 23.060796645702307,
"eval_loss": 4.645771503448486,
"eval_runtime": 0.966,
"eval_samples_per_second": 1423.404,
"eval_steps_per_second": 89.027,
"step": 22000
},
{
"epoch": 25.157232704402517,
"grad_norm": 2.2588465213775635,
"learning_rate": 5.9980000000000005e-05,
"loss": 3.9858,
"step": 24000
},
{
"epoch": 25.157232704402517,
"eval_loss": 4.623382568359375,
"eval_runtime": 0.9666,
"eval_samples_per_second": 1422.538,
"eval_steps_per_second": 88.973,
"step": 24000
},
{
"epoch": 27.253668763102727,
"eval_loss": 4.6056084632873535,
"eval_runtime": 0.9825,
"eval_samples_per_second": 1399.422,
"eval_steps_per_second": 87.527,
"step": 26000
},
{
"epoch": 29.350104821802937,
"grad_norm": 2.1845760345458984,
"learning_rate": 6.997500000000001e-05,
"loss": 3.8189,
"step": 28000
},
{
"epoch": 29.350104821802937,
"eval_loss": 4.590851783752441,
"eval_runtime": 1.0367,
"eval_samples_per_second": 1326.293,
"eval_steps_per_second": 82.954,
"step": 28000
},
{
"epoch": 31.446540880503143,
"eval_loss": 4.586838245391846,
"eval_runtime": 0.9705,
"eval_samples_per_second": 1416.743,
"eval_steps_per_second": 88.611,
"step": 30000
},
{
"epoch": 33.542976939203356,
"grad_norm": 2.2118289470672607,
"learning_rate": 7.997e-05,
"loss": 3.6763,
"step": 32000
},
{
"epoch": 33.542976939203356,
"eval_loss": 4.583011150360107,
"eval_runtime": 0.9706,
"eval_samples_per_second": 1416.604,
"eval_steps_per_second": 88.602,
"step": 32000
},
{
"epoch": 35.63941299790356,
"eval_loss": 4.57816743850708,
"eval_runtime": 0.9657,
"eval_samples_per_second": 1423.818,
"eval_steps_per_second": 89.053,
"step": 34000
},
{
"epoch": 37.735849056603776,
"grad_norm": 2.189404010772705,
"learning_rate": 8.9965e-05,
"loss": 3.5493,
"step": 36000
},
{
"epoch": 37.735849056603776,
"eval_loss": 4.585381031036377,
"eval_runtime": 0.9726,
"eval_samples_per_second": 1413.783,
"eval_steps_per_second": 88.426,
"step": 36000
},
{
"epoch": 39.83228511530398,
"eval_loss": 4.596414566040039,
"eval_runtime": 0.971,
"eval_samples_per_second": 1416.025,
"eval_steps_per_second": 88.566,
"step": 38000
},
{
"epoch": 41.928721174004195,
"grad_norm": 2.1832594871520996,
"learning_rate": 9.996000000000001e-05,
"loss": 3.4327,
"step": 40000
},
{
"epoch": 41.928721174004195,
"eval_loss": 4.610367774963379,
"eval_runtime": 0.9687,
"eval_samples_per_second": 1419.473,
"eval_steps_per_second": 88.782,
"step": 40000
},
{
"epoch": 44.0251572327044,
"eval_loss": 4.636893272399902,
"eval_runtime": 0.9651,
"eval_samples_per_second": 1424.742,
"eval_steps_per_second": 89.111,
"step": 42000
},
{
"epoch": 46.121593291404615,
"grad_norm": 2.1686387062072754,
"learning_rate": 9.336333333333334e-05,
"loss": 3.3112,
"step": 44000
},
{
"epoch": 46.121593291404615,
"eval_loss": 4.66969108581543,
"eval_runtime": 0.9642,
"eval_samples_per_second": 1426.082,
"eval_steps_per_second": 89.195,
"step": 44000
},
{
"epoch": 48.21802935010482,
"eval_loss": 4.69525146484375,
"eval_runtime": 0.966,
"eval_samples_per_second": 1423.408,
"eval_steps_per_second": 89.028,
"step": 46000
},
{
"epoch": 50.314465408805034,
"grad_norm": 2.3040497303009033,
"learning_rate": 8.67e-05,
"loss": 3.1908,
"step": 48000
},
{
"epoch": 50.314465408805034,
"eval_loss": 4.727965831756592,
"eval_runtime": 0.9829,
"eval_samples_per_second": 1398.863,
"eval_steps_per_second": 87.493,
"step": 48000
},
{
"epoch": 52.41090146750524,
"eval_loss": 4.762918472290039,
"eval_runtime": 1.0048,
"eval_samples_per_second": 1368.397,
"eval_steps_per_second": 85.587,
"step": 50000
},
{
"epoch": 54.507337526205454,
"grad_norm": 2.491647720336914,
"learning_rate": 8.003666666666667e-05,
"loss": 3.0857,
"step": 52000
},
{
"epoch": 54.507337526205454,
"eval_loss": 4.792750835418701,
"eval_runtime": 0.9654,
"eval_samples_per_second": 1424.262,
"eval_steps_per_second": 89.081,
"step": 52000
}
],
"logging_steps": 4000,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 105,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3445443431727104e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}