Phi-2_PT_QA_2_v2 / trainer_state.json
vsvasconcelos's picture
Upload 14 files
d8a0037 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.603550295857988,
"eval_steps": 5,
"global_step": 110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11834319526627218,
"grad_norm": 0.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 2.6428,
"step": 5
},
{
"epoch": 0.11834319526627218,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.5284,
"eval_samples_per_second": 0.606,
"eval_steps_per_second": 0.153,
"step": 5
},
{
"epoch": 0.23668639053254437,
"grad_norm": 0.0,
"learning_rate": 1.992981096013517e-05,
"loss": 2.6411,
"step": 10
},
{
"epoch": 0.23668639053254437,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7203,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 10
},
{
"epoch": 0.35502958579881655,
"grad_norm": 0.0,
"learning_rate": 1.964635581908359e-05,
"loss": 2.6552,
"step": 15
},
{
"epoch": 0.35502958579881655,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7242,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 15
},
{
"epoch": 0.47337278106508873,
"grad_norm": 0.0,
"learning_rate": 1.9151456172430186e-05,
"loss": 2.6293,
"step": 20
},
{
"epoch": 0.47337278106508873,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.967,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 20
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.0,
"learning_rate": 1.845596003501826e-05,
"loss": 2.6225,
"step": 25
},
{
"epoch": 0.591715976331361,
"eval_loss": 2.5045294761657715,
"eval_runtime": 268.4624,
"eval_samples_per_second": 0.603,
"eval_steps_per_second": 0.153,
"step": 25
},
{
"epoch": 0.7100591715976331,
"grad_norm": 0.0,
"learning_rate": 1.7575112421616203e-05,
"loss": 2.6365,
"step": 30
},
{
"epoch": 0.7100591715976331,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.8379,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 30
},
{
"epoch": 0.8284023668639053,
"grad_norm": 0.0,
"learning_rate": 1.6528221181905217e-05,
"loss": 2.6304,
"step": 35
},
{
"epoch": 0.8284023668639053,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.5669,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 35
},
{
"epoch": 0.9467455621301775,
"grad_norm": 0.0,
"learning_rate": 1.533823377964791e-05,
"loss": 2.6426,
"step": 40
},
{
"epoch": 0.9467455621301775,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.4996,
"eval_samples_per_second": 0.606,
"eval_steps_per_second": 0.153,
"step": 40
},
{
"epoch": 1.0650887573964498,
"grad_norm": 0.0,
"learning_rate": 1.4031234292879726e-05,
"loss": 2.6404,
"step": 45
},
{
"epoch": 1.0650887573964498,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7725,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 45
},
{
"epoch": 1.183431952662722,
"grad_norm": 0.0,
"learning_rate": 1.2635871660690677e-05,
"loss": 2.6214,
"step": 50
},
{
"epoch": 1.183431952662722,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7285,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 50
},
{
"epoch": 1.301775147928994,
"grad_norm": 0.0,
"learning_rate": 1.1182731709213658e-05,
"loss": 2.636,
"step": 55
},
{
"epoch": 1.301775147928994,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.906,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 55
},
{
"epoch": 1.4201183431952662,
"grad_norm": 0.0,
"learning_rate": 9.703666721774403e-06,
"loss": 2.6574,
"step": 60
},
{
"epoch": 1.4201183431952662,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.4678,
"eval_samples_per_second": 0.606,
"eval_steps_per_second": 0.153,
"step": 60
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.0,
"learning_rate": 8.231097248774273e-06,
"loss": 2.6211,
"step": 65
},
{
"epoch": 1.5384615384615383,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.8967,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 65
},
{
"epoch": 1.6568047337278107,
"grad_norm": 0.0,
"learning_rate": 6.797301461371626e-06,
"loss": 2.6171,
"step": 70
},
{
"epoch": 1.6568047337278107,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.422,
"eval_samples_per_second": 0.606,
"eval_steps_per_second": 0.153,
"step": 70
},
{
"epoch": 1.7751479289940828,
"grad_norm": 0.0,
"learning_rate": 5.43370762606287e-06,
"loss": 2.6375,
"step": 75
},
{
"epoch": 1.7751479289940828,
"eval_loss": 2.5045294761657715,
"eval_runtime": 268.3368,
"eval_samples_per_second": 0.604,
"eval_steps_per_second": 0.153,
"step": 75
},
{
"epoch": 1.893491124260355,
"grad_norm": 0.0,
"learning_rate": 4.170205208855281e-06,
"loss": 2.6548,
"step": 80
},
{
"epoch": 1.893491124260355,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.6946,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 80
},
{
"epoch": 2.0118343195266273,
"grad_norm": 0.0,
"learning_rate": 3.0344897093700333e-06,
"loss": 2.6684,
"step": 85
},
{
"epoch": 2.0118343195266273,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.6543,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 85
},
{
"epoch": 2.1301775147928996,
"grad_norm": 0.0,
"learning_rate": 2.0514555858664663e-06,
"loss": 2.6305,
"step": 90
},
{
"epoch": 2.1301775147928996,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.8601,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 90
},
{
"epoch": 2.2485207100591715,
"grad_norm": 0.0,
"learning_rate": 1.2426505780436326e-06,
"loss": 2.6421,
"step": 95
},
{
"epoch": 2.2485207100591715,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7705,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 95
},
{
"epoch": 2.366863905325444,
"grad_norm": 0.0,
"learning_rate": 6.258033886587911e-07,
"loss": 2.6469,
"step": 100
},
{
"epoch": 2.366863905325444,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.5145,
"eval_samples_per_second": 0.606,
"eval_steps_per_second": 0.153,
"step": 100
},
{
"epoch": 2.485207100591716,
"grad_norm": 0.0,
"learning_rate": 2.1443507700495968e-07,
"loss": 2.6275,
"step": 105
},
{
"epoch": 2.485207100591716,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.7646,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 105
},
{
"epoch": 2.603550295857988,
"grad_norm": 0.0,
"learning_rate": 1.7562682356786488e-08,
"loss": 2.6352,
"step": 110
},
{
"epoch": 2.603550295857988,
"eval_loss": 2.5045294761657715,
"eval_runtime": 267.6496,
"eval_samples_per_second": 0.605,
"eval_steps_per_second": 0.153,
"step": 110
}
],
"logging_steps": 5,
"max_steps": 112,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5,
"total_flos": 5.736198700007424e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}