|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1533632286995517, |
|
"eval_steps": 500, |
|
"global_step": 2736, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9999999844947046e-05, |
|
"loss": 1.7024, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.999961237011484e-05, |
|
"loss": 1.1507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9998449510510744e-05, |
|
"loss": 1.0928, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.999651151133954e-05, |
|
"loss": 1.0793, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.999379852284651e-05, |
|
"loss": 1.0867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999031075535873e-05, |
|
"loss": 1.0857, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9986048479268788e-05, |
|
"loss": 1.0721, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.99810120250138e-05, |
|
"loss": 1.0923, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9975201783049804e-05, |
|
"loss": 1.0836, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9968618203821487e-05, |
|
"loss": 1.0769, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9961261797727256e-05, |
|
"loss": 1.0574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9953133135079686e-05, |
|
"loss": 1.042, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9944232846061284e-05, |
|
"loss": 1.0554, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.993456162067566e-05, |
|
"loss": 1.0735, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.992412020869401e-05, |
|
"loss": 1.0785, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9912909419596993e-05, |
|
"loss": 1.0654, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9900930122511993e-05, |
|
"loss": 1.0606, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.988818324614572e-05, |
|
"loss": 1.0664, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9874669778712215e-05, |
|
"loss": 1.0604, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9860390767856244e-05, |
|
"loss": 1.0674, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.984534732057208e-05, |
|
"loss": 1.042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9829540603117667e-05, |
|
"loss": 1.0452, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9812971840924222e-05, |
|
"loss": 1.0577, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.979564231850122e-05, |
|
"loss": 1.0471, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.977755337933682e-05, |
|
"loss": 1.0704, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9758706425793702e-05, |
|
"loss": 1.0282, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.973910291900036e-05, |
|
"loss": 1.0515, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.97187443787378e-05, |
|
"loss": 1.0548, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.0247304439544678, |
|
"eval_runtime": 4.5889, |
|
"eval_samples_per_second": 108.959, |
|
"eval_steps_per_second": 13.729, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9697632383321755e-05, |
|
"loss": 0.9636, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.96757685694803e-05, |
|
"loss": 0.9026, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.965315463222695e-05, |
|
"loss": 0.8808, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9629792324729302e-05, |
|
"loss": 0.8712, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.960568345817306e-05, |
|
"loss": 0.8967, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.9580829901621666e-05, |
|
"loss": 0.8676, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9555233581871366e-05, |
|
"loss": 0.8723, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9528896483301866e-05, |
|
"loss": 0.9122, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.9501820647722458e-05, |
|
"loss": 0.8687, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.947400817421375e-05, |
|
"loss": 0.8726, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.944546121896493e-05, |
|
"loss": 0.8505, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9416181995106585e-05, |
|
"loss": 0.8458, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9386172772539162e-05, |
|
"loss": 0.8721, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.9355435877756957e-05, |
|
"loss": 0.8676, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.9323973693667762e-05, |
|
"loss": 0.8826, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.929178865940815e-05, |
|
"loss": 0.8607, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.925888327015434e-05, |
|
"loss": 0.8561, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.9225260076928783e-05, |
|
"loss": 0.8687, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.919092168640239e-05, |
|
"loss": 0.874, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.915587076069243e-05, |
|
"loss": 0.8563, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.9120110017156172e-05, |
|
"loss": 0.8445, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.908364222818019e-05, |
|
"loss": 0.8646, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.9046470220965457e-05, |
|
"loss": 0.8479, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.9008596877308157e-05, |
|
"loss": 0.8788, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8970025133376252e-05, |
|
"loss": 0.9, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.893075797948188e-05, |
|
"loss": 0.8791, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.889079845984951e-05, |
|
"loss": 0.9254, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.0676991939544678, |
|
"eval_runtime": 4.5191, |
|
"eval_samples_per_second": 110.641, |
|
"eval_steps_per_second": 13.941, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"step": 2736, |
|
"total_flos": 572810393026560.0, |
|
"train_loss": 0.9719247023264567, |
|
"train_runtime": 13352.0365, |
|
"train_samples_per_second": 42.755, |
|
"train_steps_per_second": 1.336 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 17840, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 572810393026560.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|