|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.998657718120805, |
|
"global_step": 3720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.904632152588556e-06, |
|
"loss": 0.6417, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.653323233127594, |
|
"eval_loss": 0.6283483505249023, |
|
"eval_runtime": 8.5429, |
|
"eval_samples_per_second": 154.983, |
|
"eval_steps_per_second": 9.716, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.713896457765667e-06, |
|
"loss": 0.5105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.791540801525116, |
|
"eval_loss": 0.45882758498191833, |
|
"eval_runtime": 8.3778, |
|
"eval_samples_per_second": 158.036, |
|
"eval_steps_per_second": 9.907, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.523160762942779e-06, |
|
"loss": 0.4554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.7968277931213379, |
|
"eval_loss": 0.44999730587005615, |
|
"eval_runtime": 8.4819, |
|
"eval_samples_per_second": 156.097, |
|
"eval_steps_per_second": 9.786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.332425068119891e-06, |
|
"loss": 0.4212, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.7938066720962524, |
|
"eval_loss": 0.47725316882133484, |
|
"eval_runtime": 8.3161, |
|
"eval_samples_per_second": 159.209, |
|
"eval_steps_per_second": 9.981, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.141689373297002e-06, |
|
"loss": 0.4054, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.7983383536338806, |
|
"eval_loss": 0.43111804127693176, |
|
"eval_runtime": 8.5652, |
|
"eval_samples_per_second": 154.579, |
|
"eval_steps_per_second": 9.69, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.950953678474114e-06, |
|
"loss": 0.3922, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.7998489141464233, |
|
"eval_loss": 0.4587886333465576, |
|
"eval_runtime": 8.2972, |
|
"eval_samples_per_second": 159.573, |
|
"eval_steps_per_second": 10.003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.760217983651226e-06, |
|
"loss": 0.3776, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8066465258598328, |
|
"eval_loss": 0.43665462732315063, |
|
"eval_runtime": 8.2982, |
|
"eval_samples_per_second": 159.553, |
|
"eval_steps_per_second": 10.002, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.569482288828338e-06, |
|
"loss": 0.3535, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_accuracy": 0.8074018359184265, |
|
"eval_loss": 0.4674627184867859, |
|
"eval_runtime": 8.7016, |
|
"eval_samples_per_second": 152.155, |
|
"eval_steps_per_second": 9.538, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.37874659400545e-06, |
|
"loss": 0.33, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.8021147847175598, |
|
"eval_loss": 0.48743265867233276, |
|
"eval_runtime": 8.4057, |
|
"eval_samples_per_second": 157.512, |
|
"eval_steps_per_second": 9.874, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.188010899182561e-06, |
|
"loss": 0.3113, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.8043806552886963, |
|
"eval_loss": 0.49486756324768066, |
|
"eval_runtime": 8.2742, |
|
"eval_samples_per_second": 160.015, |
|
"eval_steps_per_second": 10.031, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.997275204359673e-06, |
|
"loss": 0.3203, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.805891215801239, |
|
"eval_loss": 0.45497822761535645, |
|
"eval_runtime": 8.7885, |
|
"eval_samples_per_second": 150.651, |
|
"eval_steps_per_second": 9.444, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.806539509536785e-06, |
|
"loss": 0.248, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_accuracy": 0.8036254048347473, |
|
"eval_loss": 0.4857841432094574, |
|
"eval_runtime": 8.5367, |
|
"eval_samples_per_second": 155.095, |
|
"eval_steps_per_second": 9.723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.615803814713896e-06, |
|
"loss": 0.2478, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": 0.8028700947761536, |
|
"eval_loss": 0.5299096703529358, |
|
"eval_runtime": 8.4072, |
|
"eval_samples_per_second": 157.485, |
|
"eval_steps_per_second": 9.873, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.425068119891008e-06, |
|
"loss": 0.2371, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_accuracy": 0.7990936636924744, |
|
"eval_loss": 0.5012686252593994, |
|
"eval_runtime": 8.2774, |
|
"eval_samples_per_second": 159.953, |
|
"eval_steps_per_second": 10.027, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.2343324250681206e-06, |
|
"loss": 0.2388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.8021147847175598, |
|
"eval_loss": 0.5519642233848572, |
|
"eval_runtime": 8.5777, |
|
"eval_samples_per_second": 154.355, |
|
"eval_steps_per_second": 9.676, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 4.043596730245232e-06, |
|
"loss": 0.1744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_accuracy": 0.791540801525116, |
|
"eval_loss": 0.6686806082725525, |
|
"eval_runtime": 8.5418, |
|
"eval_samples_per_second": 155.002, |
|
"eval_steps_per_second": 9.717, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.8528610354223435e-06, |
|
"loss": 0.1788, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_accuracy": 0.768882155418396, |
|
"eval_loss": 0.756027340888977, |
|
"eval_runtime": 8.5142, |
|
"eval_samples_per_second": 155.506, |
|
"eval_steps_per_second": 9.748, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.6621253405994546e-06, |
|
"loss": 0.1652, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_accuracy": 0.7832326292991638, |
|
"eval_loss": 0.6984805464744568, |
|
"eval_runtime": 8.5496, |
|
"eval_samples_per_second": 154.861, |
|
"eval_steps_per_second": 9.708, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 3.4713896457765665e-06, |
|
"loss": 0.1596, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_accuracy": 0.791540801525116, |
|
"eval_loss": 0.7190905213356018, |
|
"eval_runtime": 8.5367, |
|
"eval_samples_per_second": 155.094, |
|
"eval_steps_per_second": 9.723, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.2806539509536784e-06, |
|
"loss": 0.1214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_accuracy": 0.7892749309539795, |
|
"eval_loss": 0.909650981426239, |
|
"eval_runtime": 8.5312, |
|
"eval_samples_per_second": 155.195, |
|
"eval_steps_per_second": 9.729, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 3.0899182561307904e-06, |
|
"loss": 0.1432, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7787008881568909, |
|
"eval_loss": 0.9183990359306335, |
|
"eval_runtime": 8.556, |
|
"eval_samples_per_second": 154.745, |
|
"eval_steps_per_second": 9.701, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 2.899182561307902e-06, |
|
"loss": 0.1145, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_accuracy": 0.7877643704414368, |
|
"eval_loss": 0.962020993232727, |
|
"eval_runtime": 8.5378, |
|
"eval_samples_per_second": 155.075, |
|
"eval_steps_per_second": 9.721, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 2.7084468664850138e-06, |
|
"loss": 0.1069, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_accuracy": 0.7892749309539795, |
|
"eval_loss": 0.9488775134086609, |
|
"eval_runtime": 8.3193, |
|
"eval_samples_per_second": 159.148, |
|
"eval_steps_per_second": 9.977, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 2.5177111716621253e-06, |
|
"loss": 0.1012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.7817220687866211, |
|
"eval_loss": 1.0107486248016357, |
|
"eval_runtime": 8.2713, |
|
"eval_samples_per_second": 160.071, |
|
"eval_steps_per_second": 10.035, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.326975476839237e-06, |
|
"loss": 0.0942, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_accuracy": 0.7885196208953857, |
|
"eval_loss": 1.002103567123413, |
|
"eval_runtime": 8.5971, |
|
"eval_samples_per_second": 154.005, |
|
"eval_steps_per_second": 9.654, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 2.1362397820163487e-06, |
|
"loss": 0.087, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.791540801525116, |
|
"eval_loss": 1.1089540719985962, |
|
"eval_runtime": 8.5285, |
|
"eval_samples_per_second": 155.244, |
|
"eval_steps_per_second": 9.732, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.9455040871934606e-06, |
|
"loss": 0.0598, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_accuracy": 0.7794561982154846, |
|
"eval_loss": 1.1735244989395142, |
|
"eval_runtime": 8.6408, |
|
"eval_samples_per_second": 153.227, |
|
"eval_steps_per_second": 9.606, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 1.754768392370572e-06, |
|
"loss": 0.0742, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_accuracy": 0.7817220687866211, |
|
"eval_loss": 1.1433196067810059, |
|
"eval_runtime": 8.3802, |
|
"eval_samples_per_second": 157.992, |
|
"eval_steps_per_second": 9.904, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.5640326975476838e-06, |
|
"loss": 0.073, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_accuracy": 0.7953172326087952, |
|
"eval_loss": 1.1342711448669434, |
|
"eval_runtime": 8.549, |
|
"eval_samples_per_second": 154.873, |
|
"eval_steps_per_second": 9.709, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.3732970027247957e-06, |
|
"loss": 0.0553, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7839879393577576, |
|
"eval_loss": 1.2258013486862183, |
|
"eval_runtime": 8.4899, |
|
"eval_samples_per_second": 155.95, |
|
"eval_steps_per_second": 9.776, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.1825613079019074e-06, |
|
"loss": 0.0474, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_accuracy": 0.7817220687866211, |
|
"eval_loss": 1.2460782527923584, |
|
"eval_runtime": 8.5462, |
|
"eval_samples_per_second": 154.923, |
|
"eval_steps_per_second": 9.712, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 9.91825613079019e-07, |
|
"loss": 0.0515, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_accuracy": 0.7824773192405701, |
|
"eval_loss": 1.2996242046356201, |
|
"eval_runtime": 8.5173, |
|
"eval_samples_per_second": 155.448, |
|
"eval_steps_per_second": 9.745, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 8.010899182561308e-07, |
|
"loss": 0.0551, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_accuracy": 0.7854984998703003, |
|
"eval_loss": 1.281937837600708, |
|
"eval_runtime": 8.2769, |
|
"eval_samples_per_second": 159.964, |
|
"eval_steps_per_second": 10.028, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 6.103542234332425e-07, |
|
"loss": 0.0541, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_accuracy": 0.7854984998703003, |
|
"eval_loss": 1.2807722091674805, |
|
"eval_runtime": 8.5739, |
|
"eval_samples_per_second": 154.423, |
|
"eval_steps_per_second": 9.681, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 4.196185286103542e-07, |
|
"loss": 0.0465, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"eval_accuracy": 0.7817220687866211, |
|
"eval_loss": 1.3397763967514038, |
|
"eval_runtime": 8.503, |
|
"eval_samples_per_second": 155.71, |
|
"eval_steps_per_second": 9.761, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 2.288828337874659e-07, |
|
"loss": 0.0407, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_accuracy": 0.7824773192405701, |
|
"eval_loss": 1.3231298923492432, |
|
"eval_runtime": 8.2453, |
|
"eval_samples_per_second": 160.577, |
|
"eval_steps_per_second": 10.066, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 3.8147138964577657e-08, |
|
"loss": 0.0343, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.7824773192405701, |
|
"eval_loss": 1.3330293893814087, |
|
"eval_runtime": 8.5211, |
|
"eval_samples_per_second": 155.378, |
|
"eval_steps_per_second": 9.74, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3720, |
|
"total_flos": 5.551925384610202e+16, |
|
"train_loss": 0.20273424893297176, |
|
"train_runtime": 2723.3449, |
|
"train_samples_per_second": 43.755, |
|
"train_steps_per_second": 1.366 |
|
} |
|
], |
|
"max_steps": 3720, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.551925384610202e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|