|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9928292046936114, |
|
"global_step": 49000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9696846089836677e-05, |
|
"loss": 0.7465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.7886907794192562, |
|
"eval_loss": 0.5396007299423218, |
|
"eval_runtime": 51.0387, |
|
"eval_samples_per_second": 192.305, |
|
"eval_steps_per_second": 6.015, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9370523258659796e-05, |
|
"loss": 0.584, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8102903718797758, |
|
"eval_loss": 0.4883033335208893, |
|
"eval_runtime": 46.5382, |
|
"eval_samples_per_second": 210.902, |
|
"eval_steps_per_second": 6.597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9044200427482912e-05, |
|
"loss": 0.5483, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8145695364238411, |
|
"eval_loss": 0.47581160068511963, |
|
"eval_runtime": 46.7271, |
|
"eval_samples_per_second": 210.049, |
|
"eval_steps_per_second": 6.57, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.8717877596306028e-05, |
|
"loss": 0.5228, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.8207845134997452, |
|
"eval_loss": 0.45740216970443726, |
|
"eval_runtime": 45.9826, |
|
"eval_samples_per_second": 213.45, |
|
"eval_steps_per_second": 6.676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8391554765129143e-05, |
|
"loss": 0.5012, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.8290371879775853, |
|
"eval_loss": 0.4382418990135193, |
|
"eval_runtime": 46.0661, |
|
"eval_samples_per_second": 213.063, |
|
"eval_steps_per_second": 6.664, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8065231933952262e-05, |
|
"loss": 0.4915, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.8333163525216505, |
|
"eval_loss": 0.4435715079307556, |
|
"eval_runtime": 47.0204, |
|
"eval_samples_per_second": 208.739, |
|
"eval_steps_per_second": 6.529, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7738909102775378e-05, |
|
"loss": 0.4858, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.8344370860927153, |
|
"eval_loss": 0.43664801120758057, |
|
"eval_runtime": 46.4417, |
|
"eval_samples_per_second": 211.34, |
|
"eval_steps_per_second": 6.61, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7412586271598494e-05, |
|
"loss": 0.4766, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.8361691288843607, |
|
"eval_loss": 0.42201751470565796, |
|
"eval_runtime": 47.0497, |
|
"eval_samples_per_second": 208.609, |
|
"eval_steps_per_second": 6.525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.708626344042161e-05, |
|
"loss": 0.4643, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.8390219052470708, |
|
"eval_loss": 0.4428311586380005, |
|
"eval_runtime": 46.1174, |
|
"eval_samples_per_second": 212.826, |
|
"eval_steps_per_second": 6.657, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.675994060924473e-05, |
|
"loss": 0.4593, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.8371879775853286, |
|
"eval_loss": 0.43534818291664124, |
|
"eval_runtime": 50.18, |
|
"eval_samples_per_second": 195.596, |
|
"eval_steps_per_second": 6.118, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6433617778067844e-05, |
|
"loss": 0.46, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8424859908303617, |
|
"eval_loss": 0.4115181863307953, |
|
"eval_runtime": 53.0256, |
|
"eval_samples_per_second": 185.099, |
|
"eval_steps_per_second": 5.79, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.610729494689096e-05, |
|
"loss": 0.4537, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.8446255731023943, |
|
"eval_loss": 0.4107950031757355, |
|
"eval_runtime": 46.5601, |
|
"eval_samples_per_second": 210.803, |
|
"eval_steps_per_second": 6.594, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5780972115714076e-05, |
|
"loss": 0.3982, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 0.4335910677909851, |
|
"eval_runtime": 47.058, |
|
"eval_samples_per_second": 208.572, |
|
"eval_steps_per_second": 6.524, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.545464928453719e-05, |
|
"loss": 0.3781, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.8492103922567499, |
|
"eval_loss": 0.4207431674003601, |
|
"eval_runtime": 46.2518, |
|
"eval_samples_per_second": 212.208, |
|
"eval_steps_per_second": 6.638, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.512832645336031e-05, |
|
"loss": 0.3871, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8451349974528782, |
|
"eval_loss": 0.4314253032207489, |
|
"eval_runtime": 46.2026, |
|
"eval_samples_per_second": 212.434, |
|
"eval_steps_per_second": 6.645, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.4802003622183428e-05, |
|
"loss": 0.3789, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.8504330106979113, |
|
"eval_loss": 0.3989640772342682, |
|
"eval_runtime": 46.611, |
|
"eval_samples_per_second": 210.572, |
|
"eval_steps_per_second": 6.586, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.4475680791006544e-05, |
|
"loss": 0.3848, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8482934284258787, |
|
"eval_loss": 0.4063809812068939, |
|
"eval_runtime": 46.0252, |
|
"eval_samples_per_second": 213.253, |
|
"eval_steps_per_second": 6.67, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4149357959829661e-05, |
|
"loss": 0.3771, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.8446255731023943, |
|
"eval_loss": 0.43143340945243835, |
|
"eval_runtime": 46.514, |
|
"eval_samples_per_second": 211.012, |
|
"eval_steps_per_second": 6.6, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.3823035128652777e-05, |
|
"loss": 0.3754, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8511462047885889, |
|
"eval_loss": 0.4237792193889618, |
|
"eval_runtime": 47.005, |
|
"eval_samples_per_second": 208.808, |
|
"eval_steps_per_second": 6.531, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3496712297475893e-05, |
|
"loss": 0.378, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8496179317371371, |
|
"eval_loss": 0.4224611818790436, |
|
"eval_runtime": 46.124, |
|
"eval_samples_per_second": 212.796, |
|
"eval_steps_per_second": 6.656, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.3170389466299012e-05, |
|
"loss": 0.3747, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.8504330106979113, |
|
"eval_loss": 0.39967572689056396, |
|
"eval_runtime": 45.9739, |
|
"eval_samples_per_second": 213.49, |
|
"eval_steps_per_second": 6.678, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2844066635122128e-05, |
|
"loss": 0.3715, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.8466632705043301, |
|
"eval_loss": 0.42271146178245544, |
|
"eval_runtime": 46.4472, |
|
"eval_samples_per_second": 211.315, |
|
"eval_steps_per_second": 6.61, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2517743803945243e-05, |
|
"loss": 0.369, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.8543046357615894, |
|
"eval_loss": 0.3938461244106293, |
|
"eval_runtime": 47.3838, |
|
"eval_samples_per_second": 207.138, |
|
"eval_steps_per_second": 6.479, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.2191420972768359e-05, |
|
"loss": 0.3645, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8500254712175241, |
|
"eval_loss": 0.41426172852516174, |
|
"eval_runtime": 45.9093, |
|
"eval_samples_per_second": 213.791, |
|
"eval_steps_per_second": 6.687, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1865098141591478e-05, |
|
"loss": 0.3413, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.8531839021905248, |
|
"eval_loss": 0.43976131081581116, |
|
"eval_runtime": 46.649, |
|
"eval_samples_per_second": 210.401, |
|
"eval_steps_per_second": 6.581, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.1538775310414594e-05, |
|
"loss": 0.3014, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.8570555272542028, |
|
"eval_loss": 0.4210617244243622, |
|
"eval_runtime": 46.539, |
|
"eval_samples_per_second": 210.898, |
|
"eval_steps_per_second": 6.597, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.121245247923771e-05, |
|
"loss": 0.2941, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.8554253693326541, |
|
"eval_loss": 0.43391153216362, |
|
"eval_runtime": 46.5124, |
|
"eval_samples_per_second": 211.019, |
|
"eval_steps_per_second": 6.6, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.0886129648060829e-05, |
|
"loss": 0.2979, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.8607233825776872, |
|
"eval_loss": 0.42420682311058044, |
|
"eval_runtime": 147.2489, |
|
"eval_samples_per_second": 66.656, |
|
"eval_steps_per_second": 2.085, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0559806816883944e-05, |
|
"loss": 0.3056, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.8541008660213958, |
|
"eval_loss": 0.4425140619277954, |
|
"eval_runtime": 46.1358, |
|
"eval_samples_per_second": 212.741, |
|
"eval_steps_per_second": 6.654, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.023348398570706e-05, |
|
"loss": 0.2953, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.8592969943963321, |
|
"eval_loss": 0.41613587737083435, |
|
"eval_runtime": 47.1744, |
|
"eval_samples_per_second": 208.058, |
|
"eval_steps_per_second": 6.508, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.907161154530178e-06, |
|
"loss": 0.2935, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.8588894549159449, |
|
"eval_loss": 0.4042932987213135, |
|
"eval_runtime": 46.2425, |
|
"eval_samples_per_second": 212.251, |
|
"eval_steps_per_second": 6.639, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.580838323353295e-06, |
|
"loss": 0.3018, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.8497198166072338, |
|
"eval_loss": 0.4361600875854492, |
|
"eval_runtime": 45.7305, |
|
"eval_samples_per_second": 214.627, |
|
"eval_steps_per_second": 6.713, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.25451549217641e-06, |
|
"loss": 0.3046, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.8528782475802343, |
|
"eval_loss": 0.43105748295783997, |
|
"eval_runtime": 47.0154, |
|
"eval_samples_per_second": 208.761, |
|
"eval_steps_per_second": 6.53, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.928192660999528e-06, |
|
"loss": 0.2982, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.8610290371879776, |
|
"eval_loss": 0.40264639258384705, |
|
"eval_runtime": 46.7153, |
|
"eval_samples_per_second": 210.103, |
|
"eval_steps_per_second": 6.572, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.601869829822644e-06, |
|
"loss": 0.2983, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.856444218033622, |
|
"eval_loss": 0.40915054082870483, |
|
"eval_runtime": 46.1785, |
|
"eval_samples_per_second": 212.545, |
|
"eval_steps_per_second": 6.648, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.275546998645761e-06, |
|
"loss": 0.2987, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.8551197147223637, |
|
"eval_loss": 0.41256535053253174, |
|
"eval_runtime": 46.0319, |
|
"eval_samples_per_second": 213.222, |
|
"eval_steps_per_second": 6.669, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.949224167468877e-06, |
|
"loss": 0.2893, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.8574630667345899, |
|
"eval_loss": 0.45308130979537964, |
|
"eval_runtime": 46.1413, |
|
"eval_samples_per_second": 212.716, |
|
"eval_steps_per_second": 6.653, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.622901336291994e-06, |
|
"loss": 0.2413, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.8611309220580744, |
|
"eval_loss": 0.44799548387527466, |
|
"eval_runtime": 45.9836, |
|
"eval_samples_per_second": 213.446, |
|
"eval_steps_per_second": 6.676, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.296578505115111e-06, |
|
"loss": 0.2376, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.8607233825776872, |
|
"eval_loss": 0.4475202262401581, |
|
"eval_runtime": 45.5836, |
|
"eval_samples_per_second": 215.319, |
|
"eval_steps_per_second": 6.735, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.9702556739382275e-06, |
|
"loss": 0.2399, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.8579724910850739, |
|
"eval_loss": 0.4466216266155243, |
|
"eval_runtime": 48.0904, |
|
"eval_samples_per_second": 204.095, |
|
"eval_steps_per_second": 6.384, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.643932842761345e-06, |
|
"loss": 0.2411, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_accuracy": 0.8579724910850739, |
|
"eval_loss": 0.44625383615493774, |
|
"eval_runtime": 46.3779, |
|
"eval_samples_per_second": 211.631, |
|
"eval_steps_per_second": 6.62, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.317610011584461e-06, |
|
"loss": 0.2461, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_accuracy": 0.8574630667345899, |
|
"eval_loss": 0.45237523317337036, |
|
"eval_runtime": 46.2979, |
|
"eval_samples_per_second": 211.997, |
|
"eval_steps_per_second": 6.631, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 5.991287180407578e-06, |
|
"loss": 0.2433, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.8600101884870097, |
|
"eval_loss": 0.4503074884414673, |
|
"eval_runtime": 46.049, |
|
"eval_samples_per_second": 213.143, |
|
"eval_steps_per_second": 6.667, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.664964349230695e-06, |
|
"loss": 0.2427, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.8616403464085584, |
|
"eval_loss": 0.4240187704563141, |
|
"eval_runtime": 46.0142, |
|
"eval_samples_per_second": 213.304, |
|
"eval_steps_per_second": 6.672, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 5.338641518053811e-06, |
|
"loss": 0.2354, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.8609271523178808, |
|
"eval_loss": 0.42988237738609314, |
|
"eval_runtime": 46.9338, |
|
"eval_samples_per_second": 209.125, |
|
"eval_steps_per_second": 6.541, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.012318686876928e-06, |
|
"loss": 0.2401, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.8600101884870097, |
|
"eval_loss": 0.4482932388782501, |
|
"eval_runtime": 46.836, |
|
"eval_samples_per_second": 209.561, |
|
"eval_steps_per_second": 6.555, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.685995855700044e-06, |
|
"loss": 0.2396, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.8579724910850739, |
|
"eval_loss": 0.4487365782260895, |
|
"eval_runtime": 47.4992, |
|
"eval_samples_per_second": 206.635, |
|
"eval_steps_per_second": 6.463, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.359673024523161e-06, |
|
"loss": 0.2327, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.8613346917982679, |
|
"eval_loss": 0.4441225230693817, |
|
"eval_runtime": 46.3347, |
|
"eval_samples_per_second": 211.828, |
|
"eval_steps_per_second": 6.626, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.0333501933462775e-06, |
|
"loss": 0.2336, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.8644931227712684, |
|
"eval_loss": 0.4395711421966553, |
|
"eval_runtime": 47.7718, |
|
"eval_samples_per_second": 205.456, |
|
"eval_steps_per_second": 6.426, |
|
"step": 49000 |
|
} |
|
], |
|
"max_steps": 61360, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.2022186898599373e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|