|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.136986301369863, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3902, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.3901418447494507, |
|
"eval_runtime": 108.6518, |
|
"eval_samples_per_second": 1.555, |
|
"eval_steps_per_second": 0.396, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8e-05, |
|
"loss": 1.3304, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.3154278993606567, |
|
"eval_runtime": 108.7121, |
|
"eval_samples_per_second": 1.555, |
|
"eval_steps_per_second": 0.396, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012, |
|
"loss": 1.2138, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.2570539712905884, |
|
"eval_runtime": 108.5686, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2281, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.237758994102478, |
|
"eval_runtime": 108.8506, |
|
"eval_samples_per_second": 1.553, |
|
"eval_steps_per_second": 0.395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2155, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.2247029542922974, |
|
"eval_runtime": 108.5608, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 1.1823, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.2180429697036743, |
|
"eval_runtime": 108.7983, |
|
"eval_samples_per_second": 1.553, |
|
"eval_steps_per_second": 0.395, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 1.2289, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.2121814489364624, |
|
"eval_runtime": 108.4495, |
|
"eval_samples_per_second": 1.558, |
|
"eval_steps_per_second": 0.396, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 1.1623, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.2073330879211426, |
|
"eval_runtime": 108.7996, |
|
"eval_samples_per_second": 1.553, |
|
"eval_steps_per_second": 0.395, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 1.2067, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.204315423965454, |
|
"eval_runtime": 108.8942, |
|
"eval_samples_per_second": 1.552, |
|
"eval_steps_per_second": 0.395, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 1.1499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.2012346982955933, |
|
"eval_runtime": 108.7573, |
|
"eval_samples_per_second": 1.554, |
|
"eval_steps_per_second": 0.395, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 1.2184, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.200089454650879, |
|
"eval_runtime": 108.5135, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 1.1758, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.1969162225723267, |
|
"eval_runtime": 108.562, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 1.1931, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.1960800886154175, |
|
"eval_runtime": 108.6308, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 1.1334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1931264400482178, |
|
"eval_runtime": 108.6207, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 1.1321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.1918461322784424, |
|
"eval_runtime": 108.4723, |
|
"eval_samples_per_second": 1.558, |
|
"eval_steps_per_second": 0.396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 1.181, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.1898659467697144, |
|
"eval_runtime": 108.5621, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 1.2347, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.1882331371307373, |
|
"eval_runtime": 108.6542, |
|
"eval_samples_per_second": 1.555, |
|
"eval_steps_per_second": 0.396, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 1.0926, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.1871733665466309, |
|
"eval_runtime": 108.5395, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 1.1425, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.4877, |
|
"eval_samples_per_second": 1.558, |
|
"eval_steps_per_second": 0.396, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 1.128, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.7001, |
|
"eval_samples_per_second": 1.555, |
|
"eval_steps_per_second": 0.396, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 1.1237, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.5929, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 1.0939, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.8988, |
|
"eval_samples_per_second": 1.552, |
|
"eval_steps_per_second": 0.395, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 1.093, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.817, |
|
"eval_samples_per_second": 1.553, |
|
"eval_steps_per_second": 0.395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.0988, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.6088, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 1.1054, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.4026, |
|
"eval_samples_per_second": 1.559, |
|
"eval_steps_per_second": 0.397, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 1.1051, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.6345, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001746821476984154, |
|
"loss": 1.1373, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.873, |
|
"eval_samples_per_second": 1.552, |
|
"eval_steps_per_second": 0.395, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00017244252047910892, |
|
"loss": 1.119, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.5863, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00017012367842724887, |
|
"loss": 1.1399, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.7422, |
|
"eval_samples_per_second": 1.554, |
|
"eval_steps_per_second": 0.395, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 1.0744, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.7961, |
|
"eval_samples_per_second": 1.553, |
|
"eval_steps_per_second": 0.395, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00016525857615241687, |
|
"loss": 1.0797, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.4529, |
|
"eval_samples_per_second": 1.558, |
|
"eval_steps_per_second": 0.396, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001627176358473537, |
|
"loss": 1.1281, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.354, |
|
"eval_samples_per_second": 1.56, |
|
"eval_steps_per_second": 0.397, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00016010811472830252, |
|
"loss": 1.073, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.3772, |
|
"eval_samples_per_second": 1.559, |
|
"eval_steps_per_second": 0.397, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00015743286626829437, |
|
"loss": 1.1053, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.3166, |
|
"eval_samples_per_second": 1.56, |
|
"eval_steps_per_second": 0.397, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 1.1402, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.4029, |
|
"eval_samples_per_second": 1.559, |
|
"eval_steps_per_second": 0.397, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00015189695737812152, |
|
"loss": 1.1627, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.1867098808288574, |
|
"eval_runtime": 108.4859, |
|
"eval_samples_per_second": 1.558, |
|
"eval_steps_per_second": 0.396, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00014904235038305083, |
|
"loss": 1.1064, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 1.1901580095291138, |
|
"eval_runtime": 108.5309, |
|
"eval_samples_per_second": 1.557, |
|
"eval_steps_per_second": 0.396, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0001461341162978688, |
|
"loss": 1.0264, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 1.187896966934204, |
|
"eval_runtime": 108.3764, |
|
"eval_samples_per_second": 1.559, |
|
"eval_steps_per_second": 0.397, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 1.1384, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 1.1920827627182007, |
|
"eval_runtime": 108.625, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.396, |
|
"step": 390 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 6, |
|
"total_flos": 6.601561671204864e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|