|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "bit-50-Pharyngitis\\checkpoint-36", |
|
"epoch": 95.23809523809524, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 0.6438681483268738, |
|
"eval_runtime": 0.5883, |
|
"eval_samples_per_second": 62.898, |
|
"eval_steps_per_second": 5.1, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6378, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.8918918918918919, |
|
"eval_loss": 0.4361162483692169, |
|
"eval_runtime": 0.3774, |
|
"eval_samples_per_second": 98.042, |
|
"eval_steps_per_second": 7.949, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.2754928469657898, |
|
"eval_runtime": 0.3737, |
|
"eval_samples_per_second": 99.02, |
|
"eval_steps_per_second": 8.029, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4237, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.21373549103736877, |
|
"eval_runtime": 0.4065, |
|
"eval_samples_per_second": 91.026, |
|
"eval_steps_per_second": 7.381, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.1247631162405014, |
|
"eval_runtime": 0.3977, |
|
"eval_samples_per_second": 93.032, |
|
"eval_steps_per_second": 7.543, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2592, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.08167269825935364, |
|
"eval_runtime": 0.3971, |
|
"eval_samples_per_second": 93.166, |
|
"eval_steps_per_second": 7.554, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06954346597194672, |
|
"eval_runtime": 0.3842, |
|
"eval_samples_per_second": 96.305, |
|
"eval_steps_per_second": 7.809, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1775, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.23191875219345093, |
|
"eval_runtime": 0.3999, |
|
"eval_samples_per_second": 92.518, |
|
"eval_steps_per_second": 7.501, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.052055422216653824, |
|
"eval_runtime": 0.4011, |
|
"eval_samples_per_second": 92.257, |
|
"eval_steps_per_second": 7.48, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1805, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.047812674194574356, |
|
"eval_runtime": 0.3975, |
|
"eval_samples_per_second": 93.076, |
|
"eval_steps_per_second": 7.547, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04274846613407135, |
|
"eval_runtime": 0.4052, |
|
"eval_samples_per_second": 91.313, |
|
"eval_steps_per_second": 7.404, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.171, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8918918918918919, |
|
"eval_loss": 0.1753544956445694, |
|
"eval_runtime": 0.4139, |
|
"eval_samples_per_second": 89.385, |
|
"eval_steps_per_second": 7.247, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.06655322760343552, |
|
"eval_runtime": 0.3887, |
|
"eval_samples_per_second": 95.191, |
|
"eval_steps_per_second": 7.718, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.089, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.02494250051677227, |
|
"eval_runtime": 0.4123, |
|
"eval_samples_per_second": 89.751, |
|
"eval_steps_per_second": 7.277, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.019040122628211975, |
|
"eval_runtime": 0.4396, |
|
"eval_samples_per_second": 84.171, |
|
"eval_steps_per_second": 6.825, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.1093, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.06111358851194382, |
|
"eval_runtime": 0.4317, |
|
"eval_samples_per_second": 85.699, |
|
"eval_steps_per_second": 6.949, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.16683633625507355, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 93.907, |
|
"eval_steps_per_second": 7.614, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.1025, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.05101567134261131, |
|
"eval_runtime": 0.3847, |
|
"eval_samples_per_second": 96.177, |
|
"eval_steps_per_second": 7.798, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010445931926369667, |
|
"eval_runtime": 0.3987, |
|
"eval_samples_per_second": 92.805, |
|
"eval_steps_per_second": 7.525, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.12, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8918918918918919, |
|
"eval_loss": 0.18418262898921967, |
|
"eval_runtime": 0.391, |
|
"eval_samples_per_second": 94.632, |
|
"eval_steps_per_second": 7.673, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.0996, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011865493841469288, |
|
"eval_runtime": 0.3899, |
|
"eval_samples_per_second": 94.901, |
|
"eval_steps_per_second": 7.695, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.03581492602825165, |
|
"eval_runtime": 0.4047, |
|
"eval_samples_per_second": 91.415, |
|
"eval_steps_per_second": 7.412, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.0933, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.11507910490036011, |
|
"eval_runtime": 0.4091, |
|
"eval_samples_per_second": 90.451, |
|
"eval_steps_per_second": 7.334, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.08254072815179825, |
|
"eval_runtime": 0.3864, |
|
"eval_samples_per_second": 95.759, |
|
"eval_steps_per_second": 7.764, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.1118, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.08668244630098343, |
|
"eval_runtime": 0.3898, |
|
"eval_samples_per_second": 94.932, |
|
"eval_steps_per_second": 7.697, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.009897518903017044, |
|
"eval_runtime": 0.4036, |
|
"eval_samples_per_second": 91.68, |
|
"eval_steps_per_second": 7.434, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0471, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.019395073875784874, |
|
"eval_runtime": 0.3954, |
|
"eval_samples_per_second": 93.584, |
|
"eval_steps_per_second": 7.588, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0031759734265506268, |
|
"eval_runtime": 0.3912, |
|
"eval_samples_per_second": 94.57, |
|
"eval_steps_per_second": 7.668, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0686, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0057038371451199055, |
|
"eval_runtime": 0.3906, |
|
"eval_samples_per_second": 94.728, |
|
"eval_steps_per_second": 7.681, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0014269119128584862, |
|
"eval_runtime": 0.3905, |
|
"eval_samples_per_second": 94.751, |
|
"eval_steps_per_second": 7.682, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.0692, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.34027108550071716, |
|
"eval_runtime": 0.4009, |
|
"eval_samples_per_second": 92.295, |
|
"eval_steps_per_second": 7.483, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011479969136416912, |
|
"eval_runtime": 0.4005, |
|
"eval_samples_per_second": 92.395, |
|
"eval_steps_per_second": 7.491, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 32.38, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0912, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.0989471897482872, |
|
"eval_runtime": 0.4052, |
|
"eval_samples_per_second": 91.323, |
|
"eval_steps_per_second": 7.405, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.10641559958457947, |
|
"eval_runtime": 0.3889, |
|
"eval_samples_per_second": 95.131, |
|
"eval_steps_per_second": 7.713, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.0994, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.09466935694217682, |
|
"eval_runtime": 0.3912, |
|
"eval_samples_per_second": 94.569, |
|
"eval_steps_per_second": 7.668, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.10136424005031586, |
|
"eval_runtime": 0.4255, |
|
"eval_samples_per_second": 86.966, |
|
"eval_steps_per_second": 7.051, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 36.19, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.0561, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.0519096776843071, |
|
"eval_runtime": 0.4107, |
|
"eval_samples_per_second": 90.096, |
|
"eval_steps_per_second": 7.305, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.16669504344463348, |
|
"eval_runtime": 0.4065, |
|
"eval_samples_per_second": 91.027, |
|
"eval_steps_per_second": 7.381, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0516, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.3604719936847687, |
|
"eval_runtime": 0.3971, |
|
"eval_samples_per_second": 93.183, |
|
"eval_steps_per_second": 7.555, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.0535, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.044684119522571564, |
|
"eval_runtime": 0.3775, |
|
"eval_samples_per_second": 98.002, |
|
"eval_steps_per_second": 7.946, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.1711174100637436, |
|
"eval_runtime": 0.3876, |
|
"eval_samples_per_second": 95.468, |
|
"eval_steps_per_second": 7.741, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0475, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.10387804359197617, |
|
"eval_runtime": 0.3953, |
|
"eval_samples_per_second": 93.597, |
|
"eval_steps_per_second": 7.589, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.029212407767772675, |
|
"eval_runtime": 0.3915, |
|
"eval_samples_per_second": 94.5, |
|
"eval_steps_per_second": 7.662, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 43.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0504, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014002898707985878, |
|
"eval_runtime": 0.3889, |
|
"eval_samples_per_second": 95.145, |
|
"eval_steps_per_second": 7.714, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 44.95, |
|
"eval_accuracy": 0.8918918918918919, |
|
"eval_loss": 0.27451202273368835, |
|
"eval_runtime": 0.4255, |
|
"eval_samples_per_second": 86.962, |
|
"eval_steps_per_second": 7.051, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.0432, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 45.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.002892308635637164, |
|
"eval_runtime": 0.3957, |
|
"eval_samples_per_second": 93.507, |
|
"eval_steps_per_second": 7.582, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"eval_accuracy": 0.8648648648648649, |
|
"eval_loss": 0.4316161274909973, |
|
"eval_runtime": 0.4242, |
|
"eval_samples_per_second": 87.218, |
|
"eval_steps_per_second": 7.072, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.015067849308252335, |
|
"eval_runtime": 0.3902, |
|
"eval_samples_per_second": 94.825, |
|
"eval_steps_per_second": 7.688, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00749516487121582, |
|
"eval_runtime": 0.3915, |
|
"eval_samples_per_second": 94.506, |
|
"eval_steps_per_second": 7.663, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 49.52, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.1531, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 49.9, |
|
"eval_accuracy": 0.8648648648648649, |
|
"eval_loss": 0.3624305725097656, |
|
"eval_runtime": 0.4028, |
|
"eval_samples_per_second": 91.854, |
|
"eval_steps_per_second": 7.448, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 50.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.036536574363708496, |
|
"eval_runtime": 0.3932, |
|
"eval_samples_per_second": 94.091, |
|
"eval_steps_per_second": 7.629, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 51.43, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.0622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.20016101002693176, |
|
"eval_runtime": 0.4002, |
|
"eval_samples_per_second": 92.444, |
|
"eval_steps_per_second": 7.495, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 52.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.12544459104537964, |
|
"eval_runtime": 0.4119, |
|
"eval_samples_per_second": 89.837, |
|
"eval_steps_per_second": 7.284, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.0432, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 53.9, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.0909779816865921, |
|
"eval_runtime": 0.3955, |
|
"eval_samples_per_second": 93.545, |
|
"eval_steps_per_second": 7.585, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 54.86, |
|
"eval_accuracy": 0.8918918918918919, |
|
"eval_loss": 0.3727685809135437, |
|
"eval_runtime": 0.3993, |
|
"eval_samples_per_second": 92.669, |
|
"eval_steps_per_second": 7.514, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 55.24, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0531, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.10307420045137405, |
|
"eval_runtime": 0.3958, |
|
"eval_samples_per_second": 93.478, |
|
"eval_steps_per_second": 7.579, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 56.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.002981973346322775, |
|
"eval_runtime": 0.3986, |
|
"eval_samples_per_second": 92.827, |
|
"eval_steps_per_second": 7.526, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0731, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 57.9, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.2001199871301651, |
|
"eval_runtime": 0.4153, |
|
"eval_samples_per_second": 89.082, |
|
"eval_steps_per_second": 7.223, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 58.86, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.2389511615037918, |
|
"eval_runtime": 0.3911, |
|
"eval_samples_per_second": 94.615, |
|
"eval_steps_per_second": 7.672, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 59.05, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.0529, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.13383528590202332, |
|
"eval_runtime": 0.3898, |
|
"eval_samples_per_second": 94.919, |
|
"eval_steps_per_second": 7.696, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0203, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.22631436586380005, |
|
"eval_runtime": 0.3946, |
|
"eval_samples_per_second": 93.757, |
|
"eval_steps_per_second": 7.602, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.16640439629554749, |
|
"eval_runtime": 0.3813, |
|
"eval_samples_per_second": 97.045, |
|
"eval_steps_per_second": 7.868, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0345, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006216500885784626, |
|
"eval_runtime": 0.3909, |
|
"eval_samples_per_second": 94.661, |
|
"eval_steps_per_second": 7.675, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0056242975406348705, |
|
"eval_runtime": 0.4048, |
|
"eval_samples_per_second": 91.408, |
|
"eval_steps_per_second": 7.411, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 64.76, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0595, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 64.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.022064007818698883, |
|
"eval_runtime": 0.3988, |
|
"eval_samples_per_second": 92.779, |
|
"eval_steps_per_second": 7.523, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 65.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01646520011126995, |
|
"eval_runtime": 0.4012, |
|
"eval_samples_per_second": 92.225, |
|
"eval_steps_per_second": 7.478, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0278, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 66.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01749444752931595, |
|
"eval_runtime": 0.404, |
|
"eval_samples_per_second": 91.579, |
|
"eval_steps_per_second": 7.425, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.017633311450481415, |
|
"eval_runtime": 0.3933, |
|
"eval_samples_per_second": 94.074, |
|
"eval_steps_per_second": 7.628, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 68.57, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.1035, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 68.95, |
|
"eval_accuracy": 0.918918918918919, |
|
"eval_loss": 0.15788349509239197, |
|
"eval_runtime": 0.3978, |
|
"eval_samples_per_second": 93.022, |
|
"eval_steps_per_second": 7.542, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 69.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.06550092250108719, |
|
"eval_runtime": 0.3834, |
|
"eval_samples_per_second": 96.515, |
|
"eval_steps_per_second": 7.826, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 70.48, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.0466, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 70.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00981216412037611, |
|
"eval_runtime": 0.4023, |
|
"eval_samples_per_second": 91.96, |
|
"eval_steps_per_second": 7.456, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.0761876329779625, |
|
"eval_runtime": 0.3968, |
|
"eval_samples_per_second": 93.24, |
|
"eval_steps_per_second": 7.56, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 72.38, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0719, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 72.95, |
|
"eval_accuracy": 0.9459459459459459, |
|
"eval_loss": 0.26816752552986145, |
|
"eval_runtime": 0.4037, |
|
"eval_samples_per_second": 91.659, |
|
"eval_steps_per_second": 7.432, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 73.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.03098950907588005, |
|
"eval_runtime": 0.3999, |
|
"eval_samples_per_second": 92.524, |
|
"eval_steps_per_second": 7.502, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 74.29, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.0144, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 74.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0038737300783395767, |
|
"eval_runtime": 0.4143, |
|
"eval_samples_per_second": 89.311, |
|
"eval_steps_per_second": 7.241, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.055370986461639404, |
|
"eval_runtime": 0.387, |
|
"eval_samples_per_second": 95.603, |
|
"eval_steps_per_second": 7.752, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0613, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 76.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.05363788455724716, |
|
"eval_runtime": 0.3953, |
|
"eval_samples_per_second": 93.59, |
|
"eval_steps_per_second": 7.588, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 77.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.03543579578399658, |
|
"eval_runtime": 0.4021, |
|
"eval_samples_per_second": 92.01, |
|
"eval_steps_per_second": 7.46, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 78.1, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0307, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 78.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.02702740766108036, |
|
"eval_runtime": 0.4203, |
|
"eval_samples_per_second": 88.032, |
|
"eval_steps_per_second": 7.138, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0253, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.005909389816224575, |
|
"eval_runtime": 0.3856, |
|
"eval_samples_per_second": 95.942, |
|
"eval_steps_per_second": 7.779, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01845603436231613, |
|
"eval_runtime": 0.3966, |
|
"eval_samples_per_second": 93.298, |
|
"eval_steps_per_second": 7.565, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 81.9, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.0311, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 81.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.02441835217177868, |
|
"eval_runtime": 0.4215, |
|
"eval_samples_per_second": 87.778, |
|
"eval_steps_per_second": 7.117, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 82.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.030215006321668625, |
|
"eval_runtime": 0.4009, |
|
"eval_samples_per_second": 92.283, |
|
"eval_steps_per_second": 7.482, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 83.81, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0189, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00978150311857462, |
|
"eval_runtime": 0.4039, |
|
"eval_samples_per_second": 91.607, |
|
"eval_steps_per_second": 7.428, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 84.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.004685988184064627, |
|
"eval_runtime": 0.3893, |
|
"eval_samples_per_second": 95.039, |
|
"eval_steps_per_second": 7.706, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0235, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 85.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010029388591647148, |
|
"eval_runtime": 0.4017, |
|
"eval_samples_per_second": 92.114, |
|
"eval_steps_per_second": 7.469, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 86.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01385235320776701, |
|
"eval_runtime": 0.409, |
|
"eval_samples_per_second": 90.473, |
|
"eval_steps_per_second": 7.336, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 87.62, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.014, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.013605994172394276, |
|
"eval_runtime": 0.4002, |
|
"eval_samples_per_second": 92.454, |
|
"eval_steps_per_second": 7.496, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 88.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.030411923304200172, |
|
"eval_runtime": 0.3929, |
|
"eval_samples_per_second": 94.163, |
|
"eval_steps_per_second": 7.635, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 89.52, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0197, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 89.9, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.04400445148348808, |
|
"eval_runtime": 0.4094, |
|
"eval_samples_per_second": 90.379, |
|
"eval_steps_per_second": 7.328, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 90.86, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.059370577335357666, |
|
"eval_runtime": 0.4037, |
|
"eval_samples_per_second": 91.649, |
|
"eval_steps_per_second": 7.431, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 91.43, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0309, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.048442043364048004, |
|
"eval_runtime": 0.3924, |
|
"eval_samples_per_second": 94.293, |
|
"eval_steps_per_second": 7.645, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 92.95, |
|
"eval_accuracy": 0.972972972972973, |
|
"eval_loss": 0.031032495200634003, |
|
"eval_runtime": 0.3937, |
|
"eval_samples_per_second": 93.991, |
|
"eval_steps_per_second": 7.621, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.0197, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 93.9, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.019568899646401405, |
|
"eval_runtime": 0.4189, |
|
"eval_samples_per_second": 88.319, |
|
"eval_steps_per_second": 7.161, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 94.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014562019146978855, |
|
"eval_runtime": 0.3925, |
|
"eval_samples_per_second": 94.275, |
|
"eval_steps_per_second": 7.644, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"learning_rate": 0.0, |
|
"loss": 0.0106, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014045949093997478, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 93.898, |
|
"eval_steps_per_second": 7.613, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.628508796024832e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|