|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992697108389406, |
|
"eval_steps": 500, |
|
"global_step": 358500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993031591974624e-05, |
|
"loss": 0.6149, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.986063183949248e-05, |
|
"loss": 0.4578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9790947759238714e-05, |
|
"loss": 0.4321, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.972126367898496e-05, |
|
"loss": 0.4103, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9651579598731195e-05, |
|
"loss": 0.4003, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958189551847743e-05, |
|
"loss": 0.3793, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.951221143822367e-05, |
|
"loss": 0.3678, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.944252735796991e-05, |
|
"loss": 0.386, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9372843277716144e-05, |
|
"loss": 0.3704, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.930315919746239e-05, |
|
"loss": 0.3623, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9233475117208625e-05, |
|
"loss": 0.3508, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.916379103695486e-05, |
|
"loss": 0.3505, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90941069567011e-05, |
|
"loss": 0.3406, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.902442287644734e-05, |
|
"loss": 0.337, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895473879619358e-05, |
|
"loss": 0.342, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.888505471593982e-05, |
|
"loss": 0.3322, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.881537063568606e-05, |
|
"loss": 0.3352, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.87456865554323e-05, |
|
"loss": 0.3293, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8676002475178536e-05, |
|
"loss": 0.3249, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.860631839492477e-05, |
|
"loss": 0.3265, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.853663431467101e-05, |
|
"loss": 0.3125, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.846695023441725e-05, |
|
"loss": 0.3207, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8397266154163485e-05, |
|
"loss": 0.3271, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.832758207390973e-05, |
|
"loss": 0.3187, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8257897993655966e-05, |
|
"loss": 0.3163, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.81882139134022e-05, |
|
"loss": 0.3197, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.811852983314844e-05, |
|
"loss": 0.3132, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.804884575289468e-05, |
|
"loss": 0.306, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7979161672640915e-05, |
|
"loss": 0.3053, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.790947759238716e-05, |
|
"loss": 0.3011, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7839793512133396e-05, |
|
"loss": 0.3033, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.777010943187963e-05, |
|
"loss": 0.3103, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.770042535162587e-05, |
|
"loss": 0.3059, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.763074127137211e-05, |
|
"loss": 0.298, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7561057191118345e-05, |
|
"loss": 0.2967, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.749137311086458e-05, |
|
"loss": 0.2844, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7421689030610826e-05, |
|
"loss": 0.3001, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.735200495035706e-05, |
|
"loss": 0.3088, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.72823208701033e-05, |
|
"loss": 0.3079, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.721263678984954e-05, |
|
"loss": 0.2908, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.714295270959578e-05, |
|
"loss": 0.2879, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.707326862934202e-05, |
|
"loss": 0.2891, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7003584549088256e-05, |
|
"loss": 0.3005, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.69339004688345e-05, |
|
"loss": 0.3008, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.686421638858074e-05, |
|
"loss": 0.2914, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6794532308326974e-05, |
|
"loss": 0.2864, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.672484822807321e-05, |
|
"loss": 0.2885, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.665516414781945e-05, |
|
"loss": 0.289, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6585480067565685e-05, |
|
"loss": 0.283, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.651579598731193e-05, |
|
"loss": 0.2917, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6446111907058167e-05, |
|
"loss": 0.2895, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6376427826804404e-05, |
|
"loss": 0.276, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.630674374655064e-05, |
|
"loss": 0.2847, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.623705966629688e-05, |
|
"loss": 0.2774, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6167375586043115e-05, |
|
"loss": 0.283, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.609769150578935e-05, |
|
"loss": 0.2859, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6028007425535596e-05, |
|
"loss": 0.2793, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5958323345281834e-05, |
|
"loss": 0.2769, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.588863926502807e-05, |
|
"loss": 0.2763, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.581895518477431e-05, |
|
"loss": 0.2886, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5749271104520545e-05, |
|
"loss": 0.2721, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.567958702426678e-05, |
|
"loss": 0.2805, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.560990294401302e-05, |
|
"loss": 0.2779, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5540218863759264e-05, |
|
"loss": 0.2833, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.54705347835055e-05, |
|
"loss": 0.2881, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.540085070325174e-05, |
|
"loss": 0.2802, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5331166622997975e-05, |
|
"loss": 0.2683, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.526148254274422e-05, |
|
"loss": 0.2732, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5191798462490456e-05, |
|
"loss": 0.2773, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5122114382236693e-05, |
|
"loss": 0.2791, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.505243030198294e-05, |
|
"loss": 0.2842, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4982746221729175e-05, |
|
"loss": 0.278, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.491306214147541e-05, |
|
"loss": 0.2757, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.484337806122165e-05, |
|
"loss": 0.2741, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4773693980967886e-05, |
|
"loss": 0.2708, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.470400990071412e-05, |
|
"loss": 0.2725, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.463432582046037e-05, |
|
"loss": 0.2735, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4564641740206604e-05, |
|
"loss": 0.2735, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.449495765995284e-05, |
|
"loss": 0.2704, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.442527357969908e-05, |
|
"loss": 0.2757, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4355589499445316e-05, |
|
"loss": 0.2851, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.428590541919155e-05, |
|
"loss": 0.2681, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.421622133893779e-05, |
|
"loss": 0.27, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4146537258684034e-05, |
|
"loss": 0.2701, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.407685317843027e-05, |
|
"loss": 0.2648, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.400716909817651e-05, |
|
"loss": 0.2698, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3937485017922746e-05, |
|
"loss": 0.2693, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.386780093766898e-05, |
|
"loss": 0.2665, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.379811685741522e-05, |
|
"loss": 0.2688, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3728432777161464e-05, |
|
"loss": 0.2689, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.36587486969077e-05, |
|
"loss": 0.273, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.358906461665394e-05, |
|
"loss": 0.2717, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3519380536400176e-05, |
|
"loss": 0.2691, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.344969645614641e-05, |
|
"loss": 0.2596, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.338001237589266e-05, |
|
"loss": 0.2639, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3310328295638894e-05, |
|
"loss": 0.2724, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.324064421538513e-05, |
|
"loss": 0.2572, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3170960135131375e-05, |
|
"loss": 0.2657, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.310127605487761e-05, |
|
"loss": 0.2681, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.303159197462385e-05, |
|
"loss": 0.2654, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.296190789437009e-05, |
|
"loss": 0.2655, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2892223814116324e-05, |
|
"loss": 0.2658, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.282253973386256e-05, |
|
"loss": 0.2582, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2752855653608805e-05, |
|
"loss": 0.2664, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.268317157335504e-05, |
|
"loss": 0.2508, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.261348749310128e-05, |
|
"loss": 0.2569, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.254380341284752e-05, |
|
"loss": 0.2592, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2474119332593754e-05, |
|
"loss": 0.2645, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.240443525233999e-05, |
|
"loss": 0.2597, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2334751172086235e-05, |
|
"loss": 0.2545, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.226506709183247e-05, |
|
"loss": 0.2652, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.219538301157871e-05, |
|
"loss": 0.2677, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2125698931324947e-05, |
|
"loss": 0.2671, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2056014851071184e-05, |
|
"loss": 0.2601, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.198633077081742e-05, |
|
"loss": 0.2728, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.191664669056366e-05, |
|
"loss": 0.2628, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.18469626103099e-05, |
|
"loss": 0.2628, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.177727853005614e-05, |
|
"loss": 0.2508, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1707594449802376e-05, |
|
"loss": 0.2661, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1637910369548614e-05, |
|
"loss": 0.2575, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.156822628929485e-05, |
|
"loss": 0.2537, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.149854220904109e-05, |
|
"loss": 0.2495, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.142885812878733e-05, |
|
"loss": 0.2564, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.135917404853357e-05, |
|
"loss": 0.2515, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.128948996827981e-05, |
|
"loss": 0.2582, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.121980588802605e-05, |
|
"loss": 0.266, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.115012180777229e-05, |
|
"loss": 0.2492, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1080437727518525e-05, |
|
"loss": 0.2677, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.101075364726476e-05, |
|
"loss": 0.257, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.0941069567011006e-05, |
|
"loss": 0.2586, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.087138548675724e-05, |
|
"loss": 0.2571, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.080170140650348e-05, |
|
"loss": 0.2549, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.073201732624972e-05, |
|
"loss": 0.258, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0662333245995955e-05, |
|
"loss": 0.2594, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.059264916574219e-05, |
|
"loss": 0.2564, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.052296508548843e-05, |
|
"loss": 0.2646, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.045328100523467e-05, |
|
"loss": 0.2577, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.038359692498091e-05, |
|
"loss": 0.2567, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.031391284472715e-05, |
|
"loss": 0.2518, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0244228764473384e-05, |
|
"loss": 0.2541, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.017454468421962e-05, |
|
"loss": 0.2504, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.010486060396586e-05, |
|
"loss": 0.2595, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.00351765237121e-05, |
|
"loss": 0.2618, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.996549244345834e-05, |
|
"loss": 0.2604, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.989580836320458e-05, |
|
"loss": 0.2525, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9826124282950814e-05, |
|
"loss": 0.2447, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.975644020269705e-05, |
|
"loss": 0.2535, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.968675612244329e-05, |
|
"loss": 0.256, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9617072042189526e-05, |
|
"loss": 0.2475, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.954738796193577e-05, |
|
"loss": 0.2591, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.947770388168201e-05, |
|
"loss": 0.2464, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9408019801428244e-05, |
|
"loss": 0.2535, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.933833572117449e-05, |
|
"loss": 0.2514, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9268651640920725e-05, |
|
"loss": 0.2484, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.919896756066696e-05, |
|
"loss": 0.2483, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.91292834804132e-05, |
|
"loss": 0.2559, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9059599400159444e-05, |
|
"loss": 0.2526, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.898991531990568e-05, |
|
"loss": 0.2537, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.892023123965192e-05, |
|
"loss": 0.2586, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.8850547159398155e-05, |
|
"loss": 0.2407, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.878086307914439e-05, |
|
"loss": 0.2518, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.871117899889063e-05, |
|
"loss": 0.254, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8641494918636874e-05, |
|
"loss": 0.2517, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.857181083838311e-05, |
|
"loss": 0.248, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.850212675812935e-05, |
|
"loss": 0.2483, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8432442677875585e-05, |
|
"loss": 0.2465, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.836275859762182e-05, |
|
"loss": 0.243, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.829307451736806e-05, |
|
"loss": 0.2518, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.82233904371143e-05, |
|
"loss": 0.2548, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.815370635686054e-05, |
|
"loss": 0.2492, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.808402227660678e-05, |
|
"loss": 0.2444, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8014338196353015e-05, |
|
"loss": 0.2436, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.794465411609925e-05, |
|
"loss": 0.2494, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.787497003584549e-05, |
|
"loss": 0.2595, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7805285955591727e-05, |
|
"loss": 0.2507, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7735601875337964e-05, |
|
"loss": 0.2388, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.766591779508421e-05, |
|
"loss": 0.2455, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7596233714830445e-05, |
|
"loss": 0.2364, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.752654963457668e-05, |
|
"loss": 0.2433, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7456865554322926e-05, |
|
"loss": 0.2519, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.738718147406916e-05, |
|
"loss": 0.2495, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.73174973938154e-05, |
|
"loss": 0.2405, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7247813313561644e-05, |
|
"loss": 0.2431, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.717812923330788e-05, |
|
"loss": 0.2474, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.710844515305412e-05, |
|
"loss": 0.2468, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7038761072800356e-05, |
|
"loss": 0.2464, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.696907699254659e-05, |
|
"loss": 0.2464, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.689939291229283e-05, |
|
"loss": 0.2424, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.682970883203907e-05, |
|
"loss": 0.2379, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.676002475178531e-05, |
|
"loss": 0.2442, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.669034067153155e-05, |
|
"loss": 0.246, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6620656591277786e-05, |
|
"loss": 0.2403, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.655097251102402e-05, |
|
"loss": 0.2429, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.648128843077026e-05, |
|
"loss": 0.2482, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.64116043505165e-05, |
|
"loss": 0.2432, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6341920270262735e-05, |
|
"loss": 0.2374, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.627223619000898e-05, |
|
"loss": 0.2539, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6202552109755216e-05, |
|
"loss": 0.2496, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.613286802950145e-05, |
|
"loss": 0.2447, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.606318394924769e-05, |
|
"loss": 0.2396, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.599349986899393e-05, |
|
"loss": 0.2472, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.5923815788740164e-05, |
|
"loss": 0.2431, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.585413170848641e-05, |
|
"loss": 0.2414, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.5784447628232646e-05, |
|
"loss": 0.2398, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.571476354797888e-05, |
|
"loss": 0.2415, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.564507946772512e-05, |
|
"loss": 0.2428, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5575395387471364e-05, |
|
"loss": 0.2341, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.55057113072176e-05, |
|
"loss": 0.2461, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.543602722696384e-05, |
|
"loss": 0.2481, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.536634314671008e-05, |
|
"loss": 0.2409, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.529665906645632e-05, |
|
"loss": 0.2388, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.522697498620256e-05, |
|
"loss": 0.2427, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5157290905948794e-05, |
|
"loss": 0.238, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.508760682569503e-05, |
|
"loss": 0.2453, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.501792274544127e-05, |
|
"loss": 0.245, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4948238665187505e-05, |
|
"loss": 0.2395, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.487855458493375e-05, |
|
"loss": 0.2369, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4808870504679986e-05, |
|
"loss": 0.2369, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4739186424426224e-05, |
|
"loss": 0.2476, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.466950234417246e-05, |
|
"loss": 0.238, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.45998182639187e-05, |
|
"loss": 0.2417, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4530134183664935e-05, |
|
"loss": 0.2433, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.446045010341118e-05, |
|
"loss": 0.2431, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4390766023157416e-05, |
|
"loss": 0.2434, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4321081942903654e-05, |
|
"loss": 0.2459, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.425139786264989e-05, |
|
"loss": 0.2464, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.418171378239613e-05, |
|
"loss": 0.2357, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4112029702142365e-05, |
|
"loss": 0.2394, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.40423456218886e-05, |
|
"loss": 0.2404, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3972661541634846e-05, |
|
"loss": 0.2333, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3902977461381083e-05, |
|
"loss": 0.2323, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.383329338112732e-05, |
|
"loss": 0.2562, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.376360930087356e-05, |
|
"loss": 0.238, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.36939252206198e-05, |
|
"loss": 0.2409, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.362424114036604e-05, |
|
"loss": 0.2435, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3554557060112276e-05, |
|
"loss": 0.2377, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.348487297985852e-05, |
|
"loss": 0.2287, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.341518889960476e-05, |
|
"loss": 0.2427, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3345504819350994e-05, |
|
"loss": 0.2438, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.327582073909723e-05, |
|
"loss": 0.2447, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.320613665884347e-05, |
|
"loss": 0.2375, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3136452578589706e-05, |
|
"loss": 0.2399, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.306676849833595e-05, |
|
"loss": 0.2416, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.299708441808219e-05, |
|
"loss": 0.2405, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.2927400337828424e-05, |
|
"loss": 0.2322, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.285771625757466e-05, |
|
"loss": 0.2337, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.27880321773209e-05, |
|
"loss": 0.2391, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2718348097067136e-05, |
|
"loss": 0.239, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.264866401681337e-05, |
|
"loss": 0.2363, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.257897993655962e-05, |
|
"loss": 0.239, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2509295856305854e-05, |
|
"loss": 0.243, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.243961177605209e-05, |
|
"loss": 0.2338, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.236992769579833e-05, |
|
"loss": 0.2386, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2300243615544566e-05, |
|
"loss": 0.2339, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.22305595352908e-05, |
|
"loss": 0.2372, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.216087545503705e-05, |
|
"loss": 0.2368, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2091191374783284e-05, |
|
"loss": 0.2344, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.202150729452952e-05, |
|
"loss": 0.2402, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.195182321427576e-05, |
|
"loss": 0.2352, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1882139134021996e-05, |
|
"loss": 0.2358, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.181245505376824e-05, |
|
"loss": 0.2431, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.174277097351448e-05, |
|
"loss": 0.2312, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1673086893260714e-05, |
|
"loss": 0.2384, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.160340281300696e-05, |
|
"loss": 0.2352, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1533718732753195e-05, |
|
"loss": 0.236, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.146403465249943e-05, |
|
"loss": 0.2491, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.139435057224567e-05, |
|
"loss": 0.2431, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.132466649199191e-05, |
|
"loss": 0.2351, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1254982411738144e-05, |
|
"loss": 0.2347, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.118529833148439e-05, |
|
"loss": 0.2349, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1115614251230625e-05, |
|
"loss": 0.2296, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.104593017097686e-05, |
|
"loss": 0.2408, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.09762460907231e-05, |
|
"loss": 0.2287, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0906562010469337e-05, |
|
"loss": 0.2327, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0836877930215574e-05, |
|
"loss": 0.2329, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.076719384996182e-05, |
|
"loss": 0.2401, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0697509769708055e-05, |
|
"loss": 0.2366, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.062782568945429e-05, |
|
"loss": 0.2351, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.055814160920053e-05, |
|
"loss": 0.236, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0488457528946766e-05, |
|
"loss": 0.2282, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0418773448693007e-05, |
|
"loss": 0.2339, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0349089368439244e-05, |
|
"loss": 0.2336, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0279405288185485e-05, |
|
"loss": 0.2384, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0209721207931725e-05, |
|
"loss": 0.2247, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0140037127677963e-05, |
|
"loss": 0.2345, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.00703530474242e-05, |
|
"loss": 0.2296, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0000668967170437e-05, |
|
"loss": 0.2319, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9930984886916674e-05, |
|
"loss": 0.2454, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.986130080666291e-05, |
|
"loss": 0.234, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9791616726409155e-05, |
|
"loss": 0.2305, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9721932646155392e-05, |
|
"loss": 0.2346, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.965224856590163e-05, |
|
"loss": 0.2336, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9582564485647867e-05, |
|
"loss": 0.2312, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9512880405394104e-05, |
|
"loss": 0.2349, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9443196325140345e-05, |
|
"loss": 0.2343, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9373512244886585e-05, |
|
"loss": 0.23, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9303828164632822e-05, |
|
"loss": 0.2221, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9234144084379063e-05, |
|
"loss": 0.2368, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.91644600041253e-05, |
|
"loss": 0.2262, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9094775923871537e-05, |
|
"loss": 0.2276, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9025091843617774e-05, |
|
"loss": 0.2368, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.895540776336401e-05, |
|
"loss": 0.2373, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8885723683110256e-05, |
|
"loss": 0.2397, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8816039602856493e-05, |
|
"loss": 0.2328, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.874635552260273e-05, |
|
"loss": 0.2284, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8676671442348967e-05, |
|
"loss": 0.229, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8606987362095204e-05, |
|
"loss": 0.228, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.853730328184144e-05, |
|
"loss": 0.2335, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8467619201587682e-05, |
|
"loss": 0.2281, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8397935121333923e-05, |
|
"loss": 0.2329, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8328251041080163e-05, |
|
"loss": 0.2294, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.82585669608264e-05, |
|
"loss": 0.2292, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8188882880572638e-05, |
|
"loss": 0.23, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8119198800318875e-05, |
|
"loss": 0.2255, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8049514720065112e-05, |
|
"loss": 0.235, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7979830639811356e-05, |
|
"loss": 0.2368, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7910146559557593e-05, |
|
"loss": 0.2277, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.784046247930383e-05, |
|
"loss": 0.2381, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7770778399050068e-05, |
|
"loss": 0.2302, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7701094318796305e-05, |
|
"loss": 0.225, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7631410238542542e-05, |
|
"loss": 0.227, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7561726158288782e-05, |
|
"loss": 0.2362, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7492042078035023e-05, |
|
"loss": 0.2283, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.742235799778126e-05, |
|
"loss": 0.2201, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.73526739175275e-05, |
|
"loss": 0.2327, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7282989837273738e-05, |
|
"loss": 0.2236, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7213305757019975e-05, |
|
"loss": 0.2362, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7143621676766212e-05, |
|
"loss": 0.2282, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.707393759651245e-05, |
|
"loss": 0.2388, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7004253516258693e-05, |
|
"loss": 0.2274, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.693456943600493e-05, |
|
"loss": 0.2294, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6864885355751168e-05, |
|
"loss": 0.2206, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6795201275497405e-05, |
|
"loss": 0.2253, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6725517195243642e-05, |
|
"loss": 0.2387, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.665583311498988e-05, |
|
"loss": 0.232, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6586149034736123e-05, |
|
"loss": 0.2296, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.651646495448236e-05, |
|
"loss": 0.2168, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6446780874228598e-05, |
|
"loss": 0.2217, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.637709679397484e-05, |
|
"loss": 0.2333, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6307412713721076e-05, |
|
"loss": 0.2269, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6237728633467313e-05, |
|
"loss": 0.2326, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.616804455321355e-05, |
|
"loss": 0.2262, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6098360472959794e-05, |
|
"loss": 0.2245, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.602867639270603e-05, |
|
"loss": 0.2269, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5958992312452268e-05, |
|
"loss": 0.2313, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5889308232198505e-05, |
|
"loss": 0.2239, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5819624151944743e-05, |
|
"loss": 0.2328, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.574994007169098e-05, |
|
"loss": 0.2284, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.568025599143722e-05, |
|
"loss": 0.2314, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.561057191118346e-05, |
|
"loss": 0.2284, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5540887830929698e-05, |
|
"loss": 0.216, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.547120375067594e-05, |
|
"loss": 0.2232, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5401519670422176e-05, |
|
"loss": 0.2267, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5331835590168413e-05, |
|
"loss": 0.227, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.526215150991465e-05, |
|
"loss": 0.2336, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5192467429660894e-05, |
|
"loss": 0.2279, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.512278334940713e-05, |
|
"loss": 0.2236, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.505309926915337e-05, |
|
"loss": 0.2309, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4983415188899606e-05, |
|
"loss": 0.2229, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4913731108645843e-05, |
|
"loss": 0.2181, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4844047028392083e-05, |
|
"loss": 0.2312, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.477436294813832e-05, |
|
"loss": 0.2264, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4704678867884558e-05, |
|
"loss": 0.2313, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.46349947876308e-05, |
|
"loss": 0.2267, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4565310707377036e-05, |
|
"loss": 0.234, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4495626627123276e-05, |
|
"loss": 0.2303, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4425942546869513e-05, |
|
"loss": 0.2201, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4356258466615754e-05, |
|
"loss": 0.23, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.428657438636199e-05, |
|
"loss": 0.2192, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.421689030610823e-05, |
|
"loss": 0.2319, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.414720622585447e-05, |
|
"loss": 0.226, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4077522145600706e-05, |
|
"loss": 0.232, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4007838065346943e-05, |
|
"loss": 0.2327, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3938153985093184e-05, |
|
"loss": 0.2231, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.386846990483942e-05, |
|
"loss": 0.22, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3798785824585658e-05, |
|
"loss": 0.2243, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3729101744331895e-05, |
|
"loss": 0.2287, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3659417664078136e-05, |
|
"loss": 0.2227, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3589733583824377e-05, |
|
"loss": 0.2321, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3520049503570614e-05, |
|
"loss": 0.22, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3450365423316854e-05, |
|
"loss": 0.2247, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.338068134306309e-05, |
|
"loss": 0.2251, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.331099726280933e-05, |
|
"loss": 0.2182, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.324131318255557e-05, |
|
"loss": 0.2244, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3171629102301806e-05, |
|
"loss": 0.2236, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3101945022048044e-05, |
|
"loss": 0.2228, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.303226094179428e-05, |
|
"loss": 0.216, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.296257686154052e-05, |
|
"loss": 0.2137, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.289289278128676e-05, |
|
"loss": 0.2207, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2823208701032996e-05, |
|
"loss": 0.2285, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2753524620779236e-05, |
|
"loss": 0.2209, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2683840540525473e-05, |
|
"loss": 0.2313, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2614156460271714e-05, |
|
"loss": 0.2304, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2544472380017955e-05, |
|
"loss": 0.2236, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2474788299764192e-05, |
|
"loss": 0.2211, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.240510421951043e-05, |
|
"loss": 0.2294, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2335420139256666e-05, |
|
"loss": 0.221, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2265736059002907e-05, |
|
"loss": 0.2227, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2196051978749144e-05, |
|
"loss": 0.2204, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.212636789849538e-05, |
|
"loss": 0.2248, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2056683818241622e-05, |
|
"loss": 0.2206, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.198699973798786e-05, |
|
"loss": 0.2322, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1917315657734096e-05, |
|
"loss": 0.2211, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1847631577480337e-05, |
|
"loss": 0.2307, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1777947497226574e-05, |
|
"loss": 0.2143, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1708263416972814e-05, |
|
"loss": 0.2201, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.163857933671905e-05, |
|
"loss": 0.2269, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1568895256465292e-05, |
|
"loss": 0.2222, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.149921117621153e-05, |
|
"loss": 0.2158, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1429527095957767e-05, |
|
"loss": 0.216, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1359843015704007e-05, |
|
"loss": 0.2322, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1290158935450244e-05, |
|
"loss": 0.2167, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.122047485519648e-05, |
|
"loss": 0.2231, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1150790774942722e-05, |
|
"loss": 0.2242, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.108110669468896e-05, |
|
"loss": 0.237, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1011422614435196e-05, |
|
"loss": 0.221, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0941738534181434e-05, |
|
"loss": 0.2251, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0872054453927674e-05, |
|
"loss": 0.2272, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.080237037367391e-05, |
|
"loss": 0.2235, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0732686293420152e-05, |
|
"loss": 0.2179, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0663002213166392e-05, |
|
"loss": 0.2276, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.059331813291263e-05, |
|
"loss": 0.2215, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0523634052658867e-05, |
|
"loss": 0.2266, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0453949972405107e-05, |
|
"loss": 0.2185, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0384265892151345e-05, |
|
"loss": 0.2255, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0314581811897582e-05, |
|
"loss": 0.2307, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.024489773164382e-05, |
|
"loss": 0.2201, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.017521365139006e-05, |
|
"loss": 0.2208, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0105529571136297e-05, |
|
"loss": 0.2177, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0035845490882534e-05, |
|
"loss": 0.2136, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9966161410628775e-05, |
|
"loss": 0.2254, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9896477330375012e-05, |
|
"loss": 0.2208, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.982679325012125e-05, |
|
"loss": 0.2213, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.975710916986749e-05, |
|
"loss": 0.2253, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.968742508961373e-05, |
|
"loss": 0.2172, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9617741009359967e-05, |
|
"loss": 0.2342, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9548056929106204e-05, |
|
"loss": 0.2177, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9478372848852445e-05, |
|
"loss": 0.2263, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9408688768598682e-05, |
|
"loss": 0.2334, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.933900468834492e-05, |
|
"loss": 0.2152, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.926932060809116e-05, |
|
"loss": 0.2154, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9199636527837397e-05, |
|
"loss": 0.2123, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9129952447583634e-05, |
|
"loss": 0.2204, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9060268367329875e-05, |
|
"loss": 0.2167, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8990584287076112e-05, |
|
"loss": 0.2234, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.892090020682235e-05, |
|
"loss": 0.2244, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.885121612656859e-05, |
|
"loss": 0.2227, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8781532046314827e-05, |
|
"loss": 0.2177, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8711847966061068e-05, |
|
"loss": 0.2212, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8642163885807305e-05, |
|
"loss": 0.2218, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8572479805553545e-05, |
|
"loss": 0.2278, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8502795725299782e-05, |
|
"loss": 0.2233, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.843311164504602e-05, |
|
"loss": 0.2242, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.836342756479226e-05, |
|
"loss": 0.2268, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8293743484538497e-05, |
|
"loss": 0.2189, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8224059404284735e-05, |
|
"loss": 0.2257, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8154375324030972e-05, |
|
"loss": 0.2134, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8084691243777212e-05, |
|
"loss": 0.2147, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.801500716352345e-05, |
|
"loss": 0.2204, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7945323083269687e-05, |
|
"loss": 0.215, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7875639003015927e-05, |
|
"loss": 0.2182, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7805954922762168e-05, |
|
"loss": 0.2259, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7736270842508405e-05, |
|
"loss": 0.2207, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7666586762254646e-05, |
|
"loss": 0.2197, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7596902682000883e-05, |
|
"loss": 0.2229, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.752721860174712e-05, |
|
"loss": 0.2196, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7457534521493357e-05, |
|
"loss": 0.217, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7387850441239598e-05, |
|
"loss": 0.2143, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7318166360985835e-05, |
|
"loss": 0.2177, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7248482280732072e-05, |
|
"loss": 0.2198, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7178798200478313e-05, |
|
"loss": 0.2264, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.710911412022455e-05, |
|
"loss": 0.2157, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7039430039970787e-05, |
|
"loss": 0.2275, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6969745959717028e-05, |
|
"loss": 0.2197, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6900061879463265e-05, |
|
"loss": 0.2203, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6830377799209505e-05, |
|
"loss": 0.2242, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6760693718955743e-05, |
|
"loss": 0.2214, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6691009638701983e-05, |
|
"loss": 0.2226, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.662132555844822e-05, |
|
"loss": 0.2247, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6551641478194458e-05, |
|
"loss": 0.2177, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6481957397940698e-05, |
|
"loss": 0.215, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6412273317686935e-05, |
|
"loss": 0.211, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6342589237433172e-05, |
|
"loss": 0.2142, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6272905157179413e-05, |
|
"loss": 0.2119, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.620322107692565e-05, |
|
"loss": 0.2178, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6133536996671887e-05, |
|
"loss": 0.2145, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6063852916418125e-05, |
|
"loss": 0.2108, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5994168836164365e-05, |
|
"loss": 0.2207, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5924484755910606e-05, |
|
"loss": 0.2127, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5854800675656843e-05, |
|
"loss": 0.2249, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5785116595403084e-05, |
|
"loss": 0.2257, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.571543251514932e-05, |
|
"loss": 0.2178, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5645748434895558e-05, |
|
"loss": 0.2237, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.55760643546418e-05, |
|
"loss": 0.2141, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5506380274388036e-05, |
|
"loss": 0.2165, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5436696194134273e-05, |
|
"loss": 0.2096, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5367012113880513e-05, |
|
"loss": 0.2096, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.529732803362675e-05, |
|
"loss": 0.2258, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.522764395337299e-05, |
|
"loss": 0.2141, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5157959873119227e-05, |
|
"loss": 0.2116, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5088275792865467e-05, |
|
"loss": 0.2153, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5018591712611704e-05, |
|
"loss": 0.218, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4948907632357942e-05, |
|
"loss": 0.2172, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4879223552104182e-05, |
|
"loss": 0.2281, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.480953947185042e-05, |
|
"loss": 0.2171, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4739855391596658e-05, |
|
"loss": 0.2155, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4670171311342897e-05, |
|
"loss": 0.2148, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4600487231089136e-05, |
|
"loss": 0.2189, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4530803150835373e-05, |
|
"loss": 0.2221, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.446111907058161e-05, |
|
"loss": 0.2223, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4391434990327851e-05, |
|
"loss": 0.2191, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4321750910074088e-05, |
|
"loss": 0.2195, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4252066829820327e-05, |
|
"loss": 0.2182, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4182382749566568e-05, |
|
"loss": 0.2169, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4112698669312805e-05, |
|
"loss": 0.2239, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4043014589059042e-05, |
|
"loss": 0.2209, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3973330508805283e-05, |
|
"loss": 0.2043, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.390364642855152e-05, |
|
"loss": 0.2211, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3833962348297757e-05, |
|
"loss": 0.2159, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3764278268043996e-05, |
|
"loss": 0.2114, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3694594187790236e-05, |
|
"loss": 0.2229, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3624910107536474e-05, |
|
"loss": 0.2158, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.355522602728271e-05, |
|
"loss": 0.2145, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3485541947028951e-05, |
|
"loss": 0.2132, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3415857866775188e-05, |
|
"loss": 0.2184, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3346173786521426e-05, |
|
"loss": 0.2164, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3276489706267666e-05, |
|
"loss": 0.2111, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3206805626013905e-05, |
|
"loss": 0.2128, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3137121545760142e-05, |
|
"loss": 0.2203, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.306743746550638e-05, |
|
"loss": 0.2163, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.299775338525262e-05, |
|
"loss": 0.2169, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2928069304998857e-05, |
|
"loss": 0.2157, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2858385224745096e-05, |
|
"loss": 0.2246, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2788701144491335e-05, |
|
"loss": 0.2204, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2719017064237574e-05, |
|
"loss": 0.2137, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2649332983983811e-05, |
|
"loss": 0.2195, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2579648903730052e-05, |
|
"loss": 0.2189, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2509964823476289e-05, |
|
"loss": 0.208, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2440280743222526e-05, |
|
"loss": 0.2109, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2370596662968765e-05, |
|
"loss": 0.2249, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2300912582715004e-05, |
|
"loss": 0.2159, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2231228502461243e-05, |
|
"loss": 0.2114, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2161544422207482e-05, |
|
"loss": 0.2095, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2091860341953719e-05, |
|
"loss": 0.218, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2022176261699958e-05, |
|
"loss": 0.2208, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1952492181446195e-05, |
|
"loss": 0.2098, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1882808101192434e-05, |
|
"loss": 0.2073, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1813124020938674e-05, |
|
"loss": 0.2084, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1743439940684911e-05, |
|
"loss": 0.2071, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.167375586043115e-05, |
|
"loss": 0.2207, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1604071780177387e-05, |
|
"loss": 0.2186, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1534387699923626e-05, |
|
"loss": 0.2132, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1464703619669865e-05, |
|
"loss": 0.2107, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1395019539416104e-05, |
|
"loss": 0.2157, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1325335459162343e-05, |
|
"loss": 0.2138, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1255651378908582e-05, |
|
"loss": 0.2157, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1185967298654819e-05, |
|
"loss": 0.2101, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1116283218401058e-05, |
|
"loss": 0.2177, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1046599138147295e-05, |
|
"loss": 0.2142, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0976915057893534e-05, |
|
"loss": 0.2122, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0907230977639773e-05, |
|
"loss": 0.2212, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0837546897386012e-05, |
|
"loss": 0.2096, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.076786281713225e-05, |
|
"loss": 0.2091, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0698178736878488e-05, |
|
"loss": 0.2124, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0628494656624727e-05, |
|
"loss": 0.2175, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0558810576370966e-05, |
|
"loss": 0.2124, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0489126496117203e-05, |
|
"loss": 0.2046, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0419442415863442e-05, |
|
"loss": 0.2072, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.034975833560968e-05, |
|
"loss": 0.2181, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.028007425535592e-05, |
|
"loss": 0.22, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0210390175102158e-05, |
|
"loss": 0.2087, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0140706094848395e-05, |
|
"loss": 0.2074, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0071022014594634e-05, |
|
"loss": 0.211, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0001337934340872e-05, |
|
"loss": 0.2156, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.931653854087112e-06, |
|
"loss": 0.2081, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.861969773833351e-06, |
|
"loss": 0.2158, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.792285693579588e-06, |
|
"loss": 0.2212, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.722601613325827e-06, |
|
"loss": 0.2113, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.652917533072064e-06, |
|
"loss": 0.2052, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.583233452818303e-06, |
|
"loss": 0.2151, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.513549372564542e-06, |
|
"loss": 0.2147, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.44386529231078e-06, |
|
"loss": 0.2114, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.37418121205702e-06, |
|
"loss": 0.2175, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.304497131803257e-06, |
|
"loss": 0.2145, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.234813051549496e-06, |
|
"loss": 0.212, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.165128971295735e-06, |
|
"loss": 0.2117, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.095444891041972e-06, |
|
"loss": 0.2065, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.02576081078821e-06, |
|
"loss": 0.2192, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.95607673053445e-06, |
|
"loss": 0.2074, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.886392650280688e-06, |
|
"loss": 0.2155, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.816708570026927e-06, |
|
"loss": 0.2099, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.747024489773165e-06, |
|
"loss": 0.2192, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.677340409519403e-06, |
|
"loss": 0.2117, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.60765632926564e-06, |
|
"loss": 0.2118, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.53797224901188e-06, |
|
"loss": 0.2074, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.468288168758118e-06, |
|
"loss": 0.2071, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.398604088504357e-06, |
|
"loss": 0.2113, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.328920008250596e-06, |
|
"loss": 0.2072, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.259235927996833e-06, |
|
"loss": 0.2048, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.189551847743072e-06, |
|
"loss": 0.2104, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.119867767489311e-06, |
|
"loss": 0.218, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.050183687235548e-06, |
|
"loss": 0.2168, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.980499606981789e-06, |
|
"loss": 0.2217, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.910815526728026e-06, |
|
"loss": 0.2126, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.841131446474265e-06, |
|
"loss": 0.2084, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.771447366220504e-06, |
|
"loss": 0.2098, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.701763285966741e-06, |
|
"loss": 0.2131, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.63207920571298e-06, |
|
"loss": 0.2078, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.562395125459218e-06, |
|
"loss": 0.2164, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.492711045205457e-06, |
|
"loss": 0.2114, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.423026964951696e-06, |
|
"loss": 0.206, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.353342884697934e-06, |
|
"loss": 0.2108, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.2836588044441725e-06, |
|
"loss": 0.2156, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.2139747241904106e-06, |
|
"loss": 0.2124, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.1442906439366494e-06, |
|
"loss": 0.2197, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.074606563682888e-06, |
|
"loss": 0.2127, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.0049224834291255e-06, |
|
"loss": 0.2113, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.935238403175364e-06, |
|
"loss": 0.2093, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.865554322921602e-06, |
|
"loss": 0.2191, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.795870242667841e-06, |
|
"loss": 0.2023, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.72618616241408e-06, |
|
"loss": 0.2092, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.656502082160318e-06, |
|
"loss": 0.2088, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.586818001906557e-06, |
|
"loss": 0.2077, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.517133921652794e-06, |
|
"loss": 0.2075, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.447449841399034e-06, |
|
"loss": 0.2123, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.377765761145273e-06, |
|
"loss": 0.2137, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.30808168089151e-06, |
|
"loss": 0.2154, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.238397600637749e-06, |
|
"loss": 0.2137, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.168713520383988e-06, |
|
"loss": 0.2106, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.099029440130226e-06, |
|
"loss": 0.2101, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.029345359876464e-06, |
|
"loss": 0.2154, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.959661279622703e-06, |
|
"loss": 0.2122, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.889977199368942e-06, |
|
"loss": 0.2119, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.82029311911518e-06, |
|
"loss": 0.2069, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.750609038861418e-06, |
|
"loss": 0.2166, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.6809249586076566e-06, |
|
"loss": 0.2135, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.611240878353895e-06, |
|
"loss": 0.216, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.5415567981001335e-06, |
|
"loss": 0.2078, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.471872717846372e-06, |
|
"loss": 0.2033, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.40218863759261e-06, |
|
"loss": 0.218, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.3325045573388484e-06, |
|
"loss": 0.2128, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.262820477085087e-06, |
|
"loss": 0.2115, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.193136396831326e-06, |
|
"loss": 0.2094, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.123452316577564e-06, |
|
"loss": 0.2096, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.053768236323802e-06, |
|
"loss": 0.2107, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.984084156070041e-06, |
|
"loss": 0.2123, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.91440007581628e-06, |
|
"loss": 0.2089, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.844715995562518e-06, |
|
"loss": 0.2103, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.775031915308757e-06, |
|
"loss": 0.2089, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.705347835054995e-06, |
|
"loss": 0.2041, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.635663754801233e-06, |
|
"loss": 0.2033, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.565979674547472e-06, |
|
"loss": 0.2161, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.496295594293711e-06, |
|
"loss": 0.2082, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.426611514039949e-06, |
|
"loss": 0.2064, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.356927433786187e-06, |
|
"loss": 0.2146, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.287243353532426e-06, |
|
"loss": 0.2149, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.2175592732786646e-06, |
|
"loss": 0.2031, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.147875193024903e-06, |
|
"loss": 0.2051, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.078191112771141e-06, |
|
"loss": 0.2138, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.0085070325173795e-06, |
|
"loss": 0.2039, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.9388229522636175e-06, |
|
"loss": 0.2155, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.869138872009856e-06, |
|
"loss": 0.2039, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.799454791756095e-06, |
|
"loss": 0.2121, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7297707115023333e-06, |
|
"loss": 0.2129, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6600866312485714e-06, |
|
"loss": 0.2153, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.59040255099481e-06, |
|
"loss": 0.2072, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5207184707410487e-06, |
|
"loss": 0.2059, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.451034390487287e-06, |
|
"loss": 0.2111, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.3813503102335256e-06, |
|
"loss": 0.2161, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.3116662299797636e-06, |
|
"loss": 0.2153, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.241982149726002e-06, |
|
"loss": 0.2092, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.172298069472241e-06, |
|
"loss": 0.2151, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1026139892184794e-06, |
|
"loss": 0.2036, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.032929908964718e-06, |
|
"loss": 0.214, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.963245828710956e-06, |
|
"loss": 0.214, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8935617484571948e-06, |
|
"loss": 0.2063, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.823877668203433e-06, |
|
"loss": 0.2064, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7541935879496717e-06, |
|
"loss": 0.2051, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.68450950769591e-06, |
|
"loss": 0.2069, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.614825427442148e-06, |
|
"loss": 0.2058, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.545141347188387e-06, |
|
"loss": 0.2122, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.475457266934625e-06, |
|
"loss": 0.202, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.405773186680864e-06, |
|
"loss": 0.2069, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.336089106427102e-06, |
|
"loss": 0.2081, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.2664050261733405e-06, |
|
"loss": 0.2072, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1967209459195793e-06, |
|
"loss": 0.2092, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1270368656658174e-06, |
|
"loss": 0.2121, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0573527854120563e-06, |
|
"loss": 0.2068, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9876687051582943e-06, |
|
"loss": 0.201, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9179846249045327e-06, |
|
"loss": 0.2171, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.8483005446507714e-06, |
|
"loss": 0.2043, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7786164643970096e-06, |
|
"loss": 0.2038, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7089323841432483e-06, |
|
"loss": 0.2107, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6392483038894866e-06, |
|
"loss": 0.2078, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5695642236357252e-06, |
|
"loss": 0.209, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4998801433819637e-06, |
|
"loss": 0.2118, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4301960631282021e-06, |
|
"loss": 0.2247, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3605119828744404e-06, |
|
"loss": 0.2094, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2908279026206788e-06, |
|
"loss": 0.2154, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2211438223669173e-06, |
|
"loss": 0.2062, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.151459742113156e-06, |
|
"loss": 0.2105, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0817756618593944e-06, |
|
"loss": 0.2177, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0120915816056329e-06, |
|
"loss": 0.2062, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.424075013518711e-07, |
|
"loss": 0.205, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.727234210981097e-07, |
|
"loss": 0.2032, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.030393408443481e-07, |
|
"loss": 0.2169, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.333552605905866e-07, |
|
"loss": 0.2153, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.636711803368249e-07, |
|
"loss": 0.2046, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.939871000830635e-07, |
|
"loss": 0.2068, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.243030198293018e-07, |
|
"loss": 0.2188, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.5461893957554035e-07, |
|
"loss": 0.2139, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.849348593217788e-07, |
|
"loss": 0.2085, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.1525077906801726e-07, |
|
"loss": 0.1994, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.455666988142557e-07, |
|
"loss": 0.216, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7588261856049415e-07, |
|
"loss": 0.2123, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.061985383067326e-07, |
|
"loss": 0.2063, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.651445805297105e-08, |
|
"loss": 0.2098, |
|
"step": 358500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 358762, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 1.26740447502336e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|