{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992697108389406, "eval_steps": 500, "global_step": 358500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.993031591974624e-05, "loss": 0.6149, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.986063183949248e-05, "loss": 0.4578, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9790947759238714e-05, "loss": 0.4321, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.972126367898496e-05, "loss": 0.4103, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9651579598731195e-05, "loss": 0.4003, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.958189551847743e-05, "loss": 0.3793, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.951221143822367e-05, "loss": 0.3678, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.944252735796991e-05, "loss": 0.386, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.9372843277716144e-05, "loss": 0.3704, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.930315919746239e-05, "loss": 0.3623, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.9233475117208625e-05, "loss": 0.3508, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.916379103695486e-05, "loss": 0.3505, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.90941069567011e-05, "loss": 0.3406, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.902442287644734e-05, "loss": 0.337, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.895473879619358e-05, "loss": 0.342, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.888505471593982e-05, "loss": 0.3322, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.881537063568606e-05, "loss": 0.3352, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.87456865554323e-05, "loss": 0.3293, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.8676002475178536e-05, "loss": 0.3249, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.860631839492477e-05, "loss": 0.3265, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.853663431467101e-05, "loss": 0.3125, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.846695023441725e-05, "loss": 0.3207, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.8397266154163485e-05, "loss": 0.3271, "step": 11500 }, { "epoch": 0.03, "learning_rate": 4.832758207390973e-05, "loss": 0.3187, "step": 12000 }, { "epoch": 0.03, "learning_rate": 4.8257897993655966e-05, "loss": 0.3163, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.81882139134022e-05, "loss": 0.3197, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.811852983314844e-05, "loss": 0.3132, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.804884575289468e-05, "loss": 0.306, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.7979161672640915e-05, "loss": 0.3053, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.790947759238716e-05, "loss": 0.3011, "step": 15000 }, { "epoch": 0.04, "learning_rate": 4.7839793512133396e-05, "loss": 0.3033, "step": 15500 }, { "epoch": 0.04, "learning_rate": 4.777010943187963e-05, "loss": 0.3103, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.770042535162587e-05, "loss": 0.3059, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.763074127137211e-05, "loss": 0.298, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.7561057191118345e-05, "loss": 0.2967, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.749137311086458e-05, "loss": 0.2844, "step": 18000 }, { "epoch": 0.05, "learning_rate": 4.7421689030610826e-05, "loss": 0.3001, "step": 18500 }, { "epoch": 0.05, "learning_rate": 4.735200495035706e-05, "loss": 0.3088, "step": 19000 }, { "epoch": 0.05, "learning_rate": 4.72823208701033e-05, "loss": 0.3079, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.721263678984954e-05, "loss": 0.2908, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.714295270959578e-05, "loss": 0.2879, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.707326862934202e-05, "loss": 0.2891, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.7003584549088256e-05, "loss": 0.3005, "step": 21500 }, { "epoch": 0.06, "learning_rate": 4.69339004688345e-05, "loss": 0.3008, "step": 22000 }, { "epoch": 0.06, "learning_rate": 4.686421638858074e-05, "loss": 0.2914, "step": 22500 }, { "epoch": 0.06, "learning_rate": 4.6794532308326974e-05, "loss": 0.2864, "step": 23000 }, { "epoch": 0.07, "learning_rate": 4.672484822807321e-05, "loss": 0.2885, "step": 23500 }, { "epoch": 0.07, "learning_rate": 4.665516414781945e-05, "loss": 0.289, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.6585480067565685e-05, "loss": 0.283, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.651579598731193e-05, "loss": 0.2917, "step": 25000 }, { "epoch": 0.07, "learning_rate": 4.6446111907058167e-05, "loss": 0.2895, "step": 25500 }, { "epoch": 0.07, "learning_rate": 4.6376427826804404e-05, "loss": 0.276, "step": 26000 }, { "epoch": 0.07, "learning_rate": 4.630674374655064e-05, "loss": 0.2847, "step": 26500 }, { "epoch": 0.08, "learning_rate": 4.623705966629688e-05, "loss": 0.2774, "step": 27000 }, { "epoch": 0.08, "learning_rate": 4.6167375586043115e-05, "loss": 0.283, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.609769150578935e-05, "loss": 0.2859, "step": 28000 }, { "epoch": 0.08, "learning_rate": 4.6028007425535596e-05, "loss": 0.2793, "step": 28500 }, { "epoch": 0.08, "learning_rate": 4.5958323345281834e-05, "loss": 0.2769, "step": 29000 }, { "epoch": 0.08, "learning_rate": 4.588863926502807e-05, "loss": 0.2763, "step": 29500 }, { "epoch": 0.08, "learning_rate": 4.581895518477431e-05, "loss": 0.2886, "step": 30000 }, { "epoch": 0.09, "learning_rate": 4.5749271104520545e-05, "loss": 0.2721, "step": 30500 }, { "epoch": 0.09, "learning_rate": 4.567958702426678e-05, "loss": 0.2805, "step": 31000 }, { "epoch": 0.09, "learning_rate": 4.560990294401302e-05, "loss": 0.2779, "step": 31500 }, { "epoch": 0.09, "learning_rate": 4.5540218863759264e-05, "loss": 0.2833, "step": 32000 }, { "epoch": 0.09, "learning_rate": 4.54705347835055e-05, "loss": 0.2881, "step": 32500 }, { "epoch": 0.09, "learning_rate": 4.540085070325174e-05, "loss": 0.2802, "step": 33000 }, { "epoch": 0.09, "learning_rate": 4.5331166622997975e-05, "loss": 0.2683, "step": 33500 }, { "epoch": 0.09, "learning_rate": 4.526148254274422e-05, "loss": 0.2732, "step": 34000 }, { "epoch": 0.1, "learning_rate": 4.5191798462490456e-05, "loss": 0.2773, "step": 34500 }, { "epoch": 0.1, "learning_rate": 4.5122114382236693e-05, "loss": 0.2791, "step": 35000 }, { "epoch": 0.1, "learning_rate": 4.505243030198294e-05, "loss": 0.2842, "step": 35500 }, { "epoch": 0.1, "learning_rate": 4.4982746221729175e-05, "loss": 0.278, "step": 36000 }, { "epoch": 0.1, "learning_rate": 4.491306214147541e-05, "loss": 0.2757, "step": 36500 }, { "epoch": 0.1, "learning_rate": 4.484337806122165e-05, "loss": 0.2741, "step": 37000 }, { "epoch": 0.1, "learning_rate": 4.4773693980967886e-05, "loss": 0.2708, "step": 37500 }, { "epoch": 0.11, "learning_rate": 4.470400990071412e-05, "loss": 0.2725, "step": 38000 }, { "epoch": 0.11, "learning_rate": 4.463432582046037e-05, "loss": 0.2735, "step": 38500 }, { "epoch": 0.11, "learning_rate": 4.4564641740206604e-05, "loss": 0.2735, "step": 39000 }, { "epoch": 0.11, "learning_rate": 4.449495765995284e-05, "loss": 0.2704, "step": 39500 }, { "epoch": 0.11, "learning_rate": 4.442527357969908e-05, "loss": 0.2757, "step": 40000 }, { "epoch": 0.11, "learning_rate": 4.4355589499445316e-05, "loss": 0.2851, "step": 40500 }, { "epoch": 0.11, "learning_rate": 4.428590541919155e-05, "loss": 0.2681, "step": 41000 }, { "epoch": 0.12, "learning_rate": 4.421622133893779e-05, "loss": 0.27, "step": 41500 }, { "epoch": 0.12, "learning_rate": 4.4146537258684034e-05, "loss": 0.2701, "step": 42000 }, { "epoch": 0.12, "learning_rate": 4.407685317843027e-05, "loss": 0.2648, "step": 42500 }, { "epoch": 0.12, "learning_rate": 4.400716909817651e-05, "loss": 0.2698, "step": 43000 }, { "epoch": 0.12, "learning_rate": 4.3937485017922746e-05, "loss": 0.2693, "step": 43500 }, { "epoch": 0.12, "learning_rate": 4.386780093766898e-05, "loss": 0.2665, "step": 44000 }, { "epoch": 0.12, "learning_rate": 4.379811685741522e-05, "loss": 0.2688, "step": 44500 }, { "epoch": 0.13, "learning_rate": 4.3728432777161464e-05, "loss": 0.2689, "step": 45000 }, { "epoch": 0.13, "learning_rate": 4.36587486969077e-05, "loss": 0.273, "step": 45500 }, { "epoch": 0.13, "learning_rate": 4.358906461665394e-05, "loss": 0.2717, "step": 46000 }, { "epoch": 0.13, "learning_rate": 4.3519380536400176e-05, "loss": 0.2691, "step": 46500 }, { "epoch": 0.13, "learning_rate": 4.344969645614641e-05, "loss": 0.2596, "step": 47000 }, { "epoch": 0.13, "learning_rate": 4.338001237589266e-05, "loss": 0.2639, "step": 47500 }, { "epoch": 0.13, "learning_rate": 4.3310328295638894e-05, "loss": 0.2724, "step": 48000 }, { "epoch": 0.14, "learning_rate": 4.324064421538513e-05, "loss": 0.2572, "step": 48500 }, { "epoch": 0.14, "learning_rate": 4.3170960135131375e-05, "loss": 0.2657, "step": 49000 }, { "epoch": 0.14, "learning_rate": 4.310127605487761e-05, "loss": 0.2681, "step": 49500 }, { "epoch": 0.14, "learning_rate": 4.303159197462385e-05, "loss": 0.2654, "step": 50000 }, { "epoch": 0.14, "learning_rate": 4.296190789437009e-05, "loss": 0.2655, "step": 50500 }, { "epoch": 0.14, "learning_rate": 4.2892223814116324e-05, "loss": 0.2658, "step": 51000 }, { "epoch": 0.14, "learning_rate": 4.282253973386256e-05, "loss": 0.2582, "step": 51500 }, { "epoch": 0.14, "learning_rate": 4.2752855653608805e-05, "loss": 0.2664, "step": 52000 }, { "epoch": 0.15, "learning_rate": 4.268317157335504e-05, "loss": 0.2508, "step": 52500 }, { "epoch": 0.15, "learning_rate": 4.261348749310128e-05, "loss": 0.2569, "step": 53000 }, { "epoch": 0.15, "learning_rate": 4.254380341284752e-05, "loss": 0.2592, "step": 53500 }, { "epoch": 0.15, "learning_rate": 4.2474119332593754e-05, "loss": 0.2645, "step": 54000 }, { "epoch": 0.15, "learning_rate": 4.240443525233999e-05, "loss": 0.2597, "step": 54500 }, { "epoch": 0.15, "learning_rate": 4.2334751172086235e-05, "loss": 0.2545, "step": 55000 }, { "epoch": 0.15, "learning_rate": 4.226506709183247e-05, "loss": 0.2652, "step": 55500 }, { "epoch": 0.16, "learning_rate": 4.219538301157871e-05, "loss": 0.2677, "step": 56000 }, { "epoch": 0.16, "learning_rate": 4.2125698931324947e-05, "loss": 0.2671, "step": 56500 }, { "epoch": 0.16, "learning_rate": 4.2056014851071184e-05, "loss": 0.2601, "step": 57000 }, { "epoch": 0.16, "learning_rate": 4.198633077081742e-05, "loss": 0.2728, "step": 57500 }, { "epoch": 0.16, "learning_rate": 4.191664669056366e-05, "loss": 0.2628, "step": 58000 }, { "epoch": 0.16, "learning_rate": 4.18469626103099e-05, "loss": 0.2628, "step": 58500 }, { "epoch": 0.16, "learning_rate": 4.177727853005614e-05, "loss": 0.2508, "step": 59000 }, { "epoch": 0.17, "learning_rate": 4.1707594449802376e-05, "loss": 0.2661, "step": 59500 }, { "epoch": 0.17, "learning_rate": 4.1637910369548614e-05, "loss": 0.2575, "step": 60000 }, { "epoch": 0.17, "learning_rate": 4.156822628929485e-05, "loss": 0.2537, "step": 60500 }, { "epoch": 0.17, "learning_rate": 4.149854220904109e-05, "loss": 0.2495, "step": 61000 }, { "epoch": 0.17, "learning_rate": 4.142885812878733e-05, "loss": 0.2564, "step": 61500 }, { "epoch": 0.17, "learning_rate": 4.135917404853357e-05, "loss": 0.2515, "step": 62000 }, { "epoch": 0.17, "learning_rate": 4.128948996827981e-05, "loss": 0.2582, "step": 62500 }, { "epoch": 0.18, "learning_rate": 4.121980588802605e-05, "loss": 0.266, "step": 63000 }, { "epoch": 0.18, "learning_rate": 4.115012180777229e-05, "loss": 0.2492, "step": 63500 }, { "epoch": 0.18, "learning_rate": 4.1080437727518525e-05, "loss": 0.2677, "step": 64000 }, { "epoch": 0.18, "learning_rate": 4.101075364726476e-05, "loss": 0.257, "step": 64500 }, { "epoch": 0.18, "learning_rate": 4.0941069567011006e-05, "loss": 0.2586, "step": 65000 }, { "epoch": 0.18, "learning_rate": 4.087138548675724e-05, "loss": 0.2571, "step": 65500 }, { "epoch": 0.18, "learning_rate": 4.080170140650348e-05, "loss": 0.2549, "step": 66000 }, { "epoch": 0.19, "learning_rate": 4.073201732624972e-05, "loss": 0.258, "step": 66500 }, { "epoch": 0.19, "learning_rate": 4.0662333245995955e-05, "loss": 0.2594, "step": 67000 }, { "epoch": 0.19, "learning_rate": 4.059264916574219e-05, "loss": 0.2564, "step": 67500 }, { "epoch": 0.19, "learning_rate": 4.052296508548843e-05, "loss": 0.2646, "step": 68000 }, { "epoch": 0.19, "learning_rate": 4.045328100523467e-05, "loss": 0.2577, "step": 68500 }, { "epoch": 0.19, "learning_rate": 4.038359692498091e-05, "loss": 0.2567, "step": 69000 }, { "epoch": 0.19, "learning_rate": 4.031391284472715e-05, "loss": 0.2518, "step": 69500 }, { "epoch": 0.2, "learning_rate": 4.0244228764473384e-05, "loss": 0.2541, "step": 70000 }, { "epoch": 0.2, "learning_rate": 4.017454468421962e-05, "loss": 0.2504, "step": 70500 }, { "epoch": 0.2, "learning_rate": 4.010486060396586e-05, "loss": 0.2595, "step": 71000 }, { "epoch": 0.2, "learning_rate": 4.00351765237121e-05, "loss": 0.2618, "step": 71500 }, { "epoch": 0.2, "learning_rate": 3.996549244345834e-05, "loss": 0.2604, "step": 72000 }, { "epoch": 0.2, "learning_rate": 3.989580836320458e-05, "loss": 0.2525, "step": 72500 }, { "epoch": 0.2, "learning_rate": 3.9826124282950814e-05, "loss": 0.2447, "step": 73000 }, { "epoch": 0.2, "learning_rate": 3.975644020269705e-05, "loss": 0.2535, "step": 73500 }, { "epoch": 0.21, "learning_rate": 3.968675612244329e-05, "loss": 0.256, "step": 74000 }, { "epoch": 0.21, "learning_rate": 3.9617072042189526e-05, "loss": 0.2475, "step": 74500 }, { "epoch": 0.21, "learning_rate": 3.954738796193577e-05, "loss": 0.2591, "step": 75000 }, { "epoch": 0.21, "learning_rate": 3.947770388168201e-05, "loss": 0.2464, "step": 75500 }, { "epoch": 0.21, "learning_rate": 3.9408019801428244e-05, "loss": 0.2535, "step": 76000 }, { "epoch": 0.21, "learning_rate": 3.933833572117449e-05, "loss": 0.2514, "step": 76500 }, { "epoch": 0.21, "learning_rate": 3.9268651640920725e-05, "loss": 0.2484, "step": 77000 }, { "epoch": 0.22, "learning_rate": 3.919896756066696e-05, "loss": 0.2483, "step": 77500 }, { "epoch": 0.22, "learning_rate": 3.91292834804132e-05, "loss": 0.2559, "step": 78000 }, { "epoch": 0.22, "learning_rate": 3.9059599400159444e-05, "loss": 0.2526, "step": 78500 }, { "epoch": 0.22, "learning_rate": 3.898991531990568e-05, "loss": 0.2537, "step": 79000 }, { "epoch": 0.22, "learning_rate": 3.892023123965192e-05, "loss": 0.2586, "step": 79500 }, { "epoch": 0.22, "learning_rate": 3.8850547159398155e-05, "loss": 0.2407, "step": 80000 }, { "epoch": 0.22, "learning_rate": 3.878086307914439e-05, "loss": 0.2518, "step": 80500 }, { "epoch": 0.23, "learning_rate": 3.871117899889063e-05, "loss": 0.254, "step": 81000 }, { "epoch": 0.23, "learning_rate": 3.8641494918636874e-05, "loss": 0.2517, "step": 81500 }, { "epoch": 0.23, "learning_rate": 3.857181083838311e-05, "loss": 0.248, "step": 82000 }, { "epoch": 0.23, "learning_rate": 3.850212675812935e-05, "loss": 0.2483, "step": 82500 }, { "epoch": 0.23, "learning_rate": 3.8432442677875585e-05, "loss": 0.2465, "step": 83000 }, { "epoch": 0.23, "learning_rate": 3.836275859762182e-05, "loss": 0.243, "step": 83500 }, { "epoch": 0.23, "learning_rate": 3.829307451736806e-05, "loss": 0.2518, "step": 84000 }, { "epoch": 0.24, "learning_rate": 3.82233904371143e-05, "loss": 0.2548, "step": 84500 }, { "epoch": 0.24, "learning_rate": 3.815370635686054e-05, "loss": 0.2492, "step": 85000 }, { "epoch": 0.24, "learning_rate": 3.808402227660678e-05, "loss": 0.2444, "step": 85500 }, { "epoch": 0.24, "learning_rate": 3.8014338196353015e-05, "loss": 0.2436, "step": 86000 }, { "epoch": 0.24, "learning_rate": 3.794465411609925e-05, "loss": 0.2494, "step": 86500 }, { "epoch": 0.24, "learning_rate": 3.787497003584549e-05, "loss": 0.2595, "step": 87000 }, { "epoch": 0.24, "learning_rate": 3.7805285955591727e-05, "loss": 0.2507, "step": 87500 }, { "epoch": 0.25, "learning_rate": 3.7735601875337964e-05, "loss": 0.2388, "step": 88000 }, { "epoch": 0.25, "learning_rate": 3.766591779508421e-05, "loss": 0.2455, "step": 88500 }, { "epoch": 0.25, "learning_rate": 3.7596233714830445e-05, "loss": 0.2364, "step": 89000 }, { "epoch": 0.25, "learning_rate": 3.752654963457668e-05, "loss": 0.2433, "step": 89500 }, { "epoch": 0.25, "learning_rate": 3.7456865554322926e-05, "loss": 0.2519, "step": 90000 }, { "epoch": 0.25, "learning_rate": 3.738718147406916e-05, "loss": 0.2495, "step": 90500 }, { "epoch": 0.25, "learning_rate": 3.73174973938154e-05, "loss": 0.2405, "step": 91000 }, { "epoch": 0.26, "learning_rate": 3.7247813313561644e-05, "loss": 0.2431, "step": 91500 }, { "epoch": 0.26, "learning_rate": 3.717812923330788e-05, "loss": 0.2474, "step": 92000 }, { "epoch": 0.26, "learning_rate": 3.710844515305412e-05, "loss": 0.2468, "step": 92500 }, { "epoch": 0.26, "learning_rate": 3.7038761072800356e-05, "loss": 0.2464, "step": 93000 }, { "epoch": 0.26, "learning_rate": 3.696907699254659e-05, "loss": 0.2464, "step": 93500 }, { "epoch": 0.26, "learning_rate": 3.689939291229283e-05, "loss": 0.2424, "step": 94000 }, { "epoch": 0.26, "learning_rate": 3.682970883203907e-05, "loss": 0.2379, "step": 94500 }, { "epoch": 0.26, "learning_rate": 3.676002475178531e-05, "loss": 0.2442, "step": 95000 }, { "epoch": 0.27, "learning_rate": 3.669034067153155e-05, "loss": 0.246, "step": 95500 }, { "epoch": 0.27, "learning_rate": 3.6620656591277786e-05, "loss": 0.2403, "step": 96000 }, { "epoch": 0.27, "learning_rate": 3.655097251102402e-05, "loss": 0.2429, "step": 96500 }, { "epoch": 0.27, "learning_rate": 3.648128843077026e-05, "loss": 0.2482, "step": 97000 }, { "epoch": 0.27, "learning_rate": 3.64116043505165e-05, "loss": 0.2432, "step": 97500 }, { "epoch": 0.27, "learning_rate": 3.6341920270262735e-05, "loss": 0.2374, "step": 98000 }, { "epoch": 0.27, "learning_rate": 3.627223619000898e-05, "loss": 0.2539, "step": 98500 }, { "epoch": 0.28, "learning_rate": 3.6202552109755216e-05, "loss": 0.2496, "step": 99000 }, { "epoch": 0.28, "learning_rate": 3.613286802950145e-05, "loss": 0.2447, "step": 99500 }, { "epoch": 0.28, "learning_rate": 3.606318394924769e-05, "loss": 0.2396, "step": 100000 }, { "epoch": 0.28, "learning_rate": 3.599349986899393e-05, "loss": 0.2472, "step": 100500 }, { "epoch": 0.28, "learning_rate": 3.5923815788740164e-05, "loss": 0.2431, "step": 101000 }, { "epoch": 0.28, "learning_rate": 3.585413170848641e-05, "loss": 0.2414, "step": 101500 }, { "epoch": 0.28, "learning_rate": 3.5784447628232646e-05, "loss": 0.2398, "step": 102000 }, { "epoch": 0.29, "learning_rate": 3.571476354797888e-05, "loss": 0.2415, "step": 102500 }, { "epoch": 0.29, "learning_rate": 3.564507946772512e-05, "loss": 0.2428, "step": 103000 }, { "epoch": 0.29, "learning_rate": 3.5575395387471364e-05, "loss": 0.2341, "step": 103500 }, { "epoch": 0.29, "learning_rate": 3.55057113072176e-05, "loss": 0.2461, "step": 104000 }, { "epoch": 0.29, "learning_rate": 3.543602722696384e-05, "loss": 0.2481, "step": 104500 }, { "epoch": 0.29, "learning_rate": 3.536634314671008e-05, "loss": 0.2409, "step": 105000 }, { "epoch": 0.29, "learning_rate": 3.529665906645632e-05, "loss": 0.2388, "step": 105500 }, { "epoch": 0.3, "learning_rate": 3.522697498620256e-05, "loss": 0.2427, "step": 106000 }, { "epoch": 0.3, "learning_rate": 3.5157290905948794e-05, "loss": 0.238, "step": 106500 }, { "epoch": 0.3, "learning_rate": 3.508760682569503e-05, "loss": 0.2453, "step": 107000 }, { "epoch": 0.3, "learning_rate": 3.501792274544127e-05, "loss": 0.245, "step": 107500 }, { "epoch": 0.3, "learning_rate": 3.4948238665187505e-05, "loss": 0.2395, "step": 108000 }, { "epoch": 0.3, "learning_rate": 3.487855458493375e-05, "loss": 0.2369, "step": 108500 }, { "epoch": 0.3, "learning_rate": 3.4808870504679986e-05, "loss": 0.2369, "step": 109000 }, { "epoch": 0.31, "learning_rate": 3.4739186424426224e-05, "loss": 0.2476, "step": 109500 }, { "epoch": 0.31, "learning_rate": 3.466950234417246e-05, "loss": 0.238, "step": 110000 }, { "epoch": 0.31, "learning_rate": 3.45998182639187e-05, "loss": 0.2417, "step": 110500 }, { "epoch": 0.31, "learning_rate": 3.4530134183664935e-05, "loss": 0.2433, "step": 111000 }, { "epoch": 0.31, "learning_rate": 3.446045010341118e-05, "loss": 0.2431, "step": 111500 }, { "epoch": 0.31, "learning_rate": 3.4390766023157416e-05, "loss": 0.2434, "step": 112000 }, { "epoch": 0.31, "learning_rate": 3.4321081942903654e-05, "loss": 0.2459, "step": 112500 }, { "epoch": 0.31, "learning_rate": 3.425139786264989e-05, "loss": 0.2464, "step": 113000 }, { "epoch": 0.32, "learning_rate": 3.418171378239613e-05, "loss": 0.2357, "step": 113500 }, { "epoch": 0.32, "learning_rate": 3.4112029702142365e-05, "loss": 0.2394, "step": 114000 }, { "epoch": 0.32, "learning_rate": 3.40423456218886e-05, "loss": 0.2404, "step": 114500 }, { "epoch": 0.32, "learning_rate": 3.3972661541634846e-05, "loss": 0.2333, "step": 115000 }, { "epoch": 0.32, "learning_rate": 3.3902977461381083e-05, "loss": 0.2323, "step": 115500 }, { "epoch": 0.32, "learning_rate": 3.383329338112732e-05, "loss": 0.2562, "step": 116000 }, { "epoch": 0.32, "learning_rate": 3.376360930087356e-05, "loss": 0.238, "step": 116500 }, { "epoch": 0.33, "learning_rate": 3.36939252206198e-05, "loss": 0.2409, "step": 117000 }, { "epoch": 0.33, "learning_rate": 3.362424114036604e-05, "loss": 0.2435, "step": 117500 }, { "epoch": 0.33, "learning_rate": 3.3554557060112276e-05, "loss": 0.2377, "step": 118000 }, { "epoch": 0.33, "learning_rate": 3.348487297985852e-05, "loss": 0.2287, "step": 118500 }, { "epoch": 0.33, "learning_rate": 3.341518889960476e-05, "loss": 0.2427, "step": 119000 }, { "epoch": 0.33, "learning_rate": 3.3345504819350994e-05, "loss": 0.2438, "step": 119500 }, { "epoch": 0.33, "learning_rate": 3.327582073909723e-05, "loss": 0.2447, "step": 120000 }, { "epoch": 0.34, "learning_rate": 3.320613665884347e-05, "loss": 0.2375, "step": 120500 }, { "epoch": 0.34, "learning_rate": 3.3136452578589706e-05, "loss": 0.2399, "step": 121000 }, { "epoch": 0.34, "learning_rate": 3.306676849833595e-05, "loss": 0.2416, "step": 121500 }, { "epoch": 0.34, "learning_rate": 3.299708441808219e-05, "loss": 0.2405, "step": 122000 }, { "epoch": 0.34, "learning_rate": 3.2927400337828424e-05, "loss": 0.2322, "step": 122500 }, { "epoch": 0.34, "learning_rate": 3.285771625757466e-05, "loss": 0.2337, "step": 123000 }, { "epoch": 0.34, "learning_rate": 3.27880321773209e-05, "loss": 0.2391, "step": 123500 }, { "epoch": 0.35, "learning_rate": 3.2718348097067136e-05, "loss": 0.239, "step": 124000 }, { "epoch": 0.35, "learning_rate": 3.264866401681337e-05, "loss": 0.2363, "step": 124500 }, { "epoch": 0.35, "learning_rate": 3.257897993655962e-05, "loss": 0.239, "step": 125000 }, { "epoch": 0.35, "learning_rate": 3.2509295856305854e-05, "loss": 0.243, "step": 125500 }, { "epoch": 0.35, "learning_rate": 3.243961177605209e-05, "loss": 0.2338, "step": 126000 }, { "epoch": 0.35, "learning_rate": 3.236992769579833e-05, "loss": 0.2386, "step": 126500 }, { "epoch": 0.35, "learning_rate": 3.2300243615544566e-05, "loss": 0.2339, "step": 127000 }, { "epoch": 0.36, "learning_rate": 3.22305595352908e-05, "loss": 0.2372, "step": 127500 }, { "epoch": 0.36, "learning_rate": 3.216087545503705e-05, "loss": 0.2368, "step": 128000 }, { "epoch": 0.36, "learning_rate": 3.2091191374783284e-05, "loss": 0.2344, "step": 128500 }, { "epoch": 0.36, "learning_rate": 3.202150729452952e-05, "loss": 0.2402, "step": 129000 }, { "epoch": 0.36, "learning_rate": 3.195182321427576e-05, "loss": 0.2352, "step": 129500 }, { "epoch": 0.36, "learning_rate": 3.1882139134021996e-05, "loss": 0.2358, "step": 130000 }, { "epoch": 0.36, "learning_rate": 3.181245505376824e-05, "loss": 0.2431, "step": 130500 }, { "epoch": 0.37, "learning_rate": 3.174277097351448e-05, "loss": 0.2312, "step": 131000 }, { "epoch": 0.37, "learning_rate": 3.1673086893260714e-05, "loss": 0.2384, "step": 131500 }, { "epoch": 0.37, "learning_rate": 3.160340281300696e-05, "loss": 0.2352, "step": 132000 }, { "epoch": 0.37, "learning_rate": 3.1533718732753195e-05, "loss": 0.236, "step": 132500 }, { "epoch": 0.37, "learning_rate": 3.146403465249943e-05, "loss": 0.2491, "step": 133000 }, { "epoch": 0.37, "learning_rate": 3.139435057224567e-05, "loss": 0.2431, "step": 133500 }, { "epoch": 0.37, "learning_rate": 3.132466649199191e-05, "loss": 0.2351, "step": 134000 }, { "epoch": 0.37, "learning_rate": 3.1254982411738144e-05, "loss": 0.2347, "step": 134500 }, { "epoch": 0.38, "learning_rate": 3.118529833148439e-05, "loss": 0.2349, "step": 135000 }, { "epoch": 0.38, "learning_rate": 3.1115614251230625e-05, "loss": 0.2296, "step": 135500 }, { "epoch": 0.38, "learning_rate": 3.104593017097686e-05, "loss": 0.2408, "step": 136000 }, { "epoch": 0.38, "learning_rate": 3.09762460907231e-05, "loss": 0.2287, "step": 136500 }, { "epoch": 0.38, "learning_rate": 3.0906562010469337e-05, "loss": 0.2327, "step": 137000 }, { "epoch": 0.38, "learning_rate": 3.0836877930215574e-05, "loss": 0.2329, "step": 137500 }, { "epoch": 0.38, "learning_rate": 3.076719384996182e-05, "loss": 0.2401, "step": 138000 }, { "epoch": 0.39, "learning_rate": 3.0697509769708055e-05, "loss": 0.2366, "step": 138500 }, { "epoch": 0.39, "learning_rate": 3.062782568945429e-05, "loss": 0.2351, "step": 139000 }, { "epoch": 0.39, "learning_rate": 3.055814160920053e-05, "loss": 0.236, "step": 139500 }, { "epoch": 0.39, "learning_rate": 3.0488457528946766e-05, "loss": 0.2282, "step": 140000 }, { "epoch": 0.39, "learning_rate": 3.0418773448693007e-05, "loss": 0.2339, "step": 140500 }, { "epoch": 0.39, "learning_rate": 3.0349089368439244e-05, "loss": 0.2336, "step": 141000 }, { "epoch": 0.39, "learning_rate": 3.0279405288185485e-05, "loss": 0.2384, "step": 141500 }, { "epoch": 0.4, "learning_rate": 3.0209721207931725e-05, "loss": 0.2247, "step": 142000 }, { "epoch": 0.4, "learning_rate": 3.0140037127677963e-05, "loss": 0.2345, "step": 142500 }, { "epoch": 0.4, "learning_rate": 3.00703530474242e-05, "loss": 0.2296, "step": 143000 }, { "epoch": 0.4, "learning_rate": 3.0000668967170437e-05, "loss": 0.2319, "step": 143500 }, { "epoch": 0.4, "learning_rate": 2.9930984886916674e-05, "loss": 0.2454, "step": 144000 }, { "epoch": 0.4, "learning_rate": 2.986130080666291e-05, "loss": 0.234, "step": 144500 }, { "epoch": 0.4, "learning_rate": 2.9791616726409155e-05, "loss": 0.2305, "step": 145000 }, { "epoch": 0.41, "learning_rate": 2.9721932646155392e-05, "loss": 0.2346, "step": 145500 }, { "epoch": 0.41, "learning_rate": 2.965224856590163e-05, "loss": 0.2336, "step": 146000 }, { "epoch": 0.41, "learning_rate": 2.9582564485647867e-05, "loss": 0.2312, "step": 146500 }, { "epoch": 0.41, "learning_rate": 2.9512880405394104e-05, "loss": 0.2349, "step": 147000 }, { "epoch": 0.41, "learning_rate": 2.9443196325140345e-05, "loss": 0.2343, "step": 147500 }, { "epoch": 0.41, "learning_rate": 2.9373512244886585e-05, "loss": 0.23, "step": 148000 }, { "epoch": 0.41, "learning_rate": 2.9303828164632822e-05, "loss": 0.2221, "step": 148500 }, { "epoch": 0.42, "learning_rate": 2.9234144084379063e-05, "loss": 0.2368, "step": 149000 }, { "epoch": 0.42, "learning_rate": 2.91644600041253e-05, "loss": 0.2262, "step": 149500 }, { "epoch": 0.42, "learning_rate": 2.9094775923871537e-05, "loss": 0.2276, "step": 150000 }, { "epoch": 0.42, "learning_rate": 2.9025091843617774e-05, "loss": 0.2368, "step": 150500 }, { "epoch": 0.42, "learning_rate": 2.895540776336401e-05, "loss": 0.2373, "step": 151000 }, { "epoch": 0.42, "learning_rate": 2.8885723683110256e-05, "loss": 0.2397, "step": 151500 }, { "epoch": 0.42, "learning_rate": 2.8816039602856493e-05, "loss": 0.2328, "step": 152000 }, { "epoch": 0.43, "learning_rate": 2.874635552260273e-05, "loss": 0.2284, "step": 152500 }, { "epoch": 0.43, "learning_rate": 2.8676671442348967e-05, "loss": 0.229, "step": 153000 }, { "epoch": 0.43, "learning_rate": 2.8606987362095204e-05, "loss": 0.228, "step": 153500 }, { "epoch": 0.43, "learning_rate": 2.853730328184144e-05, "loss": 0.2335, "step": 154000 }, { "epoch": 0.43, "learning_rate": 2.8467619201587682e-05, "loss": 0.2281, "step": 154500 }, { "epoch": 0.43, "learning_rate": 2.8397935121333923e-05, "loss": 0.2329, "step": 155000 }, { "epoch": 0.43, "learning_rate": 2.8328251041080163e-05, "loss": 0.2294, "step": 155500 }, { "epoch": 0.43, "learning_rate": 2.82585669608264e-05, "loss": 0.2292, "step": 156000 }, { "epoch": 0.44, "learning_rate": 2.8188882880572638e-05, "loss": 0.23, "step": 156500 }, { "epoch": 0.44, "learning_rate": 2.8119198800318875e-05, "loss": 0.2255, "step": 157000 }, { "epoch": 0.44, "learning_rate": 2.8049514720065112e-05, "loss": 0.235, "step": 157500 }, { "epoch": 0.44, "learning_rate": 2.7979830639811356e-05, "loss": 0.2368, "step": 158000 }, { "epoch": 0.44, "learning_rate": 2.7910146559557593e-05, "loss": 0.2277, "step": 158500 }, { "epoch": 0.44, "learning_rate": 2.784046247930383e-05, "loss": 0.2381, "step": 159000 }, { "epoch": 0.44, "learning_rate": 2.7770778399050068e-05, "loss": 0.2302, "step": 159500 }, { "epoch": 0.45, "learning_rate": 2.7701094318796305e-05, "loss": 0.225, "step": 160000 }, { "epoch": 0.45, "learning_rate": 2.7631410238542542e-05, "loss": 0.227, "step": 160500 }, { "epoch": 0.45, "learning_rate": 2.7561726158288782e-05, "loss": 0.2362, "step": 161000 }, { "epoch": 0.45, "learning_rate": 2.7492042078035023e-05, "loss": 0.2283, "step": 161500 }, { "epoch": 0.45, "learning_rate": 2.742235799778126e-05, "loss": 0.2201, "step": 162000 }, { "epoch": 0.45, "learning_rate": 2.73526739175275e-05, "loss": 0.2327, "step": 162500 }, { "epoch": 0.45, "learning_rate": 2.7282989837273738e-05, "loss": 0.2236, "step": 163000 }, { "epoch": 0.46, "learning_rate": 2.7213305757019975e-05, "loss": 0.2362, "step": 163500 }, { "epoch": 0.46, "learning_rate": 2.7143621676766212e-05, "loss": 0.2282, "step": 164000 }, { "epoch": 0.46, "learning_rate": 2.707393759651245e-05, "loss": 0.2388, "step": 164500 }, { "epoch": 0.46, "learning_rate": 2.7004253516258693e-05, "loss": 0.2274, "step": 165000 }, { "epoch": 0.46, "learning_rate": 2.693456943600493e-05, "loss": 0.2294, "step": 165500 }, { "epoch": 0.46, "learning_rate": 2.6864885355751168e-05, "loss": 0.2206, "step": 166000 }, { "epoch": 0.46, "learning_rate": 2.6795201275497405e-05, "loss": 0.2253, "step": 166500 }, { "epoch": 0.47, "learning_rate": 2.6725517195243642e-05, "loss": 0.2387, "step": 167000 }, { "epoch": 0.47, "learning_rate": 2.665583311498988e-05, "loss": 0.232, "step": 167500 }, { "epoch": 0.47, "learning_rate": 2.6586149034736123e-05, "loss": 0.2296, "step": 168000 }, { "epoch": 0.47, "learning_rate": 2.651646495448236e-05, "loss": 0.2168, "step": 168500 }, { "epoch": 0.47, "learning_rate": 2.6446780874228598e-05, "loss": 0.2217, "step": 169000 }, { "epoch": 0.47, "learning_rate": 2.637709679397484e-05, "loss": 0.2333, "step": 169500 }, { "epoch": 0.47, "learning_rate": 2.6307412713721076e-05, "loss": 0.2269, "step": 170000 }, { "epoch": 0.48, "learning_rate": 2.6237728633467313e-05, "loss": 0.2326, "step": 170500 }, { "epoch": 0.48, "learning_rate": 2.616804455321355e-05, "loss": 0.2262, "step": 171000 }, { "epoch": 0.48, "learning_rate": 2.6098360472959794e-05, "loss": 0.2245, "step": 171500 }, { "epoch": 0.48, "learning_rate": 2.602867639270603e-05, "loss": 0.2269, "step": 172000 }, { "epoch": 0.48, "learning_rate": 2.5958992312452268e-05, "loss": 0.2313, "step": 172500 }, { "epoch": 0.48, "learning_rate": 2.5889308232198505e-05, "loss": 0.2239, "step": 173000 }, { "epoch": 0.48, "learning_rate": 2.5819624151944743e-05, "loss": 0.2328, "step": 173500 }, { "epoch": 0.49, "learning_rate": 2.574994007169098e-05, "loss": 0.2284, "step": 174000 }, { "epoch": 0.49, "learning_rate": 2.568025599143722e-05, "loss": 0.2314, "step": 174500 }, { "epoch": 0.49, "learning_rate": 2.561057191118346e-05, "loss": 0.2284, "step": 175000 }, { "epoch": 0.49, "learning_rate": 2.5540887830929698e-05, "loss": 0.216, "step": 175500 }, { "epoch": 0.49, "learning_rate": 2.547120375067594e-05, "loss": 0.2232, "step": 176000 }, { "epoch": 0.49, "learning_rate": 2.5401519670422176e-05, "loss": 0.2267, "step": 176500 }, { "epoch": 0.49, "learning_rate": 2.5331835590168413e-05, "loss": 0.227, "step": 177000 }, { "epoch": 0.49, "learning_rate": 2.526215150991465e-05, "loss": 0.2336, "step": 177500 }, { "epoch": 0.5, "learning_rate": 2.5192467429660894e-05, "loss": 0.2279, "step": 178000 }, { "epoch": 0.5, "learning_rate": 2.512278334940713e-05, "loss": 0.2236, "step": 178500 }, { "epoch": 0.5, "learning_rate": 2.505309926915337e-05, "loss": 0.2309, "step": 179000 }, { "epoch": 0.5, "learning_rate": 2.4983415188899606e-05, "loss": 0.2229, "step": 179500 }, { "epoch": 0.5, "learning_rate": 2.4913731108645843e-05, "loss": 0.2181, "step": 180000 }, { "epoch": 0.5, "learning_rate": 2.4844047028392083e-05, "loss": 0.2312, "step": 180500 }, { "epoch": 0.5, "learning_rate": 2.477436294813832e-05, "loss": 0.2264, "step": 181000 }, { "epoch": 0.51, "learning_rate": 2.4704678867884558e-05, "loss": 0.2313, "step": 181500 }, { "epoch": 0.51, "learning_rate": 2.46349947876308e-05, "loss": 0.2267, "step": 182000 }, { "epoch": 0.51, "learning_rate": 2.4565310707377036e-05, "loss": 0.234, "step": 182500 }, { "epoch": 0.51, "learning_rate": 2.4495626627123276e-05, "loss": 0.2303, "step": 183000 }, { "epoch": 0.51, "learning_rate": 2.4425942546869513e-05, "loss": 0.2201, "step": 183500 }, { "epoch": 0.51, "learning_rate": 2.4356258466615754e-05, "loss": 0.23, "step": 184000 }, { "epoch": 0.51, "learning_rate": 2.428657438636199e-05, "loss": 0.2192, "step": 184500 }, { "epoch": 0.52, "learning_rate": 2.421689030610823e-05, "loss": 0.2319, "step": 185000 }, { "epoch": 0.52, "learning_rate": 2.414720622585447e-05, "loss": 0.226, "step": 185500 }, { "epoch": 0.52, "learning_rate": 2.4077522145600706e-05, "loss": 0.232, "step": 186000 }, { "epoch": 0.52, "learning_rate": 2.4007838065346943e-05, "loss": 0.2327, "step": 186500 }, { "epoch": 0.52, "learning_rate": 2.3938153985093184e-05, "loss": 0.2231, "step": 187000 }, { "epoch": 0.52, "learning_rate": 2.386846990483942e-05, "loss": 0.22, "step": 187500 }, { "epoch": 0.52, "learning_rate": 2.3798785824585658e-05, "loss": 0.2243, "step": 188000 }, { "epoch": 0.53, "learning_rate": 2.3729101744331895e-05, "loss": 0.2287, "step": 188500 }, { "epoch": 0.53, "learning_rate": 2.3659417664078136e-05, "loss": 0.2227, "step": 189000 }, { "epoch": 0.53, "learning_rate": 2.3589733583824377e-05, "loss": 0.2321, "step": 189500 }, { "epoch": 0.53, "learning_rate": 2.3520049503570614e-05, "loss": 0.22, "step": 190000 }, { "epoch": 0.53, "learning_rate": 2.3450365423316854e-05, "loss": 0.2247, "step": 190500 }, { "epoch": 0.53, "learning_rate": 2.338068134306309e-05, "loss": 0.2251, "step": 191000 }, { "epoch": 0.53, "learning_rate": 2.331099726280933e-05, "loss": 0.2182, "step": 191500 }, { "epoch": 0.54, "learning_rate": 2.324131318255557e-05, "loss": 0.2244, "step": 192000 }, { "epoch": 0.54, "learning_rate": 2.3171629102301806e-05, "loss": 0.2236, "step": 192500 }, { "epoch": 0.54, "learning_rate": 2.3101945022048044e-05, "loss": 0.2228, "step": 193000 }, { "epoch": 0.54, "learning_rate": 2.303226094179428e-05, "loss": 0.216, "step": 193500 }, { "epoch": 0.54, "learning_rate": 2.296257686154052e-05, "loss": 0.2137, "step": 194000 }, { "epoch": 0.54, "learning_rate": 2.289289278128676e-05, "loss": 0.2207, "step": 194500 }, { "epoch": 0.54, "learning_rate": 2.2823208701032996e-05, "loss": 0.2285, "step": 195000 }, { "epoch": 0.54, "learning_rate": 2.2753524620779236e-05, "loss": 0.2209, "step": 195500 }, { "epoch": 0.55, "learning_rate": 2.2683840540525473e-05, "loss": 0.2313, "step": 196000 }, { "epoch": 0.55, "learning_rate": 2.2614156460271714e-05, "loss": 0.2304, "step": 196500 }, { "epoch": 0.55, "learning_rate": 2.2544472380017955e-05, "loss": 0.2236, "step": 197000 }, { "epoch": 0.55, "learning_rate": 2.2474788299764192e-05, "loss": 0.2211, "step": 197500 }, { "epoch": 0.55, "learning_rate": 2.240510421951043e-05, "loss": 0.2294, "step": 198000 }, { "epoch": 0.55, "learning_rate": 2.2335420139256666e-05, "loss": 0.221, "step": 198500 }, { "epoch": 0.55, "learning_rate": 2.2265736059002907e-05, "loss": 0.2227, "step": 199000 }, { "epoch": 0.56, "learning_rate": 2.2196051978749144e-05, "loss": 0.2204, "step": 199500 }, { "epoch": 0.56, "learning_rate": 2.212636789849538e-05, "loss": 0.2248, "step": 200000 }, { "epoch": 0.56, "learning_rate": 2.2056683818241622e-05, "loss": 0.2206, "step": 200500 }, { "epoch": 0.56, "learning_rate": 2.198699973798786e-05, "loss": 0.2322, "step": 201000 }, { "epoch": 0.56, "learning_rate": 2.1917315657734096e-05, "loss": 0.2211, "step": 201500 }, { "epoch": 0.56, "learning_rate": 2.1847631577480337e-05, "loss": 0.2307, "step": 202000 }, { "epoch": 0.56, "learning_rate": 2.1777947497226574e-05, "loss": 0.2143, "step": 202500 }, { "epoch": 0.57, "learning_rate": 2.1708263416972814e-05, "loss": 0.2201, "step": 203000 }, { "epoch": 0.57, "learning_rate": 2.163857933671905e-05, "loss": 0.2269, "step": 203500 }, { "epoch": 0.57, "learning_rate": 2.1568895256465292e-05, "loss": 0.2222, "step": 204000 }, { "epoch": 0.57, "learning_rate": 2.149921117621153e-05, "loss": 0.2158, "step": 204500 }, { "epoch": 0.57, "learning_rate": 2.1429527095957767e-05, "loss": 0.216, "step": 205000 }, { "epoch": 0.57, "learning_rate": 2.1359843015704007e-05, "loss": 0.2322, "step": 205500 }, { "epoch": 0.57, "learning_rate": 2.1290158935450244e-05, "loss": 0.2167, "step": 206000 }, { "epoch": 0.58, "learning_rate": 2.122047485519648e-05, "loss": 0.2231, "step": 206500 }, { "epoch": 0.58, "learning_rate": 2.1150790774942722e-05, "loss": 0.2242, "step": 207000 }, { "epoch": 0.58, "learning_rate": 2.108110669468896e-05, "loss": 0.237, "step": 207500 }, { "epoch": 0.58, "learning_rate": 2.1011422614435196e-05, "loss": 0.221, "step": 208000 }, { "epoch": 0.58, "learning_rate": 2.0941738534181434e-05, "loss": 0.2251, "step": 208500 }, { "epoch": 0.58, "learning_rate": 2.0872054453927674e-05, "loss": 0.2272, "step": 209000 }, { "epoch": 0.58, "learning_rate": 2.080237037367391e-05, "loss": 0.2235, "step": 209500 }, { "epoch": 0.59, "learning_rate": 2.0732686293420152e-05, "loss": 0.2179, "step": 210000 }, { "epoch": 0.59, "learning_rate": 2.0663002213166392e-05, "loss": 0.2276, "step": 210500 }, { "epoch": 0.59, "learning_rate": 2.059331813291263e-05, "loss": 0.2215, "step": 211000 }, { "epoch": 0.59, "learning_rate": 2.0523634052658867e-05, "loss": 0.2266, "step": 211500 }, { "epoch": 0.59, "learning_rate": 2.0453949972405107e-05, "loss": 0.2185, "step": 212000 }, { "epoch": 0.59, "learning_rate": 2.0384265892151345e-05, "loss": 0.2255, "step": 212500 }, { "epoch": 0.59, "learning_rate": 2.0314581811897582e-05, "loss": 0.2307, "step": 213000 }, { "epoch": 0.6, "learning_rate": 2.024489773164382e-05, "loss": 0.2201, "step": 213500 }, { "epoch": 0.6, "learning_rate": 2.017521365139006e-05, "loss": 0.2208, "step": 214000 }, { "epoch": 0.6, "learning_rate": 2.0105529571136297e-05, "loss": 0.2177, "step": 214500 }, { "epoch": 0.6, "learning_rate": 2.0035845490882534e-05, "loss": 0.2136, "step": 215000 }, { "epoch": 0.6, "learning_rate": 1.9966161410628775e-05, "loss": 0.2254, "step": 215500 }, { "epoch": 0.6, "learning_rate": 1.9896477330375012e-05, "loss": 0.2208, "step": 216000 }, { "epoch": 0.6, "learning_rate": 1.982679325012125e-05, "loss": 0.2213, "step": 216500 }, { "epoch": 0.6, "learning_rate": 1.975710916986749e-05, "loss": 0.2253, "step": 217000 }, { "epoch": 0.61, "learning_rate": 1.968742508961373e-05, "loss": 0.2172, "step": 217500 }, { "epoch": 0.61, "learning_rate": 1.9617741009359967e-05, "loss": 0.2342, "step": 218000 }, { "epoch": 0.61, "learning_rate": 1.9548056929106204e-05, "loss": 0.2177, "step": 218500 }, { "epoch": 0.61, "learning_rate": 1.9478372848852445e-05, "loss": 0.2263, "step": 219000 }, { "epoch": 0.61, "learning_rate": 1.9408688768598682e-05, "loss": 0.2334, "step": 219500 }, { "epoch": 0.61, "learning_rate": 1.933900468834492e-05, "loss": 0.2152, "step": 220000 }, { "epoch": 0.61, "learning_rate": 1.926932060809116e-05, "loss": 0.2154, "step": 220500 }, { "epoch": 0.62, "learning_rate": 1.9199636527837397e-05, "loss": 0.2123, "step": 221000 }, { "epoch": 0.62, "learning_rate": 1.9129952447583634e-05, "loss": 0.2204, "step": 221500 }, { "epoch": 0.62, "learning_rate": 1.9060268367329875e-05, "loss": 0.2167, "step": 222000 }, { "epoch": 0.62, "learning_rate": 1.8990584287076112e-05, "loss": 0.2234, "step": 222500 }, { "epoch": 0.62, "learning_rate": 1.892090020682235e-05, "loss": 0.2244, "step": 223000 }, { "epoch": 0.62, "learning_rate": 1.885121612656859e-05, "loss": 0.2227, "step": 223500 }, { "epoch": 0.62, "learning_rate": 1.8781532046314827e-05, "loss": 0.2177, "step": 224000 }, { "epoch": 0.63, "learning_rate": 1.8711847966061068e-05, "loss": 0.2212, "step": 224500 }, { "epoch": 0.63, "learning_rate": 1.8642163885807305e-05, "loss": 0.2218, "step": 225000 }, { "epoch": 0.63, "learning_rate": 1.8572479805553545e-05, "loss": 0.2278, "step": 225500 }, { "epoch": 0.63, "learning_rate": 1.8502795725299782e-05, "loss": 0.2233, "step": 226000 }, { "epoch": 0.63, "learning_rate": 1.843311164504602e-05, "loss": 0.2242, "step": 226500 }, { "epoch": 0.63, "learning_rate": 1.836342756479226e-05, "loss": 0.2268, "step": 227000 }, { "epoch": 0.63, "learning_rate": 1.8293743484538497e-05, "loss": 0.2189, "step": 227500 }, { "epoch": 0.64, "learning_rate": 1.8224059404284735e-05, "loss": 0.2257, "step": 228000 }, { "epoch": 0.64, "learning_rate": 1.8154375324030972e-05, "loss": 0.2134, "step": 228500 }, { "epoch": 0.64, "learning_rate": 1.8084691243777212e-05, "loss": 0.2147, "step": 229000 }, { "epoch": 0.64, "learning_rate": 1.801500716352345e-05, "loss": 0.2204, "step": 229500 }, { "epoch": 0.64, "learning_rate": 1.7945323083269687e-05, "loss": 0.215, "step": 230000 }, { "epoch": 0.64, "learning_rate": 1.7875639003015927e-05, "loss": 0.2182, "step": 230500 }, { "epoch": 0.64, "learning_rate": 1.7805954922762168e-05, "loss": 0.2259, "step": 231000 }, { "epoch": 0.65, "learning_rate": 1.7736270842508405e-05, "loss": 0.2207, "step": 231500 }, { "epoch": 0.65, "learning_rate": 1.7666586762254646e-05, "loss": 0.2197, "step": 232000 }, { "epoch": 0.65, "learning_rate": 1.7596902682000883e-05, "loss": 0.2229, "step": 232500 }, { "epoch": 0.65, "learning_rate": 1.752721860174712e-05, "loss": 0.2196, "step": 233000 }, { "epoch": 0.65, "learning_rate": 1.7457534521493357e-05, "loss": 0.217, "step": 233500 }, { "epoch": 0.65, "learning_rate": 1.7387850441239598e-05, "loss": 0.2143, "step": 234000 }, { "epoch": 0.65, "learning_rate": 1.7318166360985835e-05, "loss": 0.2177, "step": 234500 }, { "epoch": 0.66, "learning_rate": 1.7248482280732072e-05, "loss": 0.2198, "step": 235000 }, { "epoch": 0.66, "learning_rate": 1.7178798200478313e-05, "loss": 0.2264, "step": 235500 }, { "epoch": 0.66, "learning_rate": 1.710911412022455e-05, "loss": 0.2157, "step": 236000 }, { "epoch": 0.66, "learning_rate": 1.7039430039970787e-05, "loss": 0.2275, "step": 236500 }, { "epoch": 0.66, "learning_rate": 1.6969745959717028e-05, "loss": 0.2197, "step": 237000 }, { "epoch": 0.66, "learning_rate": 1.6900061879463265e-05, "loss": 0.2203, "step": 237500 }, { "epoch": 0.66, "learning_rate": 1.6830377799209505e-05, "loss": 0.2242, "step": 238000 }, { "epoch": 0.66, "learning_rate": 1.6760693718955743e-05, "loss": 0.2214, "step": 238500 }, { "epoch": 0.67, "learning_rate": 1.6691009638701983e-05, "loss": 0.2226, "step": 239000 }, { "epoch": 0.67, "learning_rate": 1.662132555844822e-05, "loss": 0.2247, "step": 239500 }, { "epoch": 0.67, "learning_rate": 1.6551641478194458e-05, "loss": 0.2177, "step": 240000 }, { "epoch": 0.67, "learning_rate": 1.6481957397940698e-05, "loss": 0.215, "step": 240500 }, { "epoch": 0.67, "learning_rate": 1.6412273317686935e-05, "loss": 0.211, "step": 241000 }, { "epoch": 0.67, "learning_rate": 1.6342589237433172e-05, "loss": 0.2142, "step": 241500 }, { "epoch": 0.67, "learning_rate": 1.6272905157179413e-05, "loss": 0.2119, "step": 242000 }, { "epoch": 0.68, "learning_rate": 1.620322107692565e-05, "loss": 0.2178, "step": 242500 }, { "epoch": 0.68, "learning_rate": 1.6133536996671887e-05, "loss": 0.2145, "step": 243000 }, { "epoch": 0.68, "learning_rate": 1.6063852916418125e-05, "loss": 0.2108, "step": 243500 }, { "epoch": 0.68, "learning_rate": 1.5994168836164365e-05, "loss": 0.2207, "step": 244000 }, { "epoch": 0.68, "learning_rate": 1.5924484755910606e-05, "loss": 0.2127, "step": 244500 }, { "epoch": 0.68, "learning_rate": 1.5854800675656843e-05, "loss": 0.2249, "step": 245000 }, { "epoch": 0.68, "learning_rate": 1.5785116595403084e-05, "loss": 0.2257, "step": 245500 }, { "epoch": 0.69, "learning_rate": 1.571543251514932e-05, "loss": 0.2178, "step": 246000 }, { "epoch": 0.69, "learning_rate": 1.5645748434895558e-05, "loss": 0.2237, "step": 246500 }, { "epoch": 0.69, "learning_rate": 1.55760643546418e-05, "loss": 0.2141, "step": 247000 }, { "epoch": 0.69, "learning_rate": 1.5506380274388036e-05, "loss": 0.2165, "step": 247500 }, { "epoch": 0.69, "learning_rate": 1.5436696194134273e-05, "loss": 0.2096, "step": 248000 }, { "epoch": 0.69, "learning_rate": 1.5367012113880513e-05, "loss": 0.2096, "step": 248500 }, { "epoch": 0.69, "learning_rate": 1.529732803362675e-05, "loss": 0.2258, "step": 249000 }, { "epoch": 0.7, "learning_rate": 1.522764395337299e-05, "loss": 0.2141, "step": 249500 }, { "epoch": 0.7, "learning_rate": 1.5157959873119227e-05, "loss": 0.2116, "step": 250000 }, { "epoch": 0.7, "learning_rate": 1.5088275792865467e-05, "loss": 0.2153, "step": 250500 }, { "epoch": 0.7, "learning_rate": 1.5018591712611704e-05, "loss": 0.218, "step": 251000 }, { "epoch": 0.7, "learning_rate": 1.4948907632357942e-05, "loss": 0.2172, "step": 251500 }, { "epoch": 0.7, "learning_rate": 1.4879223552104182e-05, "loss": 0.2281, "step": 252000 }, { "epoch": 0.7, "learning_rate": 1.480953947185042e-05, "loss": 0.2171, "step": 252500 }, { "epoch": 0.71, "learning_rate": 1.4739855391596658e-05, "loss": 0.2155, "step": 253000 }, { "epoch": 0.71, "learning_rate": 1.4670171311342897e-05, "loss": 0.2148, "step": 253500 }, { "epoch": 0.71, "learning_rate": 1.4600487231089136e-05, "loss": 0.2189, "step": 254000 }, { "epoch": 0.71, "learning_rate": 1.4530803150835373e-05, "loss": 0.2221, "step": 254500 }, { "epoch": 0.71, "learning_rate": 1.446111907058161e-05, "loss": 0.2223, "step": 255000 }, { "epoch": 0.71, "learning_rate": 1.4391434990327851e-05, "loss": 0.2191, "step": 255500 }, { "epoch": 0.71, "learning_rate": 1.4321750910074088e-05, "loss": 0.2195, "step": 256000 }, { "epoch": 0.71, "learning_rate": 1.4252066829820327e-05, "loss": 0.2182, "step": 256500 }, { "epoch": 0.72, "learning_rate": 1.4182382749566568e-05, "loss": 0.2169, "step": 257000 }, { "epoch": 0.72, "learning_rate": 1.4112698669312805e-05, "loss": 0.2239, "step": 257500 }, { "epoch": 0.72, "learning_rate": 1.4043014589059042e-05, "loss": 0.2209, "step": 258000 }, { "epoch": 0.72, "learning_rate": 1.3973330508805283e-05, "loss": 0.2043, "step": 258500 }, { "epoch": 0.72, "learning_rate": 1.390364642855152e-05, "loss": 0.2211, "step": 259000 }, { "epoch": 0.72, "learning_rate": 1.3833962348297757e-05, "loss": 0.2159, "step": 259500 }, { "epoch": 0.72, "learning_rate": 1.3764278268043996e-05, "loss": 0.2114, "step": 260000 }, { "epoch": 0.73, "learning_rate": 1.3694594187790236e-05, "loss": 0.2229, "step": 260500 }, { "epoch": 0.73, "learning_rate": 1.3624910107536474e-05, "loss": 0.2158, "step": 261000 }, { "epoch": 0.73, "learning_rate": 1.355522602728271e-05, "loss": 0.2145, "step": 261500 }, { "epoch": 0.73, "learning_rate": 1.3485541947028951e-05, "loss": 0.2132, "step": 262000 }, { "epoch": 0.73, "learning_rate": 1.3415857866775188e-05, "loss": 0.2184, "step": 262500 }, { "epoch": 0.73, "learning_rate": 1.3346173786521426e-05, "loss": 0.2164, "step": 263000 }, { "epoch": 0.73, "learning_rate": 1.3276489706267666e-05, "loss": 0.2111, "step": 263500 }, { "epoch": 0.74, "learning_rate": 1.3206805626013905e-05, "loss": 0.2128, "step": 264000 }, { "epoch": 0.74, "learning_rate": 1.3137121545760142e-05, "loss": 0.2203, "step": 264500 }, { "epoch": 0.74, "learning_rate": 1.306743746550638e-05, "loss": 0.2163, "step": 265000 }, { "epoch": 0.74, "learning_rate": 1.299775338525262e-05, "loss": 0.2169, "step": 265500 }, { "epoch": 0.74, "learning_rate": 1.2928069304998857e-05, "loss": 0.2157, "step": 266000 }, { "epoch": 0.74, "learning_rate": 1.2858385224745096e-05, "loss": 0.2246, "step": 266500 }, { "epoch": 0.74, "learning_rate": 1.2788701144491335e-05, "loss": 0.2204, "step": 267000 }, { "epoch": 0.75, "learning_rate": 1.2719017064237574e-05, "loss": 0.2137, "step": 267500 }, { "epoch": 0.75, "learning_rate": 1.2649332983983811e-05, "loss": 0.2195, "step": 268000 }, { "epoch": 0.75, "learning_rate": 1.2579648903730052e-05, "loss": 0.2189, "step": 268500 }, { "epoch": 0.75, "learning_rate": 1.2509964823476289e-05, "loss": 0.208, "step": 269000 }, { "epoch": 0.75, "learning_rate": 1.2440280743222526e-05, "loss": 0.2109, "step": 269500 }, { "epoch": 0.75, "learning_rate": 1.2370596662968765e-05, "loss": 0.2249, "step": 270000 }, { "epoch": 0.75, "learning_rate": 1.2300912582715004e-05, "loss": 0.2159, "step": 270500 }, { "epoch": 0.76, "learning_rate": 1.2231228502461243e-05, "loss": 0.2114, "step": 271000 }, { "epoch": 0.76, "learning_rate": 1.2161544422207482e-05, "loss": 0.2095, "step": 271500 }, { "epoch": 0.76, "learning_rate": 1.2091860341953719e-05, "loss": 0.218, "step": 272000 }, { "epoch": 0.76, "learning_rate": 1.2022176261699958e-05, "loss": 0.2208, "step": 272500 }, { "epoch": 0.76, "learning_rate": 1.1952492181446195e-05, "loss": 0.2098, "step": 273000 }, { "epoch": 0.76, "learning_rate": 1.1882808101192434e-05, "loss": 0.2073, "step": 273500 }, { "epoch": 0.76, "learning_rate": 1.1813124020938674e-05, "loss": 0.2084, "step": 274000 }, { "epoch": 0.77, "learning_rate": 1.1743439940684911e-05, "loss": 0.2071, "step": 274500 }, { "epoch": 0.77, "learning_rate": 1.167375586043115e-05, "loss": 0.2207, "step": 275000 }, { "epoch": 0.77, "learning_rate": 1.1604071780177387e-05, "loss": 0.2186, "step": 275500 }, { "epoch": 0.77, "learning_rate": 1.1534387699923626e-05, "loss": 0.2132, "step": 276000 }, { "epoch": 0.77, "learning_rate": 1.1464703619669865e-05, "loss": 0.2107, "step": 276500 }, { "epoch": 0.77, "learning_rate": 1.1395019539416104e-05, "loss": 0.2157, "step": 277000 }, { "epoch": 0.77, "learning_rate": 1.1325335459162343e-05, "loss": 0.2138, "step": 277500 }, { "epoch": 0.77, "learning_rate": 1.1255651378908582e-05, "loss": 0.2157, "step": 278000 }, { "epoch": 0.78, "learning_rate": 1.1185967298654819e-05, "loss": 0.2101, "step": 278500 }, { "epoch": 0.78, "learning_rate": 1.1116283218401058e-05, "loss": 0.2177, "step": 279000 }, { "epoch": 0.78, "learning_rate": 1.1046599138147295e-05, "loss": 0.2142, "step": 279500 }, { "epoch": 0.78, "learning_rate": 1.0976915057893534e-05, "loss": 0.2122, "step": 280000 }, { "epoch": 0.78, "learning_rate": 1.0907230977639773e-05, "loss": 0.2212, "step": 280500 }, { "epoch": 0.78, "learning_rate": 1.0837546897386012e-05, "loss": 0.2096, "step": 281000 }, { "epoch": 0.78, "learning_rate": 1.076786281713225e-05, "loss": 0.2091, "step": 281500 }, { "epoch": 0.79, "learning_rate": 1.0698178736878488e-05, "loss": 0.2124, "step": 282000 }, { "epoch": 0.79, "learning_rate": 1.0628494656624727e-05, "loss": 0.2175, "step": 282500 }, { "epoch": 0.79, "learning_rate": 1.0558810576370966e-05, "loss": 0.2124, "step": 283000 }, { "epoch": 0.79, "learning_rate": 1.0489126496117203e-05, "loss": 0.2046, "step": 283500 }, { "epoch": 0.79, "learning_rate": 1.0419442415863442e-05, "loss": 0.2072, "step": 284000 }, { "epoch": 0.79, "learning_rate": 1.034975833560968e-05, "loss": 0.2181, "step": 284500 }, { "epoch": 0.79, "learning_rate": 1.028007425535592e-05, "loss": 0.22, "step": 285000 }, { "epoch": 0.8, "learning_rate": 1.0210390175102158e-05, "loss": 0.2087, "step": 285500 }, { "epoch": 0.8, "learning_rate": 1.0140706094848395e-05, "loss": 0.2074, "step": 286000 }, { "epoch": 0.8, "learning_rate": 1.0071022014594634e-05, "loss": 0.211, "step": 286500 }, { "epoch": 0.8, "learning_rate": 1.0001337934340872e-05, "loss": 0.2156, "step": 287000 }, { "epoch": 0.8, "learning_rate": 9.931653854087112e-06, "loss": 0.2081, "step": 287500 }, { "epoch": 0.8, "learning_rate": 9.861969773833351e-06, "loss": 0.2158, "step": 288000 }, { "epoch": 0.8, "learning_rate": 9.792285693579588e-06, "loss": 0.2212, "step": 288500 }, { "epoch": 0.81, "learning_rate": 9.722601613325827e-06, "loss": 0.2113, "step": 289000 }, { "epoch": 0.81, "learning_rate": 9.652917533072064e-06, "loss": 0.2052, "step": 289500 }, { "epoch": 0.81, "learning_rate": 9.583233452818303e-06, "loss": 0.2151, "step": 290000 }, { "epoch": 0.81, "learning_rate": 9.513549372564542e-06, "loss": 0.2147, "step": 290500 }, { "epoch": 0.81, "learning_rate": 9.44386529231078e-06, "loss": 0.2114, "step": 291000 }, { "epoch": 0.81, "learning_rate": 9.37418121205702e-06, "loss": 0.2175, "step": 291500 }, { "epoch": 0.81, "learning_rate": 9.304497131803257e-06, "loss": 0.2145, "step": 292000 }, { "epoch": 0.82, "learning_rate": 9.234813051549496e-06, "loss": 0.212, "step": 292500 }, { "epoch": 0.82, "learning_rate": 9.165128971295735e-06, "loss": 0.2117, "step": 293000 }, { "epoch": 0.82, "learning_rate": 9.095444891041972e-06, "loss": 0.2065, "step": 293500 }, { "epoch": 0.82, "learning_rate": 9.02576081078821e-06, "loss": 0.2192, "step": 294000 }, { "epoch": 0.82, "learning_rate": 8.95607673053445e-06, "loss": 0.2074, "step": 294500 }, { "epoch": 0.82, "learning_rate": 8.886392650280688e-06, "loss": 0.2155, "step": 295000 }, { "epoch": 0.82, "learning_rate": 8.816708570026927e-06, "loss": 0.2099, "step": 295500 }, { "epoch": 0.83, "learning_rate": 8.747024489773165e-06, "loss": 0.2192, "step": 296000 }, { "epoch": 0.83, "learning_rate": 8.677340409519403e-06, "loss": 0.2117, "step": 296500 }, { "epoch": 0.83, "learning_rate": 8.60765632926564e-06, "loss": 0.2118, "step": 297000 }, { "epoch": 0.83, "learning_rate": 8.53797224901188e-06, "loss": 0.2074, "step": 297500 }, { "epoch": 0.83, "learning_rate": 8.468288168758118e-06, "loss": 0.2071, "step": 298000 }, { "epoch": 0.83, "learning_rate": 8.398604088504357e-06, "loss": 0.2113, "step": 298500 }, { "epoch": 0.83, "learning_rate": 8.328920008250596e-06, "loss": 0.2072, "step": 299000 }, { "epoch": 0.83, "learning_rate": 8.259235927996833e-06, "loss": 0.2048, "step": 299500 }, { "epoch": 0.84, "learning_rate": 8.189551847743072e-06, "loss": 0.2104, "step": 300000 }, { "epoch": 0.84, "learning_rate": 8.119867767489311e-06, "loss": 0.218, "step": 300500 }, { "epoch": 0.84, "learning_rate": 8.050183687235548e-06, "loss": 0.2168, "step": 301000 }, { "epoch": 0.84, "learning_rate": 7.980499606981789e-06, "loss": 0.2217, "step": 301500 }, { "epoch": 0.84, "learning_rate": 7.910815526728026e-06, "loss": 0.2126, "step": 302000 }, { "epoch": 0.84, "learning_rate": 7.841131446474265e-06, "loss": 0.2084, "step": 302500 }, { "epoch": 0.84, "learning_rate": 7.771447366220504e-06, "loss": 0.2098, "step": 303000 }, { "epoch": 0.85, "learning_rate": 7.701763285966741e-06, "loss": 0.2131, "step": 303500 }, { "epoch": 0.85, "learning_rate": 7.63207920571298e-06, "loss": 0.2078, "step": 304000 }, { "epoch": 0.85, "learning_rate": 7.562395125459218e-06, "loss": 0.2164, "step": 304500 }, { "epoch": 0.85, "learning_rate": 7.492711045205457e-06, "loss": 0.2114, "step": 305000 }, { "epoch": 0.85, "learning_rate": 7.423026964951696e-06, "loss": 0.206, "step": 305500 }, { "epoch": 0.85, "learning_rate": 7.353342884697934e-06, "loss": 0.2108, "step": 306000 }, { "epoch": 0.85, "learning_rate": 7.2836588044441725e-06, "loss": 0.2156, "step": 306500 }, { "epoch": 0.86, "learning_rate": 7.2139747241904106e-06, "loss": 0.2124, "step": 307000 }, { "epoch": 0.86, "learning_rate": 7.1442906439366494e-06, "loss": 0.2197, "step": 307500 }, { "epoch": 0.86, "learning_rate": 7.074606563682888e-06, "loss": 0.2127, "step": 308000 }, { "epoch": 0.86, "learning_rate": 7.0049224834291255e-06, "loss": 0.2113, "step": 308500 }, { "epoch": 0.86, "learning_rate": 6.935238403175364e-06, "loss": 0.2093, "step": 309000 }, { "epoch": 0.86, "learning_rate": 6.865554322921602e-06, "loss": 0.2191, "step": 309500 }, { "epoch": 0.86, "learning_rate": 6.795870242667841e-06, "loss": 0.2023, "step": 310000 }, { "epoch": 0.87, "learning_rate": 6.72618616241408e-06, "loss": 0.2092, "step": 310500 }, { "epoch": 0.87, "learning_rate": 6.656502082160318e-06, "loss": 0.2088, "step": 311000 }, { "epoch": 0.87, "learning_rate": 6.586818001906557e-06, "loss": 0.2077, "step": 311500 }, { "epoch": 0.87, "learning_rate": 6.517133921652794e-06, "loss": 0.2075, "step": 312000 }, { "epoch": 0.87, "learning_rate": 6.447449841399034e-06, "loss": 0.2123, "step": 312500 }, { "epoch": 0.87, "learning_rate": 6.377765761145273e-06, "loss": 0.2137, "step": 313000 }, { "epoch": 0.87, "learning_rate": 6.30808168089151e-06, "loss": 0.2154, "step": 313500 }, { "epoch": 0.88, "learning_rate": 6.238397600637749e-06, "loss": 0.2137, "step": 314000 }, { "epoch": 0.88, "learning_rate": 6.168713520383988e-06, "loss": 0.2106, "step": 314500 }, { "epoch": 0.88, "learning_rate": 6.099029440130226e-06, "loss": 0.2101, "step": 315000 }, { "epoch": 0.88, "learning_rate": 6.029345359876464e-06, "loss": 0.2154, "step": 315500 }, { "epoch": 0.88, "learning_rate": 5.959661279622703e-06, "loss": 0.2122, "step": 316000 }, { "epoch": 0.88, "learning_rate": 5.889977199368942e-06, "loss": 0.2119, "step": 316500 }, { "epoch": 0.88, "learning_rate": 5.82029311911518e-06, "loss": 0.2069, "step": 317000 }, { "epoch": 0.88, "learning_rate": 5.750609038861418e-06, "loss": 0.2166, "step": 317500 }, { "epoch": 0.89, "learning_rate": 5.6809249586076566e-06, "loss": 0.2135, "step": 318000 }, { "epoch": 0.89, "learning_rate": 5.611240878353895e-06, "loss": 0.216, "step": 318500 }, { "epoch": 0.89, "learning_rate": 5.5415567981001335e-06, "loss": 0.2078, "step": 319000 }, { "epoch": 0.89, "learning_rate": 5.471872717846372e-06, "loss": 0.2033, "step": 319500 }, { "epoch": 0.89, "learning_rate": 5.40218863759261e-06, "loss": 0.218, "step": 320000 }, { "epoch": 0.89, "learning_rate": 5.3325045573388484e-06, "loss": 0.2128, "step": 320500 }, { "epoch": 0.89, "learning_rate": 5.262820477085087e-06, "loss": 0.2115, "step": 321000 }, { "epoch": 0.9, "learning_rate": 5.193136396831326e-06, "loss": 0.2094, "step": 321500 }, { "epoch": 0.9, "learning_rate": 5.123452316577564e-06, "loss": 0.2096, "step": 322000 }, { "epoch": 0.9, "learning_rate": 5.053768236323802e-06, "loss": 0.2107, "step": 322500 }, { "epoch": 0.9, "learning_rate": 4.984084156070041e-06, "loss": 0.2123, "step": 323000 }, { "epoch": 0.9, "learning_rate": 4.91440007581628e-06, "loss": 0.2089, "step": 323500 }, { "epoch": 0.9, "learning_rate": 4.844715995562518e-06, "loss": 0.2103, "step": 324000 }, { "epoch": 0.9, "learning_rate": 4.775031915308757e-06, "loss": 0.2089, "step": 324500 }, { "epoch": 0.91, "learning_rate": 4.705347835054995e-06, "loss": 0.2041, "step": 325000 }, { "epoch": 0.91, "learning_rate": 4.635663754801233e-06, "loss": 0.2033, "step": 325500 }, { "epoch": 0.91, "learning_rate": 4.565979674547472e-06, "loss": 0.2161, "step": 326000 }, { "epoch": 0.91, "learning_rate": 4.496295594293711e-06, "loss": 0.2082, "step": 326500 }, { "epoch": 0.91, "learning_rate": 4.426611514039949e-06, "loss": 0.2064, "step": 327000 }, { "epoch": 0.91, "learning_rate": 4.356927433786187e-06, "loss": 0.2146, "step": 327500 }, { "epoch": 0.91, "learning_rate": 4.287243353532426e-06, "loss": 0.2149, "step": 328000 }, { "epoch": 0.92, "learning_rate": 4.2175592732786646e-06, "loss": 0.2031, "step": 328500 }, { "epoch": 0.92, "learning_rate": 4.147875193024903e-06, "loss": 0.2051, "step": 329000 }, { "epoch": 0.92, "learning_rate": 4.078191112771141e-06, "loss": 0.2138, "step": 329500 }, { "epoch": 0.92, "learning_rate": 4.0085070325173795e-06, "loss": 0.2039, "step": 330000 }, { "epoch": 0.92, "learning_rate": 3.9388229522636175e-06, "loss": 0.2155, "step": 330500 }, { "epoch": 0.92, "learning_rate": 3.869138872009856e-06, "loss": 0.2039, "step": 331000 }, { "epoch": 0.92, "learning_rate": 3.799454791756095e-06, "loss": 0.2121, "step": 331500 }, { "epoch": 0.93, "learning_rate": 3.7297707115023333e-06, "loss": 0.2129, "step": 332000 }, { "epoch": 0.93, "learning_rate": 3.6600866312485714e-06, "loss": 0.2153, "step": 332500 }, { "epoch": 0.93, "learning_rate": 3.59040255099481e-06, "loss": 0.2072, "step": 333000 }, { "epoch": 0.93, "learning_rate": 3.5207184707410487e-06, "loss": 0.2059, "step": 333500 }, { "epoch": 0.93, "learning_rate": 3.451034390487287e-06, "loss": 0.2111, "step": 334000 }, { "epoch": 0.93, "learning_rate": 3.3813503102335256e-06, "loss": 0.2161, "step": 334500 }, { "epoch": 0.93, "learning_rate": 3.3116662299797636e-06, "loss": 0.2153, "step": 335000 }, { "epoch": 0.94, "learning_rate": 3.241982149726002e-06, "loss": 0.2092, "step": 335500 }, { "epoch": 0.94, "learning_rate": 3.172298069472241e-06, "loss": 0.2151, "step": 336000 }, { "epoch": 0.94, "learning_rate": 3.1026139892184794e-06, "loss": 0.2036, "step": 336500 }, { "epoch": 0.94, "learning_rate": 3.032929908964718e-06, "loss": 0.214, "step": 337000 }, { "epoch": 0.94, "learning_rate": 2.963245828710956e-06, "loss": 0.214, "step": 337500 }, { "epoch": 0.94, "learning_rate": 2.8935617484571948e-06, "loss": 0.2063, "step": 338000 }, { "epoch": 0.94, "learning_rate": 2.823877668203433e-06, "loss": 0.2064, "step": 338500 }, { "epoch": 0.94, "learning_rate": 2.7541935879496717e-06, "loss": 0.2051, "step": 339000 }, { "epoch": 0.95, "learning_rate": 2.68450950769591e-06, "loss": 0.2069, "step": 339500 }, { "epoch": 0.95, "learning_rate": 2.614825427442148e-06, "loss": 0.2058, "step": 340000 }, { "epoch": 0.95, "learning_rate": 2.545141347188387e-06, "loss": 0.2122, "step": 340500 }, { "epoch": 0.95, "learning_rate": 2.475457266934625e-06, "loss": 0.202, "step": 341000 }, { "epoch": 0.95, "learning_rate": 2.405773186680864e-06, "loss": 0.2069, "step": 341500 }, { "epoch": 0.95, "learning_rate": 2.336089106427102e-06, "loss": 0.2081, "step": 342000 }, { "epoch": 0.95, "learning_rate": 2.2664050261733405e-06, "loss": 0.2072, "step": 342500 }, { "epoch": 0.96, "learning_rate": 2.1967209459195793e-06, "loss": 0.2092, "step": 343000 }, { "epoch": 0.96, "learning_rate": 2.1270368656658174e-06, "loss": 0.2121, "step": 343500 }, { "epoch": 0.96, "learning_rate": 2.0573527854120563e-06, "loss": 0.2068, "step": 344000 }, { "epoch": 0.96, "learning_rate": 1.9876687051582943e-06, "loss": 0.201, "step": 344500 }, { "epoch": 0.96, "learning_rate": 1.9179846249045327e-06, "loss": 0.2171, "step": 345000 }, { "epoch": 0.96, "learning_rate": 1.8483005446507714e-06, "loss": 0.2043, "step": 345500 }, { "epoch": 0.96, "learning_rate": 1.7786164643970096e-06, "loss": 0.2038, "step": 346000 }, { "epoch": 0.97, "learning_rate": 1.7089323841432483e-06, "loss": 0.2107, "step": 346500 }, { "epoch": 0.97, "learning_rate": 1.6392483038894866e-06, "loss": 0.2078, "step": 347000 }, { "epoch": 0.97, "learning_rate": 1.5695642236357252e-06, "loss": 0.209, "step": 347500 }, { "epoch": 0.97, "learning_rate": 1.4998801433819637e-06, "loss": 0.2118, "step": 348000 }, { "epoch": 0.97, "learning_rate": 1.4301960631282021e-06, "loss": 0.2247, "step": 348500 }, { "epoch": 0.97, "learning_rate": 1.3605119828744404e-06, "loss": 0.2094, "step": 349000 }, { "epoch": 0.97, "learning_rate": 1.2908279026206788e-06, "loss": 0.2154, "step": 349500 }, { "epoch": 0.98, "learning_rate": 1.2211438223669173e-06, "loss": 0.2062, "step": 350000 }, { "epoch": 0.98, "learning_rate": 1.151459742113156e-06, "loss": 0.2105, "step": 350500 }, { "epoch": 0.98, "learning_rate": 1.0817756618593944e-06, "loss": 0.2177, "step": 351000 }, { "epoch": 0.98, "learning_rate": 1.0120915816056329e-06, "loss": 0.2062, "step": 351500 }, { "epoch": 0.98, "learning_rate": 9.424075013518711e-07, "loss": 0.205, "step": 352000 }, { "epoch": 0.98, "learning_rate": 8.727234210981097e-07, "loss": 0.2032, "step": 352500 }, { "epoch": 0.98, "learning_rate": 8.030393408443481e-07, "loss": 0.2169, "step": 353000 }, { "epoch": 0.99, "learning_rate": 7.333552605905866e-07, "loss": 0.2153, "step": 353500 }, { "epoch": 0.99, "learning_rate": 6.636711803368249e-07, "loss": 0.2046, "step": 354000 }, { "epoch": 0.99, "learning_rate": 5.939871000830635e-07, "loss": 0.2068, "step": 354500 }, { "epoch": 0.99, "learning_rate": 5.243030198293018e-07, "loss": 0.2188, "step": 355000 }, { "epoch": 0.99, "learning_rate": 4.5461893957554035e-07, "loss": 0.2139, "step": 355500 }, { "epoch": 0.99, "learning_rate": 3.849348593217788e-07, "loss": 0.2085, "step": 356000 }, { "epoch": 0.99, "learning_rate": 3.1525077906801726e-07, "loss": 0.1994, "step": 356500 }, { "epoch": 1.0, "learning_rate": 2.455666988142557e-07, "loss": 0.216, "step": 357000 }, { "epoch": 1.0, "learning_rate": 1.7588261856049415e-07, "loss": 0.2123, "step": 357500 }, { "epoch": 1.0, "learning_rate": 1.061985383067326e-07, "loss": 0.2063, "step": 358000 }, { "epoch": 1.0, "learning_rate": 3.651445805297105e-08, "loss": 0.2098, "step": 358500 } ], "logging_steps": 500, "max_steps": 358762, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.26740447502336e+17, "trial_name": null, "trial_params": null }