diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15010 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18433058885567705, + "global_step": 1249500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9998148198352554e-05, + "loss": 2.5633, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.99962963967051e-05, + "loss": 2.4025, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999444459505765e-05, + "loss": 2.4857, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9992592793410196e-05, + "loss": 2.4036, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999074099176275e-05, + "loss": 2.4167, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.998888919011529e-05, + "loss": 2.4199, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.998703738846784e-05, + "loss": 2.3777, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.998518558682039e-05, + "loss": 2.3574, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9983333785172935e-05, + "loss": 2.3653, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 4.998148198352549e-05, + "loss": 2.3203, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.997963018187803e-05, + "loss": 2.3002, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9977778380230584e-05, + "loss": 2.2327, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 4.997592657858313e-05, + "loss": 2.3823, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 4.997407477693568e-05, + "loss": 2.3095, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 4.997222297528823e-05, + "loss": 2.3012, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 4.997037117364078e-05, + "loss": 2.2502, + "step": 8000 + }, + { + "epoch": 0.0, + "learning_rate": 4.996851937199332e-05, + "loss": 2.3248, + "step": 8500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996666757034587e-05, + "loss": 2.3523, + "step": 9000 + }, + { + "epoch": 0.0, + "learning_rate": 4.996481576869842e-05, + "loss": 2.2897, + "step": 9500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996296396705097e-05, + "loss": 2.2968, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 4.996111169011484e-05, + "loss": 2.2047, + "step": 10500 + }, + { + "epoch": 0.0, + "learning_rate": 4.995925986583459e-05, + "loss": 2.3323, + "step": 11000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995740804155435e-05, + "loss": 2.2564, + "step": 11500 + }, + { + "epoch": 0.0, + "learning_rate": 4.99555562172741e-05, + "loss": 2.3009, + "step": 12000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995370439299386e-05, + "loss": 2.2419, + "step": 12500 + }, + { + "epoch": 0.0, + "learning_rate": 4.995185256871361e-05, + "loss": 2.2346, + "step": 13000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995000074443336e-05, + "loss": 2.304, + "step": 13500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9948148920153116e-05, + "loss": 2.223, + "step": 14000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994629709587287e-05, + "loss": 2.2424, + "step": 14500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994444527159263e-05, + "loss": 2.2479, + "step": 15000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994259344731238e-05, + "loss": 2.2607, + "step": 15500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9940741623032137e-05, + "loss": 2.2446, + "step": 16000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993888979875189e-05, + "loss": 2.1891, + "step": 16500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993703797447164e-05, + "loss": 2.2201, + "step": 17000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9935186150191394e-05, + "loss": 2.2409, + "step": 17500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993333432591115e-05, + "loss": 2.2319, + "step": 18000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993148250163091e-05, + "loss": 2.2554, + "step": 18500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992963067735066e-05, + "loss": 2.2191, + "step": 19000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9927778853070415e-05, + "loss": 2.1847, + "step": 19500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9925927028790165e-05, + "loss": 2.1872, + "step": 20000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9924075204509915e-05, + "loss": 2.2293, + "step": 20500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992222338022968e-05, + "loss": 2.2537, + "step": 21000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992037155594943e-05, + "loss": 2.2332, + "step": 21500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9918519731669186e-05, + "loss": 2.2963, + "step": 22000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9916667907388936e-05, + "loss": 2.2041, + "step": 22500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9914816083108686e-05, + "loss": 2.1912, + "step": 23000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991296425882844e-05, + "loss": 2.1984, + "step": 23500 + }, + { + "epoch": 0.01, + "learning_rate": 4.99111124345482e-05, + "loss": 2.2336, + "step": 24000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990926061026796e-05, + "loss": 2.1302, + "step": 24500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990740878598771e-05, + "loss": 2.2171, + "step": 25000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9905556961707464e-05, + "loss": 2.1807, + "step": 25500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9903705137427215e-05, + "loss": 2.1588, + "step": 26000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9901853313146965e-05, + "loss": 2.1912, + "step": 26500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990000148886672e-05, + "loss": 2.2455, + "step": 27000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989814966458648e-05, + "loss": 2.1608, + "step": 27500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9896297840306236e-05, + "loss": 2.1814, + "step": 28000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9894446016025986e-05, + "loss": 2.0651, + "step": 28500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989259419174574e-05, + "loss": 2.1137, + "step": 29000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989074236746549e-05, + "loss": 2.2112, + "step": 29500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988889054318524e-05, + "loss": 2.1663, + "step": 30000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988703871890501e-05, + "loss": 2.1839, + "step": 30500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988518689462476e-05, + "loss": 2.1793, + "step": 31000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9883335070344514e-05, + "loss": 2.1851, + "step": 31500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9881483246064264e-05, + "loss": 2.1889, + "step": 32000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987963142178402e-05, + "loss": 2.0989, + "step": 32500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987777959750377e-05, + "loss": 2.102, + "step": 33000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987592777322353e-05, + "loss": 2.0911, + "step": 33500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9874075948943285e-05, + "loss": 2.2145, + "step": 34000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9872224124663035e-05, + "loss": 2.1246, + "step": 34500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987037230038279e-05, + "loss": 2.1473, + "step": 35000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986852047610254e-05, + "loss": 2.1508, + "step": 35500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986666865182229e-05, + "loss": 2.1553, + "step": 36000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986481682754205e-05, + "loss": 2.1734, + "step": 36500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9862965003261806e-05, + "loss": 2.2153, + "step": 37000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986111317898156e-05, + "loss": 2.1443, + "step": 37500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9859261354701314e-05, + "loss": 2.2392, + "step": 38000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985740953042107e-05, + "loss": 2.117, + "step": 38500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985555770614082e-05, + "loss": 2.1762, + "step": 39000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985370588186057e-05, + "loss": 2.1363, + "step": 39500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9851854057580335e-05, + "loss": 2.1806, + "step": 40000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9850002233300085e-05, + "loss": 2.1484, + "step": 40500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984815040901984e-05, + "loss": 2.1418, + "step": 41000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984629858473959e-05, + "loss": 2.1191, + "step": 41500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984444676045935e-05, + "loss": 2.2352, + "step": 42000 + }, + { + "epoch": 0.01, + "learning_rate": 4.98425949361791e-05, + "loss": 2.1433, + "step": 42500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984074311189885e-05, + "loss": 2.1621, + "step": 43000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983889128761861e-05, + "loss": 2.0863, + "step": 43500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983703946333836e-05, + "loss": 2.201, + "step": 44000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983518763905812e-05, + "loss": 2.1628, + "step": 44500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983333581477787e-05, + "loss": 2.1656, + "step": 45000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983148399049763e-05, + "loss": 2.154, + "step": 45500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982963216621738e-05, + "loss": 2.1882, + "step": 46000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9827780341937134e-05, + "loss": 2.1192, + "step": 46500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982592851765689e-05, + "loss": 2.0503, + "step": 47000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982407669337664e-05, + "loss": 2.1834, + "step": 47500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98222248690964e-05, + "loss": 2.1964, + "step": 48000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982037304481615e-05, + "loss": 2.1844, + "step": 48500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9818521220535905e-05, + "loss": 2.15, + "step": 49000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981666939625566e-05, + "loss": 2.1419, + "step": 49500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981481757197541e-05, + "loss": 2.1146, + "step": 50000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981296574769517e-05, + "loss": 2.1196, + "step": 50500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981111392341492e-05, + "loss": 2.0982, + "step": 51000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9809262099134677e-05, + "loss": 2.1951, + "step": 51500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980741027485443e-05, + "loss": 2.0885, + "step": 52000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980555845057418e-05, + "loss": 2.0822, + "step": 52500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980370662629394e-05, + "loss": 2.107, + "step": 53000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980185480201369e-05, + "loss": 2.0801, + "step": 53500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980000297773345e-05, + "loss": 2.1561, + "step": 54000 + }, + { + "epoch": 0.01, + "learning_rate": 4.97981511534532e-05, + "loss": 2.0842, + "step": 54500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9796299329172955e-05, + "loss": 2.0803, + "step": 55000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9794447504892705e-05, + "loss": 2.2168, + "step": 55500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979259568061246e-05, + "loss": 2.114, + "step": 56000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979074385633222e-05, + "loss": 2.1965, + "step": 56500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978889203205197e-05, + "loss": 2.1872, + "step": 57000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9787040207771726e-05, + "loss": 2.1307, + "step": 57500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9785188383491476e-05, + "loss": 2.1543, + "step": 58000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978333655921123e-05, + "loss": 2.1638, + "step": 58500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978148473493099e-05, + "loss": 2.152, + "step": 59000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977963291065074e-05, + "loss": 2.185, + "step": 59500 + }, + { + "epoch": 0.01, + "learning_rate": 4.97777810863705e-05, + "loss": 2.083, + "step": 60000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977592926209025e-05, + "loss": 2.1583, + "step": 60500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9774077437810004e-05, + "loss": 2.1324, + "step": 61000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9772225613529754e-05, + "loss": 2.1022, + "step": 61500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977037378924951e-05, + "loss": 2.1016, + "step": 62000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976852196496927e-05, + "loss": 2.2154, + "step": 62500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976667014068902e-05, + "loss": 2.1377, + "step": 63000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9764818316408775e-05, + "loss": 2.1611, + "step": 63500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9762966492128526e-05, + "loss": 2.1962, + "step": 64000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976111466784828e-05, + "loss": 2.1179, + "step": 64500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975926284356803e-05, + "loss": 2.136, + "step": 65000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975741101928779e-05, + "loss": 2.0581, + "step": 65500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975555919500755e-05, + "loss": 2.1673, + "step": 66000 + }, + { + "epoch": 0.01, + "learning_rate": 4.97537073707273e-05, + "loss": 2.1709, + "step": 66500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9751855546447054e-05, + "loss": 2.1291, + "step": 67000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9750003722166804e-05, + "loss": 2.1081, + "step": 67500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974815189788656e-05, + "loss": 2.0995, + "step": 68000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974630007360631e-05, + "loss": 2.1093, + "step": 68500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974444824932607e-05, + "loss": 2.0995, + "step": 69000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9742596425045825e-05, + "loss": 2.3111, + "step": 69500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9740744600765575e-05, + "loss": 2.2349, + "step": 70000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973889277648533e-05, + "loss": 2.0986, + "step": 70500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973704095220508e-05, + "loss": 2.1087, + "step": 71000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973518912792484e-05, + "loss": 2.1279, + "step": 71500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9733337303644596e-05, + "loss": 2.1361, + "step": 72000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9731485479364346e-05, + "loss": 2.1142, + "step": 72500 + }, + { + "epoch": 0.02, + "learning_rate": 4.97296336550841e-05, + "loss": 2.1393, + "step": 73000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9727781830803853e-05, + "loss": 2.0751, + "step": 73500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972593000652361e-05, + "loss": 2.1786, + "step": 74000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972407818224336e-05, + "loss": 2.1415, + "step": 74500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972222635796312e-05, + "loss": 2.094, + "step": 75000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9720374533682874e-05, + "loss": 2.158, + "step": 75500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9718522709402625e-05, + "loss": 2.0468, + "step": 76000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971667088512238e-05, + "loss": 2.082, + "step": 76500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971481906084213e-05, + "loss": 2.1283, + "step": 77000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971296723656189e-05, + "loss": 2.1468, + "step": 77500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971111541228164e-05, + "loss": 2.1154, + "step": 78000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9709263588001396e-05, + "loss": 2.1036, + "step": 78500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970741176372115e-05, + "loss": 2.1031, + "step": 79000 + }, + { + "epoch": 0.02, + "learning_rate": 4.97055599394409e-05, + "loss": 2.0857, + "step": 79500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970370811516066e-05, + "loss": 2.1558, + "step": 80000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970185629088041e-05, + "loss": 2.0955, + "step": 80500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970000446660017e-05, + "loss": 2.0567, + "step": 81000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9698152642319924e-05, + "loss": 2.1498, + "step": 81500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9696300818039674e-05, + "loss": 2.1512, + "step": 82000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969444899375943e-05, + "loss": 2.098, + "step": 82500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969259716947918e-05, + "loss": 2.1491, + "step": 83000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969074534519894e-05, + "loss": 2.1296, + "step": 83500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968889352091869e-05, + "loss": 2.064, + "step": 84000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9687041696638445e-05, + "loss": 2.148, + "step": 84500 + }, + { + "epoch": 0.02, + "learning_rate": 4.96851898723582e-05, + "loss": 2.1478, + "step": 85000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968333804807795e-05, + "loss": 2.1299, + "step": 85500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968148622379771e-05, + "loss": 2.1889, + "step": 86000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967963439951746e-05, + "loss": 2.0996, + "step": 86500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9677782575237216e-05, + "loss": 2.1575, + "step": 87000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967593075095697e-05, + "loss": 2.12, + "step": 87500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9674078926676724e-05, + "loss": 2.094, + "step": 88000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967222710239648e-05, + "loss": 2.1591, + "step": 88500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967037527811623e-05, + "loss": 2.1463, + "step": 89000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966852345383599e-05, + "loss": 2.0924, + "step": 89500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966667162955574e-05, + "loss": 2.1121, + "step": 90000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9664819805275495e-05, + "loss": 2.1229, + "step": 90500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966296798099525e-05, + "loss": 2.1183, + "step": 91000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9661116156715e-05, + "loss": 2.0884, + "step": 91500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965926433243476e-05, + "loss": 2.1724, + "step": 92000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965741250815451e-05, + "loss": 2.1064, + "step": 92500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9655560683874266e-05, + "loss": 2.1236, + "step": 93000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9653708859594016e-05, + "loss": 2.159, + "step": 93500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965185703531377e-05, + "loss": 2.0801, + "step": 94000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965000521103353e-05, + "loss": 2.1024, + "step": 94500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964815338675328e-05, + "loss": 2.1097, + "step": 95000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964630156247304e-05, + "loss": 2.1249, + "step": 95500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964444973819279e-05, + "loss": 2.0557, + "step": 96000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9642597913912544e-05, + "loss": 2.0868, + "step": 96500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9640746089632294e-05, + "loss": 2.1668, + "step": 97000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963889426535205e-05, + "loss": 2.1173, + "step": 97500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963704244107181e-05, + "loss": 2.1173, + "step": 98000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963519061679156e-05, + "loss": 2.0945, + "step": 98500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9633338792511315e-05, + "loss": 2.0548, + "step": 99000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9631486968231066e-05, + "loss": 2.0306, + "step": 99500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962963514395082e-05, + "loss": 2.0759, + "step": 100000 + }, + { + "epoch": 0.02, + "learning_rate": 4.962778331967058e-05, + "loss": 2.1732, + "step": 100500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962593149539033e-05, + "loss": 2.1157, + "step": 101000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9624079671110087e-05, + "loss": 2.148, + "step": 101500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962222784682984e-05, + "loss": 2.1079, + "step": 102000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9620376022549594e-05, + "loss": 2.127, + "step": 102500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9618524198269344e-05, + "loss": 2.1, + "step": 103000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96166723739891e-05, + "loss": 2.071, + "step": 103500 + }, + { + "epoch": 0.02, + "learning_rate": 4.961482054970886e-05, + "loss": 2.1219, + "step": 104000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961296872542861e-05, + "loss": 2.1071, + "step": 104500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9611116901148365e-05, + "loss": 2.0968, + "step": 105000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9609265076868115e-05, + "loss": 2.1153, + "step": 105500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960741325258787e-05, + "loss": 2.0603, + "step": 106000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960556142830762e-05, + "loss": 2.1243, + "step": 106500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960370960402738e-05, + "loss": 2.1658, + "step": 107000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9601857779747136e-05, + "loss": 2.0951, + "step": 107500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9600005955466886e-05, + "loss": 2.1128, + "step": 108000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959815413118664e-05, + "loss": 2.1391, + "step": 108500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959630230690639e-05, + "loss": 2.1156, + "step": 109000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959445048262615e-05, + "loss": 2.0878, + "step": 109500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959259865834591e-05, + "loss": 2.1156, + "step": 110000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959074683406566e-05, + "loss": 2.1387, + "step": 110500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9588895009785414e-05, + "loss": 2.1429, + "step": 111000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9587043185505165e-05, + "loss": 2.1445, + "step": 111500 + }, + { + "epoch": 0.02, + "learning_rate": 4.958519136122492e-05, + "loss": 2.1135, + "step": 112000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958333953694467e-05, + "loss": 2.1144, + "step": 112500 + }, + { + "epoch": 0.03, + "learning_rate": 4.958148771266443e-05, + "loss": 2.1433, + "step": 113000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9579635888384186e-05, + "loss": 2.1211, + "step": 113500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9577784064103936e-05, + "loss": 2.1933, + "step": 114000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957593223982369e-05, + "loss": 2.1649, + "step": 114500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957408041554344e-05, + "loss": 2.0976, + "step": 115000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95722285912632e-05, + "loss": 2.116, + "step": 115500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957037676698295e-05, + "loss": 2.0661, + "step": 116000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956852494270271e-05, + "loss": 2.1022, + "step": 116500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9566673118422464e-05, + "loss": 2.1366, + "step": 117000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9564821294142214e-05, + "loss": 2.1154, + "step": 117500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956296946986197e-05, + "loss": 2.0719, + "step": 118000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956111764558172e-05, + "loss": 2.1059, + "step": 118500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955926582130148e-05, + "loss": 2.0687, + "step": 119000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955741399702123e-05, + "loss": 2.1175, + "step": 119500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9555562172740985e-05, + "loss": 2.1016, + "step": 120000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955371034846074e-05, + "loss": 2.091, + "step": 120500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955185852418049e-05, + "loss": 2.1525, + "step": 121000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955000669990025e-05, + "loss": 2.1353, + "step": 121500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954815487562e-05, + "loss": 2.1404, + "step": 122000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9546303051339756e-05, + "loss": 2.1124, + "step": 122500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954445122705951e-05, + "loss": 2.0695, + "step": 123000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9542599402779263e-05, + "loss": 2.0679, + "step": 123500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954074757849902e-05, + "loss": 2.0789, + "step": 124000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953889575421877e-05, + "loss": 2.1837, + "step": 124500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953704392993853e-05, + "loss": 2.0892, + "step": 125000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953519210565828e-05, + "loss": 2.1429, + "step": 125500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9533340281378035e-05, + "loss": 2.1141, + "step": 126000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953148845709779e-05, + "loss": 2.1018, + "step": 126500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952963663281754e-05, + "loss": 2.0873, + "step": 127000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95277848085373e-05, + "loss": 2.0736, + "step": 127500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952593298425705e-05, + "loss": 2.0752, + "step": 128000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9524081159976806e-05, + "loss": 2.102, + "step": 128500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9522229335696556e-05, + "loss": 2.132, + "step": 129000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952037751141631e-05, + "loss": 2.1201, + "step": 129500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951852568713607e-05, + "loss": 2.1781, + "step": 130000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951667386285582e-05, + "loss": 2.0607, + "step": 130500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951482203857558e-05, + "loss": 2.1332, + "step": 131000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951297021429533e-05, + "loss": 2.0999, + "step": 131500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9511118390015084e-05, + "loss": 2.1018, + "step": 132000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950926656573484e-05, + "loss": 2.0752, + "step": 132500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950741474145459e-05, + "loss": 2.1125, + "step": 133000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950556291717435e-05, + "loss": 2.0988, + "step": 133500 + }, + { + "epoch": 0.03, + "learning_rate": 4.95037110928941e-05, + "loss": 2.0444, + "step": 134000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9501859268613855e-05, + "loss": 2.0619, + "step": 134500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9500007444333606e-05, + "loss": 2.1532, + "step": 135000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949815562005336e-05, + "loss": 2.1091, + "step": 135500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949630379577312e-05, + "loss": 2.0799, + "step": 136000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949445197149287e-05, + "loss": 2.0619, + "step": 136500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9492600147212627e-05, + "loss": 2.0687, + "step": 137000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949074832293238e-05, + "loss": 2.133, + "step": 137500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9488896498652134e-05, + "loss": 2.0766, + "step": 138000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9487044674371884e-05, + "loss": 2.1147, + "step": 138500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948519285009164e-05, + "loss": 2.1444, + "step": 139000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94833410258114e-05, + "loss": 2.138, + "step": 139500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948148920153115e-05, + "loss": 2.0898, + "step": 140000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9479637377250905e-05, + "loss": 2.0965, + "step": 140500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9477785552970655e-05, + "loss": 2.0402, + "step": 141000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947593372869041e-05, + "loss": 2.0965, + "step": 141500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947408190441017e-05, + "loss": 2.1203, + "step": 142000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947223008012992e-05, + "loss": 2.0735, + "step": 142500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9470378255849676e-05, + "loss": 2.0774, + "step": 143000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9468526431569426e-05, + "loss": 2.0433, + "step": 143500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946667460728918e-05, + "loss": 2.0941, + "step": 144000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946482278300893e-05, + "loss": 2.0883, + "step": 144500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946297095872869e-05, + "loss": 2.0986, + "step": 145000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946111913444845e-05, + "loss": 2.1054, + "step": 145500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94592673101682e-05, + "loss": 2.095, + "step": 146000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9457415485887954e-05, + "loss": 2.048, + "step": 146500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9455563661607704e-05, + "loss": 2.1582, + "step": 147000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945371183732746e-05, + "loss": 2.0933, + "step": 147500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945186001304721e-05, + "loss": 2.0986, + "step": 148000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9450008188766975e-05, + "loss": 2.0879, + "step": 148500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9448156364486725e-05, + "loss": 2.1448, + "step": 149000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9446304540206476e-05, + "loss": 2.0606, + "step": 149500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944445271592623e-05, + "loss": 2.1036, + "step": 150000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944260089164598e-05, + "loss": 2.0317, + "step": 150500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944074906736574e-05, + "loss": 2.1222, + "step": 151000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94388972430855e-05, + "loss": 2.0757, + "step": 151500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943704541880525e-05, + "loss": 2.066, + "step": 152000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9435193594525004e-05, + "loss": 2.1251, + "step": 152500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9433341770244754e-05, + "loss": 2.0925, + "step": 153000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943148994596451e-05, + "loss": 2.0619, + "step": 153500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942963812168426e-05, + "loss": 2.0371, + "step": 154000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942778629740402e-05, + "loss": 2.1089, + "step": 154500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9425934473123775e-05, + "loss": 2.0807, + "step": 155000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9424082648843525e-05, + "loss": 2.0697, + "step": 155500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942223082456328e-05, + "loss": 2.0802, + "step": 156000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942037900028303e-05, + "loss": 2.0421, + "step": 156500 + }, + { + "epoch": 0.03, + "learning_rate": 4.941852717600279e-05, + "loss": 2.0534, + "step": 157000 + }, + { + "epoch": 0.03, + "learning_rate": 4.941667535172254e-05, + "loss": 2.0713, + "step": 157500 + }, + { + "epoch": 0.04, + "learning_rate": 4.94148235274423e-05, + "loss": 2.0668, + "step": 158000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941297170316205e-05, + "loss": 2.0462, + "step": 158500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9411119878881803e-05, + "loss": 2.0347, + "step": 159000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940926805460156e-05, + "loss": 2.1034, + "step": 159500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940741623032131e-05, + "loss": 2.0372, + "step": 160000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940556440604107e-05, + "loss": 2.0649, + "step": 160500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9403712581760824e-05, + "loss": 2.104, + "step": 161000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940186075748058e-05, + "loss": 2.1171, + "step": 161500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940000893320033e-05, + "loss": 2.0524, + "step": 162000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939815710892008e-05, + "loss": 2.0812, + "step": 162500 + }, + { + "epoch": 0.04, + "learning_rate": 4.939630528463984e-05, + "loss": 2.0633, + "step": 163000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939445346035959e-05, + "loss": 2.0875, + "step": 163500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9392601636079346e-05, + "loss": 2.0515, + "step": 164000 + }, + { + "epoch": 0.04, + "learning_rate": 4.93907498117991e-05, + "loss": 2.0684, + "step": 164500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938889798751885e-05, + "loss": 2.1043, + "step": 165000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938704616323861e-05, + "loss": 2.0895, + "step": 165500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938519433895836e-05, + "loss": 2.1472, + "step": 166000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938334251467812e-05, + "loss": 2.1056, + "step": 166500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938149069039787e-05, + "loss": 2.1839, + "step": 167000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937963886611763e-05, + "loss": 2.1721, + "step": 167500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937778704183738e-05, + "loss": 2.0804, + "step": 168000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937593521755713e-05, + "loss": 2.1136, + "step": 168500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937408339327689e-05, + "loss": 2.1256, + "step": 169000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937223156899664e-05, + "loss": 2.1167, + "step": 169500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9370379744716395e-05, + "loss": 2.0798, + "step": 170000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9368527920436145e-05, + "loss": 2.0936, + "step": 170500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936667609615591e-05, + "loss": 2.0949, + "step": 171000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936482427187566e-05, + "loss": 2.0507, + "step": 171500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936297244759541e-05, + "loss": 2.1179, + "step": 172000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9361120623315166e-05, + "loss": 2.122, + "step": 172500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9359268799034917e-05, + "loss": 2.0264, + "step": 173000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9357416974754674e-05, + "loss": 2.0497, + "step": 173500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935556515047443e-05, + "loss": 2.0554, + "step": 174000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935371332619419e-05, + "loss": 2.1613, + "step": 174500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935186150191394e-05, + "loss": 2.0881, + "step": 175000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935000967763369e-05, + "loss": 2.037, + "step": 175500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9348157853353445e-05, + "loss": 2.0376, + "step": 176000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9346306029073195e-05, + "loss": 2.044, + "step": 176500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934445420479296e-05, + "loss": 2.0583, + "step": 177000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934260238051271e-05, + "loss": 2.1067, + "step": 177500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934075055623246e-05, + "loss": 2.0444, + "step": 178000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9338898731952216e-05, + "loss": 2.0586, + "step": 178500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9337046907671966e-05, + "loss": 2.0813, + "step": 179000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933519508339172e-05, + "loss": 2.077, + "step": 179500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933334325911147e-05, + "loss": 2.0301, + "step": 180000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933149143483124e-05, + "loss": 2.0865, + "step": 180500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932963961055099e-05, + "loss": 2.1051, + "step": 181000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932778778627074e-05, + "loss": 2.1441, + "step": 181500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9325935961990494e-05, + "loss": 2.0732, + "step": 182000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9324084137710244e-05, + "loss": 2.1316, + "step": 182500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932223231343e-05, + "loss": 2.1063, + "step": 183000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932038048914976e-05, + "loss": 2.1785, + "step": 183500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9318528664869515e-05, + "loss": 2.1108, + "step": 184000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9316676840589265e-05, + "loss": 2.0622, + "step": 184500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9314825016309016e-05, + "loss": 2.0539, + "step": 185000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931297319202877e-05, + "loss": 2.0998, + "step": 185500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931112136774852e-05, + "loss": 2.0329, + "step": 186000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9309269543468286e-05, + "loss": 2.1002, + "step": 186500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9307417719188037e-05, + "loss": 2.1119, + "step": 187000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9305565894907794e-05, + "loss": 2.1018, + "step": 187500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9303714070627544e-05, + "loss": 2.0742, + "step": 188000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9301862246347294e-05, + "loss": 2.0729, + "step": 188500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930001042206705e-05, + "loss": 2.0995, + "step": 189000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92981585977868e-05, + "loss": 2.1262, + "step": 189500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9296306773506565e-05, + "loss": 2.1104, + "step": 190000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9294454949226315e-05, + "loss": 2.0692, + "step": 190500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929260312494607e-05, + "loss": 2.0884, + "step": 191000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929075130066582e-05, + "loss": 2.047, + "step": 191500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928889947638557e-05, + "loss": 2.0389, + "step": 192000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928704765210533e-05, + "loss": 2.091, + "step": 192500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9285195827825086e-05, + "loss": 2.0903, + "step": 193000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928334400354484e-05, + "loss": 2.0842, + "step": 193500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928149217926459e-05, + "loss": 2.11, + "step": 194000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927964035498434e-05, + "loss": 2.1346, + "step": 194500 + }, + { + "epoch": 0.04, + "learning_rate": 4.92777885307041e-05, + "loss": 2.0544, + "step": 195000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927593670642385e-05, + "loss": 2.1018, + "step": 195500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927408488214361e-05, + "loss": 2.1567, + "step": 196000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9272233057863364e-05, + "loss": 2.038, + "step": 196500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927038123358312e-05, + "loss": 2.1098, + "step": 197000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926852940930287e-05, + "loss": 2.0882, + "step": 197500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926667758502262e-05, + "loss": 2.1377, + "step": 198000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926482576074238e-05, + "loss": 2.0804, + "step": 198500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926297393646213e-05, + "loss": 2.0966, + "step": 199000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926112211218189e-05, + "loss": 2.0618, + "step": 199500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925927028790164e-05, + "loss": 2.0894, + "step": 200000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92574184636214e-05, + "loss": 2.1082, + "step": 200500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925556663934115e-05, + "loss": 2.0714, + "step": 201000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92537148150609e-05, + "loss": 2.0899, + "step": 201500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925186299078066e-05, + "loss": 2.1127, + "step": 202000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9250011166500414e-05, + "loss": 2.0379, + "step": 202500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924815934222017e-05, + "loss": 2.1386, + "step": 203000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924630751793992e-05, + "loss": 2.1004, + "step": 203500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924445569365968e-05, + "loss": 2.1193, + "step": 204000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924260386937943e-05, + "loss": 2.0352, + "step": 204500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924075204509918e-05, + "loss": 1.9945, + "step": 205000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9238900220818935e-05, + "loss": 2.048, + "step": 205500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923704839653869e-05, + "loss": 2.1062, + "step": 206000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923519657225845e-05, + "loss": 2.1527, + "step": 206500 + }, + { + "epoch": 0.05, + "learning_rate": 4.92333447479782e-05, + "loss": 2.0849, + "step": 207000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923149292369795e-05, + "loss": 2.1391, + "step": 207500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9229641099417706e-05, + "loss": 2.1056, + "step": 208000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9227789275137457e-05, + "loss": 2.0838, + "step": 208500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922593745085722e-05, + "loss": 2.0591, + "step": 209000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922408562657697e-05, + "loss": 2.0663, + "step": 209500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922223380229673e-05, + "loss": 2.0937, + "step": 210000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922038197801648e-05, + "loss": 2.069, + "step": 210500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921853015373623e-05, + "loss": 2.1053, + "step": 211000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9216678329455985e-05, + "loss": 2.1391, + "step": 211500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9214826505175735e-05, + "loss": 2.1385, + "step": 212000 + }, + { + "epoch": 0.05, + "learning_rate": 4.92129746808955e-05, + "loss": 2.0814, + "step": 212500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921112285661525e-05, + "loss": 2.0214, + "step": 213000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9209271032335006e-05, + "loss": 2.0682, + "step": 213500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9207419208054756e-05, + "loss": 2.0033, + "step": 214000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9205567383774506e-05, + "loss": 2.1075, + "step": 214500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920371555949426e-05, + "loss": 2.1303, + "step": 215000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920186373521402e-05, + "loss": 2.0722, + "step": 215500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920001191093378e-05, + "loss": 2.0589, + "step": 216000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919816008665353e-05, + "loss": 2.1009, + "step": 216500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9196308262373284e-05, + "loss": 2.0362, + "step": 217000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9194456438093034e-05, + "loss": 2.0887, + "step": 217500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9192604613812784e-05, + "loss": 2.0315, + "step": 218000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919075278953255e-05, + "loss": 2.0048, + "step": 218500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91889009652523e-05, + "loss": 2.0886, + "step": 219000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9187049140972055e-05, + "loss": 2.1068, + "step": 219500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9185197316691805e-05, + "loss": 2.0436, + "step": 220000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9183345492411555e-05, + "loss": 2.1176, + "step": 220500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918149366813131e-05, + "loss": 2.0312, + "step": 221000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917964184385106e-05, + "loss": 2.0521, + "step": 221500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9177790019570826e-05, + "loss": 2.1194, + "step": 222000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9175938195290577e-05, + "loss": 2.1554, + "step": 222500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9174086371010333e-05, + "loss": 2.0491, + "step": 223000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9172234546730084e-05, + "loss": 2.1222, + "step": 223500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9170382722449834e-05, + "loss": 2.1292, + "step": 224000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916853089816959e-05, + "loss": 2.1109, + "step": 224500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916667907388935e-05, + "loss": 2.1183, + "step": 225000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9164827249609105e-05, + "loss": 2.0613, + "step": 225500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9162975425328855e-05, + "loss": 2.1023, + "step": 226000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916112360104861e-05, + "loss": 2.1123, + "step": 226500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915927177676836e-05, + "loss": 2.0664, + "step": 227000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915741995248811e-05, + "loss": 2.0816, + "step": 227500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9155568128207876e-05, + "loss": 2.0749, + "step": 228000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9153716303927626e-05, + "loss": 2.0996, + "step": 228500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915186447964738e-05, + "loss": 2.0893, + "step": 229000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915001265536713e-05, + "loss": 2.1056, + "step": 229500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914816083108689e-05, + "loss": 2.0729, + "step": 230000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914630900680664e-05, + "loss": 2.0735, + "step": 230500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914445718252639e-05, + "loss": 2.1005, + "step": 231000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9142605358246154e-05, + "loss": 2.144, + "step": 231500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9140753533965904e-05, + "loss": 2.1338, + "step": 232000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913890170968566e-05, + "loss": 2.0836, + "step": 232500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913704988540541e-05, + "loss": 2.1221, + "step": 233000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913519806112517e-05, + "loss": 2.0788, + "step": 233500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913334623684492e-05, + "loss": 2.0521, + "step": 234000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9131494412564675e-05, + "loss": 2.1534, + "step": 234500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912964258828443e-05, + "loss": 2.1214, + "step": 235000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912779076400418e-05, + "loss": 2.1256, + "step": 235500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912593893972394e-05, + "loss": 2.1058, + "step": 236000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912408711544369e-05, + "loss": 2.023, + "step": 236500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912223529116344e-05, + "loss": 2.092, + "step": 237000 + }, + { + "epoch": 0.05, + "learning_rate": 4.91203834668832e-05, + "loss": 2.0712, + "step": 237500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9118531642602954e-05, + "loss": 2.0955, + "step": 238000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911667981832271e-05, + "loss": 2.088, + "step": 238500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911482799404246e-05, + "loss": 2.0495, + "step": 239000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911297616976222e-05, + "loss": 2.0736, + "step": 239500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911112434548197e-05, + "loss": 2.0218, + "step": 240000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910927252120172e-05, + "loss": 2.1898, + "step": 240500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910742069692148e-05, + "loss": 2.1209, + "step": 241000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910556887264123e-05, + "loss": 2.1103, + "step": 241500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910371704836099e-05, + "loss": 2.1283, + "step": 242000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910186522408074e-05, + "loss": 2.1047, + "step": 242500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9100013399800496e-05, + "loss": 2.0582, + "step": 243000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9098161575520246e-05, + "loss": 2.051, + "step": 243500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909630975124e-05, + "loss": 2.1267, + "step": 244000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909445792695976e-05, + "loss": 2.0628, + "step": 244500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909260610267951e-05, + "loss": 2.0915, + "step": 245000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909075427839927e-05, + "loss": 2.0771, + "step": 245500 + }, + { + "epoch": 0.05, + "learning_rate": 4.908890245411902e-05, + "loss": 2.0893, + "step": 246000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9087050629838774e-05, + "loss": 2.0784, + "step": 246500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9085198805558525e-05, + "loss": 2.0892, + "step": 247000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908334698127828e-05, + "loss": 2.0572, + "step": 247500 + }, + { + "epoch": 0.06, + "learning_rate": 4.908149515699804e-05, + "loss": 2.0948, + "step": 248000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907964333271779e-05, + "loss": 2.1058, + "step": 248500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9077791508437546e-05, + "loss": 2.0597, + "step": 249000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9075939684157296e-05, + "loss": 2.0909, + "step": 249500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9074087859877046e-05, + "loss": 2.0595, + "step": 250000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907223603559681e-05, + "loss": 2.0492, + "step": 250500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907038421131656e-05, + "loss": 2.0546, + "step": 251000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906853238703632e-05, + "loss": 2.1159, + "step": 251500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906668056275607e-05, + "loss": 2.1385, + "step": 252000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9064828738475824e-05, + "loss": 2.0528, + "step": 252500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9062976914195574e-05, + "loss": 2.0894, + "step": 253000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906112508991533e-05, + "loss": 2.0223, + "step": 253500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905927326563509e-05, + "loss": 2.0927, + "step": 254000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905742144135484e-05, + "loss": 2.0777, + "step": 254500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9055569617074595e-05, + "loss": 2.0878, + "step": 255000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9053717792794345e-05, + "loss": 2.1174, + "step": 255500 + }, + { + "epoch": 0.06, + "learning_rate": 4.90518659685141e-05, + "loss": 2.0705, + "step": 256000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905001414423385e-05, + "loss": 2.0456, + "step": 256500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904816231995361e-05, + "loss": 2.0628, + "step": 257000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9046310495673366e-05, + "loss": 2.1184, + "step": 257500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9044458671393116e-05, + "loss": 2.1294, + "step": 258000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904260684711287e-05, + "loss": 2.078, + "step": 258500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9040755022832624e-05, + "loss": 2.0868, + "step": 259000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903890319855238e-05, + "loss": 2.0613, + "step": 259500 + }, + { + "epoch": 0.06, + "learning_rate": 4.903705137427214e-05, + "loss": 2.0518, + "step": 260000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903519954999189e-05, + "loss": 2.0432, + "step": 260500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9033347725711645e-05, + "loss": 2.0214, + "step": 261000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9031495901431395e-05, + "loss": 2.0357, + "step": 261500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902964407715115e-05, + "loss": 2.0896, + "step": 262000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90277922528709e-05, + "loss": 2.0427, + "step": 262500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902594042859065e-05, + "loss": 2.0874, + "step": 263000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9024088604310416e-05, + "loss": 2.0956, + "step": 263500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9022236780030166e-05, + "loss": 2.089, + "step": 264000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902038495574992e-05, + "loss": 2.1201, + "step": 264500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901853313146967e-05, + "loss": 2.0422, + "step": 265000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901668130718943e-05, + "loss": 2.0893, + "step": 265500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901482948290918e-05, + "loss": 2.0884, + "step": 266000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901297765862894e-05, + "loss": 2.0242, + "step": 266500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9011125834348694e-05, + "loss": 2.0888, + "step": 267000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9009274010068444e-05, + "loss": 2.108, + "step": 267500 + }, + { + "epoch": 0.06, + "learning_rate": 4.90074221857882e-05, + "loss": 2.0993, + "step": 268000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900557036150795e-05, + "loss": 2.0372, + "step": 268500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900371853722771e-05, + "loss": 2.0594, + "step": 269000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9001866712947465e-05, + "loss": 1.9995, + "step": 269500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9000014888667215e-05, + "loss": 2.1141, + "step": 270000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899816306438697e-05, + "loss": 2.0906, + "step": 270500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899631124010672e-05, + "loss": 2.0663, + "step": 271000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899445941582648e-05, + "loss": 2.0266, + "step": 271500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899260759154623e-05, + "loss": 2.0186, + "step": 272000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8990755767265987e-05, + "loss": 2.0313, + "step": 272500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8988903942985744e-05, + "loss": 2.0928, + "step": 273000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8987052118705494e-05, + "loss": 2.0998, + "step": 273500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898520029442525e-05, + "loss": 2.0823, + "step": 274000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8983348470145e-05, + "loss": 2.1264, + "step": 274500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898149664586476e-05, + "loss": 2.1021, + "step": 275000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897964482158451e-05, + "loss": 2.1273, + "step": 275500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8977792997304265e-05, + "loss": 2.0928, + "step": 276000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897594117302402e-05, + "loss": 2.0685, + "step": 276500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897408934874377e-05, + "loss": 2.0811, + "step": 277000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897223752446353e-05, + "loss": 2.081, + "step": 277500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897038570018328e-05, + "loss": 2.0601, + "step": 278000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8968533875903036e-05, + "loss": 2.1053, + "step": 278500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896668205162279e-05, + "loss": 2.1081, + "step": 279000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896483022734254e-05, + "loss": 2.098, + "step": 279500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89629784030623e-05, + "loss": 2.1769, + "step": 280000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896112657878205e-05, + "loss": 2.083, + "step": 280500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895927475450181e-05, + "loss": 2.0933, + "step": 281000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895742293022156e-05, + "loss": 2.1113, + "step": 281500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8955571105941314e-05, + "loss": 2.1, + "step": 282000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895371928166107e-05, + "loss": 2.062, + "step": 282500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895186745738082e-05, + "loss": 2.1055, + "step": 283000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895001563310058e-05, + "loss": 2.018, + "step": 283500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894816380882033e-05, + "loss": 2.0741, + "step": 284000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8946311984540086e-05, + "loss": 2.1163, + "step": 284500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8944460160259836e-05, + "loss": 2.0718, + "step": 285000 + }, + { + "epoch": 0.06, + "learning_rate": 4.894260833597959e-05, + "loss": 2.0572, + "step": 285500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894075651169935e-05, + "loss": 2.0786, + "step": 286000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89389046874191e-05, + "loss": 2.1539, + "step": 286500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893705286313886e-05, + "loss": 2.0523, + "step": 287000 + }, + { + "epoch": 0.06, + "learning_rate": 4.893520103885861e-05, + "loss": 2.0635, + "step": 287500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8933349214578364e-05, + "loss": 2.0962, + "step": 288000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8931497390298114e-05, + "loss": 2.1501, + "step": 288500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892964556601787e-05, + "loss": 2.0864, + "step": 289000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892779374173763e-05, + "loss": 2.0495, + "step": 289500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892594191745738e-05, + "loss": 2.0418, + "step": 290000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8924090093177135e-05, + "loss": 2.1161, + "step": 290500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8922238268896885e-05, + "loss": 2.0743, + "step": 291000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892038644461664e-05, + "loss": 2.1056, + "step": 291500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89185346203364e-05, + "loss": 2.0449, + "step": 292000 + }, + { + "epoch": 0.06, + "learning_rate": 4.891668279605615e-05, + "loss": 2.1395, + "step": 292500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8914830971775906e-05, + "loss": 2.1422, + "step": 293000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8912979147495656e-05, + "loss": 2.0949, + "step": 293500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891112732321541e-05, + "loss": 2.0853, + "step": 294000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8909275498935163e-05, + "loss": 2.0809, + "step": 294500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890742367465492e-05, + "loss": 2.0908, + "step": 295000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890557185037468e-05, + "loss": 1.9894, + "step": 295500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890372002609443e-05, + "loss": 2.1431, + "step": 296000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8901868201814184e-05, + "loss": 2.0502, + "step": 296500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8900016377533935e-05, + "loss": 2.0828, + "step": 297000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889816455325369e-05, + "loss": 1.9749, + "step": 297500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889631272897344e-05, + "loss": 2.091, + "step": 298000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88944609046932e-05, + "loss": 2.0509, + "step": 298500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8892609080412956e-05, + "loss": 2.0141, + "step": 299000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8890757256132706e-05, + "loss": 2.1212, + "step": 299500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888890543185246e-05, + "loss": 2.1005, + "step": 300000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888705360757221e-05, + "loss": 2.1135, + "step": 300500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888520178329197e-05, + "loss": 2.0971, + "step": 301000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888334995901173e-05, + "loss": 2.0434, + "step": 301500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888149813473148e-05, + "loss": 2.0563, + "step": 302000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8879646310451234e-05, + "loss": 2.0624, + "step": 302500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8877794486170984e-05, + "loss": 2.0478, + "step": 303000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887594266189074e-05, + "loss": 2.058, + "step": 303500 + }, + { + "epoch": 0.07, + "learning_rate": 4.887409083761049e-05, + "loss": 2.0822, + "step": 304000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887223901333025e-05, + "loss": 2.0673, + "step": 304500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8870387189050005e-05, + "loss": 2.0657, + "step": 305000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8868535364769755e-05, + "loss": 2.0843, + "step": 305500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886668354048951e-05, + "loss": 2.068, + "step": 306000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886483171620926e-05, + "loss": 2.0716, + "step": 306500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886297989192902e-05, + "loss": 2.0293, + "step": 307000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886112806764877e-05, + "loss": 2.0616, + "step": 307500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8859276243368526e-05, + "loss": 2.0941, + "step": 308000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8857424419088283e-05, + "loss": 1.9884, + "step": 308500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8855572594808034e-05, + "loss": 2.0897, + "step": 309000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885372077052779e-05, + "loss": 2.0541, + "step": 309500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885186894624754e-05, + "loss": 1.9981, + "step": 310000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88500171219673e-05, + "loss": 2.0618, + "step": 310500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8848165297687055e-05, + "loss": 2.0483, + "step": 311000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8846313473406805e-05, + "loss": 2.1106, + "step": 311500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884446164912656e-05, + "loss": 2.0483, + "step": 312000 + }, + { + "epoch": 0.07, + "learning_rate": 4.884260982484631e-05, + "loss": 2.0277, + "step": 312500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884075800056607e-05, + "loss": 1.9978, + "step": 313000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883890617628582e-05, + "loss": 2.0311, + "step": 313500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8837054352005576e-05, + "loss": 2.059, + "step": 314000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883520252772533e-05, + "loss": 2.0999, + "step": 314500 + }, + { + "epoch": 0.07, + "learning_rate": 4.883335070344508e-05, + "loss": 2.0588, + "step": 315000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883149887916484e-05, + "loss": 2.0898, + "step": 315500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882964705488459e-05, + "loss": 2.1084, + "step": 316000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882779523060435e-05, + "loss": 2.1465, + "step": 316500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88259434063241e-05, + "loss": 2.105, + "step": 317000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8824091582043854e-05, + "loss": 2.0882, + "step": 317500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882223975776361e-05, + "loss": 2.0737, + "step": 318000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882038793348336e-05, + "loss": 2.1274, + "step": 318500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881853610920312e-05, + "loss": 2.0419, + "step": 319000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881668428492287e-05, + "loss": 2.0014, + "step": 319500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8814832460642625e-05, + "loss": 2.0236, + "step": 320000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881298063636238e-05, + "loss": 2.1488, + "step": 320500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881112881208213e-05, + "loss": 2.0795, + "step": 321000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880927698780189e-05, + "loss": 2.0669, + "step": 321500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880742516352164e-05, + "loss": 2.1564, + "step": 322000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88055733392414e-05, + "loss": 2.0505, + "step": 322500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880372151496115e-05, + "loss": 2.0582, + "step": 323000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8801869690680904e-05, + "loss": 2.1198, + "step": 323500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880001786640066e-05, + "loss": 2.0779, + "step": 324000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879816604212041e-05, + "loss": 2.098, + "step": 324500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879631421784017e-05, + "loss": 2.068, + "step": 325000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879446239355992e-05, + "loss": 2.0426, + "step": 325500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8792610569279675e-05, + "loss": 2.0374, + "step": 326000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8790758744999425e-05, + "loss": 2.0615, + "step": 326500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878890692071918e-05, + "loss": 2.1002, + "step": 327000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878705509643894e-05, + "loss": 2.1018, + "step": 327500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878520327215869e-05, + "loss": 2.0349, + "step": 328000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8783351447878446e-05, + "loss": 2.0651, + "step": 328500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8781499623598196e-05, + "loss": 1.9743, + "step": 329000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877964779931795e-05, + "loss": 2.0576, + "step": 329500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877779597503771e-05, + "loss": 2.0568, + "step": 330000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877594415075746e-05, + "loss": 2.0538, + "step": 330500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877409232647722e-05, + "loss": 2.0727, + "step": 331000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877224050219697e-05, + "loss": 2.1174, + "step": 331500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8770388677916724e-05, + "loss": 2.1466, + "step": 332000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8768536853636475e-05, + "loss": 2.1125, + "step": 332500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876668502935623e-05, + "loss": 2.0779, + "step": 333000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876483320507599e-05, + "loss": 2.065, + "step": 333500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876298138079574e-05, + "loss": 2.0613, + "step": 334000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8761129556515496e-05, + "loss": 2.1109, + "step": 334500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8759277732235246e-05, + "loss": 2.0144, + "step": 335000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8757425907955e-05, + "loss": 2.1222, + "step": 335500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875557408367475e-05, + "loss": 2.0914, + "step": 336000 + }, + { + "epoch": 0.07, + "learning_rate": 4.875372225939451e-05, + "loss": 2.0208, + "step": 336500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875187043511427e-05, + "loss": 2.0179, + "step": 337000 + }, + { + "epoch": 0.07, + "learning_rate": 4.875001861083402e-05, + "loss": 2.0414, + "step": 337500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8748166786553774e-05, + "loss": 2.1039, + "step": 338000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8746314962273524e-05, + "loss": 2.0596, + "step": 338500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874446313799328e-05, + "loss": 2.0743, + "step": 339000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874261131371303e-05, + "loss": 2.0327, + "step": 339500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874075948943279e-05, + "loss": 2.0576, + "step": 340000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8738907665152545e-05, + "loss": 2.0559, + "step": 340500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8737055840872295e-05, + "loss": 2.042, + "step": 341000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873520401659205e-05, + "loss": 2.0752, + "step": 341500 + }, + { + "epoch": 0.08, + "learning_rate": 4.87333521923118e-05, + "loss": 2.0516, + "step": 342000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873150036803156e-05, + "loss": 2.1037, + "step": 342500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8729648543751316e-05, + "loss": 2.1216, + "step": 343000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8727796719471066e-05, + "loss": 2.0398, + "step": 343500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872594489519082e-05, + "loss": 2.0446, + "step": 344000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8724093070910574e-05, + "loss": 2.0327, + "step": 344500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872224124663033e-05, + "loss": 2.0941, + "step": 345000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872038942235008e-05, + "loss": 1.995, + "step": 345500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8718537598069844e-05, + "loss": 2.0341, + "step": 346000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8716685773789595e-05, + "loss": 1.9983, + "step": 346500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8714833949509345e-05, + "loss": 2.0519, + "step": 347000 + }, + { + "epoch": 0.08, + "learning_rate": 4.87129821252291e-05, + "loss": 2.0699, + "step": 347500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871113030094885e-05, + "loss": 2.0541, + "step": 348000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870927847666861e-05, + "loss": 2.1049, + "step": 348500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870742665238836e-05, + "loss": 2.0521, + "step": 349000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8705574828108116e-05, + "loss": 2.0555, + "step": 349500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870372300382787e-05, + "loss": 2.0386, + "step": 350000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870187117954762e-05, + "loss": 2.0703, + "step": 350500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870001935526738e-05, + "loss": 2.1147, + "step": 351000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869816753098713e-05, + "loss": 2.025, + "step": 351500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869631570670689e-05, + "loss": 2.1052, + "step": 352000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8694463882426644e-05, + "loss": 2.1232, + "step": 352500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8692612058146394e-05, + "loss": 2.0589, + "step": 353000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869076023386615e-05, + "loss": 2.0957, + "step": 353500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86889084095859e-05, + "loss": 2.1189, + "step": 354000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868705658530566e-05, + "loss": 2.0901, + "step": 354500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868520476102541e-05, + "loss": 2.0272, + "step": 355000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868335293674517e-05, + "loss": 2.0451, + "step": 355500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868150111246492e-05, + "loss": 2.0311, + "step": 356000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867964928818467e-05, + "loss": 2.0617, + "step": 356500 + }, + { + "epoch": 0.0, + "learning_rate": 4.867779746390443e-05, + "loss": 2.0693, + "step": 357000 + }, + { + "epoch": 0.0, + "learning_rate": 4.867594563962418e-05, + "loss": 2.1253, + "step": 357500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8674093815343937e-05, + "loss": 2.0701, + "step": 358000 + }, + { + "epoch": 0.0, + "learning_rate": 4.867224199106369e-05, + "loss": 2.0521, + "step": 358500 + }, + { + "epoch": 0.0, + "learning_rate": 4.867039016678345e-05, + "loss": 2.0298, + "step": 359000 + }, + { + "epoch": 0.0, + "learning_rate": 4.86685383425032e-05, + "loss": 2.0321, + "step": 359500 + }, + { + "epoch": 0.0, + "learning_rate": 4.866668651822295e-05, + "loss": 2.0373, + "step": 360000 + }, + { + "epoch": 0.0, + "learning_rate": 4.866483469394271e-05, + "loss": 2.0453, + "step": 360500 + }, + { + "epoch": 0.0, + "learning_rate": 4.866298286966246e-05, + "loss": 2.0423, + "step": 361000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8661131045382215e-05, + "loss": 2.0792, + "step": 361500 + }, + { + "epoch": 0.0, + "learning_rate": 4.865927922110197e-05, + "loss": 2.1337, + "step": 362000 + }, + { + "epoch": 0.0, + "learning_rate": 4.865742739682172e-05, + "loss": 1.9964, + "step": 362500 + }, + { + "epoch": 0.0, + "learning_rate": 4.865557557254148e-05, + "loss": 2.0639, + "step": 363000 + }, + { + "epoch": 0.0, + "learning_rate": 4.865372374826123e-05, + "loss": 2.0159, + "step": 363500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8651871923980986e-05, + "loss": 2.0956, + "step": 364000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8650020099700736e-05, + "loss": 2.0309, + "step": 364500 + }, + { + "epoch": 0.0, + "learning_rate": 4.864816827542049e-05, + "loss": 2.0339, + "step": 365000 + }, + { + "epoch": 0.0, + "learning_rate": 4.864631645114025e-05, + "loss": 2.0868, + "step": 365500 + }, + { + "epoch": 0.0, + "learning_rate": 4.864446462686e-05, + "loss": 1.9837, + "step": 366000 + }, + { + "epoch": 0.0, + "learning_rate": 4.864261280257976e-05, + "loss": 2.0658, + "step": 366500 + }, + { + "epoch": 0.0, + "learning_rate": 4.864076097829951e-05, + "loss": 1.9948, + "step": 367000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8638909154019264e-05, + "loss": 2.0789, + "step": 367500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8637057329739015e-05, + "loss": 2.0611, + "step": 368000 + }, + { + "epoch": 0.0, + "learning_rate": 4.863520550545878e-05, + "loss": 2.0874, + "step": 368500 + }, + { + "epoch": 0.0, + "learning_rate": 4.863335368117853e-05, + "loss": 2.0583, + "step": 369000 + }, + { + "epoch": 0.0, + "learning_rate": 4.863150185689828e-05, + "loss": 2.0309, + "step": 369500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8629650032618036e-05, + "loss": 2.0677, + "step": 370000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8627798208337786e-05, + "loss": 1.9741, + "step": 370500 + }, + { + "epoch": 0.0, + "learning_rate": 4.862594638405754e-05, + "loss": 2.0508, + "step": 371000 + }, + { + "epoch": 0.0, + "learning_rate": 4.86240945597773e-05, + "loss": 2.0375, + "step": 371500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8622242735497057e-05, + "loss": 2.0502, + "step": 372000 + }, + { + "epoch": 0.0, + "learning_rate": 4.862039091121681e-05, + "loss": 2.0519, + "step": 372500 + }, + { + "epoch": 0.0, + "learning_rate": 4.861853908693656e-05, + "loss": 1.9897, + "step": 373000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8616687262656314e-05, + "loss": 2.0186, + "step": 373500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8614835438376064e-05, + "loss": 2.031, + "step": 374000 + }, + { + "epoch": 0.0, + "learning_rate": 4.861298361409582e-05, + "loss": 2.0009, + "step": 374500 + }, + { + "epoch": 0.0, + "learning_rate": 4.861113178981558e-05, + "loss": 2.0465, + "step": 375000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8609279965535335e-05, + "loss": 2.0583, + "step": 375500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8607428141255085e-05, + "loss": 1.9984, + "step": 376000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8605576316974835e-05, + "loss": 2.0159, + "step": 376500 + }, + { + "epoch": 0.0, + "learning_rate": 4.860372449269459e-05, + "loss": 2.0457, + "step": 377000 + }, + { + "epoch": 0.0, + "learning_rate": 4.860187266841434e-05, + "loss": 2.0298, + "step": 377500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8600020844134106e-05, + "loss": 2.027, + "step": 378000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8598169019853856e-05, + "loss": 2.1008, + "step": 378500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8596317195573606e-05, + "loss": 2.0358, + "step": 379000 + }, + { + "epoch": 0.01, + "learning_rate": 4.859446537129336e-05, + "loss": 2.0131, + "step": 379500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8592613547013113e-05, + "loss": 2.0221, + "step": 380000 + }, + { + "epoch": 0.01, + "learning_rate": 4.859076172273287e-05, + "loss": 2.0612, + "step": 380500 + }, + { + "epoch": 0.01, + "learning_rate": 4.858890989845263e-05, + "loss": 1.9445, + "step": 381000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8587058074172384e-05, + "loss": 2.0688, + "step": 381500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8585206249892134e-05, + "loss": 2.0329, + "step": 382000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8583354425611885e-05, + "loss": 1.9824, + "step": 382500 + }, + { + "epoch": 0.01, + "learning_rate": 4.858150260133164e-05, + "loss": 2.0359, + "step": 383000 + }, + { + "epoch": 0.01, + "learning_rate": 4.857965077705139e-05, + "loss": 2.0619, + "step": 383500 + }, + { + "epoch": 0.01, + "learning_rate": 4.857779895277115e-05, + "loss": 1.9951, + "step": 384000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8575947128490906e-05, + "loss": 2.0312, + "step": 384500 + }, + { + "epoch": 0.01, + "learning_rate": 4.857409530421066e-05, + "loss": 1.8873, + "step": 385000 + }, + { + "epoch": 0.01, + "learning_rate": 4.857224347993041e-05, + "loss": 1.9459, + "step": 385500 + }, + { + "epoch": 0.01, + "learning_rate": 4.857039165565016e-05, + "loss": 2.0782, + "step": 386000 + }, + { + "epoch": 0.01, + "learning_rate": 4.856853983136992e-05, + "loss": 1.974, + "step": 386500 + }, + { + "epoch": 0.01, + "learning_rate": 4.856668800708967e-05, + "loss": 2.0018, + "step": 387000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8564836182809434e-05, + "loss": 2.0126, + "step": 387500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8562984358529184e-05, + "loss": 1.999, + "step": 388000 + }, + { + "epoch": 0.0, + "learning_rate": 4.856113253424894e-05, + "loss": 1.5514, + "step": 388500 + }, + { + "epoch": 0.0, + "learning_rate": 4.855928070996869e-05, + "loss": 1.5872, + "step": 389000 + }, + { + "epoch": 0.0, + "learning_rate": 4.855742888568844e-05, + "loss": 1.5182, + "step": 389500 + }, + { + "epoch": 0.0, + "learning_rate": 4.85555770614082e-05, + "loss": 1.4703, + "step": 390000 + }, + { + "epoch": 0.0, + "learning_rate": 4.855372523712795e-05, + "loss": 1.454, + "step": 390500 + }, + { + "epoch": 0.0, + "learning_rate": 4.855187341284771e-05, + "loss": 1.4345, + "step": 391000 + }, + { + "epoch": 0.0, + "learning_rate": 4.855002158856746e-05, + "loss": 1.433, + "step": 391500 + }, + { + "epoch": 0.0, + "learning_rate": 4.854816976428721e-05, + "loss": 1.4309, + "step": 392000 + }, + { + "epoch": 0.0, + "learning_rate": 4.854631794000697e-05, + "loss": 1.3785, + "step": 392500 + }, + { + "epoch": 0.0, + "learning_rate": 4.854446611572672e-05, + "loss": 1.4492, + "step": 393000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8542614291446476e-05, + "loss": 1.4585, + "step": 393500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8540762467166233e-05, + "loss": 1.3263, + "step": 394000 + }, + { + "epoch": 0.0, + "learning_rate": 4.853891064288599e-05, + "loss": 1.411, + "step": 394500 + }, + { + "epoch": 0.0, + "learning_rate": 4.853705881860574e-05, + "loss": 1.3492, + "step": 395000 + }, + { + "epoch": 0.0, + "learning_rate": 4.853520699432549e-05, + "loss": 1.4024, + "step": 395500 + }, + { + "epoch": 0.0, + "learning_rate": 4.853335517004525e-05, + "loss": 1.3816, + "step": 396000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8531503345765e-05, + "loss": 1.3385, + "step": 396500 + }, + { + "epoch": 0.0, + "learning_rate": 4.852965152148476e-05, + "loss": 1.4043, + "step": 397000 + }, + { + "epoch": 0.0, + "learning_rate": 4.852779969720451e-05, + "loss": 1.3078, + "step": 397500 + }, + { + "epoch": 0.0, + "learning_rate": 4.852594787292427e-05, + "loss": 1.3505, + "step": 398000 + }, + { + "epoch": 0.0, + "learning_rate": 4.852409604864402e-05, + "loss": 1.3205, + "step": 398500 + }, + { + "epoch": 0.0, + "learning_rate": 4.852224422436377e-05, + "loss": 1.3744, + "step": 399000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8520392400083526e-05, + "loss": 1.3697, + "step": 399500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8518540575803276e-05, + "loss": 1.3913, + "step": 400000 + }, + { + "epoch": 0.0, + "learning_rate": 4.851668875152304e-05, + "loss": 1.3664, + "step": 400500 + }, + { + "epoch": 0.0, + "learning_rate": 4.851483692724279e-05, + "loss": 1.3093, + "step": 401000 + }, + { + "epoch": 0.0, + "learning_rate": 4.851298510296255e-05, + "loss": 1.3701, + "step": 401500 + }, + { + "epoch": 0.0, + "learning_rate": 4.85111332786823e-05, + "loss": 1.2745, + "step": 402000 + }, + { + "epoch": 0.0, + "learning_rate": 4.850928145440205e-05, + "loss": 1.3443, + "step": 402500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8507429630121804e-05, + "loss": 1.3313, + "step": 403000 + }, + { + "epoch": 0.0, + "learning_rate": 4.850557780584156e-05, + "loss": 1.3441, + "step": 403500 + }, + { + "epoch": 0.0, + "learning_rate": 4.850372598156132e-05, + "loss": 1.3171, + "step": 404000 + }, + { + "epoch": 0.0, + "learning_rate": 4.850187415728107e-05, + "loss": 1.2825, + "step": 404500 + }, + { + "epoch": 0.0, + "learning_rate": 4.850002233300082e-05, + "loss": 1.317, + "step": 405000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8498170508720575e-05, + "loss": 1.313, + "step": 405500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8496318684440326e-05, + "loss": 1.2906, + "step": 406000 + }, + { + "epoch": 0.0, + "learning_rate": 4.849446686016009e-05, + "loss": 1.3497, + "step": 406500 + }, + { + "epoch": 0.0, + "learning_rate": 4.849261503587984e-05, + "loss": 1.352, + "step": 407000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8490763211599596e-05, + "loss": 1.3175, + "step": 407500 + }, + { + "epoch": 0.0, + "learning_rate": 4.848891138731935e-05, + "loss": 1.336, + "step": 408000 + }, + { + "epoch": 0.0, + "learning_rate": 4.84870595630391e-05, + "loss": 1.3383, + "step": 408500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8485207738758854e-05, + "loss": 1.3373, + "step": 409000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8483355914478604e-05, + "loss": 1.3319, + "step": 409500 + }, + { + "epoch": 0.0, + "learning_rate": 4.848150409019837e-05, + "loss": 1.386, + "step": 410000 + }, + { + "epoch": 0.0, + "learning_rate": 4.847965226591812e-05, + "loss": 1.3308, + "step": 410500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8477800441637875e-05, + "loss": 1.306, + "step": 411000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8475948617357625e-05, + "loss": 1.3129, + "step": 411500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8474096793077375e-05, + "loss": 1.3699, + "step": 412000 + }, + { + "epoch": 0.01, + "learning_rate": 4.847224496879713e-05, + "loss": 1.2743, + "step": 412500 + }, + { + "epoch": 0.01, + "learning_rate": 4.847039314451689e-05, + "loss": 1.3443, + "step": 413000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8468541320236646e-05, + "loss": 1.355, + "step": 413500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8466689495956396e-05, + "loss": 1.2891, + "step": 414000 + }, + { + "epoch": 0.01, + "learning_rate": 4.846483767167615e-05, + "loss": 1.3233, + "step": 414500 + }, + { + "epoch": 0.01, + "learning_rate": 4.84629858473959e-05, + "loss": 1.3748, + "step": 415000 + }, + { + "epoch": 0.01, + "learning_rate": 4.846113402311565e-05, + "loss": 1.3159, + "step": 415500 + }, + { + "epoch": 0.01, + "learning_rate": 4.845928219883541e-05, + "loss": 1.3326, + "step": 416000 + }, + { + "epoch": 0.01, + "learning_rate": 4.845743037455517e-05, + "loss": 1.2145, + "step": 416500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8455578550274924e-05, + "loss": 1.2815, + "step": 417000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8453726725994674e-05, + "loss": 1.4068, + "step": 417500 + }, + { + "epoch": 0.01, + "learning_rate": 4.845187490171443e-05, + "loss": 1.3169, + "step": 418000 + }, + { + "epoch": 0.01, + "learning_rate": 4.845002307743418e-05, + "loss": 1.32, + "step": 418500 + }, + { + "epoch": 0.01, + "learning_rate": 4.844817125315393e-05, + "loss": 1.3116, + "step": 419000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8446319428873695e-05, + "loss": 1.3248, + "step": 419500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8444467604593446e-05, + "loss": 0.9912, + "step": 420000 + }, + { + "epoch": 0.0, + "learning_rate": 4.84426157803132e-05, + "loss": 0.2414, + "step": 420500 + }, + { + "epoch": 0.0, + "learning_rate": 4.844076395603295e-05, + "loss": 1.1361, + "step": 421000 + }, + { + "epoch": 0.0, + "learning_rate": 4.84389121317527e-05, + "loss": 1.0437, + "step": 421500 + }, + { + "epoch": 0.0, + "learning_rate": 4.843706030747246e-05, + "loss": 1.0156, + "step": 422000 + }, + { + "epoch": 0.0, + "learning_rate": 4.843520848319222e-05, + "loss": 0.9771, + "step": 422500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8433356658911974e-05, + "loss": 0.947, + "step": 423000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8431504834631724e-05, + "loss": 0.9558, + "step": 423500 + }, + { + "epoch": 0.0, + "learning_rate": 4.842965301035148e-05, + "loss": 0.9735, + "step": 424000 + }, + { + "epoch": 0.0, + "learning_rate": 4.842780118607123e-05, + "loss": 0.9152, + "step": 424500 + }, + { + "epoch": 0.0, + "learning_rate": 4.842594936179098e-05, + "loss": 0.9708, + "step": 425000 + }, + { + "epoch": 0.0, + "learning_rate": 4.842409753751074e-05, + "loss": 0.9948, + "step": 425500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8422245713230495e-05, + "loss": 0.9122, + "step": 426000 + }, + { + "epoch": 0.0, + "learning_rate": 4.842039388895025e-05, + "loss": 0.9536, + "step": 426500 + }, + { + "epoch": 0.0, + "learning_rate": 4.841854206467e-05, + "loss": 0.8994, + "step": 427000 + }, + { + "epoch": 0.0, + "learning_rate": 4.841669024038976e-05, + "loss": 0.9621, + "step": 427500 + }, + { + "epoch": 0.0, + "learning_rate": 4.841483841610951e-05, + "loss": 0.9441, + "step": 428000 + }, + { + "epoch": 0.0, + "learning_rate": 4.841298659182926e-05, + "loss": 0.885, + "step": 428500 + }, + { + "epoch": 0.0, + "learning_rate": 4.841113476754902e-05, + "loss": 0.9768, + "step": 429000 + }, + { + "epoch": 0.0, + "learning_rate": 4.840928294326877e-05, + "loss": 0.8844, + "step": 429500 + }, + { + "epoch": 0.0, + "learning_rate": 4.840743111898853e-05, + "loss": 0.9265, + "step": 430000 + }, + { + "epoch": 0.0, + "learning_rate": 4.840557929470828e-05, + "loss": 0.8965, + "step": 430500 + }, + { + "epoch": 0.0, + "learning_rate": 4.840372747042804e-05, + "loss": 0.9333, + "step": 431000 + }, + { + "epoch": 0.0, + "learning_rate": 4.840187564614779e-05, + "loss": 0.9553, + "step": 431500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8400023821867545e-05, + "loss": 0.9454, + "step": 432000 + }, + { + "epoch": 0.0, + "learning_rate": 4.83981719975873e-05, + "loss": 0.9002, + "step": 432500 + }, + { + "epoch": 0.0, + "learning_rate": 4.839632017330705e-05, + "loss": 0.869, + "step": 433000 + }, + { + "epoch": 0.0, + "learning_rate": 4.839446834902681e-05, + "loss": 0.9202, + "step": 433500 + }, + { + "epoch": 0.0, + "learning_rate": 4.839261652474656e-05, + "loss": 0.881, + "step": 434000 + }, + { + "epoch": 0.0, + "learning_rate": 4.839076470046631e-05, + "loss": 0.906, + "step": 434500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8388912876186066e-05, + "loss": 0.9354, + "step": 435000 + }, + { + "epoch": 0.0, + "learning_rate": 4.838706105190582e-05, + "loss": 0.9224, + "step": 435500 + }, + { + "epoch": 0.0, + "learning_rate": 4.838520922762558e-05, + "loss": 0.9339, + "step": 436000 + }, + { + "epoch": 0.0, + "learning_rate": 4.838335740334533e-05, + "loss": 0.8621, + "step": 436500 + }, + { + "epoch": 0.0, + "learning_rate": 4.838150557906509e-05, + "loss": 0.9027, + "step": 437000 + }, + { + "epoch": 0.0, + "learning_rate": 4.837965375478484e-05, + "loss": 0.8946, + "step": 437500 + }, + { + "epoch": 0.0, + "learning_rate": 4.837780193050459e-05, + "loss": 0.8645, + "step": 438000 + }, + { + "epoch": 0.0, + "learning_rate": 4.837595010622435e-05, + "loss": 0.9124, + "step": 438500 + }, + { + "epoch": 0.0, + "learning_rate": 4.83740982819441e-05, + "loss": 0.9027, + "step": 439000 + }, + { + "epoch": 0.0, + "learning_rate": 4.837224645766386e-05, + "loss": 0.9064, + "step": 439500 + }, + { + "epoch": 0.0, + "learning_rate": 4.837039463338361e-05, + "loss": 0.9091, + "step": 440000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8368542809103365e-05, + "loss": 0.9159, + "step": 440500 + }, + { + "epoch": 0.0, + "learning_rate": 4.8366690984823115e-05, + "loss": 0.907, + "step": 441000 + }, + { + "epoch": 0.0, + "learning_rate": 4.8364839160542866e-05, + "loss": 0.8951, + "step": 441500 + }, + { + "epoch": 0.0, + "learning_rate": 4.836298733626263e-05, + "loss": 0.9397, + "step": 442000 + }, + { + "epoch": 0.0, + "learning_rate": 4.836113551198238e-05, + "loss": 0.8823, + "step": 442500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8359283687702136e-05, + "loss": 0.8911, + "step": 443000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8357431863421887e-05, + "loss": 0.8936, + "step": 443500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8355580039141643e-05, + "loss": 0.9403, + "step": 444000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8353728214861394e-05, + "loss": 0.8754, + "step": 444500 + }, + { + "epoch": 0.01, + "learning_rate": 4.835187639058115e-05, + "loss": 0.9071, + "step": 445000 + }, + { + "epoch": 0.01, + "learning_rate": 4.835002456630091e-05, + "loss": 0.9121, + "step": 445500 + }, + { + "epoch": 0.01, + "learning_rate": 4.834817274202066e-05, + "loss": 0.8832, + "step": 446000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8346320917740415e-05, + "loss": 0.8992, + "step": 446500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8344469093460165e-05, + "loss": 0.8978, + "step": 447000 + }, + { + "epoch": 0.01, + "learning_rate": 4.834261726917992e-05, + "loss": 0.8992, + "step": 447500 + }, + { + "epoch": 0.01, + "learning_rate": 4.834076544489968e-05, + "loss": 0.8995, + "step": 448000 + }, + { + "epoch": 0.01, + "learning_rate": 4.833891362061943e-05, + "loss": 0.8194, + "step": 448500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8337061796339186e-05, + "loss": 0.8614, + "step": 449000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8335209972058936e-05, + "loss": 0.9629, + "step": 449500 + }, + { + "epoch": 0.01, + "learning_rate": 4.833335814777869e-05, + "loss": 0.8858, + "step": 450000 + }, + { + "epoch": 0.01, + "learning_rate": 4.833150632349844e-05, + "loss": 0.8869, + "step": 450500 + }, + { + "epoch": 0.01, + "learning_rate": 4.832965449921819e-05, + "loss": 0.9027, + "step": 451000 + }, + { + "epoch": 0.01, + "learning_rate": 4.832780267493796e-05, + "loss": 0.9113, + "step": 451500 + }, + { + "epoch": 0.01, + "learning_rate": 4.832595085065771e-05, + "loss": 2.3254, + "step": 452000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8324099026377464e-05, + "loss": 2.1624, + "step": 452500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8322247202097214e-05, + "loss": 2.1429, + "step": 453000 + }, + { + "epoch": 0.01, + "learning_rate": 4.832039537781697e-05, + "loss": 2.1301, + "step": 453500 + }, + { + "epoch": 0.01, + "learning_rate": 4.831854355353672e-05, + "loss": 2.2401, + "step": 454000 + }, + { + "epoch": 0.01, + "learning_rate": 4.831669172925648e-05, + "loss": 2.1386, + "step": 454500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8314839904976235e-05, + "loss": 2.178, + "step": 455000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8312988080695985e-05, + "loss": 2.1548, + "step": 455500 + }, + { + "epoch": 0.01, + "learning_rate": 4.831113625641574e-05, + "loss": 2.1627, + "step": 456000 + }, + { + "epoch": 0.01, + "learning_rate": 4.830928443213549e-05, + "loss": 2.2119, + "step": 456500 + }, + { + "epoch": 0.01, + "learning_rate": 4.830743260785525e-05, + "loss": 2.212, + "step": 457000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8305580783575007e-05, + "loss": 2.1224, + "step": 457500 + }, + { + "epoch": 0.01, + "learning_rate": 4.830372895929476e-05, + "loss": 2.2152, + "step": 458000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8301877135014514e-05, + "loss": 2.1278, + "step": 458500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8300025310734264e-05, + "loss": 2.1791, + "step": 459000 + }, + { + "epoch": 0.01, + "learning_rate": 4.829817348645402e-05, + "loss": 2.137, + "step": 459500 + }, + { + "epoch": 0.01, + "learning_rate": 4.829632166217377e-05, + "loss": 2.177, + "step": 460000 + }, + { + "epoch": 0.01, + "learning_rate": 4.829446983789353e-05, + "loss": 2.1272, + "step": 460500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8292618013613285e-05, + "loss": 2.1177, + "step": 461000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8290766189333035e-05, + "loss": 2.106, + "step": 461500 + }, + { + "epoch": 0.01, + "learning_rate": 4.828891436505279e-05, + "loss": 2.2041, + "step": 462000 + }, + { + "epoch": 0.01, + "learning_rate": 4.828706254077254e-05, + "loss": 2.1161, + "step": 462500 + }, + { + "epoch": 0.01, + "learning_rate": 4.82852107164923e-05, + "loss": 2.1244, + "step": 463000 + }, + { + "epoch": 0.01, + "learning_rate": 4.828335889221205e-05, + "loss": 2.0704, + "step": 463500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8281507067931806e-05, + "loss": 2.1964, + "step": 464000 + }, + { + "epoch": 0.01, + "learning_rate": 4.827965524365156e-05, + "loss": 2.1661, + "step": 464500 + }, + { + "epoch": 0.01, + "learning_rate": 4.827780341937131e-05, + "loss": 2.1416, + "step": 465000 + }, + { + "epoch": 0.01, + "learning_rate": 4.827595159509107e-05, + "loss": 2.1467, + "step": 465500 + }, + { + "epoch": 0.01, + "learning_rate": 4.827409977081082e-05, + "loss": 2.1394, + "step": 466000 + }, + { + "epoch": 0.01, + "learning_rate": 4.827224794653058e-05, + "loss": 2.079, + "step": 466500 + }, + { + "epoch": 0.01, + "learning_rate": 4.827039612225033e-05, + "loss": 2.0361, + "step": 467000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8268544297970084e-05, + "loss": 2.15, + "step": 467500 + }, + { + "epoch": 0.01, + "learning_rate": 4.826669247368984e-05, + "loss": 2.1647, + "step": 468000 + }, + { + "epoch": 0.01, + "learning_rate": 4.826484064940959e-05, + "loss": 2.1629, + "step": 468500 + }, + { + "epoch": 0.01, + "learning_rate": 4.826298882512935e-05, + "loss": 2.1334, + "step": 469000 + }, + { + "epoch": 0.01, + "learning_rate": 4.82611370008491e-05, + "loss": 2.1717, + "step": 469500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8259285176568856e-05, + "loss": 2.1106, + "step": 470000 + }, + { + "epoch": 0.01, + "learning_rate": 4.825743335228861e-05, + "loss": 2.0979, + "step": 470500 + }, + { + "epoch": 0.01, + "learning_rate": 4.825558152800836e-05, + "loss": 2.0478, + "step": 471000 + }, + { + "epoch": 0.01, + "learning_rate": 4.825372970372812e-05, + "loss": 2.1778, + "step": 471500 + }, + { + "epoch": 0.01, + "learning_rate": 4.825187787944787e-05, + "loss": 2.1486, + "step": 472000 + }, + { + "epoch": 0.01, + "learning_rate": 4.825002605516763e-05, + "loss": 2.0521, + "step": 472500 + }, + { + "epoch": 0.01, + "learning_rate": 4.824817423088738e-05, + "loss": 2.0744, + "step": 473000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8246322406607134e-05, + "loss": 2.027, + "step": 473500 + }, + { + "epoch": 0.01, + "learning_rate": 4.824447058232689e-05, + "loss": 2.1347, + "step": 474000 + }, + { + "epoch": 0.01, + "learning_rate": 4.824261875804664e-05, + "loss": 2.0729, + "step": 474500 + }, + { + "epoch": 0.01, + "learning_rate": 4.82407669337664e-05, + "loss": 2.0448, + "step": 475000 + }, + { + "epoch": 0.01, + "learning_rate": 4.823891510948615e-05, + "loss": 2.1762, + "step": 475500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8237063285205905e-05, + "loss": 2.0928, + "step": 476000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8235211460925655e-05, + "loss": 2.1397, + "step": 476500 + }, + { + "epoch": 0.01, + "learning_rate": 4.823335963664541e-05, + "loss": 2.1395, + "step": 477000 + }, + { + "epoch": 0.01, + "learning_rate": 4.823150781236517e-05, + "loss": 2.0817, + "step": 477500 + }, + { + "epoch": 0.01, + "learning_rate": 4.822965598808492e-05, + "loss": 2.1291, + "step": 478000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8227804163804676e-05, + "loss": 2.1392, + "step": 478500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8225952339524426e-05, + "loss": 2.1123, + "step": 479000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8224100515244183e-05, + "loss": 2.1467, + "step": 479500 + }, + { + "epoch": 0.01, + "learning_rate": 4.822224869096394e-05, + "loss": 2.0869, + "step": 480000 + }, + { + "epoch": 0.01, + "learning_rate": 4.822039686668369e-05, + "loss": 2.1907, + "step": 480500 + }, + { + "epoch": 0.01, + "learning_rate": 4.821854504240345e-05, + "loss": 2.1319, + "step": 481000 + }, + { + "epoch": 0.01, + "learning_rate": 4.82166932181232e-05, + "loss": 2.0684, + "step": 481500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8214841393842955e-05, + "loss": 2.0638, + "step": 482000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8212989569562705e-05, + "loss": 2.17, + "step": 482500 + }, + { + "epoch": 0.01, + "learning_rate": 4.821113774528246e-05, + "loss": 2.0686, + "step": 483000 + }, + { + "epoch": 0.01, + "learning_rate": 4.820928592100222e-05, + "loss": 2.1202, + "step": 483500 + }, + { + "epoch": 0.01, + "learning_rate": 4.820743409672197e-05, + "loss": 2.1897, + "step": 484000 + }, + { + "epoch": 0.01, + "learning_rate": 4.8205582272441726e-05, + "loss": 2.0982, + "step": 484500 + }, + { + "epoch": 0.01, + "learning_rate": 4.8203730448161476e-05, + "loss": 2.1002, + "step": 485000 + }, + { + "epoch": 0.01, + "learning_rate": 4.820187862388123e-05, + "loss": 2.0226, + "step": 485500 + }, + { + "epoch": 0.01, + "learning_rate": 4.820002679960098e-05, + "loss": 2.1291, + "step": 486000 + }, + { + "epoch": 0.01, + "learning_rate": 4.819817497532074e-05, + "loss": 2.1365, + "step": 486500 + }, + { + "epoch": 0.01, + "learning_rate": 4.81963231510405e-05, + "loss": 2.0817, + "step": 487000 + }, + { + "epoch": 0.01, + "learning_rate": 4.819447132676025e-05, + "loss": 2.0787, + "step": 487500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8192619502480004e-05, + "loss": 2.0578, + "step": 488000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8190767678199754e-05, + "loss": 2.0518, + "step": 488500 + }, + { + "epoch": 0.02, + "learning_rate": 4.818891585391951e-05, + "loss": 2.0799, + "step": 489000 + }, + { + "epoch": 0.02, + "learning_rate": 4.818706402963927e-05, + "loss": 2.1135, + "step": 489500 + }, + { + "epoch": 0.02, + "learning_rate": 4.818521220535902e-05, + "loss": 2.1702, + "step": 490000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8183360381078775e-05, + "loss": 2.0636, + "step": 490500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8181508556798525e-05, + "loss": 2.0613, + "step": 491000 + }, + { + "epoch": 0.02, + "learning_rate": 4.817965673251828e-05, + "loss": 2.0866, + "step": 491500 + }, + { + "epoch": 0.02, + "learning_rate": 4.817780490823803e-05, + "loss": 2.0757, + "step": 492000 + }, + { + "epoch": 0.02, + "learning_rate": 4.817595308395779e-05, + "loss": 2.071, + "step": 492500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8174101259677546e-05, + "loss": 2.0989, + "step": 493000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8172249435397297e-05, + "loss": 2.0395, + "step": 493500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8170397611117054e-05, + "loss": 2.121, + "step": 494000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8168545786836804e-05, + "loss": 2.1112, + "step": 494500 + }, + { + "epoch": 0.02, + "learning_rate": 4.816669396255656e-05, + "loss": 2.0351, + "step": 495000 + }, + { + "epoch": 0.02, + "learning_rate": 4.816484213827631e-05, + "loss": 2.139, + "step": 495500 + }, + { + "epoch": 0.02, + "learning_rate": 4.816299031399607e-05, + "loss": 2.0029, + "step": 496000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8161138489715825e-05, + "loss": 2.05, + "step": 496500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8159286665435575e-05, + "loss": 2.0657, + "step": 497000 + }, + { + "epoch": 0.02, + "learning_rate": 4.815743484115533e-05, + "loss": 2.106, + "step": 497500 + }, + { + "epoch": 0.02, + "learning_rate": 4.815558301687508e-05, + "loss": 2.0543, + "step": 498000 + }, + { + "epoch": 0.02, + "learning_rate": 4.815373119259484e-05, + "loss": 2.0514, + "step": 498500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8151879368314596e-05, + "loss": 2.0788, + "step": 499000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8150027544034346e-05, + "loss": 2.0402, + "step": 499500 + }, + { + "epoch": 0.02, + "learning_rate": 4.81481757197541e-05, + "loss": 2.1085, + "step": 500000 + }, + { + "epoch": 0.02, + "learning_rate": 4.814632389547385e-05, + "loss": 2.0492, + "step": 500500 + }, + { + "epoch": 0.02, + "learning_rate": 4.814447207119361e-05, + "loss": 2.0417, + "step": 501000 + }, + { + "epoch": 0.02, + "learning_rate": 4.814262024691336e-05, + "loss": 2.1058, + "step": 501500 + }, + { + "epoch": 0.02, + "learning_rate": 4.814076842263312e-05, + "loss": 2.0954, + "step": 502000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8138916598352874e-05, + "loss": 2.0358, + "step": 502500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8137064774072624e-05, + "loss": 2.1121, + "step": 503000 + }, + { + "epoch": 0.02, + "learning_rate": 4.813521294979238e-05, + "loss": 2.064, + "step": 503500 + }, + { + "epoch": 0.02, + "learning_rate": 4.813336112551213e-05, + "loss": 2.0141, + "step": 504000 + }, + { + "epoch": 0.02, + "learning_rate": 4.813150930123189e-05, + "loss": 2.0963, + "step": 504500 + }, + { + "epoch": 0.02, + "learning_rate": 4.812965747695164e-05, + "loss": 2.092, + "step": 505000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8127805652671396e-05, + "loss": 2.0702, + "step": 505500 + }, + { + "epoch": 0.02, + "learning_rate": 4.812595382839115e-05, + "loss": 2.132, + "step": 506000 + }, + { + "epoch": 0.02, + "learning_rate": 4.81241020041109e-05, + "loss": 2.0766, + "step": 506500 + }, + { + "epoch": 0.02, + "learning_rate": 4.812225017983066e-05, + "loss": 2.1155, + "step": 507000 + }, + { + "epoch": 0.02, + "learning_rate": 4.812039835555041e-05, + "loss": 2.0564, + "step": 507500 + }, + { + "epoch": 0.02, + "learning_rate": 4.811854653127017e-05, + "loss": 2.0596, + "step": 508000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8116694706989924e-05, + "loss": 2.1147, + "step": 508500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8114842882709674e-05, + "loss": 2.1054, + "step": 509000 + }, + { + "epoch": 0.02, + "learning_rate": 4.811299105842943e-05, + "loss": 2.0307, + "step": 509500 + }, + { + "epoch": 0.02, + "learning_rate": 4.811113923414918e-05, + "loss": 2.0596, + "step": 510000 + }, + { + "epoch": 0.02, + "learning_rate": 4.810928740986894e-05, + "loss": 2.0605, + "step": 510500 + }, + { + "epoch": 0.02, + "learning_rate": 4.810743558558869e-05, + "loss": 2.056, + "step": 511000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8105583761308445e-05, + "loss": 2.0342, + "step": 511500 + }, + { + "epoch": 0.02, + "learning_rate": 4.81037319370282e-05, + "loss": 2.1145, + "step": 512000 + }, + { + "epoch": 0.02, + "learning_rate": 4.810188011274795e-05, + "loss": 2.0648, + "step": 512500 + }, + { + "epoch": 0.02, + "learning_rate": 4.810002828846771e-05, + "loss": 2.1499, + "step": 513000 + }, + { + "epoch": 0.02, + "learning_rate": 4.809817646418746e-05, + "loss": 2.1408, + "step": 513500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8096324639907216e-05, + "loss": 2.0036, + "step": 514000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8094472815626966e-05, + "loss": 2.0102, + "step": 514500 + }, + { + "epoch": 0.02, + "learning_rate": 4.809262099134672e-05, + "loss": 2.0586, + "step": 515000 + }, + { + "epoch": 0.02, + "learning_rate": 4.809076916706648e-05, + "loss": 2.094, + "step": 515500 + }, + { + "epoch": 0.02, + "learning_rate": 4.808891734278623e-05, + "loss": 2.0283, + "step": 516000 + }, + { + "epoch": 0.02, + "learning_rate": 4.808706551850599e-05, + "loss": 2.0327, + "step": 516500 + }, + { + "epoch": 0.02, + "learning_rate": 4.808521369422574e-05, + "loss": 2.1024, + "step": 517000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8083361869945495e-05, + "loss": 2.0714, + "step": 517500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8081510045665245e-05, + "loss": 2.0896, + "step": 518000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8079658221385e-05, + "loss": 2.0503, + "step": 518500 + }, + { + "epoch": 0.02, + "learning_rate": 4.807780639710476e-05, + "loss": 2.0088, + "step": 519000 + }, + { + "epoch": 0.02, + "learning_rate": 4.807595457282451e-05, + "loss": 1.9966, + "step": 519500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8074102748544266e-05, + "loss": 2.0258, + "step": 520000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8072250924264016e-05, + "loss": 2.1098, + "step": 520500 + }, + { + "epoch": 0.02, + "learning_rate": 4.807039909998377e-05, + "loss": 2.0957, + "step": 521000 + }, + { + "epoch": 0.02, + "learning_rate": 4.806854727570353e-05, + "loss": 2.2702, + "step": 521500 + }, + { + "epoch": 0.02, + "learning_rate": 4.806669545142328e-05, + "loss": 2.0737, + "step": 522000 + }, + { + "epoch": 0.02, + "learning_rate": 4.806484362714304e-05, + "loss": 2.0648, + "step": 522500 + }, + { + "epoch": 0.02, + "learning_rate": 4.806299180286279e-05, + "loss": 2.0431, + "step": 523000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8061139978582544e-05, + "loss": 2.0472, + "step": 523500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8059288154302294e-05, + "loss": 2.0653, + "step": 524000 + }, + { + "epoch": 0.02, + "learning_rate": 4.805743633002205e-05, + "loss": 2.0622, + "step": 524500 + }, + { + "epoch": 0.02, + "learning_rate": 4.805558450574181e-05, + "loss": 2.0607, + "step": 525000 + }, + { + "epoch": 0.02, + "learning_rate": 4.805373268146156e-05, + "loss": 2.0656, + "step": 525500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8051880857181315e-05, + "loss": 2.0377, + "step": 526000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8050029032901065e-05, + "loss": 2.0661, + "step": 526500 + }, + { + "epoch": 0.02, + "learning_rate": 4.804817720862082e-05, + "loss": 2.086, + "step": 527000 + }, + { + "epoch": 0.02, + "learning_rate": 4.804632538434057e-05, + "loss": 2.029, + "step": 527500 + }, + { + "epoch": 0.02, + "learning_rate": 4.804447356006033e-05, + "loss": 2.0559, + "step": 528000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8042621735780086e-05, + "loss": 2.0894, + "step": 528500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8040769911499837e-05, + "loss": 2.0668, + "step": 529000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8038918087219593e-05, + "loss": 2.0502, + "step": 529500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8037066262939344e-05, + "loss": 2.0705, + "step": 530000 + }, + { + "epoch": 0.02, + "learning_rate": 4.80352144386591e-05, + "loss": 2.0961, + "step": 530500 + }, + { + "epoch": 0.02, + "learning_rate": 4.803336261437886e-05, + "loss": 2.0796, + "step": 531000 + }, + { + "epoch": 0.02, + "learning_rate": 4.803151079009861e-05, + "loss": 2.0869, + "step": 531500 + }, + { + "epoch": 0.02, + "learning_rate": 4.8029658965818365e-05, + "loss": 2.0623, + "step": 532000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8027807141538115e-05, + "loss": 2.0733, + "step": 532500 + }, + { + "epoch": 0.03, + "learning_rate": 4.802595531725787e-05, + "loss": 2.1174, + "step": 533000 + }, + { + "epoch": 0.03, + "learning_rate": 4.802410349297762e-05, + "loss": 2.0586, + "step": 533500 + }, + { + "epoch": 0.03, + "learning_rate": 4.802225166869738e-05, + "loss": 2.1289, + "step": 534000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8020399844417136e-05, + "loss": 2.1147, + "step": 534500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8018548020136886e-05, + "loss": 2.0457, + "step": 535000 + }, + { + "epoch": 0.03, + "learning_rate": 4.801669619585664e-05, + "loss": 2.0674, + "step": 535500 + }, + { + "epoch": 0.03, + "learning_rate": 4.801484437157639e-05, + "loss": 2.011, + "step": 536000 + }, + { + "epoch": 0.03, + "learning_rate": 4.801299254729615e-05, + "loss": 2.0423, + "step": 536500 + }, + { + "epoch": 0.03, + "learning_rate": 4.80111407230159e-05, + "loss": 2.0702, + "step": 537000 + }, + { + "epoch": 0.03, + "learning_rate": 4.800928889873566e-05, + "loss": 2.0714, + "step": 537500 + }, + { + "epoch": 0.03, + "learning_rate": 4.8007437074455414e-05, + "loss": 2.0333, + "step": 538000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8005585250175164e-05, + "loss": 2.0615, + "step": 538500 + }, + { + "epoch": 0.03, + "learning_rate": 4.800373342589492e-05, + "loss": 2.0282, + "step": 539000 + }, + { + "epoch": 0.03, + "learning_rate": 4.800188160161467e-05, + "loss": 2.0782, + "step": 539500 + }, + { + "epoch": 0.03, + "learning_rate": 4.800002977733443e-05, + "loss": 2.0383, + "step": 540000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7998177953054185e-05, + "loss": 2.0591, + "step": 540500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7996326128773935e-05, + "loss": 2.0856, + "step": 541000 + }, + { + "epoch": 0.03, + "learning_rate": 4.799447430449369e-05, + "loss": 2.0869, + "step": 541500 + }, + { + "epoch": 0.03, + "learning_rate": 4.799262248021344e-05, + "loss": 2.0858, + "step": 542000 + }, + { + "epoch": 0.03, + "learning_rate": 4.79907706559332e-05, + "loss": 2.0886, + "step": 542500 + }, + { + "epoch": 0.03, + "learning_rate": 4.798891883165295e-05, + "loss": 2.0282, + "step": 543000 + }, + { + "epoch": 0.03, + "learning_rate": 4.798706700737271e-05, + "loss": 2.0458, + "step": 543500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7985215183092464e-05, + "loss": 1.9995, + "step": 544000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7983363358812214e-05, + "loss": 2.1449, + "step": 544500 + }, + { + "epoch": 0.03, + "learning_rate": 4.798151153453197e-05, + "loss": 2.0537, + "step": 545000 + }, + { + "epoch": 0.03, + "learning_rate": 4.797965971025172e-05, + "loss": 2.089, + "step": 545500 + }, + { + "epoch": 0.03, + "learning_rate": 4.797780788597148e-05, + "loss": 2.0663, + "step": 546000 + }, + { + "epoch": 0.03, + "learning_rate": 4.797595606169123e-05, + "loss": 2.0505, + "step": 546500 + }, + { + "epoch": 0.03, + "learning_rate": 4.797410423741099e-05, + "loss": 2.0557, + "step": 547000 + }, + { + "epoch": 0.03, + "learning_rate": 4.797225241313074e-05, + "loss": 2.0147, + "step": 547500 + }, + { + "epoch": 0.03, + "learning_rate": 4.797040058885049e-05, + "loss": 2.0047, + "step": 548000 + }, + { + "epoch": 0.03, + "learning_rate": 4.796854876457025e-05, + "loss": 2.0574, + "step": 548500 + }, + { + "epoch": 0.03, + "learning_rate": 4.796669694029e-05, + "loss": 2.0718, + "step": 549000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7964845116009756e-05, + "loss": 2.0654, + "step": 549500 + }, + { + "epoch": 0.03, + "learning_rate": 4.796299329172951e-05, + "loss": 2.1151, + "step": 550000 + }, + { + "epoch": 0.03, + "learning_rate": 4.796114146744926e-05, + "loss": 2.0348, + "step": 550500 + }, + { + "epoch": 0.03, + "learning_rate": 4.795928964316902e-05, + "loss": 2.0774, + "step": 551000 + }, + { + "epoch": 0.03, + "learning_rate": 4.795743781888877e-05, + "loss": 2.0468, + "step": 551500 + }, + { + "epoch": 0.03, + "learning_rate": 4.795558599460853e-05, + "loss": 2.0486, + "step": 552000 + }, + { + "epoch": 0.03, + "learning_rate": 4.795373417032828e-05, + "loss": 2.0349, + "step": 552500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7951882346048034e-05, + "loss": 2.0806, + "step": 553000 + }, + { + "epoch": 0.03, + "learning_rate": 4.795003052176779e-05, + "loss": 2.0708, + "step": 553500 + }, + { + "epoch": 0.03, + "learning_rate": 4.794817869748754e-05, + "loss": 1.9739, + "step": 554000 + }, + { + "epoch": 0.03, + "learning_rate": 4.79463268732073e-05, + "loss": 2.0079, + "step": 554500 + }, + { + "epoch": 0.03, + "learning_rate": 4.794447504892705e-05, + "loss": 2.0891, + "step": 555000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7942623224646806e-05, + "loss": 2.0833, + "step": 555500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7940771400366556e-05, + "loss": 2.0084, + "step": 556000 + }, + { + "epoch": 0.03, + "learning_rate": 4.793891957608632e-05, + "loss": 2.0073, + "step": 556500 + }, + { + "epoch": 0.03, + "learning_rate": 4.793706775180607e-05, + "loss": 2.0094, + "step": 557000 + }, + { + "epoch": 0.03, + "learning_rate": 4.793521592752582e-05, + "loss": 2.0845, + "step": 557500 + }, + { + "epoch": 0.03, + "learning_rate": 4.793336410324558e-05, + "loss": 2.019, + "step": 558000 + }, + { + "epoch": 0.03, + "learning_rate": 4.793151227896533e-05, + "loss": 2.0751, + "step": 558500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7929660454685084e-05, + "loss": 2.0965, + "step": 559000 + }, + { + "epoch": 0.03, + "learning_rate": 4.792780863040484e-05, + "loss": 2.0792, + "step": 559500 + }, + { + "epoch": 0.03, + "learning_rate": 4.79259568061246e-05, + "loss": 2.0228, + "step": 560000 + }, + { + "epoch": 0.03, + "learning_rate": 4.792410498184435e-05, + "loss": 2.0399, + "step": 560500 + }, + { + "epoch": 0.03, + "learning_rate": 4.79222531575641e-05, + "loss": 2.003, + "step": 561000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7920401333283855e-05, + "loss": 2.0385, + "step": 561500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7918549509003605e-05, + "loss": 2.0809, + "step": 562000 + }, + { + "epoch": 0.03, + "learning_rate": 4.791669768472336e-05, + "loss": 2.0491, + "step": 562500 + }, + { + "epoch": 0.03, + "learning_rate": 4.791484586044312e-05, + "loss": 2.0152, + "step": 563000 + }, + { + "epoch": 0.03, + "learning_rate": 4.791299403616287e-05, + "loss": 1.9715, + "step": 563500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7911142211882626e-05, + "loss": 2.0302, + "step": 564000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7909290387602376e-05, + "loss": 2.0566, + "step": 564500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7907438563322133e-05, + "loss": 2.0745, + "step": 565000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7905586739041884e-05, + "loss": 2.045, + "step": 565500 + }, + { + "epoch": 0.03, + "learning_rate": 4.790373491476165e-05, + "loss": 2.0369, + "step": 566000 + }, + { + "epoch": 0.03, + "learning_rate": 4.79018830904814e-05, + "loss": 1.9952, + "step": 566500 + }, + { + "epoch": 0.03, + "learning_rate": 4.790003126620115e-05, + "loss": 2.1119, + "step": 567000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7898179441920905e-05, + "loss": 2.046, + "step": 567500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7896327617640655e-05, + "loss": 2.0293, + "step": 568000 + }, + { + "epoch": 0.03, + "learning_rate": 4.789447579336041e-05, + "loss": 2.0215, + "step": 568500 + }, + { + "epoch": 0.03, + "learning_rate": 4.789262396908016e-05, + "loss": 2.0929, + "step": 569000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7890772144799926e-05, + "loss": 2.012, + "step": 569500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7888920320519676e-05, + "loss": 2.0962, + "step": 570000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7887068496239426e-05, + "loss": 1.9749, + "step": 570500 + }, + { + "epoch": 0.03, + "learning_rate": 4.788521667195918e-05, + "loss": 2.0525, + "step": 571000 + }, + { + "epoch": 0.03, + "learning_rate": 4.788336484767893e-05, + "loss": 2.0129, + "step": 571500 + }, + { + "epoch": 0.03, + "learning_rate": 4.788151302339869e-05, + "loss": 2.025, + "step": 572000 + }, + { + "epoch": 0.03, + "learning_rate": 4.787966119911845e-05, + "loss": 2.0897, + "step": 572500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7877809374838204e-05, + "loss": 2.0451, + "step": 573000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7875957550557954e-05, + "loss": 1.9899, + "step": 573500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7874105726277704e-05, + "loss": 2.0091, + "step": 574000 + }, + { + "epoch": 0.03, + "learning_rate": 4.787225390199746e-05, + "loss": 2.0697, + "step": 574500 + }, + { + "epoch": 0.03, + "learning_rate": 4.787040207771721e-05, + "loss": 2.0429, + "step": 575000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7868550253436975e-05, + "loss": 2.0134, + "step": 575500 + }, + { + "epoch": 0.03, + "learning_rate": 4.7866698429156725e-05, + "loss": 2.0047, + "step": 576000 + }, + { + "epoch": 0.03, + "learning_rate": 4.7864846604876475e-05, + "loss": 1.9744, + "step": 576500 + }, + { + "epoch": 0.03, + "learning_rate": 4.786299478059623e-05, + "loss": 2.0071, + "step": 577000 + }, + { + "epoch": 0.03, + "learning_rate": 4.786114295631598e-05, + "loss": 2.0124, + "step": 577500 + }, + { + "epoch": 0.04, + "learning_rate": 4.785929113203574e-05, + "loss": 2.0256, + "step": 578000 + }, + { + "epoch": 0.04, + "learning_rate": 4.785743930775549e-05, + "loss": 1.9891, + "step": 578500 + }, + { + "epoch": 0.04, + "learning_rate": 4.785558748347525e-05, + "loss": 2.004, + "step": 579000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7853735659195004e-05, + "loss": 2.0468, + "step": 579500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7851883834914754e-05, + "loss": 1.9861, + "step": 580000 + }, + { + "epoch": 0.04, + "learning_rate": 4.785003201063451e-05, + "loss": 2.022, + "step": 580500 + }, + { + "epoch": 0.04, + "learning_rate": 4.784818018635426e-05, + "loss": 2.0494, + "step": 581000 + }, + { + "epoch": 0.04, + "learning_rate": 4.784632836207402e-05, + "loss": 2.0752, + "step": 581500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7844476537793775e-05, + "loss": 2.0183, + "step": 582000 + }, + { + "epoch": 0.04, + "learning_rate": 4.784262471351353e-05, + "loss": 2.0207, + "step": 582500 + }, + { + "epoch": 0.04, + "learning_rate": 4.784077288923328e-05, + "loss": 2.0097, + "step": 583000 + }, + { + "epoch": 0.04, + "learning_rate": 4.783892106495303e-05, + "loss": 2.0166, + "step": 583500 + }, + { + "epoch": 0.04, + "learning_rate": 4.783706924067279e-05, + "loss": 1.9941, + "step": 584000 + }, + { + "epoch": 0.04, + "learning_rate": 4.783521741639254e-05, + "loss": 2.0063, + "step": 584500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7833365592112296e-05, + "loss": 2.0428, + "step": 585000 + }, + { + "epoch": 0.04, + "learning_rate": 4.783151376783205e-05, + "loss": 2.0325, + "step": 585500 + }, + { + "epoch": 0.04, + "learning_rate": 4.782966194355181e-05, + "loss": 2.0902, + "step": 586000 + }, + { + "epoch": 0.04, + "learning_rate": 4.782781011927156e-05, + "loss": 2.0336, + "step": 586500 + }, + { + "epoch": 0.04, + "learning_rate": 4.782595829499131e-05, + "loss": 2.0924, + "step": 587000 + }, + { + "epoch": 0.04, + "learning_rate": 4.782410647071107e-05, + "loss": 2.1075, + "step": 587500 + }, + { + "epoch": 0.04, + "learning_rate": 4.782225464643082e-05, + "loss": 2.033, + "step": 588000 + }, + { + "epoch": 0.04, + "learning_rate": 4.782040282215058e-05, + "loss": 2.0679, + "step": 588500 + }, + { + "epoch": 0.04, + "learning_rate": 4.781855099787033e-05, + "loss": 2.0452, + "step": 589000 + }, + { + "epoch": 0.04, + "learning_rate": 4.781669917359009e-05, + "loss": 2.017, + "step": 589500 + }, + { + "epoch": 0.04, + "learning_rate": 4.781484734930984e-05, + "loss": 2.0043, + "step": 590000 + }, + { + "epoch": 0.04, + "learning_rate": 4.781299552502959e-05, + "loss": 2.0344, + "step": 590500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7811143700749346e-05, + "loss": 2.0212, + "step": 591000 + }, + { + "epoch": 0.04, + "learning_rate": 4.78092918764691e-05, + "loss": 1.9791, + "step": 591500 + }, + { + "epoch": 0.04, + "learning_rate": 4.780744005218886e-05, + "loss": 2.0494, + "step": 592000 + }, + { + "epoch": 0.04, + "learning_rate": 4.780558822790861e-05, + "loss": 2.0608, + "step": 592500 + }, + { + "epoch": 0.04, + "learning_rate": 4.780373640362836e-05, + "loss": 1.9911, + "step": 593000 + }, + { + "epoch": 0.04, + "learning_rate": 4.780188457934812e-05, + "loss": 1.9922, + "step": 593500 + }, + { + "epoch": 0.04, + "learning_rate": 4.780003275506787e-05, + "loss": 2.0017, + "step": 594000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7798180930787624e-05, + "loss": 2.1381, + "step": 594500 + }, + { + "epoch": 0.04, + "learning_rate": 4.779632910650738e-05, + "loss": 2.0281, + "step": 595000 + }, + { + "epoch": 0.04, + "learning_rate": 4.779447728222714e-05, + "loss": 1.9434, + "step": 595500 + }, + { + "epoch": 0.04, + "learning_rate": 4.779262545794689e-05, + "loss": 1.9834, + "step": 596000 + }, + { + "epoch": 0.04, + "learning_rate": 4.779077363366664e-05, + "loss": 1.9982, + "step": 596500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7788921809386395e-05, + "loss": 2.035, + "step": 597000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7787069985106145e-05, + "loss": 2.0292, + "step": 597500 + }, + { + "epoch": 0.04, + "learning_rate": 4.778521816082591e-05, + "loss": 2.019, + "step": 598000 + }, + { + "epoch": 0.04, + "learning_rate": 4.778336633654566e-05, + "loss": 2.0064, + "step": 598500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7781514512265416e-05, + "loss": 2.041, + "step": 599000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7779662687985166e-05, + "loss": 2.0466, + "step": 599500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7777810863704916e-05, + "loss": 1.9773, + "step": 600000 + }, + { + "epoch": 0.04, + "learning_rate": 4.777595903942467e-05, + "loss": 2.0342, + "step": 600500 + }, + { + "epoch": 0.04, + "learning_rate": 4.777410721514443e-05, + "loss": 2.0407, + "step": 601000 + }, + { + "epoch": 0.04, + "learning_rate": 4.777225539086419e-05, + "loss": 2.0732, + "step": 601500 + }, + { + "epoch": 0.04, + "learning_rate": 4.777040356658394e-05, + "loss": 2.0047, + "step": 602000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7768551742303694e-05, + "loss": 2.0766, + "step": 602500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7766699918023445e-05, + "loss": 2.0555, + "step": 603000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7764848093743195e-05, + "loss": 2.1317, + "step": 603500 + }, + { + "epoch": 0.04, + "learning_rate": 4.776299626946295e-05, + "loss": 2.0506, + "step": 604000 + }, + { + "epoch": 0.04, + "learning_rate": 4.776114444518271e-05, + "loss": 2.0278, + "step": 604500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7759292620902466e-05, + "loss": 2.0139, + "step": 605000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7757440796622216e-05, + "loss": 2.0501, + "step": 605500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7755588972341966e-05, + "loss": 2.0084, + "step": 606000 + }, + { + "epoch": 0.04, + "learning_rate": 4.775373714806172e-05, + "loss": 2.0657, + "step": 606500 + }, + { + "epoch": 0.04, + "learning_rate": 4.775188532378147e-05, + "loss": 2.0552, + "step": 607000 + }, + { + "epoch": 0.04, + "learning_rate": 4.775003349950124e-05, + "loss": 2.0163, + "step": 607500 + }, + { + "epoch": 0.04, + "learning_rate": 4.774818167522099e-05, + "loss": 2.0418, + "step": 608000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7746329850940744e-05, + "loss": 2.0153, + "step": 608500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7744478026660494e-05, + "loss": 2.0354, + "step": 609000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7742626202380244e-05, + "loss": 2.0825, + "step": 609500 + }, + { + "epoch": 0.04, + "learning_rate": 4.77407743781e-05, + "loss": 2.0415, + "step": 610000 + }, + { + "epoch": 0.04, + "learning_rate": 4.773892255381975e-05, + "loss": 2.0278, + "step": 610500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7737070729539515e-05, + "loss": 2.0174, + "step": 611000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7735218905259265e-05, + "loss": 1.9517, + "step": 611500 + }, + { + "epoch": 0.04, + "learning_rate": 4.773336708097902e-05, + "loss": 1.9906, + "step": 612000 + }, + { + "epoch": 0.04, + "learning_rate": 4.773151525669877e-05, + "loss": 2.0513, + "step": 612500 + }, + { + "epoch": 0.04, + "learning_rate": 4.772966343241852e-05, + "loss": 2.0269, + "step": 613000 + }, + { + "epoch": 0.04, + "learning_rate": 4.772781160813828e-05, + "loss": 2.0175, + "step": 613500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7725959783858036e-05, + "loss": 2.0251, + "step": 614000 + }, + { + "epoch": 0.04, + "learning_rate": 4.772410795957779e-05, + "loss": 2.0621, + "step": 614500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7722256135297543e-05, + "loss": 1.9919, + "step": 615000 + }, + { + "epoch": 0.04, + "learning_rate": 4.77204043110173e-05, + "loss": 2.043, + "step": 615500 + }, + { + "epoch": 0.04, + "learning_rate": 4.771855248673705e-05, + "loss": 2.081, + "step": 616000 + }, + { + "epoch": 0.04, + "learning_rate": 4.77167006624568e-05, + "loss": 1.9483, + "step": 616500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7714848838176564e-05, + "loss": 2.0136, + "step": 617000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7712997013896315e-05, + "loss": 2.0351, + "step": 617500 + }, + { + "epoch": 0.04, + "learning_rate": 4.771114518961607e-05, + "loss": 2.0785, + "step": 618000 + }, + { + "epoch": 0.04, + "learning_rate": 4.770929336533582e-05, + "loss": 2.0317, + "step": 618500 + }, + { + "epoch": 0.04, + "learning_rate": 4.770744154105557e-05, + "loss": 2.0219, + "step": 619000 + }, + { + "epoch": 0.04, + "learning_rate": 4.770558971677533e-05, + "loss": 1.9949, + "step": 619500 + }, + { + "epoch": 0.04, + "learning_rate": 4.770373789249508e-05, + "loss": 2.0794, + "step": 620000 + }, + { + "epoch": 0.04, + "learning_rate": 4.770188606821484e-05, + "loss": 2.0506, + "step": 620500 + }, + { + "epoch": 0.04, + "learning_rate": 4.770003424393459e-05, + "loss": 2.0202, + "step": 621000 + }, + { + "epoch": 0.04, + "learning_rate": 4.769818241965435e-05, + "loss": 2.0131, + "step": 621500 + }, + { + "epoch": 0.04, + "learning_rate": 4.76963305953741e-05, + "loss": 2.0382, + "step": 622000 + }, + { + "epoch": 0.04, + "learning_rate": 4.769447877109385e-05, + "loss": 1.9874, + "step": 622500 + }, + { + "epoch": 0.05, + "learning_rate": 4.769262694681361e-05, + "loss": 2.0775, + "step": 623000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7690775122533364e-05, + "loss": 2.0465, + "step": 623500 + }, + { + "epoch": 0.05, + "learning_rate": 4.768892329825312e-05, + "loss": 2.0227, + "step": 624000 + }, + { + "epoch": 0.05, + "learning_rate": 4.768707147397287e-05, + "loss": 1.9616, + "step": 624500 + }, + { + "epoch": 0.05, + "learning_rate": 4.768521964969263e-05, + "loss": 1.9414, + "step": 625000 + }, + { + "epoch": 0.05, + "learning_rate": 4.768336782541238e-05, + "loss": 2.0177, + "step": 625500 + }, + { + "epoch": 0.05, + "learning_rate": 4.768151600113213e-05, + "loss": 2.1022, + "step": 626000 + }, + { + "epoch": 0.05, + "learning_rate": 4.767966417685189e-05, + "loss": 2.0752, + "step": 626500 + }, + { + "epoch": 0.05, + "learning_rate": 4.767781235257164e-05, + "loss": 2.0178, + "step": 627000 + }, + { + "epoch": 0.05, + "learning_rate": 4.76759605282914e-05, + "loss": 2.0748, + "step": 627500 + }, + { + "epoch": 0.05, + "learning_rate": 4.767410870401115e-05, + "loss": 2.0444, + "step": 628000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7672256879730906e-05, + "loss": 2.0182, + "step": 628500 + }, + { + "epoch": 0.05, + "learning_rate": 4.767040505545066e-05, + "loss": 1.9869, + "step": 629000 + }, + { + "epoch": 0.05, + "learning_rate": 4.766855323117041e-05, + "loss": 2.0076, + "step": 629500 + }, + { + "epoch": 0.05, + "learning_rate": 4.766670140689017e-05, + "loss": 2.0308, + "step": 630000 + }, + { + "epoch": 0.05, + "learning_rate": 4.766484958260992e-05, + "loss": 2.0345, + "step": 630500 + }, + { + "epoch": 0.05, + "learning_rate": 4.766299775832968e-05, + "loss": 2.0501, + "step": 631000 + }, + { + "epoch": 0.05, + "learning_rate": 4.766114593404943e-05, + "loss": 2.0931, + "step": 631500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7659294109769185e-05, + "loss": 2.0988, + "step": 632000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7657442285488935e-05, + "loss": 2.0448, + "step": 632500 + }, + { + "epoch": 0.05, + "learning_rate": 4.765559046120869e-05, + "loss": 1.9611, + "step": 633000 + }, + { + "epoch": 0.05, + "learning_rate": 4.765373863692845e-05, + "loss": 2.0259, + "step": 633500 + }, + { + "epoch": 0.05, + "learning_rate": 4.76518868126482e-05, + "loss": 1.9305, + "step": 634000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7650034988367956e-05, + "loss": 2.0294, + "step": 634500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7648183164087706e-05, + "loss": 2.0582, + "step": 635000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7646331339807456e-05, + "loss": 2.0443, + "step": 635500 + }, + { + "epoch": 0.05, + "learning_rate": 4.764447951552721e-05, + "loss": 2.0119, + "step": 636000 + }, + { + "epoch": 0.05, + "learning_rate": 4.764262769124697e-05, + "loss": 2.0091, + "step": 636500 + }, + { + "epoch": 0.05, + "learning_rate": 4.764077586696673e-05, + "loss": 1.9696, + "step": 637000 + }, + { + "epoch": 0.05, + "learning_rate": 4.763892404268648e-05, + "loss": 2.0121, + "step": 637500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7637072218406234e-05, + "loss": 1.9961, + "step": 638000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7635220394125984e-05, + "loss": 1.9558, + "step": 638500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7633368569845735e-05, + "loss": 2.021, + "step": 639000 + }, + { + "epoch": 0.05, + "learning_rate": 4.76315167455655e-05, + "loss": 2.0348, + "step": 639500 + }, + { + "epoch": 0.05, + "learning_rate": 4.762966492128525e-05, + "loss": 2.019, + "step": 640000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7627813097005005e-05, + "loss": 2.0637, + "step": 640500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7625961272724756e-05, + "loss": 1.9666, + "step": 641000 + }, + { + "epoch": 0.05, + "learning_rate": 4.762410944844451e-05, + "loss": 2.0059, + "step": 641500 + }, + { + "epoch": 0.05, + "learning_rate": 4.762225762416426e-05, + "loss": 2.0479, + "step": 642000 + }, + { + "epoch": 0.05, + "learning_rate": 4.762040579988402e-05, + "loss": 2.0932, + "step": 642500 + }, + { + "epoch": 0.05, + "learning_rate": 4.761855397560378e-05, + "loss": 1.9762, + "step": 643000 + }, + { + "epoch": 0.05, + "learning_rate": 4.761670215132353e-05, + "loss": 2.0326, + "step": 643500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7614850327043284e-05, + "loss": 2.0609, + "step": 644000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7612998502763034e-05, + "loss": 2.0542, + "step": 644500 + }, + { + "epoch": 0.05, + "learning_rate": 4.761114667848279e-05, + "loss": 2.0496, + "step": 645000 + }, + { + "epoch": 0.05, + "learning_rate": 4.760929485420254e-05, + "loss": 2.0059, + "step": 645500 + }, + { + "epoch": 0.05, + "learning_rate": 4.76074430299223e-05, + "loss": 2.0539, + "step": 646000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7605591205642055e-05, + "loss": 2.0313, + "step": 646500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7603739381361805e-05, + "loss": 2.0165, + "step": 647000 + }, + { + "epoch": 0.05, + "learning_rate": 4.760188755708156e-05, + "loss": 2.014, + "step": 647500 + }, + { + "epoch": 0.05, + "learning_rate": 4.760003573280131e-05, + "loss": 2.0117, + "step": 648000 + }, + { + "epoch": 0.05, + "learning_rate": 4.759818390852106e-05, + "loss": 2.0378, + "step": 648500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7596332084240826e-05, + "loss": 2.0072, + "step": 649000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7594480259960576e-05, + "loss": 2.0517, + "step": 649500 + }, + { + "epoch": 0.05, + "learning_rate": 4.759262843568033e-05, + "loss": 2.0016, + "step": 650000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7590776611400083e-05, + "loss": 2.018, + "step": 650500 + }, + { + "epoch": 0.05, + "learning_rate": 4.758892478711984e-05, + "loss": 2.0559, + "step": 651000 + }, + { + "epoch": 0.05, + "learning_rate": 4.758707296283959e-05, + "loss": 2.0775, + "step": 651500 + }, + { + "epoch": 0.05, + "learning_rate": 4.758522113855935e-05, + "loss": 2.0608, + "step": 652000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7583369314279104e-05, + "loss": 2.0145, + "step": 652500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7581517489998855e-05, + "loss": 2.0347, + "step": 653000 + }, + { + "epoch": 0.05, + "learning_rate": 4.757966566571861e-05, + "loss": 2.0073, + "step": 653500 + }, + { + "epoch": 0.05, + "learning_rate": 4.757781384143836e-05, + "loss": 1.9822, + "step": 654000 + }, + { + "epoch": 0.05, + "learning_rate": 4.757596201715812e-05, + "loss": 2.0566, + "step": 654500 + }, + { + "epoch": 0.05, + "learning_rate": 4.757411019287787e-05, + "loss": 2.0311, + "step": 655000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7572258368597626e-05, + "loss": 2.0328, + "step": 655500 + }, + { + "epoch": 0.05, + "learning_rate": 4.757040654431738e-05, + "loss": 2.0276, + "step": 656000 + }, + { + "epoch": 0.05, + "learning_rate": 4.756855472003713e-05, + "loss": 1.9355, + "step": 656500 + }, + { + "epoch": 0.05, + "learning_rate": 4.756670289575689e-05, + "loss": 2.0245, + "step": 657000 + }, + { + "epoch": 0.05, + "learning_rate": 4.756485107147664e-05, + "loss": 2.0109, + "step": 657500 + }, + { + "epoch": 0.05, + "learning_rate": 4.75629992471964e-05, + "loss": 2.0427, + "step": 658000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7561147422916154e-05, + "loss": 2.0446, + "step": 658500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7559295598635904e-05, + "loss": 1.9809, + "step": 659000 + }, + { + "epoch": 0.05, + "learning_rate": 4.755744377435566e-05, + "loss": 2.0091, + "step": 659500 + }, + { + "epoch": 0.05, + "learning_rate": 4.755559195007541e-05, + "loss": 1.9967, + "step": 660000 + }, + { + "epoch": 0.05, + "learning_rate": 4.755374012579517e-05, + "loss": 2.1323, + "step": 660500 + }, + { + "epoch": 0.05, + "learning_rate": 4.755188830151492e-05, + "loss": 2.0253, + "step": 661000 + }, + { + "epoch": 0.05, + "learning_rate": 4.755003647723467e-05, + "loss": 2.0328, + "step": 661500 + }, + { + "epoch": 0.05, + "learning_rate": 4.754818465295443e-05, + "loss": 2.0563, + "step": 662000 + }, + { + "epoch": 0.05, + "learning_rate": 4.754633282867418e-05, + "loss": 2.0115, + "step": 662500 + }, + { + "epoch": 0.05, + "learning_rate": 4.754448100439394e-05, + "loss": 1.9977, + "step": 663000 + }, + { + "epoch": 0.05, + "learning_rate": 4.754262918011369e-05, + "loss": 1.9745, + "step": 663500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7540777355833446e-05, + "loss": 2.043, + "step": 664000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7538925531553197e-05, + "loss": 2.0064, + "step": 664500 + }, + { + "epoch": 0.05, + "learning_rate": 4.7537073707272954e-05, + "loss": 2.0269, + "step": 665000 + }, + { + "epoch": 0.05, + "learning_rate": 4.753522188299271e-05, + "loss": 1.9855, + "step": 665500 + }, + { + "epoch": 0.05, + "learning_rate": 4.753337005871246e-05, + "loss": 2.0638, + "step": 666000 + }, + { + "epoch": 0.05, + "learning_rate": 4.753151823443222e-05, + "loss": 2.0233, + "step": 666500 + }, + { + "epoch": 0.05, + "learning_rate": 4.752966641015197e-05, + "loss": 2.0178, + "step": 667000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7527814585871725e-05, + "loss": 2.0099, + "step": 667500 + }, + { + "epoch": 0.06, + "learning_rate": 4.752596276159148e-05, + "loss": 2.0214, + "step": 668000 + }, + { + "epoch": 0.06, + "learning_rate": 4.752411093731123e-05, + "loss": 2.0505, + "step": 668500 + }, + { + "epoch": 0.06, + "learning_rate": 4.752225911303099e-05, + "loss": 2.0089, + "step": 669000 + }, + { + "epoch": 0.06, + "learning_rate": 4.752040728875074e-05, + "loss": 2.0446, + "step": 669500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7518555464470496e-05, + "loss": 1.991, + "step": 670000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7516703640190246e-05, + "loss": 1.974, + "step": 670500 + }, + { + "epoch": 0.06, + "learning_rate": 4.751485181591e-05, + "loss": 1.9898, + "step": 671000 + }, + { + "epoch": 0.06, + "learning_rate": 4.751299999162976e-05, + "loss": 2.0428, + "step": 671500 + }, + { + "epoch": 0.06, + "learning_rate": 4.751114816734951e-05, + "loss": 2.0752, + "step": 672000 + }, + { + "epoch": 0.06, + "learning_rate": 4.750929634306927e-05, + "loss": 1.9674, + "step": 672500 + }, + { + "epoch": 0.06, + "learning_rate": 4.750744451878902e-05, + "loss": 2.0534, + "step": 673000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7505592694508774e-05, + "loss": 1.9736, + "step": 673500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7503740870228524e-05, + "loss": 2.0287, + "step": 674000 + }, + { + "epoch": 0.06, + "learning_rate": 4.750188904594828e-05, + "loss": 2.0177, + "step": 674500 + }, + { + "epoch": 0.06, + "learning_rate": 4.750003722166804e-05, + "loss": 2.0582, + "step": 675000 + }, + { + "epoch": 0.06, + "learning_rate": 4.749818539738779e-05, + "loss": 2.0747, + "step": 675500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7496333573107545e-05, + "loss": 2.0076, + "step": 676000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7494481748827296e-05, + "loss": 1.9876, + "step": 676500 + }, + { + "epoch": 0.06, + "learning_rate": 4.749262992454705e-05, + "loss": 1.9917, + "step": 677000 + }, + { + "epoch": 0.06, + "learning_rate": 4.749077810026681e-05, + "loss": 2.0341, + "step": 677500 + }, + { + "epoch": 0.06, + "learning_rate": 4.748892627598656e-05, + "loss": 2.0301, + "step": 678000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7487074451706317e-05, + "loss": 2.0224, + "step": 678500 + }, + { + "epoch": 0.06, + "learning_rate": 4.748522262742607e-05, + "loss": 2.03, + "step": 679000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7483370803145824e-05, + "loss": 1.9859, + "step": 679500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7481518978865574e-05, + "loss": 1.9941, + "step": 680000 + }, + { + "epoch": 0.06, + "learning_rate": 4.747966715458533e-05, + "loss": 1.9824, + "step": 680500 + }, + { + "epoch": 0.06, + "learning_rate": 4.747781533030509e-05, + "loss": 1.98, + "step": 681000 + }, + { + "epoch": 0.06, + "learning_rate": 4.747596350602484e-05, + "loss": 1.9798, + "step": 681500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7474111681744595e-05, + "loss": 2.0378, + "step": 682000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7472259857464345e-05, + "loss": 1.9793, + "step": 682500 + }, + { + "epoch": 0.06, + "learning_rate": 4.74704080331841e-05, + "loss": 2.0431, + "step": 683000 + }, + { + "epoch": 0.06, + "learning_rate": 4.746855620890385e-05, + "loss": 2.0299, + "step": 683500 + }, + { + "epoch": 0.06, + "learning_rate": 4.746670438462361e-05, + "loss": 2.0331, + "step": 684000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7464852560343366e-05, + "loss": 2.0741, + "step": 684500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7463000736063116e-05, + "loss": 1.9826, + "step": 685000 + }, + { + "epoch": 0.06, + "learning_rate": 4.746114891178287e-05, + "loss": 2.0529, + "step": 685500 + }, + { + "epoch": 0.06, + "learning_rate": 4.745929708750262e-05, + "loss": 2.0303, + "step": 686000 + }, + { + "epoch": 0.06, + "learning_rate": 4.745744526322238e-05, + "loss": 1.9617, + "step": 686500 + }, + { + "epoch": 0.06, + "learning_rate": 4.745559343894213e-05, + "loss": 2.0406, + "step": 687000 + }, + { + "epoch": 0.06, + "learning_rate": 4.745374161466189e-05, + "loss": 2.0287, + "step": 687500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7451889790381644e-05, + "loss": 2.0281, + "step": 688000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7450037966101394e-05, + "loss": 1.9655, + "step": 688500 + }, + { + "epoch": 0.06, + "learning_rate": 4.744818614182115e-05, + "loss": 1.9935, + "step": 689000 + }, + { + "epoch": 0.06, + "learning_rate": 4.74463343175409e-05, + "loss": 1.9347, + "step": 689500 + }, + { + "epoch": 0.06, + "learning_rate": 4.744448249326066e-05, + "loss": 2.0455, + "step": 690000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7442630668980416e-05, + "loss": 2.0223, + "step": 690500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7440778844700166e-05, + "loss": 1.9986, + "step": 691000 + }, + { + "epoch": 0.06, + "learning_rate": 4.743892702041992e-05, + "loss": 1.9804, + "step": 691500 + }, + { + "epoch": 0.06, + "learning_rate": 4.743707519613967e-05, + "loss": 1.9666, + "step": 692000 + }, + { + "epoch": 0.06, + "learning_rate": 4.743522337185943e-05, + "loss": 2.0033, + "step": 692500 + }, + { + "epoch": 0.06, + "learning_rate": 4.743337154757918e-05, + "loss": 2.0306, + "step": 693000 + }, + { + "epoch": 0.06, + "learning_rate": 4.743151972329894e-05, + "loss": 2.0487, + "step": 693500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7429667899018694e-05, + "loss": 2.014, + "step": 694000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7427816074738444e-05, + "loss": 2.1064, + "step": 694500 + }, + { + "epoch": 0.06, + "learning_rate": 4.74259642504582e-05, + "loss": 2.0571, + "step": 695000 + }, + { + "epoch": 0.06, + "learning_rate": 4.742411242617795e-05, + "loss": 2.0911, + "step": 695500 + }, + { + "epoch": 0.06, + "learning_rate": 4.742226060189771e-05, + "loss": 2.04, + "step": 696000 + }, + { + "epoch": 0.06, + "learning_rate": 4.742040877761746e-05, + "loss": 2.0139, + "step": 696500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7418556953337215e-05, + "loss": 1.9906, + "step": 697000 + }, + { + "epoch": 0.06, + "learning_rate": 4.741670512905697e-05, + "loss": 2.0458, + "step": 697500 + }, + { + "epoch": 0.06, + "learning_rate": 4.741485330477672e-05, + "loss": 2.0134, + "step": 698000 + }, + { + "epoch": 0.06, + "learning_rate": 4.741300148049648e-05, + "loss": 2.0296, + "step": 698500 + }, + { + "epoch": 0.06, + "learning_rate": 4.741114965621623e-05, + "loss": 2.0438, + "step": 699000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7409297831935986e-05, + "loss": 2.0439, + "step": 699500 + }, + { + "epoch": 0.06, + "learning_rate": 4.740744600765574e-05, + "loss": 2.12, + "step": 700000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7405594183375493e-05, + "loss": 2.0337, + "step": 700500 + }, + { + "epoch": 0.06, + "learning_rate": 4.740374235909525e-05, + "loss": 2.0302, + "step": 701000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7401890534815e-05, + "loss": 2.0418, + "step": 701500 + }, + { + "epoch": 0.06, + "learning_rate": 4.740003871053476e-05, + "loss": 2.0212, + "step": 702000 + }, + { + "epoch": 0.06, + "learning_rate": 4.739818688625451e-05, + "loss": 1.9915, + "step": 702500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7396335061974265e-05, + "loss": 2.0892, + "step": 703000 + }, + { + "epoch": 0.06, + "learning_rate": 4.739448323769402e-05, + "loss": 1.9904, + "step": 703500 + }, + { + "epoch": 0.06, + "learning_rate": 4.739263141341377e-05, + "loss": 2.1306, + "step": 704000 + }, + { + "epoch": 0.06, + "learning_rate": 4.739077958913353e-05, + "loss": 2.1016, + "step": 704500 + }, + { + "epoch": 0.06, + "learning_rate": 4.738892776485328e-05, + "loss": 2.0034, + "step": 705000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7387075940573036e-05, + "loss": 1.9966, + "step": 705500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7385224116292786e-05, + "loss": 2.0254, + "step": 706000 + }, + { + "epoch": 0.06, + "learning_rate": 4.738337229201254e-05, + "loss": 2.0833, + "step": 706500 + }, + { + "epoch": 0.06, + "learning_rate": 4.73815204677323e-05, + "loss": 2.0133, + "step": 707000 + }, + { + "epoch": 0.06, + "learning_rate": 4.737966864345205e-05, + "loss": 1.9995, + "step": 707500 + }, + { + "epoch": 0.06, + "learning_rate": 4.737781681917181e-05, + "loss": 2.0442, + "step": 708000 + }, + { + "epoch": 0.06, + "learning_rate": 4.737596499489156e-05, + "loss": 2.071, + "step": 708500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7374113170611314e-05, + "loss": 2.0073, + "step": 709000 + }, + { + "epoch": 0.06, + "learning_rate": 4.737226134633107e-05, + "loss": 1.9698, + "step": 709500 + }, + { + "epoch": 0.06, + "learning_rate": 4.737040952205082e-05, + "loss": 1.9787, + "step": 710000 + }, + { + "epoch": 0.06, + "learning_rate": 4.736855769777058e-05, + "loss": 2.0202, + "step": 710500 + }, + { + "epoch": 0.06, + "learning_rate": 4.736670587349033e-05, + "loss": 2.0006, + "step": 711000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7364854049210085e-05, + "loss": 2.0398, + "step": 711500 + }, + { + "epoch": 0.06, + "learning_rate": 4.7363002224929835e-05, + "loss": 1.9721, + "step": 712000 + }, + { + "epoch": 0.06, + "learning_rate": 4.736115040064959e-05, + "loss": 2.0857, + "step": 712500 + }, + { + "epoch": 0.07, + "learning_rate": 4.735929857636935e-05, + "loss": 2.0813, + "step": 713000 + }, + { + "epoch": 0.07, + "learning_rate": 4.73574467520891e-05, + "loss": 2.0385, + "step": 713500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7355594927808856e-05, + "loss": 2.0053, + "step": 714000 + }, + { + "epoch": 0.07, + "learning_rate": 4.735374310352861e-05, + "loss": 2.0042, + "step": 714500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7351891279248364e-05, + "loss": 2.027, + "step": 715000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7350039454968114e-05, + "loss": 1.933, + "step": 715500 + }, + { + "epoch": 0.07, + "learning_rate": 4.734818763068787e-05, + "loss": 2.0986, + "step": 716000 + }, + { + "epoch": 0.07, + "learning_rate": 4.734633580640763e-05, + "loss": 1.9804, + "step": 716500 + }, + { + "epoch": 0.07, + "learning_rate": 4.734448398212738e-05, + "loss": 2.0332, + "step": 717000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7342632157847135e-05, + "loss": 1.9209, + "step": 717500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7340780333566885e-05, + "loss": 2.0463, + "step": 718000 + }, + { + "epoch": 0.07, + "learning_rate": 4.733892850928664e-05, + "loss": 2.003, + "step": 718500 + }, + { + "epoch": 0.07, + "learning_rate": 4.73370766850064e-05, + "loss": 1.9589, + "step": 719000 + }, + { + "epoch": 0.07, + "learning_rate": 4.733522486072615e-05, + "loss": 2.066, + "step": 719500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7333373036445906e-05, + "loss": 2.0514, + "step": 720000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7331521212165656e-05, + "loss": 2.042, + "step": 720500 + }, + { + "epoch": 0.07, + "learning_rate": 4.732966938788541e-05, + "loss": 2.0349, + "step": 721000 + }, + { + "epoch": 0.07, + "learning_rate": 4.732781756360516e-05, + "loss": 1.9522, + "step": 721500 + }, + { + "epoch": 0.07, + "learning_rate": 4.732596573932492e-05, + "loss": 2.0051, + "step": 722000 + }, + { + "epoch": 0.07, + "learning_rate": 4.732411391504468e-05, + "loss": 1.9622, + "step": 722500 + }, + { + "epoch": 0.07, + "learning_rate": 4.732226209076443e-05, + "loss": 1.9827, + "step": 723000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7320410266484184e-05, + "loss": 2.0155, + "step": 723500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7318558442203934e-05, + "loss": 2.016, + "step": 724000 + }, + { + "epoch": 0.07, + "learning_rate": 4.731670661792369e-05, + "loss": 1.9934, + "step": 724500 + }, + { + "epoch": 0.07, + "learning_rate": 4.731485479364344e-05, + "loss": 2.0207, + "step": 725000 + }, + { + "epoch": 0.07, + "learning_rate": 4.73130029693632e-05, + "loss": 2.0133, + "step": 725500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7311151145082955e-05, + "loss": 2.0223, + "step": 726000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7309299320802706e-05, + "loss": 2.0113, + "step": 726500 + }, + { + "epoch": 0.07, + "learning_rate": 4.730744749652246e-05, + "loss": 1.9765, + "step": 727000 + }, + { + "epoch": 0.07, + "learning_rate": 4.730559567224221e-05, + "loss": 2.0156, + "step": 727500 + }, + { + "epoch": 0.07, + "learning_rate": 4.730374384796197e-05, + "loss": 2.0305, + "step": 728000 + }, + { + "epoch": 0.07, + "learning_rate": 4.730189202368173e-05, + "loss": 1.9378, + "step": 728500 + }, + { + "epoch": 0.07, + "learning_rate": 4.730004019940148e-05, + "loss": 2.0449, + "step": 729000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7298188375121234e-05, + "loss": 1.9851, + "step": 729500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7296336550840984e-05, + "loss": 1.9278, + "step": 730000 + }, + { + "epoch": 0.07, + "learning_rate": 4.729448472656074e-05, + "loss": 1.9945, + "step": 730500 + }, + { + "epoch": 0.07, + "learning_rate": 4.729263290228049e-05, + "loss": 1.9841, + "step": 731000 + }, + { + "epoch": 0.07, + "learning_rate": 4.729078107800025e-05, + "loss": 2.0696, + "step": 731500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7288929253720005e-05, + "loss": 1.9924, + "step": 732000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7287077429439755e-05, + "loss": 1.954, + "step": 732500 + }, + { + "epoch": 0.07, + "learning_rate": 4.728522560515951e-05, + "loss": 1.9555, + "step": 733000 + }, + { + "epoch": 0.07, + "learning_rate": 4.728337378087926e-05, + "loss": 1.9455, + "step": 733500 + }, + { + "epoch": 0.07, + "learning_rate": 4.728152195659902e-05, + "loss": 2.0125, + "step": 734000 + }, + { + "epoch": 0.07, + "learning_rate": 4.727967013231877e-05, + "loss": 2.0331, + "step": 734500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7277818308038526e-05, + "loss": 1.9452, + "step": 735000 + }, + { + "epoch": 0.07, + "learning_rate": 4.727596648375828e-05, + "loss": 2.0267, + "step": 735500 + }, + { + "epoch": 0.07, + "learning_rate": 4.727411465947803e-05, + "loss": 2.0673, + "step": 736000 + }, + { + "epoch": 0.07, + "learning_rate": 4.727226283519779e-05, + "loss": 2.0823, + "step": 736500 + }, + { + "epoch": 0.07, + "learning_rate": 4.727041101091754e-05, + "loss": 2.0305, + "step": 737000 + }, + { + "epoch": 0.07, + "learning_rate": 4.72685591866373e-05, + "loss": 2.025, + "step": 737500 + }, + { + "epoch": 0.07, + "learning_rate": 4.726670736235705e-05, + "loss": 2.0237, + "step": 738000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7264855538076805e-05, + "loss": 2.0442, + "step": 738500 + }, + { + "epoch": 0.07, + "learning_rate": 4.726300371379656e-05, + "loss": 1.998, + "step": 739000 + }, + { + "epoch": 0.07, + "learning_rate": 4.726115188951631e-05, + "loss": 1.9642, + "step": 739500 + }, + { + "epoch": 0.07, + "learning_rate": 4.725930006523607e-05, + "loss": 1.9426, + "step": 740000 + }, + { + "epoch": 0.07, + "learning_rate": 4.725744824095582e-05, + "loss": 2.0887, + "step": 740500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7255596416675576e-05, + "loss": 2.0098, + "step": 741000 + }, + { + "epoch": 0.07, + "learning_rate": 4.725374459239533e-05, + "loss": 1.9978, + "step": 741500 + }, + { + "epoch": 0.07, + "learning_rate": 4.725189276811508e-05, + "loss": 2.0871, + "step": 742000 + }, + { + "epoch": 0.07, + "learning_rate": 4.725004094383484e-05, + "loss": 1.9859, + "step": 742500 + }, + { + "epoch": 0.07, + "learning_rate": 4.724818911955459e-05, + "loss": 2.0032, + "step": 743000 + }, + { + "epoch": 0.07, + "learning_rate": 4.724633729527435e-05, + "loss": 2.057, + "step": 743500 + }, + { + "epoch": 0.07, + "learning_rate": 4.72444854709941e-05, + "loss": 2.0079, + "step": 744000 + }, + { + "epoch": 0.07, + "learning_rate": 4.724263364671386e-05, + "loss": 2.0698, + "step": 744500 + }, + { + "epoch": 0.07, + "learning_rate": 4.724078182243361e-05, + "loss": 1.9859, + "step": 745000 + }, + { + "epoch": 0.07, + "learning_rate": 4.723892999815336e-05, + "loss": 1.9798, + "step": 745500 + }, + { + "epoch": 0.07, + "learning_rate": 4.723707817387312e-05, + "loss": 1.9868, + "step": 746000 + }, + { + "epoch": 0.07, + "learning_rate": 4.723522634959287e-05, + "loss": 1.9978, + "step": 746500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7233374525312625e-05, + "loss": 2.0636, + "step": 747000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7231522701032375e-05, + "loss": 2.0346, + "step": 747500 + }, + { + "epoch": 0.07, + "learning_rate": 4.722967087675213e-05, + "loss": 1.9638, + "step": 748000 + }, + { + "epoch": 0.07, + "learning_rate": 4.722781905247189e-05, + "loss": 1.9988, + "step": 748500 + }, + { + "epoch": 0.07, + "learning_rate": 4.722596722819164e-05, + "loss": 1.9167, + "step": 749000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7224115403911396e-05, + "loss": 1.9686, + "step": 749500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7222263579631147e-05, + "loss": 2.0086, + "step": 750000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7220411755350904e-05, + "loss": 1.9991, + "step": 750500 + }, + { + "epoch": 0.07, + "learning_rate": 4.721855993107066e-05, + "loss": 2.0061, + "step": 751000 + }, + { + "epoch": 0.07, + "learning_rate": 4.721670810679041e-05, + "loss": 2.0566, + "step": 751500 + }, + { + "epoch": 0.07, + "learning_rate": 4.721485628251017e-05, + "loss": 2.0546, + "step": 752000 + }, + { + "epoch": 0.07, + "learning_rate": 4.721300445822992e-05, + "loss": 2.0482, + "step": 752500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7211152633949675e-05, + "loss": 2.0236, + "step": 753000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7209300809669425e-05, + "loss": 1.9986, + "step": 753500 + }, + { + "epoch": 0.07, + "learning_rate": 4.720744898538919e-05, + "loss": 2.0081, + "step": 754000 + }, + { + "epoch": 0.07, + "learning_rate": 4.720559716110894e-05, + "loss": 2.0366, + "step": 754500 + }, + { + "epoch": 0.07, + "learning_rate": 4.720374533682869e-05, + "loss": 1.9745, + "step": 755000 + }, + { + "epoch": 0.07, + "learning_rate": 4.7201893512548446e-05, + "loss": 2.0764, + "step": 755500 + }, + { + "epoch": 0.07, + "learning_rate": 4.7200041688268196e-05, + "loss": 2.0012, + "step": 756000 + }, + { + "epoch": 0.07, + "learning_rate": 4.719818986398795e-05, + "loss": 1.9298, + "step": 756500 + }, + { + "epoch": 0.07, + "learning_rate": 4.71963380397077e-05, + "loss": 1.9376, + "step": 757000 + }, + { + "epoch": 0.07, + "learning_rate": 4.719448621542747e-05, + "loss": 1.9849, + "step": 757500 + }, + { + "epoch": 0.08, + "learning_rate": 4.719263439114722e-05, + "loss": 2.0167, + "step": 758000 + }, + { + "epoch": 0.08, + "learning_rate": 4.719078256686697e-05, + "loss": 1.9952, + "step": 758500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7188930742586724e-05, + "loss": 2.0323, + "step": 759000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7187078918306474e-05, + "loss": 1.9649, + "step": 759500 + }, + { + "epoch": 0.08, + "learning_rate": 4.718522709402623e-05, + "loss": 1.9798, + "step": 760000 + }, + { + "epoch": 0.08, + "learning_rate": 4.718337526974599e-05, + "loss": 1.9652, + "step": 760500 + }, + { + "epoch": 0.08, + "learning_rate": 4.718152344546574e-05, + "loss": 1.9687, + "step": 761000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7179671621185495e-05, + "loss": 2.0004, + "step": 761500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7177819796905246e-05, + "loss": 1.9736, + "step": 762000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7175967972625e-05, + "loss": 2.0267, + "step": 762500 + }, + { + "epoch": 0.08, + "learning_rate": 4.717411614834475e-05, + "loss": 2.0713, + "step": 763000 + }, + { + "epoch": 0.08, + "learning_rate": 4.717226432406451e-05, + "loss": 1.9763, + "step": 763500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7170412499784267e-05, + "loss": 1.9805, + "step": 764000 + }, + { + "epoch": 0.08, + "learning_rate": 4.716856067550402e-05, + "loss": 1.9565, + "step": 764500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7166708851223774e-05, + "loss": 2.0273, + "step": 765000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7164857026943524e-05, + "loss": 1.9555, + "step": 765500 + }, + { + "epoch": 0.08, + "learning_rate": 4.716300520266328e-05, + "loss": 1.9901, + "step": 766000 + }, + { + "epoch": 0.08, + "learning_rate": 4.716115337838303e-05, + "loss": 1.9322, + "step": 766500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7159301554102795e-05, + "loss": 1.9818, + "step": 767000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7157449729822545e-05, + "loss": 1.991, + "step": 767500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7155597905542295e-05, + "loss": 1.9674, + "step": 768000 + }, + { + "epoch": 0.08, + "learning_rate": 4.715374608126205e-05, + "loss": 2.051, + "step": 768500 + }, + { + "epoch": 0.08, + "learning_rate": 4.71518942569818e-05, + "loss": 1.978, + "step": 769000 + }, + { + "epoch": 0.08, + "learning_rate": 4.715004243270156e-05, + "loss": 1.9921, + "step": 769500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7148190608421316e-05, + "loss": 1.9828, + "step": 770000 + }, + { + "epoch": 0.08, + "learning_rate": 4.714633878414107e-05, + "loss": 1.9944, + "step": 770500 + }, + { + "epoch": 0.08, + "learning_rate": 4.714448695986082e-05, + "loss": 2.0489, + "step": 771000 + }, + { + "epoch": 0.08, + "learning_rate": 4.714263513558057e-05, + "loss": 1.9693, + "step": 771500 + }, + { + "epoch": 0.08, + "learning_rate": 4.714078331130033e-05, + "loss": 2.0434, + "step": 772000 + }, + { + "epoch": 0.08, + "learning_rate": 4.713893148702008e-05, + "loss": 2.0486, + "step": 772500 + }, + { + "epoch": 0.08, + "learning_rate": 4.713707966273984e-05, + "loss": 1.9808, + "step": 773000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7135227838459594e-05, + "loss": 2.0136, + "step": 773500 + }, + { + "epoch": 0.08, + "learning_rate": 4.713337601417935e-05, + "loss": 2.0469, + "step": 774000 + }, + { + "epoch": 0.08, + "learning_rate": 4.71315241898991e-05, + "loss": 2.0264, + "step": 774500 + }, + { + "epoch": 0.08, + "learning_rate": 4.712967236561885e-05, + "loss": 1.9639, + "step": 775000 + }, + { + "epoch": 0.08, + "learning_rate": 4.712782054133861e-05, + "loss": 1.9759, + "step": 775500 + }, + { + "epoch": 0.08, + "learning_rate": 4.712596871705836e-05, + "loss": 2.0106, + "step": 776000 + }, + { + "epoch": 0.08, + "learning_rate": 4.712411689277812e-05, + "loss": 2.008, + "step": 776500 + }, + { + "epoch": 0.08, + "learning_rate": 4.712226506849787e-05, + "loss": 2.0776, + "step": 777000 + }, + { + "epoch": 0.08, + "learning_rate": 4.712041324421762e-05, + "loss": 2.1293, + "step": 777500 + }, + { + "epoch": 0.08, + "learning_rate": 4.711856141993738e-05, + "loss": 2.0102, + "step": 778000 + }, + { + "epoch": 0.08, + "learning_rate": 4.711670959565713e-05, + "loss": 2.0072, + "step": 778500 + }, + { + "epoch": 0.08, + "learning_rate": 4.711485777137689e-05, + "loss": 2.1087, + "step": 779000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7113005947096644e-05, + "loss": 1.9805, + "step": 779500 + }, + { + "epoch": 0.08, + "learning_rate": 4.71111541228164e-05, + "loss": 2.0418, + "step": 780000 + }, + { + "epoch": 0.08, + "learning_rate": 4.710930229853615e-05, + "loss": 2.0132, + "step": 780500 + }, + { + "epoch": 0.08, + "learning_rate": 4.71074504742559e-05, + "loss": 2.0929, + "step": 781000 + }, + { + "epoch": 0.08, + "learning_rate": 4.710559864997566e-05, + "loss": 2.0832, + "step": 781500 + }, + { + "epoch": 0.08, + "learning_rate": 4.710374682569541e-05, + "loss": 2.0905, + "step": 782000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7101895001415165e-05, + "loss": 1.9694, + "step": 782500 + }, + { + "epoch": 0.08, + "learning_rate": 4.710004317713492e-05, + "loss": 2.0284, + "step": 783000 + }, + { + "epoch": 0.08, + "learning_rate": 4.709819135285468e-05, + "loss": 2.0593, + "step": 783500 + }, + { + "epoch": 0.08, + "learning_rate": 4.709633952857443e-05, + "loss": 2.0327, + "step": 784000 + }, + { + "epoch": 0.08, + "learning_rate": 4.709448770429418e-05, + "loss": 2.0788, + "step": 784500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7092635880013936e-05, + "loss": 2.0705, + "step": 785000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7090784055733686e-05, + "loss": 2.0316, + "step": 785500 + }, + { + "epoch": 0.08, + "learning_rate": 4.708893223145345e-05, + "loss": 2.0512, + "step": 786000 + }, + { + "epoch": 0.08, + "learning_rate": 4.70870804071732e-05, + "loss": 2.1205, + "step": 786500 + }, + { + "epoch": 0.08, + "learning_rate": 4.708522858289296e-05, + "loss": 2.0344, + "step": 787000 + }, + { + "epoch": 0.08, + "learning_rate": 4.708337675861271e-05, + "loss": 2.0452, + "step": 787500 + }, + { + "epoch": 0.08, + "learning_rate": 4.708152493433246e-05, + "loss": 2.0642, + "step": 788000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7079673110052215e-05, + "loss": 2.0969, + "step": 788500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7077821285771965e-05, + "loss": 2.0998, + "step": 789000 + }, + { + "epoch": 0.08, + "learning_rate": 4.707596946149173e-05, + "loss": 2.0191, + "step": 789500 + }, + { + "epoch": 0.08, + "learning_rate": 4.707411763721148e-05, + "loss": 2.0373, + "step": 790000 + }, + { + "epoch": 0.08, + "learning_rate": 4.707226581293123e-05, + "loss": 2.0556, + "step": 790500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7070413988650986e-05, + "loss": 2.0802, + "step": 791000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7068562164370736e-05, + "loss": 2.0375, + "step": 791500 + }, + { + "epoch": 0.08, + "learning_rate": 4.706671034009049e-05, + "loss": 2.0806, + "step": 792000 + }, + { + "epoch": 0.08, + "learning_rate": 4.706485851581025e-05, + "loss": 2.1036, + "step": 792500 + }, + { + "epoch": 0.08, + "learning_rate": 4.706300669153001e-05, + "loss": 2.0088, + "step": 793000 + }, + { + "epoch": 0.08, + "learning_rate": 4.706115486724976e-05, + "loss": 2.0511, + "step": 793500 + }, + { + "epoch": 0.08, + "learning_rate": 4.705930304296951e-05, + "loss": 2.0661, + "step": 794000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7057451218689264e-05, + "loss": 2.0636, + "step": 794500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7055599394409014e-05, + "loss": 2.0155, + "step": 795000 + }, + { + "epoch": 0.08, + "learning_rate": 4.705374757012878e-05, + "loss": 2.0469, + "step": 795500 + }, + { + "epoch": 0.08, + "learning_rate": 4.705189574584853e-05, + "loss": 2.0243, + "step": 796000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7050043921568285e-05, + "loss": 1.9999, + "step": 796500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7048192097288035e-05, + "loss": 2.0995, + "step": 797000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7046340273007785e-05, + "loss": 1.9903, + "step": 797500 + }, + { + "epoch": 0.08, + "learning_rate": 4.704448844872754e-05, + "loss": 2.0851, + "step": 798000 + }, + { + "epoch": 0.08, + "learning_rate": 4.704263662444729e-05, + "loss": 2.0155, + "step": 798500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7040784800167056e-05, + "loss": 2.0831, + "step": 799000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7038932975886806e-05, + "loss": 2.0779, + "step": 799500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7037081151606563e-05, + "loss": 2.0864, + "step": 800000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7035229327326314e-05, + "loss": 2.0051, + "step": 800500 + }, + { + "epoch": 0.08, + "learning_rate": 4.7033377503046064e-05, + "loss": 2.0414, + "step": 801000 + }, + { + "epoch": 0.08, + "learning_rate": 4.703152567876582e-05, + "loss": 2.027, + "step": 801500 + }, + { + "epoch": 0.08, + "learning_rate": 4.702967385448558e-05, + "loss": 2.0134, + "step": 802000 + }, + { + "epoch": 0.08, + "learning_rate": 4.7027822030205335e-05, + "loss": 2.0272, + "step": 802500 + }, + { + "epoch": 0.09, + "learning_rate": 4.7025970205925085e-05, + "loss": 2.0365, + "step": 803000 + }, + { + "epoch": 0.09, + "learning_rate": 4.7024118381644835e-05, + "loss": 2.086, + "step": 803500 + }, + { + "epoch": 0.09, + "learning_rate": 4.702226655736459e-05, + "loss": 2.0551, + "step": 804000 + }, + { + "epoch": 0.09, + "learning_rate": 4.702041473308434e-05, + "loss": 2.1284, + "step": 804500 + }, + { + "epoch": 0.09, + "learning_rate": 4.7018562908804106e-05, + "loss": 2.0559, + "step": 805000 + }, + { + "epoch": 0.09, + "learning_rate": 4.7016711084523856e-05, + "loss": 2.0499, + "step": 805500 + }, + { + "epoch": 0.09, + "learning_rate": 4.701485926024361e-05, + "loss": 2.0513, + "step": 806000 + }, + { + "epoch": 0.09, + "learning_rate": 4.701300743596336e-05, + "loss": 2.0434, + "step": 806500 + }, + { + "epoch": 0.09, + "learning_rate": 4.701115561168311e-05, + "loss": 2.1082, + "step": 807000 + }, + { + "epoch": 0.09, + "learning_rate": 4.700930378740287e-05, + "loss": 2.0744, + "step": 807500 + }, + { + "epoch": 0.09, + "learning_rate": 4.700745196312262e-05, + "loss": 2.0394, + "step": 808000 + }, + { + "epoch": 0.09, + "learning_rate": 4.7005600138842384e-05, + "loss": 2.0527, + "step": 808500 + }, + { + "epoch": 0.09, + "learning_rate": 4.7003748314562134e-05, + "loss": 2.0863, + "step": 809000 + }, + { + "epoch": 0.09, + "learning_rate": 4.700189649028189e-05, + "loss": 2.0699, + "step": 809500 + }, + { + "epoch": 0.09, + "learning_rate": 4.700004466600164e-05, + "loss": 2.017, + "step": 810000 + }, + { + "epoch": 0.09, + "learning_rate": 4.699819284172139e-05, + "loss": 2.06, + "step": 810500 + }, + { + "epoch": 0.09, + "learning_rate": 4.699634101744115e-05, + "loss": 2.1166, + "step": 811000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6994489193160905e-05, + "loss": 2.1231, + "step": 811500 + }, + { + "epoch": 0.09, + "learning_rate": 4.699263736888066e-05, + "loss": 2.0806, + "step": 812000 + }, + { + "epoch": 0.09, + "learning_rate": 4.699078554460041e-05, + "loss": 2.0696, + "step": 812500 + }, + { + "epoch": 0.09, + "learning_rate": 4.698893372032017e-05, + "loss": 2.054, + "step": 813000 + }, + { + "epoch": 0.09, + "learning_rate": 4.698708189603992e-05, + "loss": 1.9506, + "step": 813500 + }, + { + "epoch": 0.09, + "learning_rate": 4.698523007175967e-05, + "loss": 2.1082, + "step": 814000 + }, + { + "epoch": 0.09, + "learning_rate": 4.698337824747943e-05, + "loss": 2.0141, + "step": 814500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6981526423199184e-05, + "loss": 2.1588, + "step": 815000 + }, + { + "epoch": 0.09, + "learning_rate": 4.697967459891894e-05, + "loss": 2.0799, + "step": 815500 + }, + { + "epoch": 0.09, + "learning_rate": 4.697782277463869e-05, + "loss": 2.0682, + "step": 816000 + }, + { + "epoch": 0.09, + "learning_rate": 4.697597095035845e-05, + "loss": 2.0533, + "step": 816500 + }, + { + "epoch": 0.09, + "learning_rate": 4.69741191260782e-05, + "loss": 2.0551, + "step": 817000 + }, + { + "epoch": 0.09, + "learning_rate": 4.697226730179795e-05, + "loss": 2.094, + "step": 817500 + }, + { + "epoch": 0.09, + "learning_rate": 4.697041547751771e-05, + "loss": 2.0754, + "step": 818000 + }, + { + "epoch": 0.09, + "learning_rate": 4.696856365323746e-05, + "loss": 2.0422, + "step": 818500 + }, + { + "epoch": 0.09, + "learning_rate": 4.696671182895722e-05, + "loss": 2.0126, + "step": 819000 + }, + { + "epoch": 0.09, + "learning_rate": 4.696486000467697e-05, + "loss": 2.1076, + "step": 819500 + }, + { + "epoch": 0.09, + "learning_rate": 4.696300818039672e-05, + "loss": 2.0516, + "step": 820000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6961156356116476e-05, + "loss": 2.0432, + "step": 820500 + }, + { + "epoch": 0.09, + "learning_rate": 4.695930453183623e-05, + "loss": 2.0257, + "step": 821000 + }, + { + "epoch": 0.09, + "learning_rate": 4.695745270755599e-05, + "loss": 2.0407, + "step": 821500 + }, + { + "epoch": 0.09, + "learning_rate": 4.695560088327574e-05, + "loss": 2.074, + "step": 822000 + }, + { + "epoch": 0.09, + "learning_rate": 4.69537490589955e-05, + "loss": 2.1457, + "step": 822500 + }, + { + "epoch": 0.09, + "learning_rate": 4.695189723471525e-05, + "loss": 2.0851, + "step": 823000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6950045410435e-05, + "loss": 2.1232, + "step": 823500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6948193586154755e-05, + "loss": 2.0155, + "step": 824000 + }, + { + "epoch": 0.09, + "learning_rate": 4.694634176187451e-05, + "loss": 2.0658, + "step": 824500 + }, + { + "epoch": 0.09, + "learning_rate": 4.694448993759427e-05, + "loss": 2.0657, + "step": 825000 + }, + { + "epoch": 0.09, + "learning_rate": 4.694263811331402e-05, + "loss": 2.0418, + "step": 825500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6940786289033776e-05, + "loss": 2.0449, + "step": 826000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6938934464753526e-05, + "loss": 2.0938, + "step": 826500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6937082640473276e-05, + "loss": 2.0486, + "step": 827000 + }, + { + "epoch": 0.09, + "learning_rate": 4.693523081619304e-05, + "loss": 2.0815, + "step": 827500 + }, + { + "epoch": 0.09, + "learning_rate": 4.693337899191279e-05, + "loss": 2.0487, + "step": 828000 + }, + { + "epoch": 0.09, + "learning_rate": 4.693152716763255e-05, + "loss": 2.0896, + "step": 828500 + }, + { + "epoch": 0.09, + "learning_rate": 4.69296753433523e-05, + "loss": 1.9657, + "step": 829000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6927823519072054e-05, + "loss": 2.0729, + "step": 829500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6925971694791804e-05, + "loss": 2.0866, + "step": 830000 + }, + { + "epoch": 0.09, + "learning_rate": 4.692411987051156e-05, + "loss": 2.055, + "step": 830500 + }, + { + "epoch": 0.09, + "learning_rate": 4.692226804623132e-05, + "loss": 1.9861, + "step": 831000 + }, + { + "epoch": 0.09, + "learning_rate": 4.692041622195107e-05, + "loss": 2.0604, + "step": 831500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6918564397670825e-05, + "loss": 2.0937, + "step": 832000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6916712573390575e-05, + "loss": 2.0143, + "step": 832500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6914860749110325e-05, + "loss": 2.0175, + "step": 833000 + }, + { + "epoch": 0.09, + "learning_rate": 4.691300892483008e-05, + "loss": 2.0605, + "step": 833500 + }, + { + "epoch": 0.09, + "learning_rate": 4.691115710054984e-05, + "loss": 2.093, + "step": 834000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6909305276269596e-05, + "loss": 2.103, + "step": 834500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6907453451989346e-05, + "loss": 2.0349, + "step": 835000 + }, + { + "epoch": 0.09, + "learning_rate": 4.69056016277091e-05, + "loss": 2.0606, + "step": 835500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6903749803428854e-05, + "loss": 2.062, + "step": 836000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6901897979148604e-05, + "loss": 2.008, + "step": 836500 + }, + { + "epoch": 0.09, + "learning_rate": 4.690004615486837e-05, + "loss": 2.0876, + "step": 837000 + }, + { + "epoch": 0.09, + "learning_rate": 4.689819433058812e-05, + "loss": 2.051, + "step": 837500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6896342506307875e-05, + "loss": 2.0967, + "step": 838000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6894490682027625e-05, + "loss": 2.0893, + "step": 838500 + }, + { + "epoch": 0.09, + "learning_rate": 4.689263885774738e-05, + "loss": 2.0889, + "step": 839000 + }, + { + "epoch": 0.09, + "learning_rate": 4.689078703346713e-05, + "loss": 2.0079, + "step": 839500 + }, + { + "epoch": 0.09, + "learning_rate": 4.688893520918688e-05, + "loss": 2.0384, + "step": 840000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6887083384906646e-05, + "loss": 2.1172, + "step": 840500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6885231560626396e-05, + "loss": 2.0889, + "step": 841000 + }, + { + "epoch": 0.09, + "learning_rate": 4.688337973634615e-05, + "loss": 1.9858, + "step": 841500 + }, + { + "epoch": 0.09, + "learning_rate": 4.68815279120659e-05, + "loss": 1.9447, + "step": 842000 + }, + { + "epoch": 0.09, + "learning_rate": 4.687967608778566e-05, + "loss": 2.0305, + "step": 842500 + }, + { + "epoch": 0.09, + "learning_rate": 4.687782426350541e-05, + "loss": 2.0814, + "step": 843000 + }, + { + "epoch": 0.09, + "learning_rate": 4.687597243922517e-05, + "loss": 2.055, + "step": 843500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6874120614944924e-05, + "loss": 2.0451, + "step": 844000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6872268790664674e-05, + "loss": 2.0967, + "step": 844500 + }, + { + "epoch": 0.09, + "learning_rate": 4.687041696638443e-05, + "loss": 2.0854, + "step": 845000 + }, + { + "epoch": 0.09, + "learning_rate": 4.686856514210418e-05, + "loss": 2.0563, + "step": 845500 + }, + { + "epoch": 0.09, + "learning_rate": 4.686671331782393e-05, + "loss": 2.0294, + "step": 846000 + }, + { + "epoch": 0.09, + "learning_rate": 4.6864861493543695e-05, + "loss": 2.0123, + "step": 846500 + }, + { + "epoch": 0.09, + "learning_rate": 4.6863009669263445e-05, + "loss": 2.0806, + "step": 847000 + }, + { + "epoch": 0.09, + "learning_rate": 4.68611578449832e-05, + "loss": 2.0407, + "step": 847500 + }, + { + "epoch": 0.1, + "learning_rate": 4.685930602070295e-05, + "loss": 2.0924, + "step": 848000 + }, + { + "epoch": 0.1, + "learning_rate": 4.685745419642271e-05, + "loss": 2.1218, + "step": 848500 + }, + { + "epoch": 0.1, + "learning_rate": 4.685560237214246e-05, + "loss": 2.0243, + "step": 849000 + }, + { + "epoch": 0.1, + "learning_rate": 4.685375054786221e-05, + "loss": 2.1099, + "step": 849500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6851898723581973e-05, + "loss": 2.0467, + "step": 850000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6850046899301724e-05, + "loss": 2.0296, + "step": 850500 + }, + { + "epoch": 0.1, + "learning_rate": 4.684819507502148e-05, + "loss": 2.0885, + "step": 851000 + }, + { + "epoch": 0.1, + "learning_rate": 4.684634325074123e-05, + "loss": 2.0313, + "step": 851500 + }, + { + "epoch": 0.1, + "learning_rate": 4.684449142646099e-05, + "loss": 2.0465, + "step": 852000 + }, + { + "epoch": 0.1, + "learning_rate": 4.684263960218074e-05, + "loss": 2.0468, + "step": 852500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6840787777900495e-05, + "loss": 2.0439, + "step": 853000 + }, + { + "epoch": 0.1, + "learning_rate": 4.683893595362025e-05, + "loss": 2.0157, + "step": 853500 + }, + { + "epoch": 0.1, + "learning_rate": 4.683708412934e-05, + "loss": 2.028, + "step": 854000 + }, + { + "epoch": 0.1, + "learning_rate": 4.683523230505976e-05, + "loss": 2.0658, + "step": 854500 + }, + { + "epoch": 0.1, + "learning_rate": 4.683338048077951e-05, + "loss": 2.083, + "step": 855000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6831528656499266e-05, + "loss": 2.1119, + "step": 855500 + }, + { + "epoch": 0.1, + "learning_rate": 4.682967683221902e-05, + "loss": 2.1112, + "step": 856000 + }, + { + "epoch": 0.1, + "learning_rate": 4.682782500793877e-05, + "loss": 2.052, + "step": 856500 + }, + { + "epoch": 0.1, + "learning_rate": 4.682597318365853e-05, + "loss": 1.993, + "step": 857000 + }, + { + "epoch": 0.1, + "learning_rate": 4.682412135937828e-05, + "loss": 2.0158, + "step": 857500 + }, + { + "epoch": 0.1, + "learning_rate": 4.682226953509804e-05, + "loss": 1.9846, + "step": 858000 + }, + { + "epoch": 0.1, + "learning_rate": 4.682041771081779e-05, + "loss": 2.0556, + "step": 858500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6818565886537544e-05, + "loss": 2.0791, + "step": 859000 + }, + { + "epoch": 0.1, + "learning_rate": 4.68167140622573e-05, + "loss": 2.0545, + "step": 859500 + }, + { + "epoch": 0.1, + "learning_rate": 4.681486223797705e-05, + "loss": 2.0347, + "step": 860000 + }, + { + "epoch": 0.1, + "learning_rate": 4.681301041369681e-05, + "loss": 2.0501, + "step": 860500 + }, + { + "epoch": 0.1, + "learning_rate": 4.681115858941656e-05, + "loss": 2.0396, + "step": 861000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6809306765136315e-05, + "loss": 2.04, + "step": 861500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6807454940856066e-05, + "loss": 2.0529, + "step": 862000 + }, + { + "epoch": 0.1, + "learning_rate": 4.680560311657582e-05, + "loss": 2.0652, + "step": 862500 + }, + { + "epoch": 0.1, + "learning_rate": 4.680375129229558e-05, + "loss": 2.1191, + "step": 863000 + }, + { + "epoch": 0.1, + "learning_rate": 4.680189946801533e-05, + "loss": 2.0805, + "step": 863500 + }, + { + "epoch": 0.1, + "learning_rate": 4.680004764373509e-05, + "loss": 2.0712, + "step": 864000 + }, + { + "epoch": 0.1, + "learning_rate": 4.679819581945484e-05, + "loss": 2.0654, + "step": 864500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6796343995174594e-05, + "loss": 2.027, + "step": 865000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6794492170894344e-05, + "loss": 2.0291, + "step": 865500 + }, + { + "epoch": 0.1, + "learning_rate": 4.67926403466141e-05, + "loss": 2.007, + "step": 866000 + }, + { + "epoch": 0.1, + "learning_rate": 4.679078852233386e-05, + "loss": 2.0595, + "step": 866500 + }, + { + "epoch": 0.1, + "learning_rate": 4.678893669805361e-05, + "loss": 2.0951, + "step": 867000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6787084873773365e-05, + "loss": 2.0557, + "step": 867500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6785233049493115e-05, + "loss": 2.0327, + "step": 868000 + }, + { + "epoch": 0.1, + "learning_rate": 4.678338122521287e-05, + "loss": 2.0578, + "step": 868500 + }, + { + "epoch": 0.1, + "learning_rate": 4.678152940093263e-05, + "loss": 2.0355, + "step": 869000 + }, + { + "epoch": 0.1, + "learning_rate": 4.677967757665238e-05, + "loss": 2.0443, + "step": 869500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6777825752372136e-05, + "loss": 2.1054, + "step": 870000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6775973928091886e-05, + "loss": 2.0842, + "step": 870500 + }, + { + "epoch": 0.1, + "learning_rate": 4.677412210381164e-05, + "loss": 2.0282, + "step": 871000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6772270279531393e-05, + "loss": 2.1102, + "step": 871500 + }, + { + "epoch": 0.1, + "learning_rate": 4.677041845525115e-05, + "loss": 2.1004, + "step": 872000 + }, + { + "epoch": 0.1, + "learning_rate": 4.676856663097091e-05, + "loss": 2.043, + "step": 872500 + }, + { + "epoch": 0.1, + "learning_rate": 4.676671480669066e-05, + "loss": 2.0572, + "step": 873000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6764862982410414e-05, + "loss": 2.0051, + "step": 873500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6763011158130165e-05, + "loss": 2.0985, + "step": 874000 + }, + { + "epoch": 0.1, + "learning_rate": 4.676115933384992e-05, + "loss": 2.0952, + "step": 874500 + }, + { + "epoch": 0.1, + "learning_rate": 4.675930750956967e-05, + "loss": 2.0283, + "step": 875000 + }, + { + "epoch": 0.1, + "learning_rate": 4.675745568528943e-05, + "loss": 2.0579, + "step": 875500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6755603861009186e-05, + "loss": 2.0514, + "step": 876000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6753752036728936e-05, + "loss": 2.1338, + "step": 876500 + }, + { + "epoch": 0.1, + "learning_rate": 4.675190021244869e-05, + "loss": 2.1004, + "step": 877000 + }, + { + "epoch": 0.1, + "learning_rate": 4.675004838816844e-05, + "loss": 2.0008, + "step": 877500 + }, + { + "epoch": 0.1, + "learning_rate": 4.67481965638882e-05, + "loss": 2.0826, + "step": 878000 + }, + { + "epoch": 0.1, + "learning_rate": 4.674634473960796e-05, + "loss": 2.0655, + "step": 878500 + }, + { + "epoch": 0.1, + "learning_rate": 4.674449291532771e-05, + "loss": 2.0472, + "step": 879000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6742641091047464e-05, + "loss": 1.9664, + "step": 879500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6740789266767214e-05, + "loss": 2.0571, + "step": 880000 + }, + { + "epoch": 0.1, + "learning_rate": 4.673893744248697e-05, + "loss": 2.0605, + "step": 880500 + }, + { + "epoch": 0.1, + "learning_rate": 4.673708561820672e-05, + "loss": 2.0526, + "step": 881000 + }, + { + "epoch": 0.1, + "learning_rate": 4.673523379392648e-05, + "loss": 2.0348, + "step": 881500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6733381969646235e-05, + "loss": 2.0136, + "step": 882000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6731530145365985e-05, + "loss": 2.1102, + "step": 882500 + }, + { + "epoch": 0.1, + "learning_rate": 4.672967832108574e-05, + "loss": 2.0325, + "step": 883000 + }, + { + "epoch": 0.1, + "learning_rate": 4.672782649680549e-05, + "loss": 2.0209, + "step": 883500 + }, + { + "epoch": 0.1, + "learning_rate": 4.672597467252525e-05, + "loss": 2.078, + "step": 884000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6724122848245e-05, + "loss": 2.0908, + "step": 884500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6722271023964756e-05, + "loss": 1.9744, + "step": 885000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6720419199684513e-05, + "loss": 2.0464, + "step": 885500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6718567375404264e-05, + "loss": 2.0477, + "step": 886000 + }, + { + "epoch": 0.1, + "learning_rate": 4.671671555112402e-05, + "loss": 2.0451, + "step": 886500 + }, + { + "epoch": 0.1, + "learning_rate": 4.671486372684377e-05, + "loss": 1.9843, + "step": 887000 + }, + { + "epoch": 0.1, + "learning_rate": 4.671301190256353e-05, + "loss": 2.012, + "step": 887500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6711160078283285e-05, + "loss": 2.0533, + "step": 888000 + }, + { + "epoch": 0.1, + "learning_rate": 4.6709308254003035e-05, + "loss": 2.0422, + "step": 888500 + }, + { + "epoch": 0.1, + "learning_rate": 4.670745642972279e-05, + "loss": 2.0168, + "step": 889000 + }, + { + "epoch": 0.1, + "learning_rate": 4.670560460544254e-05, + "loss": 2.0118, + "step": 889500 + }, + { + "epoch": 0.1, + "learning_rate": 4.67037527811623e-05, + "loss": 2.0773, + "step": 890000 + }, + { + "epoch": 0.1, + "learning_rate": 4.670190095688205e-05, + "loss": 2.133, + "step": 890500 + }, + { + "epoch": 0.1, + "learning_rate": 4.6700049132601806e-05, + "loss": 2.0453, + "step": 891000 + }, + { + "epoch": 0.1, + "learning_rate": 4.669819730832156e-05, + "loss": 2.0374, + "step": 891500 + }, + { + "epoch": 0.1, + "learning_rate": 4.669634548404131e-05, + "loss": 2.0671, + "step": 892000 + }, + { + "epoch": 0.1, + "learning_rate": 4.669449365976107e-05, + "loss": 2.0917, + "step": 892500 + }, + { + "epoch": 0.11, + "learning_rate": 4.669264183548082e-05, + "loss": 2.0369, + "step": 893000 + }, + { + "epoch": 0.11, + "learning_rate": 4.669079001120058e-05, + "loss": 2.0395, + "step": 893500 + }, + { + "epoch": 0.11, + "learning_rate": 4.668893818692033e-05, + "loss": 2.056, + "step": 894000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6687086362640084e-05, + "loss": 2.047, + "step": 894500 + }, + { + "epoch": 0.11, + "learning_rate": 4.668523453835984e-05, + "loss": 2.0453, + "step": 895000 + }, + { + "epoch": 0.11, + "learning_rate": 4.668338271407959e-05, + "loss": 2.0838, + "step": 895500 + }, + { + "epoch": 0.11, + "learning_rate": 4.668153088979935e-05, + "loss": 2.0307, + "step": 896000 + }, + { + "epoch": 0.11, + "learning_rate": 4.66796790655191e-05, + "loss": 2.01, + "step": 896500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6677827241238855e-05, + "loss": 2.0041, + "step": 897000 + }, + { + "epoch": 0.11, + "learning_rate": 4.667597541695861e-05, + "loss": 2.0933, + "step": 897500 + }, + { + "epoch": 0.11, + "learning_rate": 4.667412359267836e-05, + "loss": 2.1238, + "step": 898000 + }, + { + "epoch": 0.11, + "learning_rate": 4.667227176839812e-05, + "loss": 2.0307, + "step": 898500 + }, + { + "epoch": 0.11, + "learning_rate": 4.667041994411787e-05, + "loss": 2.0666, + "step": 899000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6668568119837627e-05, + "loss": 2.1073, + "step": 899500 + }, + { + "epoch": 0.11, + "learning_rate": 4.666671629555738e-05, + "loss": 2.044, + "step": 900000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6664864471277134e-05, + "loss": 2.1487, + "step": 900500 + }, + { + "epoch": 0.11, + "learning_rate": 4.666301264699689e-05, + "loss": 2.008, + "step": 901000 + }, + { + "epoch": 0.11, + "learning_rate": 4.666116082271664e-05, + "loss": 2.0978, + "step": 901500 + }, + { + "epoch": 0.11, + "learning_rate": 4.66593089984364e-05, + "loss": 2.0229, + "step": 902000 + }, + { + "epoch": 0.11, + "learning_rate": 4.665745717415615e-05, + "loss": 2.0439, + "step": 902500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6655605349875905e-05, + "loss": 1.985, + "step": 903000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6653753525595655e-05, + "loss": 2.0497, + "step": 903500 + }, + { + "epoch": 0.11, + "learning_rate": 4.665190170131541e-05, + "loss": 2.0531, + "step": 904000 + }, + { + "epoch": 0.11, + "learning_rate": 4.665004987703517e-05, + "loss": 2.059, + "step": 904500 + }, + { + "epoch": 0.11, + "learning_rate": 4.664819805275492e-05, + "loss": 2.056, + "step": 905000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6646346228474676e-05, + "loss": 1.9899, + "step": 905500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6644494404194426e-05, + "loss": 1.9882, + "step": 906000 + }, + { + "epoch": 0.11, + "learning_rate": 4.664264257991418e-05, + "loss": 2.0522, + "step": 906500 + }, + { + "epoch": 0.11, + "learning_rate": 4.664079075563394e-05, + "loss": 2.0068, + "step": 907000 + }, + { + "epoch": 0.11, + "learning_rate": 4.663893893135369e-05, + "loss": 2.0886, + "step": 907500 + }, + { + "epoch": 0.11, + "learning_rate": 4.663708710707345e-05, + "loss": 2.1016, + "step": 908000 + }, + { + "epoch": 0.11, + "learning_rate": 4.66352352827932e-05, + "loss": 2.0114, + "step": 908500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6633383458512954e-05, + "loss": 1.9802, + "step": 909000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6631531634232705e-05, + "loss": 2.0844, + "step": 909500 + }, + { + "epoch": 0.11, + "learning_rate": 4.662967980995246e-05, + "loss": 2.0211, + "step": 910000 + }, + { + "epoch": 0.11, + "learning_rate": 4.662782798567222e-05, + "loss": 2.0286, + "step": 910500 + }, + { + "epoch": 0.11, + "learning_rate": 4.662597616139197e-05, + "loss": 2.0299, + "step": 911000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6624124337111726e-05, + "loss": 2.0454, + "step": 911500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6622272512831476e-05, + "loss": 2.0932, + "step": 912000 + }, + { + "epoch": 0.11, + "learning_rate": 4.662042068855123e-05, + "loss": 2.0509, + "step": 912500 + }, + { + "epoch": 0.11, + "learning_rate": 4.661856886427098e-05, + "loss": 2.0061, + "step": 913000 + }, + { + "epoch": 0.11, + "learning_rate": 4.661671703999074e-05, + "loss": 1.9975, + "step": 913500 + }, + { + "epoch": 0.11, + "learning_rate": 4.66148652157105e-05, + "loss": 1.991, + "step": 914000 + }, + { + "epoch": 0.11, + "learning_rate": 4.661301339143025e-05, + "loss": 2.1047, + "step": 914500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6611161567150004e-05, + "loss": 2.0214, + "step": 915000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6609309742869754e-05, + "loss": 2.0069, + "step": 915500 + }, + { + "epoch": 0.11, + "learning_rate": 4.660745791858951e-05, + "loss": 2.0557, + "step": 916000 + }, + { + "epoch": 0.11, + "learning_rate": 4.660560609430926e-05, + "loss": 2.0242, + "step": 916500 + }, + { + "epoch": 0.11, + "learning_rate": 4.660375427002902e-05, + "loss": 2.0493, + "step": 917000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6601902445748775e-05, + "loss": 1.9199, + "step": 917500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6600050621468525e-05, + "loss": 2.0903, + "step": 918000 + }, + { + "epoch": 0.11, + "learning_rate": 4.659819879718828e-05, + "loss": 2.0927, + "step": 918500 + }, + { + "epoch": 0.11, + "learning_rate": 4.659634697290803e-05, + "loss": 2.022, + "step": 919000 + }, + { + "epoch": 0.11, + "learning_rate": 4.659449514862779e-05, + "loss": 2.0474, + "step": 919500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6592643324347546e-05, + "loss": 2.0682, + "step": 920000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6590791500067296e-05, + "loss": 1.9685, + "step": 920500 + }, + { + "epoch": 0.11, + "learning_rate": 4.658893967578705e-05, + "loss": 2.0483, + "step": 921000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6587087851506803e-05, + "loss": 2.0165, + "step": 921500 + }, + { + "epoch": 0.11, + "learning_rate": 4.658523602722656e-05, + "loss": 2.0368, + "step": 922000 + }, + { + "epoch": 0.11, + "learning_rate": 4.658338420294631e-05, + "loss": 2.0493, + "step": 922500 + }, + { + "epoch": 0.11, + "learning_rate": 4.658153237866607e-05, + "loss": 1.9734, + "step": 923000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6579680554385825e-05, + "loss": 2.058, + "step": 923500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6577828730105575e-05, + "loss": 2.0363, + "step": 924000 + }, + { + "epoch": 0.11, + "learning_rate": 4.657597690582533e-05, + "loss": 2.0874, + "step": 924500 + }, + { + "epoch": 0.11, + "learning_rate": 4.657412508154508e-05, + "loss": 2.0214, + "step": 925000 + }, + { + "epoch": 0.11, + "learning_rate": 4.657227325726484e-05, + "loss": 2.0202, + "step": 925500 + }, + { + "epoch": 0.11, + "learning_rate": 4.657042143298459e-05, + "loss": 2.0941, + "step": 926000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6568569608704346e-05, + "loss": 2.1147, + "step": 926500 + }, + { + "epoch": 0.11, + "learning_rate": 4.65667177844241e-05, + "loss": 2.013, + "step": 927000 + }, + { + "epoch": 0.11, + "learning_rate": 4.656486596014385e-05, + "loss": 2.0582, + "step": 927500 + }, + { + "epoch": 0.11, + "learning_rate": 4.656301413586361e-05, + "loss": 1.9939, + "step": 928000 + }, + { + "epoch": 0.11, + "learning_rate": 4.656116231158336e-05, + "loss": 2.0514, + "step": 928500 + }, + { + "epoch": 0.11, + "learning_rate": 4.655931048730312e-05, + "loss": 2.0541, + "step": 929000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6557458663022874e-05, + "loss": 2.0357, + "step": 929500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6555606838742624e-05, + "loss": 2.0714, + "step": 930000 + }, + { + "epoch": 0.11, + "learning_rate": 4.655375501446238e-05, + "loss": 2.0783, + "step": 930500 + }, + { + "epoch": 0.11, + "learning_rate": 4.655190319018213e-05, + "loss": 1.9661, + "step": 931000 + }, + { + "epoch": 0.11, + "learning_rate": 4.655005136590189e-05, + "loss": 2.0796, + "step": 931500 + }, + { + "epoch": 0.11, + "learning_rate": 4.654819954162164e-05, + "loss": 2.115, + "step": 932000 + }, + { + "epoch": 0.11, + "learning_rate": 4.6546347717341395e-05, + "loss": 2.0506, + "step": 932500 + }, + { + "epoch": 0.11, + "learning_rate": 4.654449589306115e-05, + "loss": 2.1206, + "step": 933000 + }, + { + "epoch": 0.11, + "learning_rate": 4.65426440687809e-05, + "loss": 2.0115, + "step": 933500 + }, + { + "epoch": 0.11, + "learning_rate": 4.654079224450066e-05, + "loss": 1.9922, + "step": 934000 + }, + { + "epoch": 0.11, + "learning_rate": 4.653894042022041e-05, + "loss": 1.9956, + "step": 934500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6537088595940167e-05, + "loss": 2.0415, + "step": 935000 + }, + { + "epoch": 0.11, + "learning_rate": 4.653523677165992e-05, + "loss": 2.0342, + "step": 935500 + }, + { + "epoch": 0.11, + "learning_rate": 4.6533384947379674e-05, + "loss": 2.0324, + "step": 936000 + }, + { + "epoch": 0.11, + "learning_rate": 4.653153312309943e-05, + "loss": 2.0401, + "step": 936500 + }, + { + "epoch": 0.11, + "learning_rate": 4.652968129881918e-05, + "loss": 2.0224, + "step": 937000 + }, + { + "epoch": 0.11, + "learning_rate": 4.652782947453894e-05, + "loss": 2.0999, + "step": 937500 + }, + { + "epoch": 0.12, + "learning_rate": 4.652597765025869e-05, + "loss": 2.0523, + "step": 938000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6524125825978445e-05, + "loss": 2.0315, + "step": 938500 + }, + { + "epoch": 0.12, + "learning_rate": 4.65222740016982e-05, + "loss": 1.9957, + "step": 939000 + }, + { + "epoch": 0.12, + "learning_rate": 4.652042217741795e-05, + "loss": 2.073, + "step": 939500 + }, + { + "epoch": 0.12, + "learning_rate": 4.651857035313771e-05, + "loss": 1.9975, + "step": 940000 + }, + { + "epoch": 0.12, + "learning_rate": 4.651671852885746e-05, + "loss": 1.9802, + "step": 940500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6514866704577216e-05, + "loss": 2.0256, + "step": 941000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6513014880296966e-05, + "loss": 2.0479, + "step": 941500 + }, + { + "epoch": 0.12, + "learning_rate": 4.651116305601672e-05, + "loss": 2.0764, + "step": 942000 + }, + { + "epoch": 0.12, + "learning_rate": 4.650931123173648e-05, + "loss": 1.9871, + "step": 942500 + }, + { + "epoch": 0.12, + "learning_rate": 4.650745940745623e-05, + "loss": 2.0974, + "step": 943000 + }, + { + "epoch": 0.12, + "learning_rate": 4.650560758317599e-05, + "loss": 1.9827, + "step": 943500 + }, + { + "epoch": 0.12, + "learning_rate": 4.650375575889574e-05, + "loss": 2.076, + "step": 944000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6501903934615494e-05, + "loss": 2.0276, + "step": 944500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6500052110335244e-05, + "loss": 2.0284, + "step": 945000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6498200286055e-05, + "loss": 2.0503, + "step": 945500 + }, + { + "epoch": 0.12, + "learning_rate": 4.649634846177476e-05, + "loss": 2.117, + "step": 946000 + }, + { + "epoch": 0.12, + "learning_rate": 4.649449663749451e-05, + "loss": 2.0124, + "step": 946500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6492644813214265e-05, + "loss": 1.9898, + "step": 947000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6490792988934016e-05, + "loss": 2.0763, + "step": 947500 + }, + { + "epoch": 0.12, + "learning_rate": 4.648894116465377e-05, + "loss": 2.0705, + "step": 948000 + }, + { + "epoch": 0.12, + "learning_rate": 4.648708934037353e-05, + "loss": 2.0323, + "step": 948500 + }, + { + "epoch": 0.12, + "learning_rate": 4.648523751609328e-05, + "loss": 2.0466, + "step": 949000 + }, + { + "epoch": 0.12, + "learning_rate": 4.648338569181304e-05, + "loss": 2.001, + "step": 949500 + }, + { + "epoch": 0.12, + "learning_rate": 4.648153386753279e-05, + "loss": 2.0487, + "step": 950000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6479682043252544e-05, + "loss": 2.086, + "step": 950500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6477830218972294e-05, + "loss": 1.9899, + "step": 951000 + }, + { + "epoch": 0.12, + "learning_rate": 4.647597839469205e-05, + "loss": 2.0867, + "step": 951500 + }, + { + "epoch": 0.12, + "learning_rate": 4.647412657041181e-05, + "loss": 2.0617, + "step": 952000 + }, + { + "epoch": 0.12, + "learning_rate": 4.647227474613156e-05, + "loss": 1.9912, + "step": 952500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6470422921851315e-05, + "loss": 2.0193, + "step": 953000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6468571097571065e-05, + "loss": 2.0106, + "step": 953500 + }, + { + "epoch": 0.12, + "learning_rate": 4.646671927329082e-05, + "loss": 2.0216, + "step": 954000 + }, + { + "epoch": 0.12, + "learning_rate": 4.646486744901057e-05, + "loss": 2.0525, + "step": 954500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6463015624730336e-05, + "loss": 2.0356, + "step": 955000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6461163800450086e-05, + "loss": 2.1147, + "step": 955500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6459311976169836e-05, + "loss": 2.0354, + "step": 956000 + }, + { + "epoch": 0.12, + "learning_rate": 4.645746015188959e-05, + "loss": 2.0831, + "step": 956500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6455608327609343e-05, + "loss": 2.0503, + "step": 957000 + }, + { + "epoch": 0.12, + "learning_rate": 4.64537565033291e-05, + "loss": 1.9702, + "step": 957500 + }, + { + "epoch": 0.12, + "learning_rate": 4.645190467904886e-05, + "loss": 1.9823, + "step": 958000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6450052854768614e-05, + "loss": 2.0705, + "step": 958500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6448201030488364e-05, + "loss": 2.0601, + "step": 959000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6446349206208115e-05, + "loss": 2.0164, + "step": 959500 + }, + { + "epoch": 0.12, + "learning_rate": 4.644449738192787e-05, + "loss": 2.1266, + "step": 960000 + }, + { + "epoch": 0.12, + "learning_rate": 4.644264555764762e-05, + "loss": 2.0423, + "step": 960500 + }, + { + "epoch": 0.12, + "learning_rate": 4.644079373336738e-05, + "loss": 2.0309, + "step": 961000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6438941909087136e-05, + "loss": 2.0068, + "step": 961500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6437090084806886e-05, + "loss": 2.0237, + "step": 962000 + }, + { + "epoch": 0.12, + "learning_rate": 4.643523826052664e-05, + "loss": 2.0418, + "step": 962500 + }, + { + "epoch": 0.12, + "learning_rate": 4.643338643624639e-05, + "loss": 2.0352, + "step": 963000 + }, + { + "epoch": 0.12, + "learning_rate": 4.643153461196615e-05, + "loss": 2.0765, + "step": 963500 + }, + { + "epoch": 0.12, + "learning_rate": 4.64296827876859e-05, + "loss": 2.092, + "step": 964000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6427830963405664e-05, + "loss": 2.0081, + "step": 964500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6425979139125414e-05, + "loss": 2.01, + "step": 965000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6424127314845164e-05, + "loss": 2.0473, + "step": 965500 + }, + { + "epoch": 0.12, + "learning_rate": 4.642227549056492e-05, + "loss": 2.0386, + "step": 966000 + }, + { + "epoch": 0.12, + "learning_rate": 4.642042366628467e-05, + "loss": 2.0354, + "step": 966500 + }, + { + "epoch": 0.12, + "learning_rate": 4.641857184200443e-05, + "loss": 2.0138, + "step": 967000 + }, + { + "epoch": 0.12, + "learning_rate": 4.641672001772418e-05, + "loss": 2.0468, + "step": 967500 + }, + { + "epoch": 0.12, + "learning_rate": 4.641486819344394e-05, + "loss": 1.9797, + "step": 968000 + }, + { + "epoch": 0.12, + "learning_rate": 4.641301636916369e-05, + "loss": 2.1049, + "step": 968500 + }, + { + "epoch": 0.12, + "learning_rate": 4.641116454488344e-05, + "loss": 2.0446, + "step": 969000 + }, + { + "epoch": 0.12, + "learning_rate": 4.64093127206032e-05, + "loss": 2.0752, + "step": 969500 + }, + { + "epoch": 0.12, + "learning_rate": 4.640746089632295e-05, + "loss": 2.0608, + "step": 970000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6405609072042706e-05, + "loss": 2.0546, + "step": 970500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6403757247762463e-05, + "loss": 2.0337, + "step": 971000 + }, + { + "epoch": 0.12, + "learning_rate": 4.640190542348222e-05, + "loss": 2.0205, + "step": 971500 + }, + { + "epoch": 0.12, + "learning_rate": 4.640005359920197e-05, + "loss": 2.1151, + "step": 972000 + }, + { + "epoch": 0.12, + "learning_rate": 4.639820177492172e-05, + "loss": 2.0644, + "step": 972500 + }, + { + "epoch": 0.12, + "learning_rate": 4.639634995064148e-05, + "loss": 2.0661, + "step": 973000 + }, + { + "epoch": 0.12, + "learning_rate": 4.639449812636123e-05, + "loss": 1.9976, + "step": 973500 + }, + { + "epoch": 0.12, + "learning_rate": 4.639264630208099e-05, + "loss": 2.0132, + "step": 974000 + }, + { + "epoch": 0.12, + "learning_rate": 4.639079447780074e-05, + "loss": 2.0034, + "step": 974500 + }, + { + "epoch": 0.12, + "learning_rate": 4.638894265352049e-05, + "loss": 1.9839, + "step": 975000 + }, + { + "epoch": 0.12, + "learning_rate": 4.638709082924025e-05, + "loss": 2.1059, + "step": 975500 + }, + { + "epoch": 0.12, + "learning_rate": 4.638523900496e-05, + "loss": 1.9894, + "step": 976000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6383387180679756e-05, + "loss": 2.047, + "step": 976500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6381535356399506e-05, + "loss": 1.9589, + "step": 977000 + }, + { + "epoch": 0.12, + "learning_rate": 4.637968353211927e-05, + "loss": 2.0083, + "step": 977500 + }, + { + "epoch": 0.12, + "learning_rate": 4.637783170783902e-05, + "loss": 2.0254, + "step": 978000 + }, + { + "epoch": 0.12, + "learning_rate": 4.637597988355877e-05, + "loss": 2.0772, + "step": 978500 + }, + { + "epoch": 0.12, + "learning_rate": 4.637412805927853e-05, + "loss": 2.0772, + "step": 979000 + }, + { + "epoch": 0.12, + "learning_rate": 4.637227623499828e-05, + "loss": 2.0938, + "step": 979500 + }, + { + "epoch": 0.12, + "learning_rate": 4.6370424410718034e-05, + "loss": 2.0982, + "step": 980000 + }, + { + "epoch": 0.12, + "learning_rate": 4.636857258643779e-05, + "loss": 2.0493, + "step": 980500 + }, + { + "epoch": 0.12, + "learning_rate": 4.636672076215755e-05, + "loss": 2.0379, + "step": 981000 + }, + { + "epoch": 0.12, + "learning_rate": 4.63648689378773e-05, + "loss": 2.0638, + "step": 981500 + }, + { + "epoch": 0.12, + "learning_rate": 4.636301711359705e-05, + "loss": 2.1105, + "step": 982000 + }, + { + "epoch": 0.12, + "learning_rate": 4.6361165289316805e-05, + "loss": 2.0282, + "step": 982500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6359313465036556e-05, + "loss": 1.988, + "step": 983000 + }, + { + "epoch": 0.13, + "learning_rate": 4.635746164075631e-05, + "loss": 2.0554, + "step": 983500 + }, + { + "epoch": 0.13, + "learning_rate": 4.635560981647607e-05, + "loss": 2.1075, + "step": 984000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6353757992195826e-05, + "loss": 2.0504, + "step": 984500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6351906167915577e-05, + "loss": 2.0456, + "step": 985000 + }, + { + "epoch": 0.13, + "learning_rate": 4.635005434363533e-05, + "loss": 2.0108, + "step": 985500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6348202519355084e-05, + "loss": 2.0372, + "step": 986000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6346350695074834e-05, + "loss": 2.0026, + "step": 986500 + }, + { + "epoch": 0.13, + "learning_rate": 4.63444988707946e-05, + "loss": 1.9993, + "step": 987000 + }, + { + "epoch": 0.13, + "learning_rate": 4.634264704651435e-05, + "loss": 2.0701, + "step": 987500 + }, + { + "epoch": 0.13, + "learning_rate": 4.63407952222341e-05, + "loss": 2.0102, + "step": 988000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6338943397953855e-05, + "loss": 1.982, + "step": 988500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6337091573673605e-05, + "loss": 2.0209, + "step": 989000 + }, + { + "epoch": 0.13, + "learning_rate": 4.633523974939336e-05, + "loss": 2.0488, + "step": 989500 + }, + { + "epoch": 0.13, + "learning_rate": 4.633338792511312e-05, + "loss": 2.0136, + "step": 990000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6331536100832876e-05, + "loss": 2.1251, + "step": 990500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6329684276552626e-05, + "loss": 2.0328, + "step": 991000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6327832452272376e-05, + "loss": 1.9825, + "step": 991500 + }, + { + "epoch": 0.13, + "learning_rate": 4.632598062799213e-05, + "loss": 2.06, + "step": 992000 + }, + { + "epoch": 0.13, + "learning_rate": 4.632412880371188e-05, + "loss": 2.0039, + "step": 992500 + }, + { + "epoch": 0.13, + "learning_rate": 4.632227697943164e-05, + "loss": 1.9938, + "step": 993000 + }, + { + "epoch": 0.13, + "learning_rate": 4.63204251551514e-05, + "loss": 2.0274, + "step": 993500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6318573330871154e-05, + "loss": 2.0711, + "step": 994000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6316721506590904e-05, + "loss": 1.9769, + "step": 994500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6314869682310655e-05, + "loss": 1.9745, + "step": 995000 + }, + { + "epoch": 0.13, + "learning_rate": 4.631301785803041e-05, + "loss": 1.992, + "step": 995500 + }, + { + "epoch": 0.13, + "learning_rate": 4.631116603375016e-05, + "loss": 2.0302, + "step": 996000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6309314209469925e-05, + "loss": 2.0624, + "step": 996500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6307462385189676e-05, + "loss": 1.9892, + "step": 997000 + }, + { + "epoch": 0.13, + "learning_rate": 4.630561056090943e-05, + "loss": 2.0339, + "step": 997500 + }, + { + "epoch": 0.13, + "learning_rate": 4.630375873662918e-05, + "loss": 2.0717, + "step": 998000 + }, + { + "epoch": 0.13, + "learning_rate": 4.630190691234893e-05, + "loss": 2.0768, + "step": 998500 + }, + { + "epoch": 0.13, + "learning_rate": 4.630005508806869e-05, + "loss": 2.0493, + "step": 999000 + }, + { + "epoch": 0.13, + "learning_rate": 4.629820326378845e-05, + "loss": 2.0204, + "step": 999500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6296351439508204e-05, + "loss": 2.0824, + "step": 1000000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6294499615227954e-05, + "loss": 2.1043, + "step": 1000500 + }, + { + "epoch": 0.13, + "learning_rate": 4.629264779094771e-05, + "loss": 2.0029, + "step": 1001000 + }, + { + "epoch": 0.13, + "learning_rate": 4.629079596666746e-05, + "loss": 2.0412, + "step": 1001500 + }, + { + "epoch": 0.13, + "learning_rate": 4.628894414238721e-05, + "loss": 2.0522, + "step": 1002000 + }, + { + "epoch": 0.13, + "learning_rate": 4.628709231810697e-05, + "loss": 2.0814, + "step": 1002500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6285240493826725e-05, + "loss": 2.1019, + "step": 1003000 + }, + { + "epoch": 0.13, + "learning_rate": 4.628338866954648e-05, + "loss": 2.0045, + "step": 1003500 + }, + { + "epoch": 0.13, + "learning_rate": 4.628153684526623e-05, + "loss": 1.9963, + "step": 1004000 + }, + { + "epoch": 0.13, + "learning_rate": 4.627968502098598e-05, + "loss": 2.0839, + "step": 1004500 + }, + { + "epoch": 0.13, + "learning_rate": 4.627783319670574e-05, + "loss": 2.0057, + "step": 1005000 + }, + { + "epoch": 0.13, + "learning_rate": 4.627598137242549e-05, + "loss": 1.9783, + "step": 1005500 + }, + { + "epoch": 0.13, + "learning_rate": 4.627412954814525e-05, + "loss": 2.0617, + "step": 1006000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6272277723865e-05, + "loss": 2.0306, + "step": 1006500 + }, + { + "epoch": 0.13, + "learning_rate": 4.627042589958476e-05, + "loss": 2.0356, + "step": 1007000 + }, + { + "epoch": 0.13, + "learning_rate": 4.626857407530451e-05, + "loss": 1.9083, + "step": 1007500 + }, + { + "epoch": 0.13, + "learning_rate": 4.626672225102426e-05, + "loss": 2.0926, + "step": 1008000 + }, + { + "epoch": 0.13, + "learning_rate": 4.626487042674402e-05, + "loss": 2.1131, + "step": 1008500 + }, + { + "epoch": 0.13, + "learning_rate": 4.626301860246377e-05, + "loss": 2.0052, + "step": 1009000 + }, + { + "epoch": 0.13, + "learning_rate": 4.626116677818353e-05, + "loss": 2.1092, + "step": 1009500 + }, + { + "epoch": 0.13, + "learning_rate": 4.625931495390328e-05, + "loss": 1.9954, + "step": 1010000 + }, + { + "epoch": 0.13, + "learning_rate": 4.625746312962304e-05, + "loss": 1.9873, + "step": 1010500 + }, + { + "epoch": 0.13, + "learning_rate": 4.625561130534279e-05, + "loss": 2.0889, + "step": 1011000 + }, + { + "epoch": 0.13, + "learning_rate": 4.625375948106254e-05, + "loss": 2.0882, + "step": 1011500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6251907656782296e-05, + "loss": 2.044, + "step": 1012000 + }, + { + "epoch": 0.13, + "learning_rate": 4.625005583250205e-05, + "loss": 1.9891, + "step": 1012500 + }, + { + "epoch": 0.13, + "learning_rate": 4.624820400822181e-05, + "loss": 2.0041, + "step": 1013000 + }, + { + "epoch": 0.13, + "learning_rate": 4.624635218394156e-05, + "loss": 2.0249, + "step": 1013500 + }, + { + "epoch": 0.13, + "learning_rate": 4.624450035966132e-05, + "loss": 2.0084, + "step": 1014000 + }, + { + "epoch": 0.13, + "learning_rate": 4.624264853538107e-05, + "loss": 2.0522, + "step": 1014500 + }, + { + "epoch": 0.13, + "learning_rate": 4.624079671110082e-05, + "loss": 2.0631, + "step": 1015000 + }, + { + "epoch": 0.13, + "learning_rate": 4.623894488682058e-05, + "loss": 2.1273, + "step": 1015500 + }, + { + "epoch": 0.13, + "learning_rate": 4.623709306254033e-05, + "loss": 2.0269, + "step": 1016000 + }, + { + "epoch": 0.13, + "learning_rate": 4.623524123826009e-05, + "loss": 1.9982, + "step": 1016500 + }, + { + "epoch": 0.13, + "learning_rate": 4.623338941397984e-05, + "loss": 2.0475, + "step": 1017000 + }, + { + "epoch": 0.13, + "learning_rate": 4.623153758969959e-05, + "loss": 2.0393, + "step": 1017500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6229685765419345e-05, + "loss": 2.0585, + "step": 1018000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6227833941139095e-05, + "loss": 2.05, + "step": 1018500 + }, + { + "epoch": 0.13, + "learning_rate": 4.622598211685886e-05, + "loss": 1.9668, + "step": 1019000 + }, + { + "epoch": 0.13, + "learning_rate": 4.622413029257861e-05, + "loss": 2.0707, + "step": 1019500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6222278468298366e-05, + "loss": 2.0517, + "step": 1020000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6220426644018117e-05, + "loss": 2.1031, + "step": 1020500 + }, + { + "epoch": 0.13, + "learning_rate": 4.621857481973787e-05, + "loss": 2.0798, + "step": 1021000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6216722995457624e-05, + "loss": 2.0894, + "step": 1021500 + }, + { + "epoch": 0.13, + "learning_rate": 4.621487117117738e-05, + "loss": 2.0638, + "step": 1022000 + }, + { + "epoch": 0.13, + "learning_rate": 4.621301934689714e-05, + "loss": 2.0821, + "step": 1022500 + }, + { + "epoch": 0.13, + "learning_rate": 4.621116752261689e-05, + "loss": 2.0366, + "step": 1023000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6209315698336645e-05, + "loss": 2.042, + "step": 1023500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6207463874056395e-05, + "loss": 1.9892, + "step": 1024000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6205612049776145e-05, + "loss": 2.0839, + "step": 1024500 + }, + { + "epoch": 0.13, + "learning_rate": 4.620376022549591e-05, + "loss": 2.0717, + "step": 1025000 + }, + { + "epoch": 0.13, + "learning_rate": 4.620190840121566e-05, + "loss": 2.0489, + "step": 1025500 + }, + { + "epoch": 0.13, + "learning_rate": 4.6200056576935416e-05, + "loss": 2.1028, + "step": 1026000 + }, + { + "epoch": 0.13, + "learning_rate": 4.6198204752655166e-05, + "loss": 2.0154, + "step": 1026500 + }, + { + "epoch": 0.13, + "learning_rate": 4.619635292837492e-05, + "loss": 2.1096, + "step": 1027000 + }, + { + "epoch": 0.13, + "learning_rate": 4.619450110409467e-05, + "loss": 2.0815, + "step": 1027500 + }, + { + "epoch": 0.14, + "learning_rate": 4.619264927981442e-05, + "loss": 2.0281, + "step": 1028000 + }, + { + "epoch": 0.14, + "learning_rate": 4.619079745553419e-05, + "loss": 2.1375, + "step": 1028500 + }, + { + "epoch": 0.14, + "learning_rate": 4.618894563125394e-05, + "loss": 2.0621, + "step": 1029000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6187093806973694e-05, + "loss": 1.9994, + "step": 1029500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6185241982693444e-05, + "loss": 2.0178, + "step": 1030000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6183390158413194e-05, + "loss": 2.0268, + "step": 1030500 + }, + { + "epoch": 0.14, + "learning_rate": 4.618153833413295e-05, + "loss": 2.0808, + "step": 1031000 + }, + { + "epoch": 0.14, + "learning_rate": 4.617968650985271e-05, + "loss": 2.0078, + "step": 1031500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6177834685572465e-05, + "loss": 2.0562, + "step": 1032000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6175982861292215e-05, + "loss": 2.0047, + "step": 1032500 + }, + { + "epoch": 0.14, + "learning_rate": 4.617413103701197e-05, + "loss": 2.0494, + "step": 1033000 + }, + { + "epoch": 0.14, + "learning_rate": 4.617227921273172e-05, + "loss": 2.0175, + "step": 1033500 + }, + { + "epoch": 0.14, + "learning_rate": 4.617042738845147e-05, + "loss": 2.0532, + "step": 1034000 + }, + { + "epoch": 0.14, + "learning_rate": 4.616857556417123e-05, + "loss": 2.085, + "step": 1034500 + }, + { + "epoch": 0.14, + "learning_rate": 4.616672373989099e-05, + "loss": 2.0665, + "step": 1035000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6164871915610744e-05, + "loss": 1.9793, + "step": 1035500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6163020091330494e-05, + "loss": 2.0211, + "step": 1036000 + }, + { + "epoch": 0.14, + "learning_rate": 4.616116826705025e-05, + "loss": 2.0065, + "step": 1036500 + }, + { + "epoch": 0.14, + "learning_rate": 4.615931644277e-05, + "loss": 2.0266, + "step": 1037000 + }, + { + "epoch": 0.14, + "learning_rate": 4.615746461848975e-05, + "loss": 2.049, + "step": 1037500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6155612794209515e-05, + "loss": 2.0853, + "step": 1038000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6153760969929265e-05, + "loss": 2.0657, + "step": 1038500 + }, + { + "epoch": 0.14, + "learning_rate": 4.615190914564902e-05, + "loss": 2.0305, + "step": 1039000 + }, + { + "epoch": 0.14, + "learning_rate": 4.615005732136877e-05, + "loss": 2.0104, + "step": 1039500 + }, + { + "epoch": 0.14, + "learning_rate": 4.614820549708853e-05, + "loss": 2.0452, + "step": 1040000 + }, + { + "epoch": 0.14, + "learning_rate": 4.614635367280828e-05, + "loss": 2.0034, + "step": 1040500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6144501848528036e-05, + "loss": 2.0114, + "step": 1041000 + }, + { + "epoch": 0.14, + "learning_rate": 4.614265002424779e-05, + "loss": 2.0384, + "step": 1041500 + }, + { + "epoch": 0.14, + "learning_rate": 4.614079819996754e-05, + "loss": 2.0609, + "step": 1042000 + }, + { + "epoch": 0.14, + "learning_rate": 4.61389463756873e-05, + "loss": 2.0699, + "step": 1042500 + }, + { + "epoch": 0.14, + "learning_rate": 4.613709455140705e-05, + "loss": 2.0296, + "step": 1043000 + }, + { + "epoch": 0.14, + "learning_rate": 4.613524272712681e-05, + "loss": 2.0341, + "step": 1043500 + }, + { + "epoch": 0.14, + "learning_rate": 4.613339090284656e-05, + "loss": 2.0044, + "step": 1044000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6131539078566314e-05, + "loss": 2.0148, + "step": 1044500 + }, + { + "epoch": 0.14, + "learning_rate": 4.612968725428607e-05, + "loss": 1.9761, + "step": 1045000 + }, + { + "epoch": 0.14, + "learning_rate": 4.612783543000582e-05, + "loss": 1.9635, + "step": 1045500 + }, + { + "epoch": 0.14, + "learning_rate": 4.612598360572558e-05, + "loss": 2.0579, + "step": 1046000 + }, + { + "epoch": 0.14, + "learning_rate": 4.612413178144533e-05, + "loss": 2.0461, + "step": 1046500 + }, + { + "epoch": 0.14, + "learning_rate": 4.612227995716508e-05, + "loss": 2.0131, + "step": 1047000 + }, + { + "epoch": 0.14, + "learning_rate": 4.612042813288484e-05, + "loss": 2.0641, + "step": 1047500 + }, + { + "epoch": 0.14, + "learning_rate": 4.611857630860459e-05, + "loss": 2.0182, + "step": 1048000 + }, + { + "epoch": 0.14, + "learning_rate": 4.611672448432435e-05, + "loss": 2.0754, + "step": 1048500 + }, + { + "epoch": 0.14, + "learning_rate": 4.61148726600441e-05, + "loss": 2.0843, + "step": 1049000 + }, + { + "epoch": 0.14, + "learning_rate": 4.611302083576386e-05, + "loss": 2.0939, + "step": 1049500 + }, + { + "epoch": 0.14, + "learning_rate": 4.611116901148361e-05, + "loss": 2.0775, + "step": 1050000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6109317187203364e-05, + "loss": 2.0064, + "step": 1050500 + }, + { + "epoch": 0.14, + "learning_rate": 4.610746536292312e-05, + "loss": 1.9742, + "step": 1051000 + }, + { + "epoch": 0.14, + "learning_rate": 4.610561353864287e-05, + "loss": 2.0307, + "step": 1051500 + }, + { + "epoch": 0.14, + "learning_rate": 4.610376171436263e-05, + "loss": 2.0393, + "step": 1052000 + }, + { + "epoch": 0.14, + "learning_rate": 4.610190989008238e-05, + "loss": 2.0434, + "step": 1052500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6100058065802135e-05, + "loss": 1.9794, + "step": 1053000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6098206241521885e-05, + "loss": 2.0144, + "step": 1053500 + }, + { + "epoch": 0.14, + "learning_rate": 4.609635441724164e-05, + "loss": 2.1097, + "step": 1054000 + }, + { + "epoch": 0.14, + "learning_rate": 4.60945025929614e-05, + "loss": 2.0711, + "step": 1054500 + }, + { + "epoch": 0.14, + "learning_rate": 4.609265076868115e-05, + "loss": 2.0481, + "step": 1055000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6090798944400906e-05, + "loss": 1.982, + "step": 1055500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6088947120120656e-05, + "loss": 1.9887, + "step": 1056000 + }, + { + "epoch": 0.14, + "learning_rate": 4.608709529584041e-05, + "loss": 2.0213, + "step": 1056500 + }, + { + "epoch": 0.14, + "learning_rate": 4.608524347156017e-05, + "loss": 2.0595, + "step": 1057000 + }, + { + "epoch": 0.14, + "learning_rate": 4.608339164727992e-05, + "loss": 2.0202, + "step": 1057500 + }, + { + "epoch": 0.14, + "learning_rate": 4.608153982299968e-05, + "loss": 2.0724, + "step": 1058000 + }, + { + "epoch": 0.14, + "learning_rate": 4.607968799871943e-05, + "loss": 2.0174, + "step": 1058500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6077836174439185e-05, + "loss": 2.0645, + "step": 1059000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6075984350158935e-05, + "loss": 2.0285, + "step": 1059500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6074132525878685e-05, + "loss": 2.0194, + "step": 1060000 + }, + { + "epoch": 0.14, + "learning_rate": 4.607228070159845e-05, + "loss": 2.0411, + "step": 1060500 + }, + { + "epoch": 0.14, + "learning_rate": 4.60704288773182e-05, + "loss": 2.0141, + "step": 1061000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6068577053037956e-05, + "loss": 2.0023, + "step": 1061500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6066725228757706e-05, + "loss": 2.0146, + "step": 1062000 + }, + { + "epoch": 0.14, + "learning_rate": 4.606487340447746e-05, + "loss": 2.0538, + "step": 1062500 + }, + { + "epoch": 0.14, + "learning_rate": 4.606302158019721e-05, + "loss": 2.0315, + "step": 1063000 + }, + { + "epoch": 0.14, + "learning_rate": 4.606116975591697e-05, + "loss": 2.0389, + "step": 1063500 + }, + { + "epoch": 0.14, + "learning_rate": 4.605931793163673e-05, + "loss": 2.0816, + "step": 1064000 + }, + { + "epoch": 0.14, + "learning_rate": 4.605746610735648e-05, + "loss": 2.0201, + "step": 1064500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6055614283076234e-05, + "loss": 2.0379, + "step": 1065000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6053762458795984e-05, + "loss": 1.9517, + "step": 1065500 + }, + { + "epoch": 0.14, + "learning_rate": 4.605191063451574e-05, + "loss": 2.0394, + "step": 1066000 + }, + { + "epoch": 0.14, + "learning_rate": 4.60500588102355e-05, + "loss": 2.0043, + "step": 1066500 + }, + { + "epoch": 0.14, + "learning_rate": 4.604820698595525e-05, + "loss": 2.0323, + "step": 1067000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6046355161675005e-05, + "loss": 2.0322, + "step": 1067500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6044503337394755e-05, + "loss": 1.9481, + "step": 1068000 + }, + { + "epoch": 0.14, + "learning_rate": 4.604265151311451e-05, + "loss": 2.0834, + "step": 1068500 + }, + { + "epoch": 0.14, + "learning_rate": 4.604079968883426e-05, + "loss": 2.0398, + "step": 1069000 + }, + { + "epoch": 0.14, + "learning_rate": 4.603894786455402e-05, + "loss": 2.0324, + "step": 1069500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6037096040273776e-05, + "loss": 2.0576, + "step": 1070000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6035244215993527e-05, + "loss": 1.9908, + "step": 1070500 + }, + { + "epoch": 0.14, + "learning_rate": 4.6033392391713284e-05, + "loss": 2.004, + "step": 1071000 + }, + { + "epoch": 0.14, + "learning_rate": 4.6031540567433034e-05, + "loss": 2.0846, + "step": 1071500 + }, + { + "epoch": 0.14, + "learning_rate": 4.602968874315279e-05, + "loss": 1.9894, + "step": 1072000 + }, + { + "epoch": 0.14, + "learning_rate": 4.602783691887254e-05, + "loss": 2.0524, + "step": 1072500 + }, + { + "epoch": 0.15, + "learning_rate": 4.60259850945923e-05, + "loss": 2.0706, + "step": 1073000 + }, + { + "epoch": 0.15, + "learning_rate": 4.6024133270312055e-05, + "loss": 1.9906, + "step": 1073500 + }, + { + "epoch": 0.15, + "learning_rate": 4.6022281446031805e-05, + "loss": 1.9503, + "step": 1074000 + }, + { + "epoch": 0.15, + "learning_rate": 4.602042962175156e-05, + "loss": 2.0304, + "step": 1074500 + }, + { + "epoch": 0.15, + "learning_rate": 4.601857779747131e-05, + "loss": 2.0718, + "step": 1075000 + }, + { + "epoch": 0.15, + "learning_rate": 4.601672597319107e-05, + "loss": 2.036, + "step": 1075500 + }, + { + "epoch": 0.15, + "learning_rate": 4.6014874148910826e-05, + "loss": 2.0283, + "step": 1076000 + }, + { + "epoch": 0.15, + "learning_rate": 4.6013022324630576e-05, + "loss": 2.0526, + "step": 1076500 + }, + { + "epoch": 0.15, + "learning_rate": 4.601117050035033e-05, + "loss": 2.0656, + "step": 1077000 + }, + { + "epoch": 0.15, + "learning_rate": 4.600931867607008e-05, + "loss": 1.9943, + "step": 1077500 + }, + { + "epoch": 0.15, + "learning_rate": 4.600746685178984e-05, + "loss": 2.0667, + "step": 1078000 + }, + { + "epoch": 0.15, + "learning_rate": 4.600561502750959e-05, + "loss": 2.0688, + "step": 1078500 + }, + { + "epoch": 0.15, + "learning_rate": 4.600376320322935e-05, + "loss": 2.0692, + "step": 1079000 + }, + { + "epoch": 0.15, + "learning_rate": 4.6001911378949104e-05, + "loss": 2.0125, + "step": 1079500 + }, + { + "epoch": 0.15, + "learning_rate": 4.6000059554668854e-05, + "loss": 2.0157, + "step": 1080000 + }, + { + "epoch": 0.15, + "learning_rate": 4.599820773038861e-05, + "loss": 2.0318, + "step": 1080500 + }, + { + "epoch": 0.15, + "learning_rate": 4.599635590610836e-05, + "loss": 2.0082, + "step": 1081000 + }, + { + "epoch": 0.15, + "learning_rate": 4.599450408182812e-05, + "loss": 2.0442, + "step": 1081500 + }, + { + "epoch": 0.15, + "learning_rate": 4.599265225754787e-05, + "loss": 1.9515, + "step": 1082000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5990800433267626e-05, + "loss": 2.03, + "step": 1082500 + }, + { + "epoch": 0.15, + "learning_rate": 4.598894860898738e-05, + "loss": 2.007, + "step": 1083000 + }, + { + "epoch": 0.15, + "learning_rate": 4.598709678470713e-05, + "loss": 2.0143, + "step": 1083500 + }, + { + "epoch": 0.15, + "learning_rate": 4.598524496042689e-05, + "loss": 2.045, + "step": 1084000 + }, + { + "epoch": 0.15, + "learning_rate": 4.598339313614664e-05, + "loss": 2.0811, + "step": 1084500 + }, + { + "epoch": 0.15, + "learning_rate": 4.59815413118664e-05, + "loss": 2.0096, + "step": 1085000 + }, + { + "epoch": 0.15, + "learning_rate": 4.597968948758615e-05, + "loss": 1.973, + "step": 1085500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5977837663305904e-05, + "loss": 1.9465, + "step": 1086000 + }, + { + "epoch": 0.15, + "learning_rate": 4.597598583902566e-05, + "loss": 1.9726, + "step": 1086500 + }, + { + "epoch": 0.15, + "learning_rate": 4.597413401474541e-05, + "loss": 2.0709, + "step": 1087000 + }, + { + "epoch": 0.15, + "learning_rate": 4.597228219046517e-05, + "loss": 2.0036, + "step": 1087500 + }, + { + "epoch": 0.15, + "learning_rate": 4.597043036618492e-05, + "loss": 1.9962, + "step": 1088000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5968578541904675e-05, + "loss": 1.9791, + "step": 1088500 + }, + { + "epoch": 0.15, + "learning_rate": 4.596672671762443e-05, + "loss": 2.0552, + "step": 1089000 + }, + { + "epoch": 0.15, + "learning_rate": 4.596487489334418e-05, + "loss": 1.9897, + "step": 1089500 + }, + { + "epoch": 0.15, + "learning_rate": 4.596302306906394e-05, + "loss": 2.0258, + "step": 1090000 + }, + { + "epoch": 0.15, + "learning_rate": 4.596117124478369e-05, + "loss": 2.01, + "step": 1090500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5959319420503446e-05, + "loss": 2.0384, + "step": 1091000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5957467596223196e-05, + "loss": 2.0046, + "step": 1091500 + }, + { + "epoch": 0.15, + "learning_rate": 4.595561577194295e-05, + "loss": 2.0059, + "step": 1092000 + }, + { + "epoch": 0.15, + "learning_rate": 4.595376394766271e-05, + "loss": 2.0942, + "step": 1092500 + }, + { + "epoch": 0.15, + "learning_rate": 4.595191212338246e-05, + "loss": 2.0283, + "step": 1093000 + }, + { + "epoch": 0.15, + "learning_rate": 4.595006029910222e-05, + "loss": 2.0263, + "step": 1093500 + }, + { + "epoch": 0.15, + "learning_rate": 4.594820847482197e-05, + "loss": 2.0925, + "step": 1094000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5946356650541724e-05, + "loss": 1.9677, + "step": 1094500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5944504826261475e-05, + "loss": 2.0579, + "step": 1095000 + }, + { + "epoch": 0.15, + "learning_rate": 4.594265300198123e-05, + "loss": 2.0022, + "step": 1095500 + }, + { + "epoch": 0.15, + "learning_rate": 4.594080117770099e-05, + "loss": 2.0642, + "step": 1096000 + }, + { + "epoch": 0.15, + "learning_rate": 4.593894935342074e-05, + "loss": 2.0249, + "step": 1096500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5937097529140496e-05, + "loss": 2.0108, + "step": 1097000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5935245704860246e-05, + "loss": 2.0411, + "step": 1097500 + }, + { + "epoch": 0.15, + "learning_rate": 4.593339388058e-05, + "loss": 2.0254, + "step": 1098000 + }, + { + "epoch": 0.15, + "learning_rate": 4.593154205629976e-05, + "loss": 2.0359, + "step": 1098500 + }, + { + "epoch": 0.15, + "learning_rate": 4.592969023201951e-05, + "loss": 1.9545, + "step": 1099000 + }, + { + "epoch": 0.15, + "learning_rate": 4.592783840773927e-05, + "loss": 2.0384, + "step": 1099500 + }, + { + "epoch": 0.15, + "learning_rate": 4.592598658345902e-05, + "loss": 2.0318, + "step": 1100000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5924134759178774e-05, + "loss": 2.0491, + "step": 1100500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5922282934898524e-05, + "loss": 2.0369, + "step": 1101000 + }, + { + "epoch": 0.15, + "learning_rate": 4.592043111061828e-05, + "loss": 1.9809, + "step": 1101500 + }, + { + "epoch": 0.15, + "learning_rate": 4.591857928633804e-05, + "loss": 2.0405, + "step": 1102000 + }, + { + "epoch": 0.15, + "learning_rate": 4.591672746205779e-05, + "loss": 2.0773, + "step": 1102500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5914875637777545e-05, + "loss": 1.9896, + "step": 1103000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5913023813497295e-05, + "loss": 2.0292, + "step": 1103500 + }, + { + "epoch": 0.15, + "learning_rate": 4.591117198921705e-05, + "loss": 2.0313, + "step": 1104000 + }, + { + "epoch": 0.15, + "learning_rate": 4.59093201649368e-05, + "loss": 2.051, + "step": 1104500 + }, + { + "epoch": 0.15, + "learning_rate": 4.590746834065656e-05, + "loss": 2.0128, + "step": 1105000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5905616516376316e-05, + "loss": 2.006, + "step": 1105500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5903764692096066e-05, + "loss": 2.0581, + "step": 1106000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5901912867815823e-05, + "loss": 1.9873, + "step": 1106500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5900061043535574e-05, + "loss": 2.0416, + "step": 1107000 + }, + { + "epoch": 0.15, + "learning_rate": 4.589820921925533e-05, + "loss": 2.0782, + "step": 1107500 + }, + { + "epoch": 0.15, + "learning_rate": 4.589635739497509e-05, + "loss": 2.0184, + "step": 1108000 + }, + { + "epoch": 0.15, + "learning_rate": 4.589450557069484e-05, + "loss": 2.0644, + "step": 1108500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5892653746414595e-05, + "loss": 1.9821, + "step": 1109000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5890801922134345e-05, + "loss": 2.0233, + "step": 1109500 + }, + { + "epoch": 0.15, + "learning_rate": 4.58889500978541e-05, + "loss": 2.0325, + "step": 1110000 + }, + { + "epoch": 0.15, + "learning_rate": 4.588709827357385e-05, + "loss": 2.0265, + "step": 1110500 + }, + { + "epoch": 0.15, + "learning_rate": 4.588524644929361e-05, + "loss": 2.0334, + "step": 1111000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5883394625013366e-05, + "loss": 2.0002, + "step": 1111500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5881542800733116e-05, + "loss": 1.9944, + "step": 1112000 + }, + { + "epoch": 0.15, + "learning_rate": 4.587969097645287e-05, + "loss": 2.071, + "step": 1112500 + }, + { + "epoch": 0.15, + "learning_rate": 4.587783915217262e-05, + "loss": 1.9979, + "step": 1113000 + }, + { + "epoch": 0.15, + "learning_rate": 4.587598732789238e-05, + "loss": 2.0582, + "step": 1113500 + }, + { + "epoch": 0.15, + "learning_rate": 4.587413550361213e-05, + "loss": 1.9804, + "step": 1114000 + }, + { + "epoch": 0.15, + "learning_rate": 4.587228367933189e-05, + "loss": 2.0089, + "step": 1114500 + }, + { + "epoch": 0.15, + "learning_rate": 4.5870431855051644e-05, + "loss": 2.0825, + "step": 1115000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5868580030771394e-05, + "loss": 2.0297, + "step": 1115500 + }, + { + "epoch": 0.15, + "learning_rate": 4.586672820649115e-05, + "loss": 2.0135, + "step": 1116000 + }, + { + "epoch": 0.15, + "learning_rate": 4.58648763822109e-05, + "loss": 2.0259, + "step": 1116500 + }, + { + "epoch": 0.15, + "learning_rate": 4.586302455793066e-05, + "loss": 2.095, + "step": 1117000 + }, + { + "epoch": 0.15, + "learning_rate": 4.5861172733650415e-05, + "loss": 1.9724, + "step": 1117500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5859320909370165e-05, + "loss": 1.9995, + "step": 1118000 + }, + { + "epoch": 0.16, + "learning_rate": 4.585746908508992e-05, + "loss": 2.0823, + "step": 1118500 + }, + { + "epoch": 0.16, + "learning_rate": 4.585561726080967e-05, + "loss": 1.9989, + "step": 1119000 + }, + { + "epoch": 0.16, + "learning_rate": 4.585376543652943e-05, + "loss": 1.9797, + "step": 1119500 + }, + { + "epoch": 0.16, + "learning_rate": 4.585191361224918e-05, + "loss": 2.0385, + "step": 1120000 + }, + { + "epoch": 0.16, + "learning_rate": 4.585006178796894e-05, + "loss": 2.088, + "step": 1120500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5848209963688694e-05, + "loss": 2.0721, + "step": 1121000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5846358139408444e-05, + "loss": 1.9574, + "step": 1121500 + }, + { + "epoch": 0.16, + "learning_rate": 4.58445063151282e-05, + "loss": 2.036, + "step": 1122000 + }, + { + "epoch": 0.16, + "learning_rate": 4.584265449084795e-05, + "loss": 1.9909, + "step": 1122500 + }, + { + "epoch": 0.16, + "learning_rate": 4.584080266656771e-05, + "loss": 1.9879, + "step": 1123000 + }, + { + "epoch": 0.16, + "learning_rate": 4.583895084228746e-05, + "loss": 2.089, + "step": 1123500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5837099018007215e-05, + "loss": 1.9891, + "step": 1124000 + }, + { + "epoch": 0.16, + "learning_rate": 4.583524719372697e-05, + "loss": 2.0456, + "step": 1124500 + }, + { + "epoch": 0.16, + "learning_rate": 4.583339536944672e-05, + "loss": 1.9962, + "step": 1125000 + }, + { + "epoch": 0.16, + "learning_rate": 4.583154354516648e-05, + "loss": 2.003, + "step": 1125500 + }, + { + "epoch": 0.16, + "learning_rate": 4.582969172088623e-05, + "loss": 2.0341, + "step": 1126000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5827839896605986e-05, + "loss": 2.0192, + "step": 1126500 + }, + { + "epoch": 0.16, + "learning_rate": 4.582598807232574e-05, + "loss": 1.9658, + "step": 1127000 + }, + { + "epoch": 0.16, + "learning_rate": 4.582413624804549e-05, + "loss": 1.9976, + "step": 1127500 + }, + { + "epoch": 0.16, + "learning_rate": 4.582228442376525e-05, + "loss": 2.0131, + "step": 1128000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5820432599485e-05, + "loss": 2.0462, + "step": 1128500 + }, + { + "epoch": 0.16, + "learning_rate": 4.581858077520476e-05, + "loss": 2.0675, + "step": 1129000 + }, + { + "epoch": 0.16, + "learning_rate": 4.581672895092451e-05, + "loss": 2.0386, + "step": 1129500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5814877126644264e-05, + "loss": 2.0208, + "step": 1130000 + }, + { + "epoch": 0.16, + "learning_rate": 4.581302530236402e-05, + "loss": 1.9804, + "step": 1130500 + }, + { + "epoch": 0.16, + "learning_rate": 4.581117347808377e-05, + "loss": 1.9951, + "step": 1131000 + }, + { + "epoch": 0.16, + "learning_rate": 4.580932165380353e-05, + "loss": 1.9944, + "step": 1131500 + }, + { + "epoch": 0.16, + "learning_rate": 4.580746982952328e-05, + "loss": 1.9934, + "step": 1132000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5805618005243036e-05, + "loss": 2.0437, + "step": 1132500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5803766180962786e-05, + "loss": 2.0452, + "step": 1133000 + }, + { + "epoch": 0.16, + "learning_rate": 4.580191435668254e-05, + "loss": 2.0159, + "step": 1133500 + }, + { + "epoch": 0.16, + "learning_rate": 4.58000625324023e-05, + "loss": 1.9983, + "step": 1134000 + }, + { + "epoch": 0.16, + "learning_rate": 4.579821070812205e-05, + "loss": 2.0513, + "step": 1134500 + }, + { + "epoch": 0.16, + "learning_rate": 4.579635888384181e-05, + "loss": 2.0089, + "step": 1135000 + }, + { + "epoch": 0.16, + "learning_rate": 4.579450705956156e-05, + "loss": 2.0429, + "step": 1135500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5792655235281314e-05, + "loss": 2.0428, + "step": 1136000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5790803411001064e-05, + "loss": 2.0082, + "step": 1136500 + }, + { + "epoch": 0.16, + "learning_rate": 4.578895158672082e-05, + "loss": 2.0299, + "step": 1137000 + }, + { + "epoch": 0.16, + "learning_rate": 4.578709976244058e-05, + "loss": 1.9961, + "step": 1137500 + }, + { + "epoch": 0.16, + "learning_rate": 4.578524793816033e-05, + "loss": 2.04, + "step": 1138000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5783396113880085e-05, + "loss": 2.0125, + "step": 1138500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5781544289599835e-05, + "loss": 2.0359, + "step": 1139000 + }, + { + "epoch": 0.16, + "learning_rate": 4.577969246531959e-05, + "loss": 2.0513, + "step": 1139500 + }, + { + "epoch": 0.16, + "learning_rate": 4.577784064103935e-05, + "loss": 1.9749, + "step": 1140000 + }, + { + "epoch": 0.16, + "learning_rate": 4.57759888167591e-05, + "loss": 1.9861, + "step": 1140500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5774136992478856e-05, + "loss": 2.0236, + "step": 1141000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5772285168198606e-05, + "loss": 2.0338, + "step": 1141500 + }, + { + "epoch": 0.16, + "learning_rate": 4.577043334391836e-05, + "loss": 2.0213, + "step": 1142000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5768581519638114e-05, + "loss": 2.0148, + "step": 1142500 + }, + { + "epoch": 0.16, + "learning_rate": 4.576672969535788e-05, + "loss": 1.9878, + "step": 1143000 + }, + { + "epoch": 0.16, + "learning_rate": 4.576487787107763e-05, + "loss": 1.9863, + "step": 1143500 + }, + { + "epoch": 0.16, + "learning_rate": 4.576302604679738e-05, + "loss": 2.0854, + "step": 1144000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5761174222517135e-05, + "loss": 2.0522, + "step": 1144500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5759322398236885e-05, + "loss": 1.9886, + "step": 1145000 + }, + { + "epoch": 0.16, + "learning_rate": 4.575747057395664e-05, + "loss": 2.0324, + "step": 1145500 + }, + { + "epoch": 0.16, + "learning_rate": 4.575561874967639e-05, + "loss": 2.0437, + "step": 1146000 + }, + { + "epoch": 0.16, + "learning_rate": 4.575376692539615e-05, + "loss": 1.9871, + "step": 1146500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5751915101115906e-05, + "loss": 2.0339, + "step": 1147000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5750063276835656e-05, + "loss": 1.9999, + "step": 1147500 + }, + { + "epoch": 0.16, + "learning_rate": 4.574821145255541e-05, + "loss": 2.0565, + "step": 1148000 + }, + { + "epoch": 0.16, + "learning_rate": 4.574635962827516e-05, + "loss": 1.9668, + "step": 1148500 + }, + { + "epoch": 0.16, + "learning_rate": 4.574450780399492e-05, + "loss": 1.9387, + "step": 1149000 + }, + { + "epoch": 0.16, + "learning_rate": 4.574265597971468e-05, + "loss": 2.0382, + "step": 1149500 + }, + { + "epoch": 0.16, + "learning_rate": 4.574080415543443e-05, + "loss": 2.018, + "step": 1150000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5738952331154184e-05, + "loss": 2.0086, + "step": 1150500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5737100506873934e-05, + "loss": 2.0138, + "step": 1151000 + }, + { + "epoch": 0.16, + "learning_rate": 4.573524868259369e-05, + "loss": 2.0495, + "step": 1151500 + }, + { + "epoch": 0.16, + "learning_rate": 4.573339685831344e-05, + "loss": 2.0011, + "step": 1152000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5731545034033205e-05, + "loss": 2.0153, + "step": 1152500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5729693209752955e-05, + "loss": 1.9924, + "step": 1153000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5727841385472705e-05, + "loss": 2.0114, + "step": 1153500 + }, + { + "epoch": 0.16, + "learning_rate": 4.572598956119246e-05, + "loss": 1.9851, + "step": 1154000 + }, + { + "epoch": 0.16, + "learning_rate": 4.572413773691221e-05, + "loss": 2.0088, + "step": 1154500 + }, + { + "epoch": 0.16, + "learning_rate": 4.572228591263197e-05, + "loss": 2.0583, + "step": 1155000 + }, + { + "epoch": 0.16, + "learning_rate": 4.572043408835172e-05, + "loss": 2.0373, + "step": 1155500 + }, + { + "epoch": 0.16, + "learning_rate": 4.571858226407148e-05, + "loss": 2.0252, + "step": 1156000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5716730439791234e-05, + "loss": 2.0274, + "step": 1156500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5714878615510984e-05, + "loss": 2.0626, + "step": 1157000 + }, + { + "epoch": 0.16, + "learning_rate": 4.571302679123074e-05, + "loss": 2.0646, + "step": 1157500 + }, + { + "epoch": 0.16, + "learning_rate": 4.571117496695049e-05, + "loss": 2.0683, + "step": 1158000 + }, + { + "epoch": 0.16, + "learning_rate": 4.570932314267025e-05, + "loss": 2.0225, + "step": 1158500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5707471318390005e-05, + "loss": 1.9791, + "step": 1159000 + }, + { + "epoch": 0.16, + "learning_rate": 4.5705619494109755e-05, + "loss": 2.0097, + "step": 1159500 + }, + { + "epoch": 0.16, + "learning_rate": 4.570376766982951e-05, + "loss": 2.01, + "step": 1160000 + }, + { + "epoch": 0.16, + "learning_rate": 4.570191584554926e-05, + "loss": 1.9769, + "step": 1160500 + }, + { + "epoch": 0.16, + "learning_rate": 4.570006402126902e-05, + "loss": 2.0588, + "step": 1161000 + }, + { + "epoch": 0.16, + "learning_rate": 4.569821219698877e-05, + "loss": 2.0773, + "step": 1161500 + }, + { + "epoch": 0.16, + "learning_rate": 4.5696360372708526e-05, + "loss": 2.0109, + "step": 1162000 + }, + { + "epoch": 0.16, + "learning_rate": 4.569450854842828e-05, + "loss": 2.0382, + "step": 1162500 + }, + { + "epoch": 0.17, + "learning_rate": 4.569265672414803e-05, + "loss": 2.022, + "step": 1163000 + }, + { + "epoch": 0.17, + "learning_rate": 4.569080489986779e-05, + "loss": 1.9878, + "step": 1163500 + }, + { + "epoch": 0.17, + "learning_rate": 4.568895307558754e-05, + "loss": 1.9953, + "step": 1164000 + }, + { + "epoch": 0.17, + "learning_rate": 4.56871012513073e-05, + "loss": 2.0324, + "step": 1164500 + }, + { + "epoch": 0.17, + "learning_rate": 4.568524942702705e-05, + "loss": 1.9929, + "step": 1165000 + }, + { + "epoch": 0.17, + "learning_rate": 4.568339760274681e-05, + "loss": 2.0366, + "step": 1165500 + }, + { + "epoch": 0.17, + "learning_rate": 4.568154577846656e-05, + "loss": 1.9813, + "step": 1166000 + }, + { + "epoch": 0.17, + "learning_rate": 4.567969395418631e-05, + "loss": 1.9828, + "step": 1166500 + }, + { + "epoch": 0.17, + "learning_rate": 4.567784212990607e-05, + "loss": 2.0847, + "step": 1167000 + }, + { + "epoch": 0.17, + "learning_rate": 4.567599030562582e-05, + "loss": 1.9774, + "step": 1167500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5674138481345576e-05, + "loss": 2.0894, + "step": 1168000 + }, + { + "epoch": 0.17, + "learning_rate": 4.567228665706533e-05, + "loss": 1.9716, + "step": 1168500 + }, + { + "epoch": 0.17, + "learning_rate": 4.567043483278509e-05, + "loss": 2.0528, + "step": 1169000 + }, + { + "epoch": 0.17, + "learning_rate": 4.566858300850484e-05, + "loss": 1.9761, + "step": 1169500 + }, + { + "epoch": 0.17, + "learning_rate": 4.566673118422459e-05, + "loss": 2.0017, + "step": 1170000 + }, + { + "epoch": 0.17, + "learning_rate": 4.566487935994435e-05, + "loss": 2.0189, + "step": 1170500 + }, + { + "epoch": 0.17, + "learning_rate": 4.56630275356641e-05, + "loss": 1.9752, + "step": 1171000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5661175711383854e-05, + "loss": 1.9397, + "step": 1171500 + }, + { + "epoch": 0.17, + "learning_rate": 4.565932388710361e-05, + "loss": 2.0487, + "step": 1172000 + }, + { + "epoch": 0.17, + "learning_rate": 4.565747206282336e-05, + "loss": 2.0155, + "step": 1172500 + }, + { + "epoch": 0.17, + "learning_rate": 4.565562023854312e-05, + "loss": 1.9776, + "step": 1173000 + }, + { + "epoch": 0.17, + "learning_rate": 4.565376841426287e-05, + "loss": 2.0014, + "step": 1173500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5651916589982625e-05, + "loss": 1.9584, + "step": 1174000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5650064765702375e-05, + "loss": 1.9828, + "step": 1174500 + }, + { + "epoch": 0.17, + "learning_rate": 4.564821294142214e-05, + "loss": 2.0071, + "step": 1175000 + }, + { + "epoch": 0.17, + "learning_rate": 4.564636111714189e-05, + "loss": 1.9823, + "step": 1175500 + }, + { + "epoch": 0.17, + "learning_rate": 4.564450929286164e-05, + "loss": 1.9643, + "step": 1176000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5642657468581396e-05, + "loss": 2.0379, + "step": 1176500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5640805644301146e-05, + "loss": 2.0049, + "step": 1177000 + }, + { + "epoch": 0.17, + "learning_rate": 4.56389538200209e-05, + "loss": 2.0408, + "step": 1177500 + }, + { + "epoch": 0.17, + "learning_rate": 4.563710199574066e-05, + "loss": 2.0529, + "step": 1178000 + }, + { + "epoch": 0.17, + "learning_rate": 4.563525017146042e-05, + "loss": 2.0234, + "step": 1178500 + }, + { + "epoch": 0.17, + "learning_rate": 4.563339834718017e-05, + "loss": 2.046, + "step": 1179000 + }, + { + "epoch": 0.17, + "learning_rate": 4.563154652289992e-05, + "loss": 2.0445, + "step": 1179500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5629694698619674e-05, + "loss": 2.0227, + "step": 1180000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5627842874339425e-05, + "loss": 2.0697, + "step": 1180500 + }, + { + "epoch": 0.17, + "learning_rate": 4.562599105005918e-05, + "loss": 2.0044, + "step": 1181000 + }, + { + "epoch": 0.17, + "learning_rate": 4.562413922577894e-05, + "loss": 1.9942, + "step": 1181500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5622287401498695e-05, + "loss": 1.9858, + "step": 1182000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5620435577218446e-05, + "loss": 2.0483, + "step": 1182500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5618583752938196e-05, + "loss": 2.1029, + "step": 1183000 + }, + { + "epoch": 0.17, + "learning_rate": 4.561673192865795e-05, + "loss": 2.034, + "step": 1183500 + }, + { + "epoch": 0.17, + "learning_rate": 4.56148801043777e-05, + "loss": 2.0263, + "step": 1184000 + }, + { + "epoch": 0.17, + "learning_rate": 4.561302828009747e-05, + "loss": 2.0165, + "step": 1184500 + }, + { + "epoch": 0.17, + "learning_rate": 4.561117645581722e-05, + "loss": 2.1276, + "step": 1185000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5609324631536974e-05, + "loss": 2.0112, + "step": 1185500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5607472807256724e-05, + "loss": 2.0514, + "step": 1186000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5605620982976474e-05, + "loss": 2.0065, + "step": 1186500 + }, + { + "epoch": 0.17, + "learning_rate": 4.560376915869623e-05, + "loss": 2.0716, + "step": 1187000 + }, + { + "epoch": 0.17, + "learning_rate": 4.560191733441598e-05, + "loss": 2.0466, + "step": 1187500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5600065510135745e-05, + "loss": 2.0075, + "step": 1188000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5598213685855495e-05, + "loss": 1.9578, + "step": 1188500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5596361861575245e-05, + "loss": 2.063, + "step": 1189000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5594510037295e-05, + "loss": 2.0231, + "step": 1189500 + }, + { + "epoch": 0.17, + "learning_rate": 4.559265821301475e-05, + "loss": 2.0299, + "step": 1190000 + }, + { + "epoch": 0.17, + "learning_rate": 4.559080638873451e-05, + "loss": 2.1112, + "step": 1190500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5588954564454266e-05, + "loss": 2.0467, + "step": 1191000 + }, + { + "epoch": 0.17, + "learning_rate": 4.558710274017402e-05, + "loss": 1.9726, + "step": 1191500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5585250915893773e-05, + "loss": 2.0669, + "step": 1192000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5583399091613524e-05, + "loss": 1.9855, + "step": 1192500 + }, + { + "epoch": 0.17, + "learning_rate": 4.558154726733328e-05, + "loss": 2.0615, + "step": 1193000 + }, + { + "epoch": 0.17, + "learning_rate": 4.557969544305303e-05, + "loss": 2.026, + "step": 1193500 + }, + { + "epoch": 0.17, + "learning_rate": 4.5577843618772794e-05, + "loss": 1.9829, + "step": 1194000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5575991794492545e-05, + "loss": 1.9632, + "step": 1194500 + }, + { + "epoch": 0.17, + "learning_rate": 4.55741399702123e-05, + "loss": 2.0398, + "step": 1195000 + }, + { + "epoch": 0.17, + "learning_rate": 4.557228814593205e-05, + "loss": 2.0325, + "step": 1195500 + }, + { + "epoch": 0.17, + "learning_rate": 4.55704363216518e-05, + "loss": 2.0186, + "step": 1196000 + }, + { + "epoch": 0.17, + "learning_rate": 4.556858449737156e-05, + "loss": 1.9944, + "step": 1196500 + }, + { + "epoch": 0.17, + "learning_rate": 4.556673267309131e-05, + "loss": 2.0366, + "step": 1197000 + }, + { + "epoch": 0.17, + "learning_rate": 4.556488084881107e-05, + "loss": 1.9559, + "step": 1197500 + }, + { + "epoch": 0.17, + "learning_rate": 4.556302902453082e-05, + "loss": 2.0433, + "step": 1198000 + }, + { + "epoch": 0.17, + "learning_rate": 4.556117720025058e-05, + "loss": 2.0049, + "step": 1198500 + }, + { + "epoch": 0.17, + "learning_rate": 4.555932537597033e-05, + "loss": 1.9842, + "step": 1199000 + }, + { + "epoch": 0.17, + "learning_rate": 4.555747355169008e-05, + "loss": 1.9653, + "step": 1199500 + }, + { + "epoch": 0.17, + "learning_rate": 4.555562172740984e-05, + "loss": 2.0843, + "step": 1200000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5553769903129594e-05, + "loss": 2.0587, + "step": 1200500 + }, + { + "epoch": 0.17, + "learning_rate": 4.555191807884935e-05, + "loss": 2.0026, + "step": 1201000 + }, + { + "epoch": 0.17, + "learning_rate": 4.55500662545691e-05, + "loss": 2.0253, + "step": 1201500 + }, + { + "epoch": 0.17, + "learning_rate": 4.554821443028885e-05, + "loss": 1.9794, + "step": 1202000 + }, + { + "epoch": 0.17, + "learning_rate": 4.554636260600861e-05, + "loss": 2.0672, + "step": 1202500 + }, + { + "epoch": 0.17, + "learning_rate": 4.554451078172836e-05, + "loss": 1.9861, + "step": 1203000 + }, + { + "epoch": 0.17, + "learning_rate": 4.554265895744812e-05, + "loss": 2.0267, + "step": 1203500 + }, + { + "epoch": 0.17, + "learning_rate": 4.554080713316787e-05, + "loss": 2.0156, + "step": 1204000 + }, + { + "epoch": 0.17, + "learning_rate": 4.553895530888763e-05, + "loss": 1.9506, + "step": 1204500 + }, + { + "epoch": 0.17, + "learning_rate": 4.553710348460738e-05, + "loss": 2.0643, + "step": 1205000 + }, + { + "epoch": 0.17, + "learning_rate": 4.553525166032713e-05, + "loss": 1.9997, + "step": 1205500 + }, + { + "epoch": 0.17, + "learning_rate": 4.553339983604689e-05, + "loss": 2.0217, + "step": 1206000 + }, + { + "epoch": 0.17, + "learning_rate": 4.553154801176664e-05, + "loss": 2.061, + "step": 1206500 + }, + { + "epoch": 0.17, + "learning_rate": 4.55296961874864e-05, + "loss": 1.9712, + "step": 1207000 + }, + { + "epoch": 0.17, + "learning_rate": 4.552784436320615e-05, + "loss": 1.9848, + "step": 1207500 + }, + { + "epoch": 0.18, + "learning_rate": 4.552599253892591e-05, + "loss": 1.9508, + "step": 1208000 + }, + { + "epoch": 0.18, + "learning_rate": 4.552414071464566e-05, + "loss": 2.0733, + "step": 1208500 + }, + { + "epoch": 0.18, + "learning_rate": 4.552228889036541e-05, + "loss": 2.0766, + "step": 1209000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5520437066085165e-05, + "loss": 2.0268, + "step": 1209500 + }, + { + "epoch": 0.18, + "learning_rate": 4.551858524180492e-05, + "loss": 2.0696, + "step": 1210000 + }, + { + "epoch": 0.18, + "learning_rate": 4.551673341752468e-05, + "loss": 2.0019, + "step": 1210500 + }, + { + "epoch": 0.18, + "learning_rate": 4.551488159324443e-05, + "loss": 2.0083, + "step": 1211000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5513029768964186e-05, + "loss": 2.0026, + "step": 1211500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5511177944683936e-05, + "loss": 2.0136, + "step": 1212000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5509326120403686e-05, + "loss": 2.0361, + "step": 1212500 + }, + { + "epoch": 0.18, + "learning_rate": 4.550747429612344e-05, + "loss": 2.0245, + "step": 1213000 + }, + { + "epoch": 0.18, + "learning_rate": 4.55056224718432e-05, + "loss": 2.0176, + "step": 1213500 + }, + { + "epoch": 0.18, + "learning_rate": 4.550377064756296e-05, + "loss": 2.0951, + "step": 1214000 + }, + { + "epoch": 0.18, + "learning_rate": 4.550191882328271e-05, + "loss": 2.0029, + "step": 1214500 + }, + { + "epoch": 0.18, + "learning_rate": 4.550006699900246e-05, + "loss": 2.0388, + "step": 1215000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5498215174722214e-05, + "loss": 1.9956, + "step": 1215500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5496363350441965e-05, + "loss": 2.0941, + "step": 1216000 + }, + { + "epoch": 0.18, + "learning_rate": 4.549451152616173e-05, + "loss": 2.0493, + "step": 1216500 + }, + { + "epoch": 0.18, + "learning_rate": 4.549265970188148e-05, + "loss": 1.9668, + "step": 1217000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5490807877601235e-05, + "loss": 2.0112, + "step": 1217500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5488956053320986e-05, + "loss": 2.01, + "step": 1218000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5487104229040736e-05, + "loss": 2.0454, + "step": 1218500 + }, + { + "epoch": 0.18, + "learning_rate": 4.548525240476049e-05, + "loss": 1.9673, + "step": 1219000 + }, + { + "epoch": 0.18, + "learning_rate": 4.548340058048025e-05, + "loss": 2.0913, + "step": 1219500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5481548756200007e-05, + "loss": 2.0534, + "step": 1220000 + }, + { + "epoch": 0.18, + "learning_rate": 4.547969693191976e-05, + "loss": 1.996, + "step": 1220500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5477845107639514e-05, + "loss": 2.0159, + "step": 1221000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5475993283359264e-05, + "loss": 2.0308, + "step": 1221500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5474141459079014e-05, + "loss": 2.0111, + "step": 1222000 + }, + { + "epoch": 0.18, + "learning_rate": 4.547228963479877e-05, + "loss": 1.9588, + "step": 1222500 + }, + { + "epoch": 0.18, + "learning_rate": 4.547043781051853e-05, + "loss": 1.9907, + "step": 1223000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5468585986238285e-05, + "loss": 1.9381, + "step": 1223500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5466734161958035e-05, + "loss": 2.0461, + "step": 1224000 + }, + { + "epoch": 0.18, + "learning_rate": 4.546488233767779e-05, + "loss": 2.0119, + "step": 1224500 + }, + { + "epoch": 0.18, + "learning_rate": 4.546303051339754e-05, + "loss": 2.0473, + "step": 1225000 + }, + { + "epoch": 0.18, + "learning_rate": 4.546117868911729e-05, + "loss": 2.0373, + "step": 1225500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5459326864837056e-05, + "loss": 1.948, + "step": 1226000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5457475040556806e-05, + "loss": 1.9465, + "step": 1226500 + }, + { + "epoch": 0.18, + "learning_rate": 4.545562321627656e-05, + "loss": 2.0531, + "step": 1227000 + }, + { + "epoch": 0.18, + "learning_rate": 4.545377139199631e-05, + "loss": 2.0145, + "step": 1227500 + }, + { + "epoch": 0.18, + "learning_rate": 4.545191956771607e-05, + "loss": 1.9744, + "step": 1228000 + }, + { + "epoch": 0.18, + "learning_rate": 4.545006774343582e-05, + "loss": 2.0182, + "step": 1228500 + }, + { + "epoch": 0.18, + "learning_rate": 4.544821591915558e-05, + "loss": 2.0322, + "step": 1229000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5446364094875334e-05, + "loss": 2.0257, + "step": 1229500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5444512270595085e-05, + "loss": 2.0792, + "step": 1230000 + }, + { + "epoch": 0.18, + "learning_rate": 4.544266044631484e-05, + "loss": 1.97, + "step": 1230500 + }, + { + "epoch": 0.18, + "learning_rate": 4.544080862203459e-05, + "loss": 1.9821, + "step": 1231000 + }, + { + "epoch": 0.18, + "learning_rate": 4.543895679775434e-05, + "loss": 2.0477, + "step": 1231500 + }, + { + "epoch": 0.18, + "learning_rate": 4.54371049734741e-05, + "loss": 1.9921, + "step": 1232000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5435253149193856e-05, + "loss": 1.9702, + "step": 1232500 + }, + { + "epoch": 0.18, + "learning_rate": 4.543340132491361e-05, + "loss": 2.0535, + "step": 1233000 + }, + { + "epoch": 0.18, + "learning_rate": 4.543154950063336e-05, + "loss": 2.0334, + "step": 1233500 + }, + { + "epoch": 0.18, + "learning_rate": 4.542969767635312e-05, + "loss": 1.9955, + "step": 1234000 + }, + { + "epoch": 0.18, + "learning_rate": 4.542784585207287e-05, + "loss": 2.0129, + "step": 1234500 + }, + { + "epoch": 0.18, + "learning_rate": 4.542599402779262e-05, + "loss": 2.0197, + "step": 1235000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5424142203512384e-05, + "loss": 2.0355, + "step": 1235500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5422290379232134e-05, + "loss": 1.9437, + "step": 1236000 + }, + { + "epoch": 0.18, + "learning_rate": 4.542043855495189e-05, + "loss": 2.0292, + "step": 1236500 + }, + { + "epoch": 0.18, + "learning_rate": 4.541858673067164e-05, + "loss": 2.0148, + "step": 1237000 + }, + { + "epoch": 0.18, + "learning_rate": 4.54167349063914e-05, + "loss": 1.999, + "step": 1237500 + }, + { + "epoch": 0.18, + "learning_rate": 4.541488308211115e-05, + "loss": 1.9867, + "step": 1238000 + }, + { + "epoch": 0.18, + "learning_rate": 4.54130312578309e-05, + "loss": 2.0071, + "step": 1238500 + }, + { + "epoch": 0.18, + "learning_rate": 4.541117943355066e-05, + "loss": 1.9873, + "step": 1239000 + }, + { + "epoch": 0.18, + "learning_rate": 4.540932760927041e-05, + "loss": 2.0363, + "step": 1239500 + }, + { + "epoch": 0.18, + "learning_rate": 4.540747578499017e-05, + "loss": 2.0003, + "step": 1240000 + }, + { + "epoch": 0.18, + "learning_rate": 4.540562396070992e-05, + "loss": 2.0571, + "step": 1240500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5403772136429676e-05, + "loss": 2.04, + "step": 1241000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5401920312149427e-05, + "loss": 2.0291, + "step": 1241500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5400068487869183e-05, + "loss": 2.0359, + "step": 1242000 + }, + { + "epoch": 0.18, + "learning_rate": 4.539821666358894e-05, + "loss": 1.9683, + "step": 1242500 + }, + { + "epoch": 0.18, + "learning_rate": 4.539636483930869e-05, + "loss": 2.0222, + "step": 1243000 + }, + { + "epoch": 0.18, + "learning_rate": 4.539451301502845e-05, + "loss": 1.9784, + "step": 1243500 + }, + { + "epoch": 0.18, + "learning_rate": 4.53926611907482e-05, + "loss": 2.0293, + "step": 1244000 + }, + { + "epoch": 0.18, + "learning_rate": 4.539080936646795e-05, + "loss": 2.0801, + "step": 1244500 + }, + { + "epoch": 0.18, + "learning_rate": 4.538895754218771e-05, + "loss": 2.0896, + "step": 1245000 + }, + { + "epoch": 0.18, + "learning_rate": 4.538710571790746e-05, + "loss": 2.0129, + "step": 1245500 + }, + { + "epoch": 0.18, + "learning_rate": 4.538525389362722e-05, + "loss": 1.9908, + "step": 1246000 + }, + { + "epoch": 0.18, + "learning_rate": 4.538340206934697e-05, + "loss": 2.0363, + "step": 1246500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5381550245066726e-05, + "loss": 2.009, + "step": 1247000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5379698420786476e-05, + "loss": 2.0021, + "step": 1247500 + }, + { + "epoch": 0.18, + "learning_rate": 4.5377846596506226e-05, + "loss": 2.0504, + "step": 1248000 + }, + { + "epoch": 0.18, + "learning_rate": 4.537599477222599e-05, + "loss": 1.942, + "step": 1248500 + }, + { + "epoch": 0.18, + "learning_rate": 4.537414294794574e-05, + "loss": 2.0592, + "step": 1249000 + }, + { + "epoch": 0.18, + "learning_rate": 4.53722911236655e-05, + "loss": 1.9323, + "step": 1249500 + } + ], + "max_steps": 13500201, + "num_train_epochs": 3, + "total_flos": 1.594877582673861e+17, + "trial_name": null, + "trial_params": null +}