{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.968985378821444, "global_step": 157500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9936704854737643e-05, "loss": 2.1885, "step": 500 }, { "epoch": 0.06, "learning_rate": 1.9873409709475284e-05, "loss": 2.1503, "step": 1000 }, { "epoch": 0.09, "learning_rate": 1.9810114564212926e-05, "loss": 2.1207, "step": 1500 }, { "epoch": 0.13, "learning_rate": 1.974681941895057e-05, "loss": 2.093, "step": 2000 }, { "epoch": 0.16, "learning_rate": 1.968352427368821e-05, "loss": 2.0938, "step": 2500 }, { "epoch": 0.19, "learning_rate": 1.9620229128425853e-05, "loss": 2.0842, "step": 3000 }, { "epoch": 0.22, "learning_rate": 1.9556933983163494e-05, "loss": 2.0991, "step": 3500 }, { "epoch": 0.25, "learning_rate": 1.9493638837901136e-05, "loss": 2.0715, "step": 4000 }, { "epoch": 0.28, "learning_rate": 1.9430343692638777e-05, "loss": 2.073, "step": 4500 }, { "epoch": 0.32, "learning_rate": 1.936704854737642e-05, "loss": 2.08, "step": 5000 }, { "epoch": 0.35, "learning_rate": 1.930375340211406e-05, "loss": 2.0494, "step": 5500 }, { "epoch": 0.38, "learning_rate": 1.92404582568517e-05, "loss": 2.0616, "step": 6000 }, { "epoch": 0.41, "learning_rate": 1.9177163111589342e-05, "loss": 2.0571, "step": 6500 }, { "epoch": 0.44, "learning_rate": 1.9113867966326984e-05, "loss": 2.0561, "step": 7000 }, { "epoch": 0.47, "learning_rate": 1.9050572821064625e-05, "loss": 2.0658, "step": 7500 }, { "epoch": 0.51, "learning_rate": 1.8987277675802266e-05, "loss": 2.0275, "step": 8000 }, { "epoch": 0.54, "learning_rate": 1.8923982530539908e-05, "loss": 2.0335, "step": 8500 }, { "epoch": 0.57, "learning_rate": 1.8860687385277552e-05, "loss": 2.0312, "step": 9000 }, { "epoch": 0.6, "learning_rate": 1.8797392240015194e-05, "loss": 2.0138, "step": 9500 }, { "epoch": 0.63, "learning_rate": 1.8734097094752835e-05, "loss": 2.0005, "step": 10000 }, { "epoch": 0.66, "learning_rate": 1.8670801949490476e-05, "loss": 2.0248, "step": 10500 }, { "epoch": 0.7, "learning_rate": 1.8607506804228118e-05, "loss": 2.0152, "step": 11000 }, { "epoch": 0.73, "learning_rate": 1.854421165896576e-05, "loss": 2.0168, "step": 11500 }, { "epoch": 0.76, "learning_rate": 1.84809165137034e-05, "loss": 2.0196, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.841762136844104e-05, "loss": 2.0003, "step": 12500 }, { "epoch": 0.82, "learning_rate": 1.8354326223178683e-05, "loss": 2.0267, "step": 13000 }, { "epoch": 0.85, "learning_rate": 1.8291031077916324e-05, "loss": 2.0022, "step": 13500 }, { "epoch": 0.89, "learning_rate": 1.8227735932653966e-05, "loss": 2.0102, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.8164440787391607e-05, "loss": 2.0, "step": 14500 }, { "epoch": 0.95, "learning_rate": 1.810114564212925e-05, "loss": 1.9751, "step": 15000 }, { "epoch": 0.98, "learning_rate": 1.803785049686689e-05, "loss": 1.9912, "step": 15500 }, { "epoch": 1.0, "eval_loss": 1.708198070526123, "eval_runtime": 629.4341, "eval_samples_per_second": 401.594, "eval_steps_per_second": 25.1, "step": 15799 }, { "epoch": 1.01, "learning_rate": 1.797455535160453e-05, "loss": 1.9373, "step": 16000 }, { "epoch": 1.04, "learning_rate": 1.7911260206342176e-05, "loss": 1.8308, "step": 16500 }, { "epoch": 1.08, "learning_rate": 1.7847965061079817e-05, "loss": 1.8442, "step": 17000 }, { "epoch": 1.11, "learning_rate": 1.778466991581746e-05, "loss": 1.8558, "step": 17500 }, { "epoch": 1.14, "learning_rate": 1.77213747705551e-05, "loss": 1.8564, "step": 18000 }, { "epoch": 1.17, "learning_rate": 1.765807962529274e-05, "loss": 1.8578, "step": 18500 }, { "epoch": 1.2, "learning_rate": 1.7594784480030382e-05, "loss": 1.8312, "step": 19000 }, { "epoch": 1.23, "learning_rate": 1.7531489334768024e-05, "loss": 1.8784, "step": 19500 }, { "epoch": 1.27, "learning_rate": 1.746819418950567e-05, "loss": 1.8497, "step": 20000 }, { "epoch": 1.3, "learning_rate": 1.740489904424331e-05, "loss": 1.8528, "step": 20500 }, { "epoch": 1.33, "learning_rate": 1.734160389898095e-05, "loss": 1.8645, "step": 21000 }, { "epoch": 1.36, "learning_rate": 1.7278308753718592e-05, "loss": 1.8563, "step": 21500 }, { "epoch": 1.39, "learning_rate": 1.7215013608456234e-05, "loss": 1.8616, "step": 22000 }, { "epoch": 1.42, "learning_rate": 1.7151718463193875e-05, "loss": 1.8699, "step": 22500 }, { "epoch": 1.46, "learning_rate": 1.7088423317931516e-05, "loss": 1.8583, "step": 23000 }, { "epoch": 1.49, "learning_rate": 1.7025128172669158e-05, "loss": 1.868, "step": 23500 }, { "epoch": 1.52, "learning_rate": 1.69618330274068e-05, "loss": 1.8534, "step": 24000 }, { "epoch": 1.55, "learning_rate": 1.689853788214444e-05, "loss": 1.8557, "step": 24500 }, { "epoch": 1.58, "learning_rate": 1.683524273688208e-05, "loss": 1.8709, "step": 25000 }, { "epoch": 1.61, "learning_rate": 1.6771947591619723e-05, "loss": 1.8544, "step": 25500 }, { "epoch": 1.65, "learning_rate": 1.6708652446357364e-05, "loss": 1.8803, "step": 26000 }, { "epoch": 1.68, "learning_rate": 1.6645357301095006e-05, "loss": 1.8573, "step": 26500 }, { "epoch": 1.71, "learning_rate": 1.658206215583265e-05, "loss": 1.8668, "step": 27000 }, { "epoch": 1.74, "learning_rate": 1.6518767010570292e-05, "loss": 1.8592, "step": 27500 }, { "epoch": 1.77, "learning_rate": 1.6455471865307933e-05, "loss": 1.8551, "step": 28000 }, { "epoch": 1.8, "learning_rate": 1.6392176720045574e-05, "loss": 1.8504, "step": 28500 }, { "epoch": 1.84, "learning_rate": 1.6328881574783216e-05, "loss": 1.8578, "step": 29000 }, { "epoch": 1.87, "learning_rate": 1.6265586429520857e-05, "loss": 1.8614, "step": 29500 }, { "epoch": 1.9, "learning_rate": 1.62022912842585e-05, "loss": 1.8592, "step": 30000 }, { "epoch": 1.93, "learning_rate": 1.613899613899614e-05, "loss": 1.854, "step": 30500 }, { "epoch": 1.96, "learning_rate": 1.607570099373378e-05, "loss": 1.8536, "step": 31000 }, { "epoch": 1.99, "learning_rate": 1.6012405848471422e-05, "loss": 1.8687, "step": 31500 }, { "epoch": 2.0, "eval_loss": 1.5787432193756104, "eval_runtime": 629.6856, "eval_samples_per_second": 401.434, "eval_steps_per_second": 25.09, "step": 31598 }, { "epoch": 2.03, "learning_rate": 1.5949110703209064e-05, "loss": 1.7515, "step": 32000 }, { "epoch": 2.06, "learning_rate": 1.5885815557946705e-05, "loss": 1.7233, "step": 32500 }, { "epoch": 2.09, "learning_rate": 1.5822520412684346e-05, "loss": 1.754, "step": 33000 }, { "epoch": 2.12, "learning_rate": 1.5759225267421988e-05, "loss": 1.7302, "step": 33500 }, { "epoch": 2.15, "learning_rate": 1.5695930122159632e-05, "loss": 1.7369, "step": 34000 }, { "epoch": 2.18, "learning_rate": 1.5632634976897274e-05, "loss": 1.7294, "step": 34500 }, { "epoch": 2.22, "learning_rate": 1.5569339831634915e-05, "loss": 1.7317, "step": 35000 }, { "epoch": 2.25, "learning_rate": 1.5506044686372556e-05, "loss": 1.7457, "step": 35500 }, { "epoch": 2.28, "learning_rate": 1.5442749541110198e-05, "loss": 1.758, "step": 36000 }, { "epoch": 2.31, "learning_rate": 1.537945439584784e-05, "loss": 1.7442, "step": 36500 }, { "epoch": 2.34, "learning_rate": 1.531615925058548e-05, "loss": 1.7449, "step": 37000 }, { "epoch": 2.37, "learning_rate": 1.5252864105323122e-05, "loss": 1.7502, "step": 37500 }, { "epoch": 2.41, "learning_rate": 1.5189568960060765e-05, "loss": 1.7529, "step": 38000 }, { "epoch": 2.44, "learning_rate": 1.5126273814798406e-05, "loss": 1.7675, "step": 38500 }, { "epoch": 2.47, "learning_rate": 1.5062978669536047e-05, "loss": 1.7537, "step": 39000 }, { "epoch": 2.5, "learning_rate": 1.4999683524273689e-05, "loss": 1.7546, "step": 39500 }, { "epoch": 2.53, "learning_rate": 1.493638837901133e-05, "loss": 1.7409, "step": 40000 }, { "epoch": 2.56, "learning_rate": 1.4873093233748971e-05, "loss": 1.7599, "step": 40500 }, { "epoch": 2.6, "learning_rate": 1.4809798088486613e-05, "loss": 1.7467, "step": 41000 }, { "epoch": 2.63, "learning_rate": 1.4746502943224257e-05, "loss": 1.7426, "step": 41500 }, { "epoch": 2.66, "learning_rate": 1.4683207797961899e-05, "loss": 1.7421, "step": 42000 }, { "epoch": 2.69, "learning_rate": 1.461991265269954e-05, "loss": 1.7572, "step": 42500 }, { "epoch": 2.72, "learning_rate": 1.4556617507437181e-05, "loss": 1.7489, "step": 43000 }, { "epoch": 2.75, "learning_rate": 1.4493322362174823e-05, "loss": 1.7482, "step": 43500 }, { "epoch": 2.78, "learning_rate": 1.4430027216912464e-05, "loss": 1.7578, "step": 44000 }, { "epoch": 2.82, "learning_rate": 1.4366732071650105e-05, "loss": 1.7608, "step": 44500 }, { "epoch": 2.85, "learning_rate": 1.4303436926387748e-05, "loss": 1.7623, "step": 45000 }, { "epoch": 2.88, "learning_rate": 1.424014178112539e-05, "loss": 1.7534, "step": 45500 }, { "epoch": 2.91, "learning_rate": 1.4176846635863031e-05, "loss": 1.7513, "step": 46000 }, { "epoch": 2.94, "learning_rate": 1.4113551490600672e-05, "loss": 1.7539, "step": 46500 }, { "epoch": 2.97, "learning_rate": 1.4050256345338314e-05, "loss": 1.7529, "step": 47000 }, { "epoch": 3.0, "eval_loss": 1.4882566928863525, "eval_runtime": 671.3515, "eval_samples_per_second": 376.52, "eval_steps_per_second": 23.533, "step": 47397 }, { "epoch": 3.01, "learning_rate": 1.3986961200075955e-05, "loss": 1.7233, "step": 47500 }, { "epoch": 3.04, "learning_rate": 1.3923666054813596e-05, "loss": 1.6255, "step": 48000 }, { "epoch": 3.07, "learning_rate": 1.386037090955124e-05, "loss": 1.6566, "step": 48500 }, { "epoch": 3.1, "learning_rate": 1.379707576428888e-05, "loss": 1.6442, "step": 49000 }, { "epoch": 3.13, "learning_rate": 1.3733780619026522e-05, "loss": 1.6439, "step": 49500 }, { "epoch": 3.16, "learning_rate": 1.3670485473764163e-05, "loss": 1.6438, "step": 50000 }, { "epoch": 3.2, "learning_rate": 1.3607190328501805e-05, "loss": 1.6527, "step": 50500 }, { "epoch": 3.23, "learning_rate": 1.3543895183239446e-05, "loss": 1.6426, "step": 51000 }, { "epoch": 3.26, "learning_rate": 1.3480600037977087e-05, "loss": 1.6802, "step": 51500 }, { "epoch": 3.29, "learning_rate": 1.341730489271473e-05, "loss": 1.6568, "step": 52000 }, { "epoch": 3.32, "learning_rate": 1.3354009747452372e-05, "loss": 1.6657, "step": 52500 }, { "epoch": 3.35, "learning_rate": 1.3290714602190013e-05, "loss": 1.6734, "step": 53000 }, { "epoch": 3.39, "learning_rate": 1.3227419456927654e-05, "loss": 1.655, "step": 53500 }, { "epoch": 3.42, "learning_rate": 1.3164124311665296e-05, "loss": 1.6831, "step": 54000 }, { "epoch": 3.45, "learning_rate": 1.3100829166402937e-05, "loss": 1.6532, "step": 54500 }, { "epoch": 3.48, "learning_rate": 1.3037534021140578e-05, "loss": 1.6649, "step": 55000 }, { "epoch": 3.51, "learning_rate": 1.2974238875878221e-05, "loss": 1.6643, "step": 55500 }, { "epoch": 3.54, "learning_rate": 1.2910943730615863e-05, "loss": 1.6749, "step": 56000 }, { "epoch": 3.58, "learning_rate": 1.2847648585353504e-05, "loss": 1.6802, "step": 56500 }, { "epoch": 3.61, "learning_rate": 1.2784353440091145e-05, "loss": 1.6753, "step": 57000 }, { "epoch": 3.64, "learning_rate": 1.2721058294828787e-05, "loss": 1.6759, "step": 57500 }, { "epoch": 3.67, "learning_rate": 1.2657763149566428e-05, "loss": 1.6756, "step": 58000 }, { "epoch": 3.7, "learning_rate": 1.259446800430407e-05, "loss": 1.6733, "step": 58500 }, { "epoch": 3.73, "learning_rate": 1.253117285904171e-05, "loss": 1.671, "step": 59000 }, { "epoch": 3.77, "learning_rate": 1.2467877713779355e-05, "loss": 1.6697, "step": 59500 }, { "epoch": 3.8, "learning_rate": 1.2404582568516997e-05, "loss": 1.668, "step": 60000 }, { "epoch": 3.83, "learning_rate": 1.2341287423254638e-05, "loss": 1.6689, "step": 60500 }, { "epoch": 3.86, "learning_rate": 1.227799227799228e-05, "loss": 1.6874, "step": 61000 }, { "epoch": 3.89, "learning_rate": 1.221469713272992e-05, "loss": 1.6926, "step": 61500 }, { "epoch": 3.92, "learning_rate": 1.2151401987467562e-05, "loss": 1.6819, "step": 62000 }, { "epoch": 3.96, "learning_rate": 1.2088106842205203e-05, "loss": 1.6599, "step": 62500 }, { "epoch": 3.99, "learning_rate": 1.2024811696942846e-05, "loss": 1.6886, "step": 63000 }, { "epoch": 4.0, "eval_loss": 1.417983055114746, "eval_runtime": 634.8433, "eval_samples_per_second": 398.172, "eval_steps_per_second": 24.886, "step": 63196 }, { "epoch": 4.02, "learning_rate": 1.1961516551680488e-05, "loss": 1.6122, "step": 63500 }, { "epoch": 4.05, "learning_rate": 1.1898221406418129e-05, "loss": 1.578, "step": 64000 }, { "epoch": 4.08, "learning_rate": 1.183492626115577e-05, "loss": 1.5662, "step": 64500 }, { "epoch": 4.11, "learning_rate": 1.1771631115893412e-05, "loss": 1.5732, "step": 65000 }, { "epoch": 4.15, "learning_rate": 1.1708335970631053e-05, "loss": 1.5726, "step": 65500 }, { "epoch": 4.18, "learning_rate": 1.1645040825368694e-05, "loss": 1.5868, "step": 66000 }, { "epoch": 4.21, "learning_rate": 1.1581745680106337e-05, "loss": 1.5781, "step": 66500 }, { "epoch": 4.24, "learning_rate": 1.1518450534843979e-05, "loss": 1.5965, "step": 67000 }, { "epoch": 4.27, "learning_rate": 1.145515538958162e-05, "loss": 1.5934, "step": 67500 }, { "epoch": 4.3, "learning_rate": 1.1391860244319261e-05, "loss": 1.5791, "step": 68000 }, { "epoch": 4.34, "learning_rate": 1.1328565099056903e-05, "loss": 1.6037, "step": 68500 }, { "epoch": 4.37, "learning_rate": 1.1265269953794544e-05, "loss": 1.6046, "step": 69000 }, { "epoch": 4.4, "learning_rate": 1.1201974808532185e-05, "loss": 1.5903, "step": 69500 }, { "epoch": 4.43, "learning_rate": 1.1138679663269828e-05, "loss": 1.5837, "step": 70000 }, { "epoch": 4.46, "learning_rate": 1.107538451800747e-05, "loss": 1.6162, "step": 70500 }, { "epoch": 4.49, "learning_rate": 1.1012089372745111e-05, "loss": 1.5988, "step": 71000 }, { "epoch": 4.53, "learning_rate": 1.0948794227482752e-05, "loss": 1.6082, "step": 71500 }, { "epoch": 4.56, "learning_rate": 1.0885499082220394e-05, "loss": 1.5832, "step": 72000 }, { "epoch": 4.59, "learning_rate": 1.0822203936958035e-05, "loss": 1.6153, "step": 72500 }, { "epoch": 4.62, "learning_rate": 1.0758908791695676e-05, "loss": 1.6178, "step": 73000 }, { "epoch": 4.65, "learning_rate": 1.0695613646433321e-05, "loss": 1.5981, "step": 73500 }, { "epoch": 4.68, "learning_rate": 1.0632318501170963e-05, "loss": 1.6135, "step": 74000 }, { "epoch": 4.72, "learning_rate": 1.0569023355908604e-05, "loss": 1.6122, "step": 74500 }, { "epoch": 4.75, "learning_rate": 1.0505728210646245e-05, "loss": 1.5929, "step": 75000 }, { "epoch": 4.78, "learning_rate": 1.0442433065383886e-05, "loss": 1.6069, "step": 75500 }, { "epoch": 4.81, "learning_rate": 1.0379137920121528e-05, "loss": 1.6025, "step": 76000 }, { "epoch": 4.84, "learning_rate": 1.0315842774859167e-05, "loss": 1.6284, "step": 76500 }, { "epoch": 4.87, "learning_rate": 1.0252547629596812e-05, "loss": 1.6134, "step": 77000 }, { "epoch": 4.91, "learning_rate": 1.0189252484334454e-05, "loss": 1.6092, "step": 77500 }, { "epoch": 4.94, "learning_rate": 1.0125957339072095e-05, "loss": 1.6194, "step": 78000 }, { "epoch": 4.97, "learning_rate": 1.0062662193809736e-05, "loss": 1.6227, "step": 78500 }, { "epoch": 5.0, "eval_loss": 1.3593807220458984, "eval_runtime": 634.5713, "eval_samples_per_second": 398.343, "eval_steps_per_second": 24.897, "step": 78995 }, { "epoch": 5.0, "learning_rate": 9.999367048547378e-06, "loss": 1.6451, "step": 79000 }, { "epoch": 5.03, "learning_rate": 9.936071903285019e-06, "loss": 1.5186, "step": 79500 }, { "epoch": 5.06, "learning_rate": 9.87277675802266e-06, "loss": 1.5124, "step": 80000 }, { "epoch": 5.1, "learning_rate": 9.809481612760301e-06, "loss": 1.5223, "step": 80500 }, { "epoch": 5.13, "learning_rate": 9.746186467497943e-06, "loss": 1.5234, "step": 81000 }, { "epoch": 5.16, "learning_rate": 9.682891322235586e-06, "loss": 1.5298, "step": 81500 }, { "epoch": 5.19, "learning_rate": 9.619596176973227e-06, "loss": 1.5259, "step": 82000 }, { "epoch": 5.22, "learning_rate": 9.556301031710869e-06, "loss": 1.5463, "step": 82500 }, { "epoch": 5.25, "learning_rate": 9.49300588644851e-06, "loss": 1.5367, "step": 83000 }, { "epoch": 5.29, "learning_rate": 9.429710741186153e-06, "loss": 1.543, "step": 83500 }, { "epoch": 5.32, "learning_rate": 9.366415595923794e-06, "loss": 1.5379, "step": 84000 }, { "epoch": 5.35, "learning_rate": 9.303120450661436e-06, "loss": 1.5215, "step": 84500 }, { "epoch": 5.38, "learning_rate": 9.239825305399077e-06, "loss": 1.5339, "step": 85000 }, { "epoch": 5.41, "learning_rate": 9.176530160136718e-06, "loss": 1.5588, "step": 85500 }, { "epoch": 5.44, "learning_rate": 9.11323501487436e-06, "loss": 1.5522, "step": 86000 }, { "epoch": 5.48, "learning_rate": 9.049939869612e-06, "loss": 1.5516, "step": 86500 }, { "epoch": 5.51, "learning_rate": 8.986644724349644e-06, "loss": 1.5503, "step": 87000 }, { "epoch": 5.54, "learning_rate": 8.923349579087285e-06, "loss": 1.5459, "step": 87500 }, { "epoch": 5.57, "learning_rate": 8.860054433824927e-06, "loss": 1.5437, "step": 88000 }, { "epoch": 5.6, "learning_rate": 8.796759288562568e-06, "loss": 1.5452, "step": 88500 }, { "epoch": 5.63, "learning_rate": 8.73346414330021e-06, "loss": 1.5434, "step": 89000 }, { "epoch": 5.66, "learning_rate": 8.67016899803785e-06, "loss": 1.5633, "step": 89500 }, { "epoch": 5.7, "learning_rate": 8.606873852775492e-06, "loss": 1.5535, "step": 90000 }, { "epoch": 5.73, "learning_rate": 8.543578707513135e-06, "loss": 1.5692, "step": 90500 }, { "epoch": 5.76, "learning_rate": 8.480283562250776e-06, "loss": 1.5609, "step": 91000 }, { "epoch": 5.79, "learning_rate": 8.416988416988418e-06, "loss": 1.5529, "step": 91500 }, { "epoch": 5.82, "learning_rate": 8.353693271726059e-06, "loss": 1.5602, "step": 92000 }, { "epoch": 5.85, "learning_rate": 8.290398126463702e-06, "loss": 1.5547, "step": 92500 }, { "epoch": 5.89, "learning_rate": 8.227102981201343e-06, "loss": 1.5557, "step": 93000 }, { "epoch": 5.92, "learning_rate": 8.163807835938985e-06, "loss": 1.5488, "step": 93500 }, { "epoch": 5.95, "learning_rate": 8.100512690676626e-06, "loss": 1.5736, "step": 94000 }, { "epoch": 5.98, "learning_rate": 8.037217545414267e-06, "loss": 1.559, "step": 94500 }, { "epoch": 6.0, "eval_loss": 1.3149573802947998, "eval_runtime": 678.6783, "eval_samples_per_second": 372.455, "eval_steps_per_second": 23.279, "step": 94794 }, { "epoch": 6.01, "learning_rate": 7.973922400151909e-06, "loss": 1.5248, "step": 95000 }, { "epoch": 6.04, "learning_rate": 7.91062725488955e-06, "loss": 1.4873, "step": 95500 }, { "epoch": 6.08, "learning_rate": 7.847332109627193e-06, "loss": 1.4885, "step": 96000 }, { "epoch": 6.11, "learning_rate": 7.784036964364834e-06, "loss": 1.4882, "step": 96500 }, { "epoch": 6.14, "learning_rate": 7.720741819102476e-06, "loss": 1.499, "step": 97000 }, { "epoch": 6.17, "learning_rate": 7.657446673840117e-06, "loss": 1.493, "step": 97500 }, { "epoch": 6.2, "learning_rate": 7.594151528577759e-06, "loss": 1.4864, "step": 98000 }, { "epoch": 6.23, "learning_rate": 7.5308563833154e-06, "loss": 1.4889, "step": 98500 }, { "epoch": 6.27, "learning_rate": 7.467561238053042e-06, "loss": 1.5047, "step": 99000 }, { "epoch": 6.3, "learning_rate": 7.404266092790684e-06, "loss": 1.4828, "step": 99500 }, { "epoch": 6.33, "learning_rate": 7.340970947528325e-06, "loss": 1.4884, "step": 100000 }, { "epoch": 6.36, "learning_rate": 7.2776758022659665e-06, "loss": 1.4981, "step": 100500 }, { "epoch": 6.39, "learning_rate": 7.214380657003608e-06, "loss": 1.494, "step": 101000 }, { "epoch": 6.42, "learning_rate": 7.15108551174125e-06, "loss": 1.4798, "step": 101500 }, { "epoch": 6.46, "learning_rate": 7.087790366478891e-06, "loss": 1.498, "step": 102000 }, { "epoch": 6.49, "learning_rate": 7.024495221216533e-06, "loss": 1.496, "step": 102500 }, { "epoch": 6.52, "learning_rate": 6.961200075954176e-06, "loss": 1.5097, "step": 103000 }, { "epoch": 6.55, "learning_rate": 6.897904930691817e-06, "loss": 1.5032, "step": 103500 }, { "epoch": 6.58, "learning_rate": 6.8346097854294576e-06, "loss": 1.5001, "step": 104000 }, { "epoch": 6.61, "learning_rate": 6.771314640167099e-06, "loss": 1.5097, "step": 104500 }, { "epoch": 6.65, "learning_rate": 6.708019494904742e-06, "loss": 1.5065, "step": 105000 }, { "epoch": 6.68, "learning_rate": 6.644724349642383e-06, "loss": 1.4961, "step": 105500 }, { "epoch": 6.71, "learning_rate": 6.5814292043800246e-06, "loss": 1.5092, "step": 106000 }, { "epoch": 6.74, "learning_rate": 6.518134059117667e-06, "loss": 1.5079, "step": 106500 }, { "epoch": 6.77, "learning_rate": 6.454838913855308e-06, "loss": 1.513, "step": 107000 }, { "epoch": 6.8, "learning_rate": 6.391543768592949e-06, "loss": 1.5076, "step": 107500 }, { "epoch": 6.84, "learning_rate": 6.328248623330591e-06, "loss": 1.5123, "step": 108000 }, { "epoch": 6.87, "learning_rate": 6.264953478068233e-06, "loss": 1.5117, "step": 108500 }, { "epoch": 6.9, "learning_rate": 6.201658332805874e-06, "loss": 1.5056, "step": 109000 }, { "epoch": 6.93, "learning_rate": 6.1383631875435156e-06, "loss": 1.517, "step": 109500 }, { "epoch": 6.96, "learning_rate": 6.075068042281157e-06, "loss": 1.515, "step": 110000 }, { "epoch": 6.99, "learning_rate": 6.011772897018799e-06, "loss": 1.5193, "step": 110500 }, { "epoch": 7.0, "eval_loss": 1.2794440984725952, "eval_runtime": 637.2277, "eval_samples_per_second": 396.682, "eval_steps_per_second": 24.793, "step": 110593 }, { "epoch": 7.03, "learning_rate": 5.94847775175644e-06, "loss": 1.4557, "step": 111000 }, { "epoch": 7.06, "learning_rate": 5.885182606494082e-06, "loss": 1.4395, "step": 111500 }, { "epoch": 7.09, "learning_rate": 5.821887461231725e-06, "loss": 1.4518, "step": 112000 }, { "epoch": 7.12, "learning_rate": 5.758592315969366e-06, "loss": 1.4513, "step": 112500 }, { "epoch": 7.15, "learning_rate": 5.695297170707007e-06, "loss": 1.454, "step": 113000 }, { "epoch": 7.18, "learning_rate": 5.632002025444649e-06, "loss": 1.4597, "step": 113500 }, { "epoch": 7.22, "learning_rate": 5.568706880182291e-06, "loss": 1.4383, "step": 114000 }, { "epoch": 7.25, "learning_rate": 5.505411734919932e-06, "loss": 1.4529, "step": 114500 }, { "epoch": 7.28, "learning_rate": 5.442116589657574e-06, "loss": 1.4706, "step": 115000 }, { "epoch": 7.31, "learning_rate": 5.378821444395216e-06, "loss": 1.4576, "step": 115500 }, { "epoch": 7.34, "learning_rate": 5.315526299132857e-06, "loss": 1.4681, "step": 116000 }, { "epoch": 7.37, "learning_rate": 5.252231153870498e-06, "loss": 1.4537, "step": 116500 }, { "epoch": 7.41, "learning_rate": 5.18893600860814e-06, "loss": 1.4583, "step": 117000 }, { "epoch": 7.44, "learning_rate": 5.125640863345782e-06, "loss": 1.4645, "step": 117500 }, { "epoch": 7.47, "learning_rate": 5.062345718083423e-06, "loss": 1.455, "step": 118000 }, { "epoch": 7.5, "learning_rate": 4.999050572821065e-06, "loss": 1.4821, "step": 118500 }, { "epoch": 7.53, "learning_rate": 4.935755427558707e-06, "loss": 1.4605, "step": 119000 }, { "epoch": 7.56, "learning_rate": 4.872460282296348e-06, "loss": 1.4621, "step": 119500 }, { "epoch": 7.6, "learning_rate": 4.8091651370339894e-06, "loss": 1.4601, "step": 120000 }, { "epoch": 7.63, "learning_rate": 4.745869991771632e-06, "loss": 1.4648, "step": 120500 }, { "epoch": 7.66, "learning_rate": 4.682574846509273e-06, "loss": 1.4723, "step": 121000 }, { "epoch": 7.69, "learning_rate": 4.619279701246915e-06, "loss": 1.4733, "step": 121500 }, { "epoch": 7.72, "learning_rate": 4.5559845559845564e-06, "loss": 1.4723, "step": 122000 }, { "epoch": 7.75, "learning_rate": 4.492689410722198e-06, "loss": 1.4788, "step": 122500 }, { "epoch": 7.79, "learning_rate": 4.42939426545984e-06, "loss": 1.4665, "step": 123000 }, { "epoch": 7.82, "learning_rate": 4.366099120197481e-06, "loss": 1.4699, "step": 123500 }, { "epoch": 7.85, "learning_rate": 4.3028039749351235e-06, "loss": 1.4908, "step": 124000 }, { "epoch": 7.88, "learning_rate": 4.239508829672764e-06, "loss": 1.4712, "step": 124500 }, { "epoch": 7.91, "learning_rate": 4.176213684410406e-06, "loss": 1.4722, "step": 125000 }, { "epoch": 7.94, "learning_rate": 4.1129185391480474e-06, "loss": 1.4856, "step": 125500 }, { "epoch": 7.98, "learning_rate": 4.04962339388569e-06, "loss": 1.4793, "step": 126000 }, { "epoch": 8.0, "eval_loss": 1.2516653537750244, "eval_runtime": 654.6089, "eval_samples_per_second": 386.15, "eval_steps_per_second": 24.135, "step": 126392 }, { "epoch": 8.01, "learning_rate": 3.986328248623331e-06, "loss": 1.4563, "step": 126500 }, { "epoch": 8.04, "learning_rate": 3.923033103360972e-06, "loss": 1.4263, "step": 127000 }, { "epoch": 8.07, "learning_rate": 3.8597379580986145e-06, "loss": 1.4301, "step": 127500 }, { "epoch": 8.1, "learning_rate": 3.7964428128362558e-06, "loss": 1.43, "step": 128000 }, { "epoch": 8.13, "learning_rate": 3.7331476675738975e-06, "loss": 1.4355, "step": 128500 }, { "epoch": 8.17, "learning_rate": 3.669852522311539e-06, "loss": 1.4384, "step": 129000 }, { "epoch": 8.2, "learning_rate": 3.6065573770491806e-06, "loss": 1.4398, "step": 129500 }, { "epoch": 8.23, "learning_rate": 3.543262231786822e-06, "loss": 1.425, "step": 130000 }, { "epoch": 8.26, "learning_rate": 3.4799670865244637e-06, "loss": 1.423, "step": 130500 }, { "epoch": 8.29, "learning_rate": 3.416671941262105e-06, "loss": 1.4278, "step": 131000 }, { "epoch": 8.32, "learning_rate": 3.3533767959997472e-06, "loss": 1.4368, "step": 131500 }, { "epoch": 8.35, "learning_rate": 3.290081650737389e-06, "loss": 1.4351, "step": 132000 }, { "epoch": 8.39, "learning_rate": 3.2267865054750303e-06, "loss": 1.4351, "step": 132500 }, { "epoch": 8.42, "learning_rate": 3.163491360212672e-06, "loss": 1.4299, "step": 133000 }, { "epoch": 8.45, "learning_rate": 3.1001962149503134e-06, "loss": 1.4265, "step": 133500 }, { "epoch": 8.48, "learning_rate": 3.036901069687955e-06, "loss": 1.4468, "step": 134000 }, { "epoch": 8.51, "learning_rate": 2.9736059244255965e-06, "loss": 1.4389, "step": 134500 }, { "epoch": 8.54, "learning_rate": 2.9103107791632386e-06, "loss": 1.4199, "step": 135000 }, { "epoch": 8.58, "learning_rate": 2.84701563390088e-06, "loss": 1.4361, "step": 135500 }, { "epoch": 8.61, "learning_rate": 2.7837204886385217e-06, "loss": 1.4401, "step": 136000 }, { "epoch": 8.64, "learning_rate": 2.7204253433761635e-06, "loss": 1.4423, "step": 136500 }, { "epoch": 8.67, "learning_rate": 2.657130198113805e-06, "loss": 1.4266, "step": 137000 }, { "epoch": 8.7, "learning_rate": 2.5938350528514466e-06, "loss": 1.4406, "step": 137500 }, { "epoch": 8.73, "learning_rate": 2.530539907589088e-06, "loss": 1.441, "step": 138000 }, { "epoch": 8.77, "learning_rate": 2.4672447623267296e-06, "loss": 1.4551, "step": 138500 }, { "epoch": 8.8, "learning_rate": 2.4039496170643714e-06, "loss": 1.4452, "step": 139000 }, { "epoch": 8.83, "learning_rate": 2.340654471802013e-06, "loss": 1.4392, "step": 139500 }, { "epoch": 8.86, "learning_rate": 2.2773593265396545e-06, "loss": 1.4361, "step": 140000 }, { "epoch": 8.89, "learning_rate": 2.2140641812772962e-06, "loss": 1.4313, "step": 140500 }, { "epoch": 8.92, "learning_rate": 2.1507690360149376e-06, "loss": 1.4323, "step": 141000 }, { "epoch": 8.96, "learning_rate": 2.0874738907525793e-06, "loss": 1.4266, "step": 141500 }, { "epoch": 8.99, "learning_rate": 2.024178745490221e-06, "loss": 1.4354, "step": 142000 }, { "epoch": 9.0, "eval_loss": 1.2341375350952148, "eval_runtime": 642.6304, "eval_samples_per_second": 393.347, "eval_steps_per_second": 24.585, "step": 142191 }, { "epoch": 9.02, "learning_rate": 1.960883600227863e-06, "loss": 1.4034, "step": 142500 }, { "epoch": 9.05, "learning_rate": 1.8975884549655044e-06, "loss": 1.3966, "step": 143000 }, { "epoch": 9.08, "learning_rate": 1.834293309703146e-06, "loss": 1.3921, "step": 143500 }, { "epoch": 9.11, "learning_rate": 1.7709981644407874e-06, "loss": 1.396, "step": 144000 }, { "epoch": 9.15, "learning_rate": 1.7077030191784292e-06, "loss": 1.411, "step": 144500 }, { "epoch": 9.18, "learning_rate": 1.6444078739160707e-06, "loss": 1.406, "step": 145000 }, { "epoch": 9.21, "learning_rate": 1.5811127286537123e-06, "loss": 1.407, "step": 145500 }, { "epoch": 9.24, "learning_rate": 1.5178175833913538e-06, "loss": 1.4182, "step": 146000 }, { "epoch": 9.27, "learning_rate": 1.4545224381289958e-06, "loss": 1.4116, "step": 146500 }, { "epoch": 9.3, "learning_rate": 1.3912272928666373e-06, "loss": 1.4166, "step": 147000 }, { "epoch": 9.34, "learning_rate": 1.3279321476042789e-06, "loss": 1.4063, "step": 147500 }, { "epoch": 9.37, "learning_rate": 1.2646370023419204e-06, "loss": 1.4025, "step": 148000 }, { "epoch": 9.4, "learning_rate": 1.2013418570795622e-06, "loss": 1.4061, "step": 148500 }, { "epoch": 9.43, "learning_rate": 1.1380467118172037e-06, "loss": 1.4066, "step": 149000 }, { "epoch": 9.46, "learning_rate": 1.0747515665548455e-06, "loss": 1.4152, "step": 149500 }, { "epoch": 9.49, "learning_rate": 1.011456421292487e-06, "loss": 1.417, "step": 150000 }, { "epoch": 9.53, "learning_rate": 9.481612760301285e-07, "loss": 1.411, "step": 150500 }, { "epoch": 9.56, "learning_rate": 8.848661307677701e-07, "loss": 1.4162, "step": 151000 }, { "epoch": 9.59, "learning_rate": 8.215709855054118e-07, "loss": 1.4195, "step": 151500 }, { "epoch": 9.62, "learning_rate": 7.582758402430535e-07, "loss": 1.4226, "step": 152000 }, { "epoch": 9.65, "learning_rate": 6.94980694980695e-07, "loss": 1.4239, "step": 152500 }, { "epoch": 9.68, "learning_rate": 6.316855497183366e-07, "loss": 1.4078, "step": 153000 }, { "epoch": 9.72, "learning_rate": 5.683904044559782e-07, "loss": 1.4101, "step": 153500 }, { "epoch": 9.75, "learning_rate": 5.050952591936199e-07, "loss": 1.416, "step": 154000 }, { "epoch": 9.78, "learning_rate": 4.418001139312615e-07, "loss": 1.4182, "step": 154500 }, { "epoch": 9.81, "learning_rate": 3.785049686689031e-07, "loss": 1.4196, "step": 155000 }, { "epoch": 9.84, "learning_rate": 3.1520982340654476e-07, "loss": 1.4132, "step": 155500 }, { "epoch": 9.87, "learning_rate": 2.5191467814418635e-07, "loss": 1.4138, "step": 156000 }, { "epoch": 9.91, "learning_rate": 1.88619532881828e-07, "loss": 1.4333, "step": 156500 }, { "epoch": 9.94, "learning_rate": 1.253243876194696e-07, "loss": 1.413, "step": 157000 }, { "epoch": 9.97, "learning_rate": 6.202924235711122e-08, "loss": 1.4116, "step": 157500 } ], "max_steps": 157990, "num_train_epochs": 10, "total_flos": 3.820454731815322e+16, "trial_name": null, "trial_params": null }