|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.989351992698509, |
|
"global_step": 2050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.097560975609757e-07, |
|
"loss": 1.2345, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"loss": 1.3023, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8292682926829268e-06, |
|
"loss": 1.2241, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"loss": 1.2505, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.0487804878048782e-06, |
|
"loss": 1.1555, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"loss": 1.101, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.26829268292683e-06, |
|
"loss": 0.9013, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"loss": 0.8904, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.487804878048781e-06, |
|
"loss": 0.7205, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 0.6704, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.707317073170733e-06, |
|
"loss": 0.6029, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.317073170731707e-06, |
|
"loss": 0.5434, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.926829268292683e-06, |
|
"loss": 0.5544, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.53658536585366e-06, |
|
"loss": 0.5272, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.146341463414634e-06, |
|
"loss": 0.504, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.756097560975611e-06, |
|
"loss": 0.505, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0365853658536585e-05, |
|
"loss": 0.5116, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0975609756097562e-05, |
|
"loss": 0.5009, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.1585365853658537e-05, |
|
"loss": 0.4888, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 0.4531, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2804878048780488e-05, |
|
"loss": 0.4701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3414634146341466e-05, |
|
"loss": 0.4221, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4024390243902441e-05, |
|
"loss": 0.4427, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4634146341463415e-05, |
|
"loss": 0.4266, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.524390243902439e-05, |
|
"loss": 0.4375, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5853658536585366e-05, |
|
"loss": 0.4361, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.6463414634146345e-05, |
|
"loss": 0.4482, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.707317073170732e-05, |
|
"loss": 0.4398, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.7682926829268292e-05, |
|
"loss": 0.4464, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8292682926829268e-05, |
|
"loss": 0.4687, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8902439024390246e-05, |
|
"loss": 0.4459, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9512195121951222e-05, |
|
"loss": 0.4257, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0121951219512197e-05, |
|
"loss": 0.3982, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.073170731707317e-05, |
|
"loss": 0.4211, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.134146341463415e-05, |
|
"loss": 0.4319, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.1951219512195124e-05, |
|
"loss": 0.4641, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.25609756097561e-05, |
|
"loss": 0.4335, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.3170731707317075e-05, |
|
"loss": 0.4278, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.378048780487805e-05, |
|
"loss": 0.3997, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 0.4259, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4156, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5609756097560977e-05, |
|
"loss": 0.4356, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.6219512195121952e-05, |
|
"loss": 0.3966, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.682926829268293e-05, |
|
"loss": 0.4271, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.7439024390243906e-05, |
|
"loss": 0.4372, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8048780487804882e-05, |
|
"loss": 0.4174, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8658536585365854e-05, |
|
"loss": 0.4342, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.926829268292683e-05, |
|
"loss": 0.401, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9878048780487805e-05, |
|
"loss": 0.4027, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.048780487804878e-05, |
|
"loss": 0.4319, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.109756097560976e-05, |
|
"loss": 0.4345, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.170731707317073e-05, |
|
"loss": 0.4136, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.231707317073171e-05, |
|
"loss": 0.4233, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.292682926829269e-05, |
|
"loss": 0.4089, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.353658536585366e-05, |
|
"loss": 0.4379, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.414634146341464e-05, |
|
"loss": 0.3893, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.475609756097561e-05, |
|
"loss": 0.4188, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.5365853658536584e-05, |
|
"loss": 0.4106, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.597560975609756e-05, |
|
"loss": 0.45, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 0.3955, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.7195121951219514e-05, |
|
"loss": 0.4393, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.780487804878049e-05, |
|
"loss": 0.4256, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.8414634146341465e-05, |
|
"loss": 0.4139, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.9024390243902444e-05, |
|
"loss": 0.4423, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.9634146341463416e-05, |
|
"loss": 0.4259, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.0243902439024395e-05, |
|
"loss": 0.4225, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.085365853658537e-05, |
|
"loss": 0.42, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.146341463414634e-05, |
|
"loss": 0.4104, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.207317073170732e-05, |
|
"loss": 0.4085, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.26829268292683e-05, |
|
"loss": 0.421, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.329268292682927e-05, |
|
"loss": 0.3984, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.390243902439025e-05, |
|
"loss": 0.4428, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.451219512195122e-05, |
|
"loss": 0.4468, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.51219512195122e-05, |
|
"loss": 0.4245, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.573170731707318e-05, |
|
"loss": 0.4008, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.634146341463415e-05, |
|
"loss": 0.4013, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.695121951219512e-05, |
|
"loss": 0.4276, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.75609756097561e-05, |
|
"loss": 0.4307, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.817073170731707e-05, |
|
"loss": 0.3836, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 0.4272, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9390243902439024e-05, |
|
"loss": 0.4091, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3907, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9999968146329897e-05, |
|
"loss": 0.4449, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9999872585400745e-05, |
|
"loss": 0.3889, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999971331745607e-05, |
|
"loss": 0.4582, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9999490342901726e-05, |
|
"loss": 0.4386, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9999203662305926e-05, |
|
"loss": 0.4174, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9998853276399215e-05, |
|
"loss": 0.4124, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9998439186074476e-05, |
|
"loss": 0.4114, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999796139238694e-05, |
|
"loss": 0.4208, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999741989655415e-05, |
|
"loss": 0.4266, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.999681469995601e-05, |
|
"loss": 0.3977, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.999614580413473e-05, |
|
"loss": 0.4004, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9995413210794864e-05, |
|
"loss": 0.4481, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9994616921803264e-05, |
|
"loss": 0.4147, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.999375693918911e-05, |
|
"loss": 0.4221, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.99928332651439e-05, |
|
"loss": 0.428, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.999184590202141e-05, |
|
"loss": 0.4283, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.999079485233775e-05, |
|
"loss": 0.4324, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9989680118771284e-05, |
|
"loss": 0.4293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.99885017041627e-05, |
|
"loss": 0.4466, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998725961151493e-05, |
|
"loss": 0.4101, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9985953843993194e-05, |
|
"loss": 0.3773, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.998458440492497e-05, |
|
"loss": 0.4226, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9983151297800005e-05, |
|
"loss": 0.4156, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.998165452627025e-05, |
|
"loss": 0.3961, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9980094094149945e-05, |
|
"loss": 0.4271, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.997847000541551e-05, |
|
"loss": 0.4275, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997678226420561e-05, |
|
"loss": 0.3846, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.99750308748211e-05, |
|
"loss": 0.4237, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.997321584172504e-05, |
|
"loss": 0.4215, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9971337169542665e-05, |
|
"loss": 0.3897, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.996939486306138e-05, |
|
"loss": 0.4016, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.996738892723075e-05, |
|
"loss": 0.4399, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.99653193671625e-05, |
|
"loss": 0.4347, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.996318618813046e-05, |
|
"loss": 0.4371, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.996098939557062e-05, |
|
"loss": 0.4298, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.995872899508103e-05, |
|
"loss": 0.4204, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.995640499242187e-05, |
|
"loss": 0.3856, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.995401739351536e-05, |
|
"loss": 0.4044, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9951566204445834e-05, |
|
"loss": 0.4019, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9949051431459615e-05, |
|
"loss": 0.4484, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.994647308096509e-05, |
|
"loss": 0.4149, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9943831159532665e-05, |
|
"loss": 0.4163, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.994112567389471e-05, |
|
"loss": 0.4097, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9938356630945616e-05, |
|
"loss": 0.4045, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.99355240377417e-05, |
|
"loss": 0.4257, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.993262790150126e-05, |
|
"loss": 0.3949, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.99296682296045e-05, |
|
"loss": 0.4253, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.992664502959351e-05, |
|
"loss": 0.3911, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.992355830917232e-05, |
|
"loss": 0.4163, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.992040807620678e-05, |
|
"loss": 0.3949, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9917194338724614e-05, |
|
"loss": 0.4146, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9913917104915374e-05, |
|
"loss": 0.4143, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9910576383130414e-05, |
|
"loss": 0.4096, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.990717218188286e-05, |
|
"loss": 0.3887, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.990370450984763e-05, |
|
"loss": 0.4135, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.990017337586137e-05, |
|
"loss": 0.426, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.989657878892244e-05, |
|
"loss": 0.4379, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9892920758190907e-05, |
|
"loss": 0.4185, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.988919929298851e-05, |
|
"loss": 0.4309, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9885414402798624e-05, |
|
"loss": 0.4489, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.988156609726628e-05, |
|
"loss": 0.3993, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.987765438619806e-05, |
|
"loss": 0.4559, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.987367927956218e-05, |
|
"loss": 0.4005, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.986964078748837e-05, |
|
"loss": 0.3977, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.986553892026789e-05, |
|
"loss": 0.4036, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9861373688353504e-05, |
|
"loss": 0.4411, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9857145102359456e-05, |
|
"loss": 0.4303, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.985285317306141e-05, |
|
"loss": 0.4416, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.984849791139646e-05, |
|
"loss": 0.3917, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.984407932846311e-05, |
|
"loss": 0.3887, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.983959743552118e-05, |
|
"loss": 0.4235, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9835052243991874e-05, |
|
"loss": 0.3951, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.983044376545767e-05, |
|
"loss": 0.3995, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.982577201166232e-05, |
|
"loss": 0.3995, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.982103699451082e-05, |
|
"loss": 0.4131, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.981623872606938e-05, |
|
"loss": 0.4159, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.981137721856541e-05, |
|
"loss": 0.4039, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.980645248438745e-05, |
|
"loss": 0.442, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.980146453608518e-05, |
|
"loss": 0.4113, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.979641338636935e-05, |
|
"loss": 0.4177, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.979129904811176e-05, |
|
"loss": 0.4017, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9786121534345265e-05, |
|
"loss": 0.4274, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.978088085826368e-05, |
|
"loss": 0.4544, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.977557703322178e-05, |
|
"loss": 0.39, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.977021007273528e-05, |
|
"loss": 0.418, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.976477999048077e-05, |
|
"loss": 0.3923, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.97592868002957e-05, |
|
"loss": 0.4087, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9753730516178313e-05, |
|
"loss": 0.4061, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.974811115228767e-05, |
|
"loss": 0.3747, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.9742428722943545e-05, |
|
"loss": 0.399, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.973668324262645e-05, |
|
"loss": 0.3833, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.973087472597754e-05, |
|
"loss": 0.4333, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.972500318779863e-05, |
|
"loss": 0.406, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9719068643052135e-05, |
|
"loss": 0.39, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9713071106860996e-05, |
|
"loss": 0.4317, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.970701059450872e-05, |
|
"loss": 0.4173, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9700887121439244e-05, |
|
"loss": 0.3884, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.969470070325699e-05, |
|
"loss": 0.3944, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.968845135572677e-05, |
|
"loss": 0.4076, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.968213909477376e-05, |
|
"loss": 0.4195, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.967576393648344e-05, |
|
"loss": 0.4093, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9669325897101604e-05, |
|
"loss": 0.3974, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.966282499303424e-05, |
|
"loss": 0.4025, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.965626124084759e-05, |
|
"loss": 0.4058, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9649634657267995e-05, |
|
"loss": 0.4007, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.964294525918196e-05, |
|
"loss": 0.4218, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.963619306363602e-05, |
|
"loss": 0.4141, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.962937808783675e-05, |
|
"loss": 0.4233, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9622500349150716e-05, |
|
"loss": 0.3931, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.961555986510442e-05, |
|
"loss": 0.4144, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.960855665338424e-05, |
|
"loss": 0.3957, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.960149073183643e-05, |
|
"loss": 0.3879, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.959436211846703e-05, |
|
"loss": 0.4152, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.958717083144182e-05, |
|
"loss": 0.4143, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.957991688908634e-05, |
|
"loss": 0.3976, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9572600309885744e-05, |
|
"loss": 0.4072, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.956522111248483e-05, |
|
"loss": 0.3903, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.955777931568797e-05, |
|
"loss": 0.3908, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.955027493845903e-05, |
|
"loss": 0.4284, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.954270799992138e-05, |
|
"loss": 0.4072, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.953507851935779e-05, |
|
"loss": 0.43, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.952738651621043e-05, |
|
"loss": 0.4228, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.951963201008076e-05, |
|
"loss": 0.3991, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.951181502072957e-05, |
|
"loss": 0.4057, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.950393556807682e-05, |
|
"loss": 0.3987, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.949599367220168e-05, |
|
"loss": 0.4142, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.948798935334242e-05, |
|
"loss": 0.3994, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9479922631896405e-05, |
|
"loss": 0.3989, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.947179352842001e-05, |
|
"loss": 0.4186, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.946360206362858e-05, |
|
"loss": 0.3896, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9455348258396364e-05, |
|
"loss": 0.4122, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.944703213375648e-05, |
|
"loss": 0.4319, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9438653710900864e-05, |
|
"loss": 0.3997, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.943021301118019e-05, |
|
"loss": 0.3924, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.942171005610385e-05, |
|
"loss": 0.3952, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.941314486733986e-05, |
|
"loss": 0.4137, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.940451746671484e-05, |
|
"loss": 0.4277, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9395827876213936e-05, |
|
"loss": 0.4003, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.938707611798078e-05, |
|
"loss": 0.3884, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.937826221431742e-05, |
|
"loss": 0.4003, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.936938618768426e-05, |
|
"loss": 0.4183, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.936044806070004e-05, |
|
"loss": 0.4319, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.935144785614173e-05, |
|
"loss": 0.3968, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.934238559694448e-05, |
|
"loss": 0.3749, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.9333261306201595e-05, |
|
"loss": 0.4044, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.932407500716445e-05, |
|
"loss": 0.4067, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.9314826723242425e-05, |
|
"loss": 0.417, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.9305516478002865e-05, |
|
"loss": 0.4099, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.9296144295171024e-05, |
|
"loss": 0.4201, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.928671019862995e-05, |
|
"loss": 0.3848, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.92772142124205e-05, |
|
"loss": 0.3959, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9267656360741245e-05, |
|
"loss": 0.3794, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.925803666794838e-05, |
|
"loss": 0.3956, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.924835515855572e-05, |
|
"loss": 0.423, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.92386118572346e-05, |
|
"loss": 0.4015, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.92288067888138e-05, |
|
"loss": 0.4043, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.921893997827951e-05, |
|
"loss": 0.3711, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.920901145077527e-05, |
|
"loss": 0.4248, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.919902123160187e-05, |
|
"loss": 0.4235, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.918896934621734e-05, |
|
"loss": 0.4214, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9178855820236824e-05, |
|
"loss": 0.3827, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.916868067943256e-05, |
|
"loss": 0.3948, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.915844394973379e-05, |
|
"loss": 0.3697, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.914814565722671e-05, |
|
"loss": 0.4164, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9137785828154393e-05, |
|
"loss": 0.3942, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.9127364488916716e-05, |
|
"loss": 0.3949, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9116881666070327e-05, |
|
"loss": 0.3867, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9106337386328524e-05, |
|
"loss": 0.3842, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.909573167656124e-05, |
|
"loss": 0.3975, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.9085064563794925e-05, |
|
"loss": 0.4215, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.907433607521251e-05, |
|
"loss": 0.3782, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.906354623815336e-05, |
|
"loss": 0.399, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.905269508011312e-05, |
|
"loss": 0.4041, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.904178262874374e-05, |
|
"loss": 0.3899, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.903080891185335e-05, |
|
"loss": 0.3772, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.901977395740619e-05, |
|
"loss": 0.4334, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.9008677793522584e-05, |
|
"loss": 0.383, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.899752044847881e-05, |
|
"loss": 0.4064, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.898630195070705e-05, |
|
"loss": 0.3921, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.8975022328795325e-05, |
|
"loss": 0.415, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.8963681611487445e-05, |
|
"loss": 0.4128, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.895227982768287e-05, |
|
"loss": 0.4232, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.89408170064367e-05, |
|
"loss": 0.3914, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.892929317695957e-05, |
|
"loss": 0.404, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.891770836861757e-05, |
|
"loss": 0.4274, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.8906062610932215e-05, |
|
"loss": 0.4025, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.889435593358029e-05, |
|
"loss": 0.3822, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.888258836639386e-05, |
|
"loss": 0.4048, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.8870759939360136e-05, |
|
"loss": 0.3952, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.885887068262143e-05, |
|
"loss": 0.4112, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.884692062647506e-05, |
|
"loss": 0.4039, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.8834909801373264e-05, |
|
"loss": 0.4157, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.8822838237923166e-05, |
|
"loss": 0.4066, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.881070596688664e-05, |
|
"loss": 0.387, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.8798513019180295e-05, |
|
"loss": 0.407, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.878625942587532e-05, |
|
"loss": 0.4103, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.877394521819747e-05, |
|
"loss": 0.411, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.8761570427526973e-05, |
|
"loss": 0.3986, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.874913508539844e-05, |
|
"loss": 0.3858, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.873663922350073e-05, |
|
"loss": 0.4145, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.8724082873677027e-05, |
|
"loss": 0.4027, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.871146606792455e-05, |
|
"loss": 0.393, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.8698788838394644e-05, |
|
"loss": 0.3802, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.8686051217392606e-05, |
|
"loss": 0.3923, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.867325323737765e-05, |
|
"loss": 0.3985, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.866039493096276e-05, |
|
"loss": 0.3941, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.86474763309147e-05, |
|
"loss": 0.3776, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.863449747015384e-05, |
|
"loss": 0.4265, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.862145838175413e-05, |
|
"loss": 0.4001, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.860835909894301e-05, |
|
"loss": 0.4198, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.859519965510129e-05, |
|
"loss": 0.383, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.858198008376308e-05, |
|
"loss": 0.4056, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.856870041861575e-05, |
|
"loss": 0.4108, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.8555360693499786e-05, |
|
"loss": 0.3703, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.8541960942408716e-05, |
|
"loss": 0.3799, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.852850119948904e-05, |
|
"loss": 0.3736, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.851498149904014e-05, |
|
"loss": 0.3908, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.850140187551417e-05, |
|
"loss": 0.3968, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.8487762363516024e-05, |
|
"loss": 0.3925, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.847406299780316e-05, |
|
"loss": 0.3768, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.8460303813285585e-05, |
|
"loss": 0.4419, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.844648484502575e-05, |
|
"loss": 0.3688, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.843260612823844e-05, |
|
"loss": 0.4208, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.8418667698290696e-05, |
|
"loss": 0.4063, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.840466959070174e-05, |
|
"loss": 0.3719, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.839061184114285e-05, |
|
"loss": 0.3985, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.837649448543731e-05, |
|
"loss": 0.3868, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.8362317559560274e-05, |
|
"loss": 0.3881, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.834808109963873e-05, |
|
"loss": 0.4067, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.833378514195133e-05, |
|
"loss": 0.3883, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.83194297229284e-05, |
|
"loss": 0.3996, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.830501487915174e-05, |
|
"loss": 0.4075, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8290540647354624e-05, |
|
"loss": 0.3918, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8276007064421635e-05, |
|
"loss": 0.4206, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.826141416738861e-05, |
|
"loss": 0.3924, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.824676199344253e-05, |
|
"loss": 0.3814, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.8232050579921445e-05, |
|
"loss": 0.3809, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.821727996431435e-05, |
|
"loss": 0.3979, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.8202450184261116e-05, |
|
"loss": 0.4201, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.8187561277552374e-05, |
|
"loss": 0.3785, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.817261328212942e-05, |
|
"loss": 0.3918, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.815760623608415e-05, |
|
"loss": 0.3789, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.8142540177658925e-05, |
|
"loss": 0.3967, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.812741514524647e-05, |
|
"loss": 0.4155, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.811223117738981e-05, |
|
"loss": 0.3727, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 0.396, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8081686590266835e-05, |
|
"loss": 0.3694, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.806632604883708e-05, |
|
"loss": 0.3919, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.8050906727636085e-05, |
|
"loss": 0.3757, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.8035428665956806e-05, |
|
"loss": 0.381, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.801989190324188e-05, |
|
"loss": 0.3915, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.800429647908354e-05, |
|
"loss": 0.3995, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.798864243322353e-05, |
|
"loss": 0.4188, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.7972929805552926e-05, |
|
"loss": 0.3832, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.795715863611212e-05, |
|
"loss": 0.3624, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.79413289650907e-05, |
|
"loss": 0.3779, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.7925440832827307e-05, |
|
"loss": 0.425, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.790949427980956e-05, |
|
"loss": 0.3829, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.7893489346673965e-05, |
|
"loss": 0.3877, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.7877426074205786e-05, |
|
"loss": 0.4043, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.786130450333897e-05, |
|
"loss": 0.3687, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.784512467515599e-05, |
|
"loss": 0.3679, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.782888663088781e-05, |
|
"loss": 0.3957, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.781259041191375e-05, |
|
"loss": 0.4215, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.7796236059761346e-05, |
|
"loss": 0.3881, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.777982361610629e-05, |
|
"loss": 0.3882, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.7763353122772305e-05, |
|
"loss": 0.386, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.774682462173105e-05, |
|
"loss": 0.3747, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.773023815510199e-05, |
|
"loss": 0.4025, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.7713593765152316e-05, |
|
"loss": 0.3759, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.7696891494296826e-05, |
|
"loss": 0.3693, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.7680131385097806e-05, |
|
"loss": 0.3718, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.766331348026493e-05, |
|
"loss": 0.3787, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.764643782265516e-05, |
|
"loss": 0.3809, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.762950445527264e-05, |
|
"loss": 0.416, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.7612513421268544e-05, |
|
"loss": 0.3663, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.7595464763941024e-05, |
|
"loss": 0.3872, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.7578358526735065e-05, |
|
"loss": 0.3923, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.756119475324237e-05, |
|
"loss": 0.3853, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.7543973487201286e-05, |
|
"loss": 0.4108, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.752669477249666e-05, |
|
"loss": 0.3972, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.750935865315971e-05, |
|
"loss": 0.3796, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.749196517336798e-05, |
|
"loss": 0.3624, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.747451437744515e-05, |
|
"loss": 0.3902, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.7457006309860976e-05, |
|
"loss": 0.4268, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.7439441015231154e-05, |
|
"loss": 0.3881, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.742181853831721e-05, |
|
"loss": 0.3927, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.740413892402639e-05, |
|
"loss": 0.4028, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.7386402217411555e-05, |
|
"loss": 0.3957, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.7368608463671013e-05, |
|
"loss": 0.3859, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.7350757708148495e-05, |
|
"loss": 0.4055, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.733284999633297e-05, |
|
"loss": 0.4085, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.731488537385853e-05, |
|
"loss": 0.3968, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.729686388650432e-05, |
|
"loss": 0.4205, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7278785580194365e-05, |
|
"loss": 0.3751, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7260650500997514e-05, |
|
"loss": 0.3866, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.724245869512727e-05, |
|
"loss": 0.3916, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.722421020894169e-05, |
|
"loss": 0.3858, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.7205905088943286e-05, |
|
"loss": 0.4032, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.7187543381778864e-05, |
|
"loss": 0.3772, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.716912513423945e-05, |
|
"loss": 0.3906, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.715065039326015e-05, |
|
"loss": 0.4172, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.7132119205920026e-05, |
|
"loss": 0.3682, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.7113531619441984e-05, |
|
"loss": 0.3684, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.709488768119266e-05, |
|
"loss": 0.4049, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.707618743868226e-05, |
|
"loss": 0.3852, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.705743093956452e-05, |
|
"loss": 0.4162, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.703861823163649e-05, |
|
"loss": 0.353, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.7019749362838476e-05, |
|
"loss": 0.3958, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.7000824381253905e-05, |
|
"loss": 0.406, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6981843335109174e-05, |
|
"loss": 0.3851, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6962806272773564e-05, |
|
"loss": 0.3828, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.69437132427591e-05, |
|
"loss": 0.4331, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6924564293720434e-05, |
|
"loss": 0.3946, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.6905359474454705e-05, |
|
"loss": 0.3799, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.6886098833901436e-05, |
|
"loss": 0.3543, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.686678242114239e-05, |
|
"loss": 0.3772, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.684741028540146e-05, |
|
"loss": 0.4009, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.6827982476044534e-05, |
|
"loss": 0.3806, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.680849904257938e-05, |
|
"loss": 0.3781, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.678896003465549e-05, |
|
"loss": 0.4264, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.6769365502064025e-05, |
|
"loss": 0.3857, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.674971549473757e-05, |
|
"loss": 0.3797, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.6730010062750134e-05, |
|
"loss": 0.3847, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.671024925631694e-05, |
|
"loss": 0.382, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.669043312579433e-05, |
|
"loss": 0.3778, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.667056172167962e-05, |
|
"loss": 0.3837, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 0.3807, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5687975287437439, |
|
"eval_runtime": 116.1454, |
|
"eval_samples_per_second": 6.561, |
|
"eval_steps_per_second": 0.413, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.6630653295367286e-05, |
|
"loss": 0.3618, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.6610616374868066e-05, |
|
"loss": 0.2856, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.659052438417326e-05, |
|
"loss": 0.2727, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.6570377374483154e-05, |
|
"loss": 0.2632, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.6550175397138253e-05, |
|
"loss": 0.2758, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.652991850361912e-05, |
|
"loss": 0.2561, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.650960674554627e-05, |
|
"loss": 0.2807, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.648924017468003e-05, |
|
"loss": 0.2686, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.64688188429204e-05, |
|
"loss": 0.2584, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.644834280230692e-05, |
|
"loss": 0.2368, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.6427812105018576e-05, |
|
"loss": 0.2642, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6407226803373586e-05, |
|
"loss": 0.2476, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6386586949829356e-05, |
|
"loss": 0.249, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6365892596982297e-05, |
|
"loss": 0.2541, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.634514379756769e-05, |
|
"loss": 0.2785, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.632434060445956e-05, |
|
"loss": 0.2369, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.630348307067057e-05, |
|
"loss": 0.27, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6282571249351826e-05, |
|
"loss": 0.2603, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.626160519379279e-05, |
|
"loss": 0.2498, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.624058495742114e-05, |
|
"loss": 0.2654, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.621951059380258e-05, |
|
"loss": 0.2316, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.619838215664082e-05, |
|
"loss": 0.2515, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.6177199699777285e-05, |
|
"loss": 0.2387, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.615596327719111e-05, |
|
"loss": 0.2628, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.613467294299892e-05, |
|
"loss": 0.2586, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.611332875145477e-05, |
|
"loss": 0.2698, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.609193075694989e-05, |
|
"loss": 0.254, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.607047901401267e-05, |
|
"loss": 0.2585, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.604897357730845e-05, |
|
"loss": 0.2311, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.60274145016394e-05, |
|
"loss": 0.2714, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.600580184194436e-05, |
|
"loss": 0.2536, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.598413565329875e-05, |
|
"loss": 0.2485, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5962415990914375e-05, |
|
"loss": 0.2466, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.59406429101393e-05, |
|
"loss": 0.2465, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.5918816466457746e-05, |
|
"loss": 0.2478, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.5896936715489885e-05, |
|
"loss": 0.2733, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.587500371299176e-05, |
|
"loss": 0.2444, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.585301751485508e-05, |
|
"loss": 0.2629, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.583097817710716e-05, |
|
"loss": 0.2702, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.580888575591068e-05, |
|
"loss": 0.2694, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.5786740307563636e-05, |
|
"loss": 0.2578, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.576454188849911e-05, |
|
"loss": 0.2516, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.574229055528522e-05, |
|
"loss": 0.2685, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.5719986364624866e-05, |
|
"loss": 0.2617, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.569762937335569e-05, |
|
"loss": 0.2532, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.5675219638449876e-05, |
|
"loss": 0.2885, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.5652757217013995e-05, |
|
"loss": 0.2597, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.5630242166288895e-05, |
|
"loss": 0.266, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.5607674543649546e-05, |
|
"loss": 0.254, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.5585054406604864e-05, |
|
"loss": 0.2702, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.556238181279761e-05, |
|
"loss": 0.2475, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.5539656820004194e-05, |
|
"loss": 0.2458, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.551687948613459e-05, |
|
"loss": 0.2492, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.5494049869232125e-05, |
|
"loss": 0.269, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.5471168027473356e-05, |
|
"loss": 0.2646, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.5448234019167945e-05, |
|
"loss": 0.2459, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.5425247902758474e-05, |
|
"loss": 0.2762, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.540220973682032e-05, |
|
"loss": 0.2511, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.537911958006149e-05, |
|
"loss": 0.252, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.5355977491322485e-05, |
|
"loss": 0.2679, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.5332783529576146e-05, |
|
"loss": 0.2551, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.530953775392749e-05, |
|
"loss": 0.2731, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.5286240223613584e-05, |
|
"loss": 0.2612, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.526289099800337e-05, |
|
"loss": 0.2739, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.523949013659753e-05, |
|
"loss": 0.2644, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.521603769902835e-05, |
|
"loss": 0.2811, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.519253374505949e-05, |
|
"loss": 0.2624, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.5168978334585956e-05, |
|
"loss": 0.2552, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.514537152763384e-05, |
|
"loss": 0.27, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.5121713384360215e-05, |
|
"loss": 0.2652, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.5098003965052984e-05, |
|
"loss": 0.2698, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.507424333013069e-05, |
|
"loss": 0.2585, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.505043154014243e-05, |
|
"loss": 0.2573, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.502656865576762e-05, |
|
"loss": 0.2561, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.5002654737815905e-05, |
|
"loss": 0.2629, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.497868984722697e-05, |
|
"loss": 0.2696, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.4954674045070387e-05, |
|
"loss": 0.2727, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.493060739254548e-05, |
|
"loss": 0.2718, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.4906489950981126e-05, |
|
"loss": 0.2537, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.488232178183567e-05, |
|
"loss": 0.2565, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.4858102946696676e-05, |
|
"loss": 0.2554, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.4833833507280884e-05, |
|
"loss": 0.2904, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.4809513525433925e-05, |
|
"loss": 0.262, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.478514306313025e-05, |
|
"loss": 0.2537, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.476072218247297e-05, |
|
"loss": 0.2583, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.4736250945693655e-05, |
|
"loss": 0.2712, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.471172941515219e-05, |
|
"loss": 0.257, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.468715765333664e-05, |
|
"loss": 0.2617, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.466253572286308e-05, |
|
"loss": 0.2528, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.46378636864754e-05, |
|
"loss": 0.2711, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.46131416070452e-05, |
|
"loss": 0.2568, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.458836954757161e-05, |
|
"loss": 0.2702, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.4563547571181086e-05, |
|
"loss": 0.2596, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.4538675741127326e-05, |
|
"loss": 0.2478, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.451375412079106e-05, |
|
"loss": 0.2438, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.4488782773679885e-05, |
|
"loss": 0.2797, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.4463761763428125e-05, |
|
"loss": 0.2355, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.443869115379667e-05, |
|
"loss": 0.2718, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.441357100867278e-05, |
|
"loss": 0.2654, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.4388401392069975e-05, |
|
"loss": 0.2776, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.4363182368127824e-05, |
|
"loss": 0.2631, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.433791400111179e-05, |
|
"loss": 0.2599, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.4312596355413116e-05, |
|
"loss": 0.2629, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.428722949554857e-05, |
|
"loss": 0.25, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.426181348616039e-05, |
|
"loss": 0.2557, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.4236348392016e-05, |
|
"loss": 0.2793, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.421083427800795e-05, |
|
"loss": 0.2641, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.41852712091537e-05, |
|
"loss": 0.2696, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.415965925059544e-05, |
|
"loss": 0.2637, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.413399846759998e-05, |
|
"loss": 0.2772, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.4108288925558505e-05, |
|
"loss": 0.2432, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.40825306899865e-05, |
|
"loss": 0.2657, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.405672382652349e-05, |
|
"loss": 0.2635, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.403086840093297e-05, |
|
"loss": 0.2551, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.400496447910212e-05, |
|
"loss": 0.2555, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.397901212704176e-05, |
|
"loss": 0.2785, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.395301141088611e-05, |
|
"loss": 0.2866, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.3926962396892606e-05, |
|
"loss": 0.256, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.3900865151441796e-05, |
|
"loss": 0.2585, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.387471974103713e-05, |
|
"loss": 0.265, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.384852623230478e-05, |
|
"loss": 0.2445, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.38222846919935e-05, |
|
"loss": 0.2608, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.379599518697444e-05, |
|
"loss": 0.2823, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.3769657784240976e-05, |
|
"loss": 0.2688, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.3743272550908543e-05, |
|
"loss": 0.2572, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.371683955421447e-05, |
|
"loss": 0.2635, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.369035886151778e-05, |
|
"loss": 0.2713, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.366383054029906e-05, |
|
"loss": 0.2651, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.363725465816028e-05, |
|
"loss": 0.2487, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.3610631282824556e-05, |
|
"loss": 0.2513, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3583960482136085e-05, |
|
"loss": 0.2683, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3557242324059896e-05, |
|
"loss": 0.2706, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3530476876681696e-05, |
|
"loss": 0.2741, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.350366420820771e-05, |
|
"loss": 0.238, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.347680438696449e-05, |
|
"loss": 0.2656, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.344989748139873e-05, |
|
"loss": 0.2534, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.342294356007715e-05, |
|
"loss": 0.2832, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.339594269168624e-05, |
|
"loss": 0.259, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3368894945032146e-05, |
|
"loss": 0.2734, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.334180038904046e-05, |
|
"loss": 0.2805, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.331465909275608e-05, |
|
"loss": 0.2837, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3287471125342996e-05, |
|
"loss": 0.2763, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.326023655608411e-05, |
|
"loss": 0.2678, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.323295545438112e-05, |
|
"loss": 0.2583, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.3205627889754286e-05, |
|
"loss": 0.2657, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.317825393184226e-05, |
|
"loss": 0.2653, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.315083365040192e-05, |
|
"loss": 0.2566, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.31233671153082e-05, |
|
"loss": 0.2443, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.309585439655389e-05, |
|
"loss": 0.262, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.306829556424948e-05, |
|
"loss": 0.2639, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.304069068862296e-05, |
|
"loss": 0.2558, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.301303984001967e-05, |
|
"loss": 0.2791, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.298534308890209e-05, |
|
"loss": 0.2564, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.295760050584966e-05, |
|
"loss": 0.2749, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.2929812161558636e-05, |
|
"loss": 0.2839, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.290197812684188e-05, |
|
"loss": 0.2432, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.2874098472628675e-05, |
|
"loss": 0.2716, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.284617326996458e-05, |
|
"loss": 0.2628, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.28182025900112e-05, |
|
"loss": 0.2797, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.279018650404604e-05, |
|
"loss": 0.2704, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.276212508346232e-05, |
|
"loss": 0.2701, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.273401839976877e-05, |
|
"loss": 0.2713, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.270586652458948e-05, |
|
"loss": 0.249, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.2755, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.264942748684563e-05, |
|
"loss": 0.2882, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.2621140468104295e-05, |
|
"loss": 0.2686, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.2592808545523335e-05, |
|
"loss": 0.2901, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.256443179130081e-05, |
|
"loss": 0.251, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.2536010277748996e-05, |
|
"loss": 0.2522, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.250754407729428e-05, |
|
"loss": 0.2773, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.2479033262476884e-05, |
|
"loss": 0.2574, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.245047790595075e-05, |
|
"loss": 0.2835, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.242187808048329e-05, |
|
"loss": 0.2646, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.239323385895527e-05, |
|
"loss": 0.2786, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.2364545314360585e-05, |
|
"loss": 0.282, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.233581251980604e-05, |
|
"loss": 0.2655, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.2307035548511265e-05, |
|
"loss": 0.259, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.227821447380842e-05, |
|
"loss": 0.2476, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.224934936914206e-05, |
|
"loss": 0.2628, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.222044030806894e-05, |
|
"loss": 0.2629, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.2191487364257854e-05, |
|
"loss": 0.2776, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.216249061148939e-05, |
|
"loss": 0.2549, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.21334501236558e-05, |
|
"loss": 0.2513, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.210436597476076e-05, |
|
"loss": 0.2596, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.207523823891923e-05, |
|
"loss": 0.2767, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2046066990357235e-05, |
|
"loss": 0.2735, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.201685230341168e-05, |
|
"loss": 0.2487, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.198759425253014e-05, |
|
"loss": 0.2558, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.195829291227076e-05, |
|
"loss": 0.2773, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.192894835730193e-05, |
|
"loss": 0.2716, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.1899560662402206e-05, |
|
"loss": 0.2724, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.1870129902460056e-05, |
|
"loss": 0.27, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.18406561524737e-05, |
|
"loss": 0.2594, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.18111394875509e-05, |
|
"loss": 0.2581, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.178157998290879e-05, |
|
"loss": 0.265, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.175197771387368e-05, |
|
"loss": 0.2653, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.172233275588082e-05, |
|
"loss": 0.2808, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.169264518447428e-05, |
|
"loss": 0.27, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.16629150753067e-05, |
|
"loss": 0.2522, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.163314250413913e-05, |
|
"loss": 0.253, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.160332754684084e-05, |
|
"loss": 0.2572, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.157347027938907e-05, |
|
"loss": 0.2799, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.1543570777868924e-05, |
|
"loss": 0.2816, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.151362911847309e-05, |
|
"loss": 0.2859, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.148364537750172e-05, |
|
"loss": 0.2601, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.1453619631362195e-05, |
|
"loss": 0.2725, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.142355195656892e-05, |
|
"loss": 0.2669, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.1393442429743166e-05, |
|
"loss": 0.2955, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.1363291127612845e-05, |
|
"loss": 0.2655, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.1333098127012326e-05, |
|
"loss": 0.2545, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.130286350488224e-05, |
|
"loss": 0.2724, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.127258733826929e-05, |
|
"loss": 0.2633, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.124226970432602e-05, |
|
"loss": 0.2643, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.121191068031067e-05, |
|
"loss": 0.2817, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.118151034358696e-05, |
|
"loss": 0.263, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.1151068771623866e-05, |
|
"loss": 0.2869, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.112058604199544e-05, |
|
"loss": 0.2666, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.109006223238064e-05, |
|
"loss": 0.2692, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.1059497420563094e-05, |
|
"loss": 0.2615, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.102889168443091e-05, |
|
"loss": 0.2571, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.099824510197649e-05, |
|
"loss": 0.2914, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.0967557751296336e-05, |
|
"loss": 0.2808, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.093682971059081e-05, |
|
"loss": 0.2658, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.0906061058163995e-05, |
|
"loss": 0.2727, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.087525187242345e-05, |
|
"loss": 0.2541, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.0844402231880016e-05, |
|
"loss": 0.2676, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.0813512215147654e-05, |
|
"loss": 0.2555, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.078258190094318e-05, |
|
"loss": 0.2597, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.075161136808612e-05, |
|
"loss": 0.2589, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.0720600695498486e-05, |
|
"loss": 0.2852, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.068954996220457e-05, |
|
"loss": 0.2557, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.0658459247330766e-05, |
|
"loss": 0.2697, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.062732863010534e-05, |
|
"loss": 0.2678, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.0596158189858255e-05, |
|
"loss": 0.2631, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.0564948006020934e-05, |
|
"loss": 0.2559, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.0533698158126085e-05, |
|
"loss": 0.2833, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.050240872580749e-05, |
|
"loss": 0.2542, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.047107978879985e-05, |
|
"loss": 0.28, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.043971142693844e-05, |
|
"loss": 0.2607, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.040830372015909e-05, |
|
"loss": 0.278, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.037685674849786e-05, |
|
"loss": 0.2569, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.034537059209085e-05, |
|
"loss": 0.2844, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.0313845331174036e-05, |
|
"loss": 0.2639, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.0282281046083045e-05, |
|
"loss": 0.2735, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.025067781725294e-05, |
|
"loss": 0.2713, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.021903572521802e-05, |
|
"loss": 0.2515, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.0187354850611636e-05, |
|
"loss": 0.2651, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.015563527416595e-05, |
|
"loss": 0.2788, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.012387707671177e-05, |
|
"loss": 0.2753, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.00920803391783e-05, |
|
"loss": 0.2589, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.0060245142592944e-05, |
|
"loss": 0.2748, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.002837156808116e-05, |
|
"loss": 0.2559, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.999645969686616e-05, |
|
"loss": 0.2563, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.996450961026876e-05, |
|
"loss": 0.251, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.9932521389707155e-05, |
|
"loss": 0.2661, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.990049511669675e-05, |
|
"loss": 0.2563, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.986843087284986e-05, |
|
"loss": 0.2754, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.9836328739875615e-05, |
|
"loss": 0.2591, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.980418879957967e-05, |
|
"loss": 0.2764, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.977201113386402e-05, |
|
"loss": 0.2801, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.9739795824726804e-05, |
|
"loss": 0.2768, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.9707542954262115e-05, |
|
"loss": 0.2933, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.96752526046597e-05, |
|
"loss": 0.2757, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.964292485820487e-05, |
|
"loss": 0.2557, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.9610559797278216e-05, |
|
"loss": 0.2624, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.957815750435542e-05, |
|
"loss": 0.2618, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.954571806200702e-05, |
|
"loss": 0.2689, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.951324155289825e-05, |
|
"loss": 0.2581, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.9480728059788796e-05, |
|
"loss": 0.2589, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.9448177665532574e-05, |
|
"loss": 0.2733, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.941559045307755e-05, |
|
"loss": 0.2653, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.938296650546552e-05, |
|
"loss": 0.2799, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.935030590583186e-05, |
|
"loss": 0.2583, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.931760873740539e-05, |
|
"loss": 0.271, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.9284875083508076e-05, |
|
"loss": 0.2534, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.9252105027554887e-05, |
|
"loss": 0.2576, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.9219298653053546e-05, |
|
"loss": 0.2464, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.918645604360433e-05, |
|
"loss": 0.2738, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.915357728289985e-05, |
|
"loss": 0.2593, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.9120662454724836e-05, |
|
"loss": 0.2795, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.908771164295595e-05, |
|
"loss": 0.2759, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.905472493156151e-05, |
|
"loss": 0.2606, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.9021702404601366e-05, |
|
"loss": 0.2867, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.8988644146226606e-05, |
|
"loss": 0.2693, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.8955550240679364e-05, |
|
"loss": 0.2601, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.8922420772292644e-05, |
|
"loss": 0.2574, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.888925582549006e-05, |
|
"loss": 0.2737, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.8856055484785625e-05, |
|
"loss": 0.2752, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.882281983478355e-05, |
|
"loss": 0.2807, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.878954896017804e-05, |
|
"loss": 0.2779, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.875624294575305e-05, |
|
"loss": 0.2837, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.872290187638208e-05, |
|
"loss": 0.268, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.8689525837027975e-05, |
|
"loss": 0.2621, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.865611491274267e-05, |
|
"loss": 0.2694, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.8622669188667015e-05, |
|
"loss": 0.2759, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.858918875003053e-05, |
|
"loss": 0.2643, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.8555673682151215e-05, |
|
"loss": 0.2663, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.852212407043528e-05, |
|
"loss": 0.2871, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.8488540000377016e-05, |
|
"loss": 0.2718, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.8454921557558476e-05, |
|
"loss": 0.2712, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.842126882764933e-05, |
|
"loss": 0.2579, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.8387581896406606e-05, |
|
"loss": 0.2695, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.835386084967451e-05, |
|
"loss": 0.2619, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.8320105773384144e-05, |
|
"loss": 0.2744, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.828631675355338e-05, |
|
"loss": 0.2606, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.8252493876286546e-05, |
|
"loss": 0.2703, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.8218637227774276e-05, |
|
"loss": 0.2657, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.818474689429323e-05, |
|
"loss": 0.2827, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.8150822962205956e-05, |
|
"loss": 0.263, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.8116865517960585e-05, |
|
"loss": 0.2702, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.808287464809063e-05, |
|
"loss": 0.2659, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.8048850439214844e-05, |
|
"loss": 0.2564, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.801479297803687e-05, |
|
"loss": 0.2758, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.7980702351345146e-05, |
|
"loss": 0.2742, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.7946578646012574e-05, |
|
"loss": 0.2741, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.791242194899639e-05, |
|
"loss": 0.2695, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.7878232347337875e-05, |
|
"loss": 0.2749, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.784400992816219e-05, |
|
"loss": 0.2679, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.78097547786781e-05, |
|
"loss": 0.2617, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.777546698617776e-05, |
|
"loss": 0.2756, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.774114663803657e-05, |
|
"loss": 0.2704, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.7706793821712826e-05, |
|
"loss": 0.2742, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.76724086247476e-05, |
|
"loss": 0.2686, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.763799113476447e-05, |
|
"loss": 0.2548, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.7603541439469315e-05, |
|
"loss": 0.2788, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.756905962665005e-05, |
|
"loss": 0.2525, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.753454578417648e-05, |
|
"loss": 0.2758, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2523, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.746542236215341e-05, |
|
"loss": 0.2652, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.743081295875069e-05, |
|
"loss": 0.2821, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.7396171877986764e-05, |
|
"loss": 0.2833, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.7361499208137254e-05, |
|
"loss": 0.2846, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.732679503755833e-05, |
|
"loss": 0.2651, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.72920594546864e-05, |
|
"loss": 0.2594, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.725729254803791e-05, |
|
"loss": 0.2776, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.722249440620917e-05, |
|
"loss": 0.2637, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.718766511787606e-05, |
|
"loss": 0.2872, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.715280477179382e-05, |
|
"loss": 0.2563, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.7117913456796854e-05, |
|
"loss": 0.2727, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.708299126179847e-05, |
|
"loss": 0.2601, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.7048038275790694e-05, |
|
"loss": 0.2784, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.701305458784397e-05, |
|
"loss": 0.2644, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.697804028710703e-05, |
|
"loss": 0.2705, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.694299546280657e-05, |
|
"loss": 0.2628, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.690792020424712e-05, |
|
"loss": 0.2715, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.687281460081071e-05, |
|
"loss": 0.2728, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.683767874195674e-05, |
|
"loss": 0.2767, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.680251271722169e-05, |
|
"loss": 0.2652, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.676731661621893e-05, |
|
"loss": 0.2726, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.673209052863843e-05, |
|
"loss": 0.2687, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.6696834544246625e-05, |
|
"loss": 0.2561, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.666154875288611e-05, |
|
"loss": 0.2781, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.662623324447544e-05, |
|
"loss": 0.2867, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.65908881090089e-05, |
|
"loss": 0.2711, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.655551343655628e-05, |
|
"loss": 0.2668, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.652010931726262e-05, |
|
"loss": 0.2522, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.648467584134802e-05, |
|
"loss": 0.2658, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.6449213099107373e-05, |
|
"loss": 0.2757, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.641372118091017e-05, |
|
"loss": 0.2865, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.6378200177200224e-05, |
|
"loss": 0.2468, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.634265017849549e-05, |
|
"loss": 0.2828, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.63070712753878e-05, |
|
"loss": 0.2562, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.6271463558542645e-05, |
|
"loss": 0.2701, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.623582711869895e-05, |
|
"loss": 0.2851, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.620016204666882e-05, |
|
"loss": 0.2844, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.616446843333733e-05, |
|
"loss": 0.284, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.612874636966228e-05, |
|
"loss": 0.2673, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.6092995946673994e-05, |
|
"loss": 0.2512, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.6057217255475034e-05, |
|
"loss": 0.2598, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.602141038724001e-05, |
|
"loss": 0.2664, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.598557543321535e-05, |
|
"loss": 0.2745, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.5949712484719014e-05, |
|
"loss": 0.2582, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.5913821633140336e-05, |
|
"loss": 0.2668, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.5877902969939755e-05, |
|
"loss": 0.2593, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.584195658664855e-05, |
|
"loss": 0.2607, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.580598257486867e-05, |
|
"loss": 0.2493, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.5769981026272475e-05, |
|
"loss": 0.272, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.573395203260245e-05, |
|
"loss": 0.2687, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.569789568567107e-05, |
|
"loss": 0.2735, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.56618120773605e-05, |
|
"loss": 0.254, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.5625701299622336e-05, |
|
"loss": 0.2665, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.558956344447748e-05, |
|
"loss": 0.2654, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.555339860401578e-05, |
|
"loss": 0.2718, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.551720687039585e-05, |
|
"loss": 0.2475, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.5480988335844886e-05, |
|
"loss": 0.269, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.544474309265834e-05, |
|
"loss": 0.2577, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5408471233199716e-05, |
|
"loss": 0.2848, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5372172849900374e-05, |
|
"loss": 0.2677, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.533584803525926e-05, |
|
"loss": 0.2583, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.529949688184265e-05, |
|
"loss": 0.2596, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.526311948228397e-05, |
|
"loss": 0.2552, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5226715929283506e-05, |
|
"loss": 0.2709, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.519028631560819e-05, |
|
"loss": 0.2602, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.51538307340914e-05, |
|
"loss": 0.2537, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.511734927763265e-05, |
|
"loss": 0.274, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.508084203919739e-05, |
|
"loss": 0.2442, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.5044309111816796e-05, |
|
"loss": 0.2676, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.50077505885875e-05, |
|
"loss": 0.2875, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4971166562671324e-05, |
|
"loss": 0.2686, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.493455712729514e-05, |
|
"loss": 0.2753, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4897922375750514e-05, |
|
"loss": 0.2703, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4861262401393566e-05, |
|
"loss": 0.2661, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.482457729764466e-05, |
|
"loss": 0.2644, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.478786715798823e-05, |
|
"loss": 0.3001, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.475113207597247e-05, |
|
"loss": 0.269, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.4714372145209166e-05, |
|
"loss": 0.2618, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.467758745937342e-05, |
|
"loss": 0.2592, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.46407781122034e-05, |
|
"loss": 0.2805, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.460394419750013e-05, |
|
"loss": 0.2432, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 0.2524, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.4530203041010745e-05, |
|
"loss": 0.2529, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5900537967681885, |
|
"eval_runtime": 116.0624, |
|
"eval_samples_per_second": 6.565, |
|
"eval_steps_per_second": 0.414, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.449329598713874e-05, |
|
"loss": 0.2271, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.445636474156125e-05, |
|
"loss": 0.1519, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.4419409398389935e-05, |
|
"loss": 0.1477, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.438243005179784e-05, |
|
"loss": 0.1407, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.434542679601922e-05, |
|
"loss": 0.1235, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.4308399725349226e-05, |
|
"loss": 0.1323, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.42713489341437e-05, |
|
"loss": 0.1445, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.423427451681895e-05, |
|
"loss": 0.1257, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.419717656785146e-05, |
|
"loss": 0.1249, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.416005518177771e-05, |
|
"loss": 0.1279, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.4122910453193885e-05, |
|
"loss": 0.1278, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.408574247675566e-05, |
|
"loss": 0.1207, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.4048551347177945e-05, |
|
"loss": 0.1184, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.401133715923467e-05, |
|
"loss": 0.1308, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.3974100007758514e-05, |
|
"loss": 0.1291, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.3936839987640664e-05, |
|
"loss": 0.1129, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.389955719383058e-05, |
|
"loss": 0.1264, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.3862251721335794e-05, |
|
"loss": 0.1149, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.382492366522158e-05, |
|
"loss": 0.1234, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.378757312061079e-05, |
|
"loss": 0.1245, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.375020018268359e-05, |
|
"loss": 0.1154, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.371280494667719e-05, |
|
"loss": 0.1231, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.367538750788563e-05, |
|
"loss": 0.1224, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.363794796165953e-05, |
|
"loss": 0.1196, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.360048640340585e-05, |
|
"loss": 0.1189, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.3563002928587627e-05, |
|
"loss": 0.1173, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.352549763272379e-05, |
|
"loss": 0.1248, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.348797061138881e-05, |
|
"loss": 0.1255, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.3450421960212566e-05, |
|
"loss": 0.1178, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.3412851774880064e-05, |
|
"loss": 0.121, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.337526015113115e-05, |
|
"loss": 0.1186, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.3337647184760315e-05, |
|
"loss": 0.1191, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.3300012971616467e-05, |
|
"loss": 0.1223, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.3262357607602596e-05, |
|
"loss": 0.1104, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.322468118867564e-05, |
|
"loss": 0.125, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.318698381084619e-05, |
|
"loss": 0.1221, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.314926557017821e-05, |
|
"loss": 0.1181, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.3111526562788864e-05, |
|
"loss": 0.1197, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.3073766884848234e-05, |
|
"loss": 0.1168, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.303598663257904e-05, |
|
"loss": 0.1186, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.299818590225647e-05, |
|
"loss": 0.1192, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.29603647902079e-05, |
|
"loss": 0.1192, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.2922523392812605e-05, |
|
"loss": 0.1285, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.2884661806501574e-05, |
|
"loss": 0.1299, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.284678012775727e-05, |
|
"loss": 0.1322, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.280887845311332e-05, |
|
"loss": 0.1174, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.27709568791543e-05, |
|
"loss": 0.1225, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.273301550251555e-05, |
|
"loss": 0.1194, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.269505441988281e-05, |
|
"loss": 0.1139, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.265707372799208e-05, |
|
"loss": 0.1294, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.2619073523629304e-05, |
|
"loss": 0.1244, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.258105390363016e-05, |
|
"loss": 0.1284, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.2543014964879816e-05, |
|
"loss": 0.1234, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.250495680431264e-05, |
|
"loss": 0.1163, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.246687951891201e-05, |
|
"loss": 0.1269, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.2428783205710026e-05, |
|
"loss": 0.1174, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.2390667961787275e-05, |
|
"loss": 0.1226, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.23525338842726e-05, |
|
"loss": 0.1174, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.231438107034281e-05, |
|
"loss": 0.1212, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.22762096172225e-05, |
|
"loss": 0.1189, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.223801962218372e-05, |
|
"loss": 0.1232, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.21998111825458e-05, |
|
"loss": 0.1271, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.216158439567506e-05, |
|
"loss": 0.1229, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.2123339358984575e-05, |
|
"loss": 0.1144, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.208507616993393e-05, |
|
"loss": 0.1251, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.2046794926028964e-05, |
|
"loss": 0.1176, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.200849572482153e-05, |
|
"loss": 0.1346, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.1970178663909236e-05, |
|
"loss": 0.1159, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.19318438409352e-05, |
|
"loss": 0.1366, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.189349135358781e-05, |
|
"loss": 0.1249, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.1855121299600456e-05, |
|
"loss": 0.1261, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.181673377675131e-05, |
|
"loss": 0.1217, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.1778328882863054e-05, |
|
"loss": 0.1191, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.173990671580263e-05, |
|
"loss": 0.1155, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.1701467373480995e-05, |
|
"loss": 0.1107, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.166301095385288e-05, |
|
"loss": 0.1177, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.162453755491655e-05, |
|
"loss": 0.1212, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.1586047274713494e-05, |
|
"loss": 0.126, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.154754021132827e-05, |
|
"loss": 0.1171, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.1509016462888174e-05, |
|
"loss": 0.1225, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.147047612756302e-05, |
|
"loss": 0.1315, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.143191930356491e-05, |
|
"loss": 0.1207, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.139334608914795e-05, |
|
"loss": 0.1307, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.135475658260801e-05, |
|
"loss": 0.1163, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.131615088228249e-05, |
|
"loss": 0.1162, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.127752908655004e-05, |
|
"loss": 0.1234, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.123889129383034e-05, |
|
"loss": 0.1168, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.1200237602583834e-05, |
|
"loss": 0.1238, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.116156811131148e-05, |
|
"loss": 0.1255, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.112288291855449e-05, |
|
"loss": 0.124, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.108418212289408e-05, |
|
"loss": 0.1267, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.104546582295126e-05, |
|
"loss": 0.124, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.100673411738652e-05, |
|
"loss": 0.1308, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.096798710489962e-05, |
|
"loss": 0.1213, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.092922488422933e-05, |
|
"loss": 0.1277, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.089044755415315e-05, |
|
"loss": 0.1201, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.0851655213487124e-05, |
|
"loss": 0.115, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.0812847961085526e-05, |
|
"loss": 0.1257, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.077402589584061e-05, |
|
"loss": 0.1203, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.0735189116682414e-05, |
|
"loss": 0.1255, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.0696337722578444e-05, |
|
"loss": 0.1215, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.065747181253346e-05, |
|
"loss": 0.1275, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.0618591485589224e-05, |
|
"loss": 0.1346, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.0579696840824206e-05, |
|
"loss": 0.1285, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.05407879773534e-05, |
|
"loss": 0.1261, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.0501864994328e-05, |
|
"loss": 0.1192, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.04629279909352e-05, |
|
"loss": 0.1186, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.0423977066397912e-05, |
|
"loss": 0.1244, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.0385012319974537e-05, |
|
"loss": 0.1248, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.034603385095868e-05, |
|
"loss": 0.1155, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.0307041758678932e-05, |
|
"loss": 0.1267, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.0268036142498596e-05, |
|
"loss": 0.1219, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.022901710181542e-05, |
|
"loss": 0.126, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.018998473606139e-05, |
|
"loss": 0.1219, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.0150939144702423e-05, |
|
"loss": 0.1208, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.011188042723816e-05, |
|
"loss": 0.1234, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.007280868320167e-05, |
|
"loss": 0.1252, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.0033724012159242e-05, |
|
"loss": 0.1185, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.9994626513710084e-05, |
|
"loss": 0.1194, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.99555162874861e-05, |
|
"loss": 0.1238, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.9916393433151634e-05, |
|
"loss": 0.1208, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9877258050403212e-05, |
|
"loss": 0.1218, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9838110238969264e-05, |
|
"loss": 0.1254, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9798950098609923e-05, |
|
"loss": 0.1208, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.975977772911671e-05, |
|
"loss": 0.1211, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9720593230312337e-05, |
|
"loss": 0.1177, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9681396702050406e-05, |
|
"loss": 0.1187, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.964218824421518e-05, |
|
"loss": 0.125, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9602967956721316e-05, |
|
"loss": 0.1174, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.9563735939513636e-05, |
|
"loss": 0.1167, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.9524492292566823e-05, |
|
"loss": 0.1175, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.948523711588522e-05, |
|
"loss": 0.1295, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.9445970509502546e-05, |
|
"loss": 0.1336, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.940669257348163e-05, |
|
"loss": 0.1218, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.9367403407914202e-05, |
|
"loss": 0.1205, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.932810311292058e-05, |
|
"loss": 0.1311, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.9288791788649462e-05, |
|
"loss": 0.1258, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.9249469535277636e-05, |
|
"loss": 0.1255, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.921013645300975e-05, |
|
"loss": 0.1263, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.9170792642078055e-05, |
|
"loss": 0.1219, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.9131438202742124e-05, |
|
"loss": 0.1224, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.909207323528863e-05, |
|
"loss": 0.122, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.9052697840031064e-05, |
|
"loss": 0.1199, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.9013312117309488e-05, |
|
"loss": 0.1206, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.8973916167490307e-05, |
|
"loss": 0.1163, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.8934510090965944e-05, |
|
"loss": 0.1222, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.889509398815467e-05, |
|
"loss": 0.114, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.8855667959500276e-05, |
|
"loss": 0.1218, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.8816232105471863e-05, |
|
"loss": 0.1257, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.8776786526563575e-05, |
|
"loss": 0.1216, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.8737331323294314e-05, |
|
"loss": 0.1285, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.8697866596207524e-05, |
|
"loss": 0.1285, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.8658392445870928e-05, |
|
"loss": 0.1286, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.8618908972876246e-05, |
|
"loss": 0.1267, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.857941627783895e-05, |
|
"loss": 0.1182, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.8539914461398043e-05, |
|
"loss": 0.1193, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.8500403624215734e-05, |
|
"loss": 0.1157, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.846088386697723e-05, |
|
"loss": 0.1269, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.8421355290390506e-05, |
|
"loss": 0.1251, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.838181799518595e-05, |
|
"loss": 0.1176, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.834227208211621e-05, |
|
"loss": 0.1238, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.8302717651955895e-05, |
|
"loss": 0.1226, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.8263154805501297e-05, |
|
"loss": 0.1294, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.822358364357015e-05, |
|
"loss": 0.1201, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.8184004267001425e-05, |
|
"loss": 0.1255, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.8144416776654963e-05, |
|
"loss": 0.1228, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.810482127341133e-05, |
|
"loss": 0.1222, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.8065217858171495e-05, |
|
"loss": 0.118, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.8025606631856578e-05, |
|
"loss": 0.1261, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.7985987695407616e-05, |
|
"loss": 0.1333, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.7946361149785306e-05, |
|
"loss": 0.1165, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.79067270959697e-05, |
|
"loss": 0.1295, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.7867085634960016e-05, |
|
"loss": 0.125, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.782743686777433e-05, |
|
"loss": 0.1143, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.778778089544935e-05, |
|
"loss": 0.1247, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.7748117819040127e-05, |
|
"loss": 0.1219, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.770844773961983e-05, |
|
"loss": 0.1319, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.7668770758279473e-05, |
|
"loss": 0.1291, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.762908697612765e-05, |
|
"loss": 0.118, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.7589396494290287e-05, |
|
"loss": 0.134, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.7549699413910384e-05, |
|
"loss": 0.1295, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.7509995836147766e-05, |
|
"loss": 0.1227, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.74702858621788e-05, |
|
"loss": 0.118, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.743056959319616e-05, |
|
"loss": 0.1226, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.739084713040856e-05, |
|
"loss": 0.1257, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.7351118575040496e-05, |
|
"loss": 0.1215, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.7311384028332e-05, |
|
"loss": 0.1232, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.7271643591538353e-05, |
|
"loss": 0.1208, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.723189736592986e-05, |
|
"loss": 0.1248, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.719214545279158e-05, |
|
"loss": 0.119, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.715238795342305e-05, |
|
"loss": 0.1213, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.711262496913805e-05, |
|
"loss": 0.122, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.7072856601264345e-05, |
|
"loss": 0.1218, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.7033082951143418e-05, |
|
"loss": 0.1178, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.6993304120130196e-05, |
|
"loss": 0.127, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.6953520209592824e-05, |
|
"loss": 0.1145, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.69137313209124e-05, |
|
"loss": 0.1256, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.6873937555482663e-05, |
|
"loss": 0.1305, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.6834139014709843e-05, |
|
"loss": 0.1268, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.6794335800012293e-05, |
|
"loss": 0.1235, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.6754528012820283e-05, |
|
"loss": 0.125, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.671471575457576e-05, |
|
"loss": 0.1309, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.6674899126732045e-05, |
|
"loss": 0.115, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.663507823075358e-05, |
|
"loss": 0.1269, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.659525316811571e-05, |
|
"loss": 0.1276, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.6555424040304398e-05, |
|
"loss": 0.1118, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.6515590948815933e-05, |
|
"loss": 0.1252, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.6475753995156743e-05, |
|
"loss": 0.1184, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.643591328084309e-05, |
|
"loss": 0.1217, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.6396068907400784e-05, |
|
"loss": 0.1271, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.635622097636501e-05, |
|
"loss": 0.1175, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.6316369589279998e-05, |
|
"loss": 0.1184, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.6276514847698762e-05, |
|
"loss": 0.1197, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.623665685318291e-05, |
|
"loss": 0.1269, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.6196795707302302e-05, |
|
"loss": 0.1257, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.6156931511634834e-05, |
|
"loss": 0.1276, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.6117064367766197e-05, |
|
"loss": 0.1322, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.607719437728957e-05, |
|
"loss": 0.1239, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.603732164180539e-05, |
|
"loss": 0.1169, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.5997446262921106e-05, |
|
"loss": 0.1144, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.595756834225089e-05, |
|
"loss": 0.1223, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.5917687981415373e-05, |
|
"loss": 0.1238, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.5877805282041455e-05, |
|
"loss": 0.1201, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.583792034576194e-05, |
|
"loss": 0.1327, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.579803327421536e-05, |
|
"loss": 0.1336, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.575814416904569e-05, |
|
"loss": 0.1177, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.571825313190208e-05, |
|
"loss": 0.1153, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.5678360264438606e-05, |
|
"loss": 0.1199, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.5638465668314006e-05, |
|
"loss": 0.1191, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.5598569445191418e-05, |
|
"loss": 0.1132, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.5558671696738146e-05, |
|
"loss": 0.1266, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.5518772524625357e-05, |
|
"loss": 0.1191, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5478872030527855e-05, |
|
"loss": 0.1148, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5438970316123822e-05, |
|
"loss": 0.1224, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.539906748309454e-05, |
|
"loss": 0.1136, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.535916363312414e-05, |
|
"loss": 0.1199, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5319258867899348e-05, |
|
"loss": 0.1241, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.5279353289109227e-05, |
|
"loss": 0.1202, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.5239446998444898e-05, |
|
"loss": 0.1247, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.5199540097599318e-05, |
|
"loss": 0.1345, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.5159632688266982e-05, |
|
"loss": 0.1223, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.511972487214369e-05, |
|
"loss": 0.1141, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.5079816750926265e-05, |
|
"loss": 0.1257, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.5039908426312332e-05, |
|
"loss": 0.1235, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1297, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4960091573687677e-05, |
|
"loss": 0.1281, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4920183249073744e-05, |
|
"loss": 0.1176, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.488027512785632e-05, |
|
"loss": 0.1204, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4840367311733024e-05, |
|
"loss": 0.1318, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.4800459902400684e-05, |
|
"loss": 0.1293, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.4760553001555108e-05, |
|
"loss": 0.1154, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.472064671089078e-05, |
|
"loss": 0.1178, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.468074113210066e-05, |
|
"loss": 0.125, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.4640836366875873e-05, |
|
"loss": 0.1191, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.4600932516905466e-05, |
|
"loss": 0.1264, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.4561029683876184e-05, |
|
"loss": 0.1207, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.4521127969472148e-05, |
|
"loss": 0.1253, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4481227475374652e-05, |
|
"loss": 0.1255, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4441328303261867e-05, |
|
"loss": 0.1287, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.440143055480859e-05, |
|
"loss": 0.1176, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4361534331686003e-05, |
|
"loss": 0.1223, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.4321639735561403e-05, |
|
"loss": 0.1321, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.4281746868097926e-05, |
|
"loss": 0.1268, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.4241855830954316e-05, |
|
"loss": 0.1229, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.420196672578465e-05, |
|
"loss": 0.118, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4162079654238073e-05, |
|
"loss": 0.135, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.412219471795855e-05, |
|
"loss": 0.1135, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4082312018584626e-05, |
|
"loss": 0.1158, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4042431657749117e-05, |
|
"loss": 0.125, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.40025537370789e-05, |
|
"loss": 0.1245, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3962678358194614e-05, |
|
"loss": 0.1259, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3922805622710438e-05, |
|
"loss": 0.1157, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3882935632233805e-05, |
|
"loss": 0.1228, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.3843068488365168e-05, |
|
"loss": 0.1255, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.3803204292697704e-05, |
|
"loss": 0.1207, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.3763343146817096e-05, |
|
"loss": 0.1201, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.372348515230124e-05, |
|
"loss": 0.1203, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.368363041072001e-05, |
|
"loss": 0.1234, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.364377902363499e-05, |
|
"loss": 0.1252, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.3603931092599215e-05, |
|
"loss": 0.1239, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.356408671915692e-05, |
|
"loss": 0.1148, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.3524246004843263e-05, |
|
"loss": 0.12, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.3484409051184076e-05, |
|
"loss": 0.12, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.3444575959695614e-05, |
|
"loss": 0.1235, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.340474683188429e-05, |
|
"loss": 0.121, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.3364921769246423e-05, |
|
"loss": 0.1218, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.332510087326796e-05, |
|
"loss": 0.1246, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.3285284245424244e-05, |
|
"loss": 0.1243, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.324547198717972e-05, |
|
"loss": 0.1206, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.3205664199987716e-05, |
|
"loss": 0.1172, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.316586098529017e-05, |
|
"loss": 0.111, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.3126062444517336e-05, |
|
"loss": 0.1272, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.3086268679087607e-05, |
|
"loss": 0.1196, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.3046479790407178e-05, |
|
"loss": 0.126, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3006695879869807e-05, |
|
"loss": 0.1232, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2966917048856588e-05, |
|
"loss": 0.115, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.292714339873566e-05, |
|
"loss": 0.1258, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.288737503086195e-05, |
|
"loss": 0.1246, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.284761204657696e-05, |
|
"loss": 0.1262, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.280785454720843e-05, |
|
"loss": 0.1126, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.2768102634070147e-05, |
|
"loss": 0.1244, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.2728356408461653e-05, |
|
"loss": 0.1271, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.268861597166801e-05, |
|
"loss": 0.1243, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.26488814249595e-05, |
|
"loss": 0.1275, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.2609152869591446e-05, |
|
"loss": 0.1219, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.2569430406803846e-05, |
|
"loss": 0.1455, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2529714137821206e-05, |
|
"loss": 0.12, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.249000416385224e-05, |
|
"loss": 0.1142, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2450300586089622e-05, |
|
"loss": 0.1285, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2410603505709715e-05, |
|
"loss": 0.1238, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2370913023872355e-05, |
|
"loss": 0.1238, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.233122924172053e-05, |
|
"loss": 0.1251, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.229155226038017e-05, |
|
"loss": 0.1346, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2251882180959875e-05, |
|
"loss": 0.1255, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2212219104550665e-05, |
|
"loss": 0.1289, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.217256313222567e-05, |
|
"loss": 0.1172, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2132914365039993e-05, |
|
"loss": 0.1186, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2093272904030307e-05, |
|
"loss": 0.1089, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2053638850214704e-05, |
|
"loss": 0.1273, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.201401230459239e-05, |
|
"loss": 0.1231, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.197439336814343e-05, |
|
"loss": 0.1146, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1934782141828504e-05, |
|
"loss": 0.1198, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1895178726588674e-05, |
|
"loss": 0.1205, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.185558322334504e-05, |
|
"loss": 0.1223, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1815995732998584e-05, |
|
"loss": 0.1212, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1776416356429856e-05, |
|
"loss": 0.122, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.173684519449872e-05, |
|
"loss": 0.1261, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.169728234804411e-05, |
|
"loss": 0.1241, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.165772791788379e-05, |
|
"loss": 0.1185, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.1618182004814054e-05, |
|
"loss": 0.1188, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.1578644709609503e-05, |
|
"loss": 0.1063, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.1539116133022773e-05, |
|
"loss": 0.1121, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.1499596375784282e-05, |
|
"loss": 0.1195, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.146008553860197e-05, |
|
"loss": 0.1131, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.142058372216105e-05, |
|
"loss": 0.1156, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.138109102712376e-05, |
|
"loss": 0.1224, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1341607554129074e-05, |
|
"loss": 0.1217, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1302133403792482e-05, |
|
"loss": 0.1213, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1262668676705695e-05, |
|
"loss": 0.1266, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1223213473436438e-05, |
|
"loss": 0.1141, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1183767894528136e-05, |
|
"loss": 0.1194, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1144332040499726e-05, |
|
"loss": 0.1157, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1104906011845334e-05, |
|
"loss": 0.1156, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1065489909034065e-05, |
|
"loss": 0.1224, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1026083832509702e-05, |
|
"loss": 0.1194, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.0986687882690515e-05, |
|
"loss": 0.11, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.094730215996894e-05, |
|
"loss": 0.115, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.090792676471137e-05, |
|
"loss": 0.1267, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.0868561797257878e-05, |
|
"loss": 0.131, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.082920735792195e-05, |
|
"loss": 0.123, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.0789863546990253e-05, |
|
"loss": 0.123, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.0750530464722373e-05, |
|
"loss": 0.126, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.071120821135054e-05, |
|
"loss": 0.1151, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.0671896887079418e-05, |
|
"loss": 0.1242, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.0632596592085804e-05, |
|
"loss": 0.1138, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.0593307426518373e-05, |
|
"loss": 0.1184, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0554029490497463e-05, |
|
"loss": 0.1216, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0514762884114784e-05, |
|
"loss": 0.1114, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.047550770743318e-05, |
|
"loss": 0.1239, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0436264060486366e-05, |
|
"loss": 0.122, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0397032043278687e-05, |
|
"loss": 0.1221, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.035781175578483e-05, |
|
"loss": 0.1218, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.03186032979496e-05, |
|
"loss": 0.1251, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0279406769687666e-05, |
|
"loss": 0.1135, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0240222270883288e-05, |
|
"loss": 0.1229, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.020104990139008e-05, |
|
"loss": 0.1183, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.016188976103074e-05, |
|
"loss": 0.1207, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0122741949596797e-05, |
|
"loss": 0.1142, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.008360656684837e-05, |
|
"loss": 0.1243, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.0044483712513908e-05, |
|
"loss": 0.1127, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.000537348628993e-05, |
|
"loss": 0.113, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9966275987840764e-05, |
|
"loss": 0.1221, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.9927191316798332e-05, |
|
"loss": 0.121, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.9888119572761845e-05, |
|
"loss": 0.1184, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.984906085529758e-05, |
|
"loss": 0.1143, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.9810015263938624e-05, |
|
"loss": 0.1155, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.977098289818459e-05, |
|
"loss": 0.1211, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.973196385750141e-05, |
|
"loss": 0.1397, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.969295824132107e-05, |
|
"loss": 0.1072, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.965396614904132e-05, |
|
"loss": 0.1223, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.961498768002547e-05, |
|
"loss": 0.1206, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.9576022933602097e-05, |
|
"loss": 0.1168, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.9537072009064814e-05, |
|
"loss": 0.116, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.949813500567201e-05, |
|
"loss": 0.1186, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9459212022646606e-05, |
|
"loss": 0.1121, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9420303159175796e-05, |
|
"loss": 0.1251, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9381408514410782e-05, |
|
"loss": 0.1256, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9342528187466548e-05, |
|
"loss": 0.1354, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9303662277421568e-05, |
|
"loss": 0.1258, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.9264810883317592e-05, |
|
"loss": 0.1149, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.922597410415939e-05, |
|
"loss": 0.1202, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.918715203891448e-05, |
|
"loss": 0.1244, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.9148344786512878e-05, |
|
"loss": 0.1198, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.9109552445846854e-05, |
|
"loss": 0.1153, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.907077511577068e-05, |
|
"loss": 0.1194, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.9032012895100383e-05, |
|
"loss": 0.1181, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.8993265882613482e-05, |
|
"loss": 0.1173, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8954534177048744e-05, |
|
"loss": 0.1196, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8915817877105926e-05, |
|
"loss": 0.1218, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8877117081445524e-05, |
|
"loss": 0.117, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8838431888688527e-05, |
|
"loss": 0.1167, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8799762397416158e-05, |
|
"loss": 0.1194, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8761108706169655e-05, |
|
"loss": 0.1177, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.872247091344996e-05, |
|
"loss": 0.1223, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8683849117717518e-05, |
|
"loss": 0.1231, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.8645243417391995e-05, |
|
"loss": 0.1212, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.8606653910852056e-05, |
|
"loss": 0.1163, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.856808069643509e-05, |
|
"loss": 0.1265, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.852952387243698e-05, |
|
"loss": 0.1148, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.849098353711183e-05, |
|
"loss": 0.12, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8452459788671738e-05, |
|
"loss": 0.1195, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.7090210318565369, |
|
"eval_runtime": 116.3629, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.841395272528651e-05, |
|
"loss": 0.0877, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8375462445083464e-05, |
|
"loss": 0.0432, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.8336989046147128e-05, |
|
"loss": 0.0427, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.8298532626519007e-05, |
|
"loss": 0.0441, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.826009328419737e-05, |
|
"loss": 0.0398, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.822167111713695e-05, |
|
"loss": 0.0429, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.818326622324869e-05, |
|
"loss": 0.0366, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.814487870039955e-05, |
|
"loss": 0.034, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.81065086464122e-05, |
|
"loss": 0.0367, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.80681561590648e-05, |
|
"loss": 0.0336, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.802982133609077e-05, |
|
"loss": 0.0367, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.7991504275178473e-05, |
|
"loss": 0.0373, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.7953205073971035e-05, |
|
"loss": 0.0351, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.7914923830066074e-05, |
|
"loss": 0.0341, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.7876660641015437e-05, |
|
"loss": 0.0392, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.7838415604324943e-05, |
|
"loss": 0.0373, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.7800188817454208e-05, |
|
"loss": 0.037, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.7761980377816287e-05, |
|
"loss": 0.0337, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.772379038277751e-05, |
|
"loss": 0.0368, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.7685618929657194e-05, |
|
"loss": 0.0413, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.764746611572742e-05, |
|
"loss": 0.0331, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.7609332038212728e-05, |
|
"loss": 0.0329, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.7571216794289984e-05, |
|
"loss": 0.0317, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.7533120481088e-05, |
|
"loss": 0.035, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.7495043195687368e-05, |
|
"loss": 0.0352, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.7456985035120193e-05, |
|
"loss": 0.0373, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.741894609636985e-05, |
|
"loss": 0.0337, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.7380926476370702e-05, |
|
"loss": 0.0368, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.734292627200793e-05, |
|
"loss": 0.0369, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.7304945580117193e-05, |
|
"loss": 0.0316, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.7266984497484458e-05, |
|
"loss": 0.0343, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.7229043120845708e-05, |
|
"loss": 0.035, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.7191121546886697e-05, |
|
"loss": 0.0375, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.7153219872242727e-05, |
|
"loss": 0.0331, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.711533819349842e-05, |
|
"loss": 0.0323, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.7077476607187397e-05, |
|
"loss": 0.0329, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.7039635209792105e-05, |
|
"loss": 0.0306, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.7001814097743528e-05, |
|
"loss": 0.0312, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.6964013367420966e-05, |
|
"loss": 0.0314, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.692623311515178e-05, |
|
"loss": 0.0333, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.6888473437211132e-05, |
|
"loss": 0.0365, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.685073442982179e-05, |
|
"loss": 0.03, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.6813016189153814e-05, |
|
"loss": 0.0329, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.6775318811324364e-05, |
|
"loss": 0.0369, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.6737642392397414e-05, |
|
"loss": 0.0331, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.6699987028383546e-05, |
|
"loss": 0.037, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.6662352815239678e-05, |
|
"loss": 0.0362, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.6624739848868854e-05, |
|
"loss": 0.0323, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.6587148225119935e-05, |
|
"loss": 0.0339, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.6549578039787436e-05, |
|
"loss": 0.0306, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.65120293886112e-05, |
|
"loss": 0.0311, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6474502367276222e-05, |
|
"loss": 0.0361, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.643699707141237e-05, |
|
"loss": 0.0342, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6399513596594158e-05, |
|
"loss": 0.0307, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6362052038340475e-05, |
|
"loss": 0.0322, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6324612492114378e-05, |
|
"loss": 0.0339, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.6287195053322816e-05, |
|
"loss": 0.0331, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.6249799817316415e-05, |
|
"loss": 0.0387, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.6212426879389205e-05, |
|
"loss": 0.035, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.617507633477842e-05, |
|
"loss": 0.0325, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.6137748278664215e-05, |
|
"loss": 0.0334, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.6100442806169422e-05, |
|
"loss": 0.0318, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.6063160012359345e-05, |
|
"loss": 0.0325, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.602589999224149e-05, |
|
"loss": 0.0322, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.598866284076532e-05, |
|
"loss": 0.0326, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.5951448652822047e-05, |
|
"loss": 0.0341, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.5914257523244347e-05, |
|
"loss": 0.0321, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.5877089546806125e-05, |
|
"loss": 0.0318, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.5839944818222295e-05, |
|
"loss": 0.0323, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.5802823432148546e-05, |
|
"loss": 0.0355, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.5765725483181053e-05, |
|
"loss": 0.0349, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.5728651065856297e-05, |
|
"loss": 0.0316, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.569160027465078e-05, |
|
"loss": 0.029, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.5654573203980784e-05, |
|
"loss": 0.0323, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.561756994820216e-05, |
|
"loss": 0.0314, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.5580590601610074e-05, |
|
"loss": 0.0322, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.5543635258438745e-05, |
|
"loss": 0.0319, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.5506704012861256e-05, |
|
"loss": 0.0331, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.546979695898926e-05, |
|
"loss": 0.0312, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 0.0378, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.5396055802499875e-05, |
|
"loss": 0.0304, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.5359221887796616e-05, |
|
"loss": 0.0327, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.5322412540626592e-05, |
|
"loss": 0.0338, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.528562785479084e-05, |
|
"loss": 0.0297, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.5248867924027534e-05, |
|
"loss": 0.0319, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.5212132842011779e-05, |
|
"loss": 0.0336, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.5175422702355343e-05, |
|
"loss": 0.032, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.5138737598606448e-05, |
|
"loss": 0.0357, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.5102077624249497e-05, |
|
"loss": 0.0327, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.506544287270487e-05, |
|
"loss": 0.0356, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.5028833437328682e-05, |
|
"loss": 0.0289, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.4992249411412513e-05, |
|
"loss": 0.0334, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.4955690888183205e-05, |
|
"loss": 0.0309, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.4919157960802618e-05, |
|
"loss": 0.0363, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.4882650722367364e-05, |
|
"loss": 0.0311, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.4846169265908603e-05, |
|
"loss": 0.0286, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.4809713684391807e-05, |
|
"loss": 0.0344, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.4773284070716503e-05, |
|
"loss": 0.0339, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.4736880517716039e-05, |
|
"loss": 0.0309, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.470050311815736e-05, |
|
"loss": 0.0345, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.4664151964740752e-05, |
|
"loss": 0.0308, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.4627827150099627e-05, |
|
"loss": 0.0331, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.4591528766800283e-05, |
|
"loss": 0.0328, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.4555256907341667e-05, |
|
"loss": 0.0301, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.4519011664155118e-05, |
|
"loss": 0.0303, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.4482793129604148e-05, |
|
"loss": 0.0367, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.4446601395984233e-05, |
|
"loss": 0.0353, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.4410436555522522e-05, |
|
"loss": 0.0292, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.4374298700377665e-05, |
|
"loss": 0.0312, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.4338187922639507e-05, |
|
"loss": 0.0334, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.4302104314328935e-05, |
|
"loss": 0.0326, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.426604796739755e-05, |
|
"loss": 0.032, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.4230018973727535e-05, |
|
"loss": 0.0307, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.4194017425131323e-05, |
|
"loss": 0.03, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.4158043413351455e-05, |
|
"loss": 0.0322, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.4122097030060249e-05, |
|
"loss": 0.0335, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.408617836685967e-05, |
|
"loss": 0.0306, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.405028751528099e-05, |
|
"loss": 0.0299, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.4014424566784661e-05, |
|
"loss": 0.0294, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.397858961275999e-05, |
|
"loss": 0.0326, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.3942782744524973e-05, |
|
"loss": 0.0332, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.3907004053326006e-05, |
|
"loss": 0.0316, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.3871253630337722e-05, |
|
"loss": 0.0325, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.3835531566662673e-05, |
|
"loss": 0.0337, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.379983795333119e-05, |
|
"loss": 0.0306, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.3764172881301062e-05, |
|
"loss": 0.0366, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.3728536441457357e-05, |
|
"loss": 0.0309, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.3692928724612203e-05, |
|
"loss": 0.0266, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.3657349821504517e-05, |
|
"loss": 0.0304, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.3621799822799788e-05, |
|
"loss": 0.0308, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3586278819089837e-05, |
|
"loss": 0.0344, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3550786900892634e-05, |
|
"loss": 0.0316, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3515324158651981e-05, |
|
"loss": 0.0356, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3479890682737379e-05, |
|
"loss": 0.0307, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3444486563443723e-05, |
|
"loss": 0.0291, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3409111890991105e-05, |
|
"loss": 0.0316, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3373766755524564e-05, |
|
"loss": 0.0318, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3338451247113897e-05, |
|
"loss": 0.0308, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.330316545575338e-05, |
|
"loss": 0.0324, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.3267909471361572e-05, |
|
"loss": 0.0329, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.323268338378108e-05, |
|
"loss": 0.0317, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.3197487282778315e-05, |
|
"loss": 0.0336, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3162321258043261e-05, |
|
"loss": 0.0318, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3127185399189295e-05, |
|
"loss": 0.0363, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3092079795752887e-05, |
|
"loss": 0.0336, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3057004537193423e-05, |
|
"loss": 0.0299, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.3021959712892979e-05, |
|
"loss": 0.0321, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2986945412156038e-05, |
|
"loss": 0.0309, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2951961724209317e-05, |
|
"loss": 0.0324, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2917008738201537e-05, |
|
"loss": 0.0296, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2882086543203154e-05, |
|
"loss": 0.031, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.284719522820618e-05, |
|
"loss": 0.0281, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2812334882123944e-05, |
|
"loss": 0.0302, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2777505593790834e-05, |
|
"loss": 0.0317, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2742707451962088e-05, |
|
"loss": 0.0308, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2707940545313613e-05, |
|
"loss": 0.0312, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2673204962441671e-05, |
|
"loss": 0.0294, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.263850079186274e-05, |
|
"loss": 0.0312, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.2603828122013246e-05, |
|
"loss": 0.0329, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.2569187041249315e-05, |
|
"loss": 0.0285, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.253457763784659e-05, |
|
"loss": 0.0304, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"loss": 0.0295, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.246545421582353e-05, |
|
"loss": 0.0317, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.2430940373349945e-05, |
|
"loss": 0.0315, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.2396458560530694e-05, |
|
"loss": 0.0333, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.2362008865235536e-05, |
|
"loss": 0.0306, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.2327591375252403e-05, |
|
"loss": 0.0301, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.2293206178287184e-05, |
|
"loss": 0.0311, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.2258853361963448e-05, |
|
"loss": 0.0328, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.2224533013822238e-05, |
|
"loss": 0.0303, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.2190245221321912e-05, |
|
"loss": 0.0355, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.2155990071837817e-05, |
|
"loss": 0.0314, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.2121767652662123e-05, |
|
"loss": 0.0331, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.2087578051003617e-05, |
|
"loss": 0.0345, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.2053421353987437e-05, |
|
"loss": 0.0276, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.2019297648654857e-05, |
|
"loss": 0.0291, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.198520702196313e-05, |
|
"loss": 0.0306, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.1951149560785167e-05, |
|
"loss": 0.0319, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.191712535190937e-05, |
|
"loss": 0.0301, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.188313448203943e-05, |
|
"loss": 0.0246, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.1849177037794051e-05, |
|
"loss": 0.0307, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.181525310570677e-05, |
|
"loss": 0.0311, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.178136277222573e-05, |
|
"loss": 0.0298, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.1747506123713458e-05, |
|
"loss": 0.0332, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.1713683246446622e-05, |
|
"loss": 0.0277, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.1679894226615862e-05, |
|
"loss": 0.0343, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.1646139150325507e-05, |
|
"loss": 0.0324, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.16124181035934e-05, |
|
"loss": 0.0311, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.157873117235067e-05, |
|
"loss": 0.0322, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.1545078442441526e-05, |
|
"loss": 0.0311, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.1511459999622981e-05, |
|
"loss": 0.033, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.147787592956472e-05, |
|
"loss": 0.0335, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.14443263178488e-05, |
|
"loss": 0.0307, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.1410811249969475e-05, |
|
"loss": 0.0314, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.1377330811332988e-05, |
|
"loss": 0.0313, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.1343885087257337e-05, |
|
"loss": 0.03, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.1310474162972026e-05, |
|
"loss": 0.0284, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.1277098123617922e-05, |
|
"loss": 0.032, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.124375705424696e-05, |
|
"loss": 0.0383, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1210451039821965e-05, |
|
"loss": 0.0352, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.117718016521645e-05, |
|
"loss": 0.0317, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1143944515214386e-05, |
|
"loss": 0.0316, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1110744174509952e-05, |
|
"loss": 0.0285, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1077579227707357e-05, |
|
"loss": 0.0301, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.104444975932064e-05, |
|
"loss": 0.0307, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.10113558537734e-05, |
|
"loss": 0.0309, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.0978297595398632e-05, |
|
"loss": 0.0328, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.094527506843849e-05, |
|
"loss": 0.0277, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0912288357044062e-05, |
|
"loss": 0.03, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0879337545275165e-05, |
|
"loss": 0.0302, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.084642271710016e-05, |
|
"loss": 0.0309, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0813543956395675e-05, |
|
"loss": 0.0292, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0780701346946453e-05, |
|
"loss": 0.0297, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.074789497244512e-05, |
|
"loss": 0.0303, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0715124916491937e-05, |
|
"loss": 0.0309, |
|
"step": 1447 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0682391262594618e-05, |
|
"loss": 0.0278, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0649694094168147e-05, |
|
"loss": 0.0285, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0617033494534486e-05, |
|
"loss": 0.0329, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0584409546922445e-05, |
|
"loss": 0.0299, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0551822334467429e-05, |
|
"loss": 0.0333, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.0519271940211215e-05, |
|
"loss": 0.0296, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.0486758447101751e-05, |
|
"loss": 0.0322, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.0454281937992989e-05, |
|
"loss": 0.031, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.0421842495644587e-05, |
|
"loss": 0.0293, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.0389440202721778e-05, |
|
"loss": 0.0341, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.035707514179513e-05, |
|
"loss": 0.0311, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.0324747395340309e-05, |
|
"loss": 0.0335, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.0292457045737895e-05, |
|
"loss": 0.0347, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.02602041752732e-05, |
|
"loss": 0.0323, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.0227988866135996e-05, |
|
"loss": 0.0282, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.0195811200420333e-05, |
|
"loss": 0.0285, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.0163671260124385e-05, |
|
"loss": 0.0313, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.0131569127150142e-05, |
|
"loss": 0.0255, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.0099504883303254e-05, |
|
"loss": 0.0303, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.0067478610292847e-05, |
|
"loss": 0.0282, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.0035490389731255e-05, |
|
"loss": 0.0268, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.0003540303133843e-05, |
|
"loss": 0.0289, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.971628431918845e-06, |
|
"loss": 0.0339, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.939754857407063e-06, |
|
"loss": 0.0288, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.90791966082171e-06, |
|
"loss": 0.0332, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.876122923288239e-06, |
|
"loss": 0.0286, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.844364725834057e-06, |
|
"loss": 0.0265, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.812645149388363e-06, |
|
"loss": 0.0325, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.780964274781984e-06, |
|
"loss": 0.0295, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.749322182747072e-06, |
|
"loss": 0.0308, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.71771895391696e-06, |
|
"loss": 0.0317, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.686154668825973e-06, |
|
"loss": 0.0275, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.654629407909163e-06, |
|
"loss": 0.0283, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.623143251502148e-06, |
|
"loss": 0.0287, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.591696279840906e-06, |
|
"loss": 0.0304, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.560288573061563e-06, |
|
"loss": 0.037, |
|
"step": 1483 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.52892021120016e-06, |
|
"loss": 0.0308, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.497591274192508e-06, |
|
"loss": 0.0303, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.46630184187393e-06, |
|
"loss": 0.0295, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.435051993979077e-06, |
|
"loss": 0.0314, |
|
"step": 1487 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.403841810141747e-06, |
|
"loss": 0.0323, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.372671369894661e-06, |
|
"loss": 0.0294, |
|
"step": 1489 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.341540752669235e-06, |
|
"loss": 0.0316, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.310450037795435e-06, |
|
"loss": 0.0279, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.279399304501526e-06, |
|
"loss": 0.0302, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.248388631913887e-06, |
|
"loss": 0.0302, |
|
"step": 1493 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.21741809905682e-06, |
|
"loss": 0.0298, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.186487784852349e-06, |
|
"loss": 0.029, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.155597768119978e-06, |
|
"loss": 0.0307, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.124748127576552e-06, |
|
"loss": 0.0299, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.09393894183601e-06, |
|
"loss": 0.0307, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.063170289409192e-06, |
|
"loss": 0.0293, |
|
"step": 1499 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.032442248703666e-06, |
|
"loss": 0.0323, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.001754898023512e-06, |
|
"loss": 0.0289, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.971108315569094e-06, |
|
"loss": 0.0323, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.940502579436913e-06, |
|
"loss": 0.0276, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.90993776761937e-06, |
|
"loss": 0.0294, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.879413958004566e-06, |
|
"loss": 0.036, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.848931228376136e-06, |
|
"loss": 0.0305, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.818489656413043e-06, |
|
"loss": 0.0326, |
|
"step": 1507 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.788089319689324e-06, |
|
"loss": 0.0323, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.757730295673985e-06, |
|
"loss": 0.0318, |
|
"step": 1509 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.727412661730724e-06, |
|
"loss": 0.0285, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.697136495117763e-06, |
|
"loss": 0.0315, |
|
"step": 1511 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.666901872987676e-06, |
|
"loss": 0.0302, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.63670887238716e-06, |
|
"loss": 0.0298, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.606557570256843e-06, |
|
"loss": 0.0332, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 8.576448043431082e-06, |
|
"loss": 0.0327, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 8.546380368637812e-06, |
|
"loss": 0.0314, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 8.51635462249828e-06, |
|
"loss": 0.0287, |
|
"step": 1517 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 8.486370881526917e-06, |
|
"loss": 0.0282, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.456429222131082e-06, |
|
"loss": 0.0327, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.426529720610934e-06, |
|
"loss": 0.0285, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.396672453159163e-06, |
|
"loss": 0.0258, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.36685749586087e-06, |
|
"loss": 0.0292, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.337084924693303e-06, |
|
"loss": 0.0266, |
|
"step": 1523 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.307354815525731e-06, |
|
"loss": 0.0299, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.277667244119187e-06, |
|
"loss": 0.0293, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.24802228612633e-06, |
|
"loss": 0.0298, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.218420017091208e-06, |
|
"loss": 0.0274, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.188860512449107e-06, |
|
"loss": 0.0281, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.159343847526308e-06, |
|
"loss": 0.026, |
|
"step": 1529 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.129870097539951e-06, |
|
"loss": 0.0282, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.100439337597798e-06, |
|
"loss": 0.0328, |
|
"step": 1531 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.071051642698074e-06, |
|
"loss": 0.0301, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.041707087729244e-06, |
|
"loss": 0.0283, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.012405747469862e-06, |
|
"loss": 0.032, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.983147696588339e-06, |
|
"loss": 0.0303, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.953933009642773e-06, |
|
"loss": 0.0317, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.924761761080768e-06, |
|
"loss": 0.029, |
|
"step": 1537 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.895634025239243e-06, |
|
"loss": 0.034, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.866549876344201e-06, |
|
"loss": 0.0298, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.837509388510611e-06, |
|
"loss": 0.03, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.808512635742157e-06, |
|
"loss": 0.027, |
|
"step": 1541 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.779559691931066e-06, |
|
"loss": 0.0287, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.750650630857947e-06, |
|
"loss": 0.0319, |
|
"step": 1543 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.721785526191588e-06, |
|
"loss": 0.0297, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.692964451488734e-06, |
|
"loss": 0.0292, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.66418748019396e-06, |
|
"loss": 0.0264, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.63545468563943e-06, |
|
"loss": 0.0296, |
|
"step": 1547 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.606766141044733e-06, |
|
"loss": 0.0279, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.578121919516712e-06, |
|
"loss": 0.0273, |
|
"step": 1549 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.54952209404926e-06, |
|
"loss": 0.0302, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.520966737523116e-06, |
|
"loss": 0.024, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.4924559227057265e-06, |
|
"loss": 0.033, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.463989722251014e-06, |
|
"loss": 0.0283, |
|
"step": 1553 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.435568208699203e-06, |
|
"loss": 0.0268, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.407191454476667e-06, |
|
"loss": 0.0264, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.37885953189571e-06, |
|
"loss": 0.0283, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.350572513154377e-06, |
|
"loss": 0.0288, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 0.0291, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.294133475410528e-06, |
|
"loss": 0.0288, |
|
"step": 1559 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.265981600231234e-06, |
|
"loss": 0.029, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.23787491653769e-06, |
|
"loss": 0.0321, |
|
"step": 1561 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.209813495953963e-06, |
|
"loss": 0.0284, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.181797409988802e-06, |
|
"loss": 0.0291, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.153826730035423e-06, |
|
"loss": 0.0289, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.125901527371329e-06, |
|
"loss": 0.0286, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.0980218731581255e-06, |
|
"loss": 0.0292, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 7.070187838441369e-06, |
|
"loss": 0.0299, |
|
"step": 1567 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 7.042399494150342e-06, |
|
"loss": 0.0293, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 7.0146569110979086e-06, |
|
"loss": 0.0291, |
|
"step": 1569 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 6.986960159980327e-06, |
|
"loss": 0.0306, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 6.959309311377038e-06, |
|
"loss": 0.0302, |
|
"step": 1571 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.931704435750522e-06, |
|
"loss": 0.0352, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.904145603446116e-06, |
|
"loss": 0.0274, |
|
"step": 1573 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.876632884691803e-06, |
|
"loss": 0.0321, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.849166349598079e-06, |
|
"loss": 0.0282, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.821746068157741e-06, |
|
"loss": 0.0289, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.794372110245717e-06, |
|
"loss": 0.029, |
|
"step": 1577 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.767044545618878e-06, |
|
"loss": 0.0286, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.739763443915895e-06, |
|
"loss": 0.0288, |
|
"step": 1579 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.712528874657012e-06, |
|
"loss": 0.0274, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.685340907243915e-06, |
|
"loss": 0.0312, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.658199610959537e-06, |
|
"loss": 0.032, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.6311050549678595e-06, |
|
"loss": 0.0284, |
|
"step": 1583 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.604057308313763e-06, |
|
"loss": 0.0303, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.577056439922857e-06, |
|
"loss": 0.0278, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.55010251860127e-06, |
|
"loss": 0.0302, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.523195613035521e-06, |
|
"loss": 0.0285, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.496335791792293e-06, |
|
"loss": 0.0279, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.469523123318308e-06, |
|
"loss": 0.0302, |
|
"step": 1589 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.442757675940109e-06, |
|
"loss": 0.0319, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.4160395178639196e-06, |
|
"loss": 0.0297, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.389368717175448e-06, |
|
"loss": 0.0272, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.362745341839729e-06, |
|
"loss": 0.0292, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.336169459700933e-06, |
|
"loss": 0.027, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.309641138482222e-06, |
|
"loss": 0.0302, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.283160445785532e-06, |
|
"loss": 0.0258, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.25672744909146e-06, |
|
"loss": 0.028, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.230342215759028e-06, |
|
"loss": 0.0255, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.204004813025568e-06, |
|
"loss": 0.0309, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.177715308006505e-06, |
|
"loss": 0.0329, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.151473767695229e-06, |
|
"loss": 0.0294, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.125280258962873e-06, |
|
"loss": 0.0273, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.099134848558208e-06, |
|
"loss": 0.0266, |
|
"step": 1603 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.073037603107404e-06, |
|
"loss": 0.0281, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 6.0469885891139e-06, |
|
"loss": 0.0286, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 6.020987872958236e-06, |
|
"loss": 0.0251, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.995035520897882e-06, |
|
"loss": 0.0266, |
|
"step": 1607 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.969131599067044e-06, |
|
"loss": 0.0288, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.943276173476509e-06, |
|
"loss": 0.0282, |
|
"step": 1609 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.91746931001351e-06, |
|
"loss": 0.0272, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.891711074441495e-06, |
|
"loss": 0.0275, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.866001532400023e-06, |
|
"loss": 0.0284, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.84034074940456e-06, |
|
"loss": 0.029, |
|
"step": 1613 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.814728790846308e-06, |
|
"loss": 0.0275, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.789165721992052e-06, |
|
"loss": 0.0286, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.763651607984008e-06, |
|
"loss": 0.0252, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.738186513839619e-06, |
|
"loss": 0.0278, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.712770504451426e-06, |
|
"loss": 0.0271, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.687403644586891e-06, |
|
"loss": 0.0298, |
|
"step": 1619 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.662085998888214e-06, |
|
"loss": 0.0299, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.636817631872185e-06, |
|
"loss": 0.0254, |
|
"step": 1621 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.611598607930032e-06, |
|
"loss": 0.027, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.586428991327223e-06, |
|
"loss": 0.0297, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.561308846203333e-06, |
|
"loss": 0.0311, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.5362382365718775e-06, |
|
"loss": 0.0297, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.511217226320125e-06, |
|
"loss": 0.0296, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.486245879208945e-06, |
|
"loss": 0.0279, |
|
"step": 1627 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.46132425887268e-06, |
|
"loss": 0.0267, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.436452428818919e-06, |
|
"loss": 0.0269, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.411630452428395e-06, |
|
"loss": 0.0305, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.386858392954799e-06, |
|
"loss": 0.026, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.362136313524607e-06, |
|
"loss": 0.0271, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.337464277136925e-06, |
|
"loss": 0.0284, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.3128423466633634e-06, |
|
"loss": 0.0275, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.288270584847813e-06, |
|
"loss": 0.0271, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.263749054306347e-06, |
|
"loss": 0.028, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.23927781752703e-06, |
|
"loss": 0.0257, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5.214856936869752e-06, |
|
"loss": 0.0269, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5.1904864745660835e-06, |
|
"loss": 0.0233, |
|
"step": 1639 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5.166166492719124e-06, |
|
"loss": 0.0302, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5.141897053303327e-06, |
|
"loss": 0.0278, |
|
"step": 1641 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.117678218164338e-06, |
|
"loss": 0.0327, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.0935100490188795e-06, |
|
"loss": 0.0293, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.9072719812393188, |
|
"eval_runtime": 116.1375, |
|
"eval_samples_per_second": 6.561, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.0693926074545315e-06, |
|
"loss": 0.0203, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.045325954929614e-06, |
|
"loss": 0.0074, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 5.0213101527730345e-06, |
|
"loss": 0.0079, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.9973452621841e-06, |
|
"loss": 0.0079, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.973431344232377e-06, |
|
"loss": 0.0085, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.9495684598575735e-06, |
|
"loss": 0.0075, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.925756669869314e-06, |
|
"loss": 0.0068, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.9019960349470265e-06, |
|
"loss": 0.0074, |
|
"step": 1651 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.878286615639791e-06, |
|
"loss": 0.0053, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.8546284723661715e-06, |
|
"loss": 0.0074, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.8310216654140425e-06, |
|
"loss": 0.005, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.80746625494051e-06, |
|
"loss": 0.006, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.7839623009716615e-06, |
|
"loss": 0.0054, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.760509863402468e-06, |
|
"loss": 0.0068, |
|
"step": 1657 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.737109001996637e-06, |
|
"loss": 0.0047, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.7137597763864286e-06, |
|
"loss": 0.0056, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.690462246072516e-06, |
|
"loss": 0.0059, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.667216470423858e-06, |
|
"loss": 0.0051, |
|
"step": 1661 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.644022508677518e-06, |
|
"loss": 0.0063, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.620880419938511e-06, |
|
"loss": 0.0059, |
|
"step": 1663 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.5977902631796855e-06, |
|
"loss": 0.0067, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.574752097241533e-06, |
|
"loss": 0.005, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.551765980832059e-06, |
|
"loss": 0.0048, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.528831972526645e-06, |
|
"loss": 0.0066, |
|
"step": 1667 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.505950130767883e-06, |
|
"loss": 0.0045, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.483120513865411e-06, |
|
"loss": 0.0046, |
|
"step": 1669 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.460343179995807e-06, |
|
"loss": 0.006, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.4376181872024e-06, |
|
"loss": 0.0047, |
|
"step": 1671 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.4149455933951396e-06, |
|
"loss": 0.0059, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.392325456350454e-06, |
|
"loss": 0.0052, |
|
"step": 1673 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.369757833711105e-06, |
|
"loss": 0.0062, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.347242782986008e-06, |
|
"loss": 0.004, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.324780361550129e-06, |
|
"loss": 0.006, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.302370626644314e-06, |
|
"loss": 0.0052, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.280013635375138e-06, |
|
"loss": 0.0048, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.2577094447147856e-06, |
|
"loss": 0.0047, |
|
"step": 1679 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.235458111500889e-06, |
|
"loss": 0.0046, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.213259692436367e-06, |
|
"loss": 0.0047, |
|
"step": 1681 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.19111424408932e-06, |
|
"loss": 0.0048, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.169021822892849e-06, |
|
"loss": 0.0045, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.146982485144921e-06, |
|
"loss": 0.0083, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.124996287008245e-06, |
|
"loss": 0.0056, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.103063284510117e-06, |
|
"loss": 0.0061, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.081183533542262e-06, |
|
"loss": 0.0058, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.059357089860702e-06, |
|
"loss": 0.0057, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.037584009085635e-06, |
|
"loss": 0.005, |
|
"step": 1689 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.015864346701251e-06, |
|
"loss": 0.0049, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.994198158055637e-06, |
|
"loss": 0.0048, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.972585498360606e-06, |
|
"loss": 0.0067, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.951026422691556e-06, |
|
"loss": 0.0054, |
|
"step": 1693 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.929520985987334e-06, |
|
"loss": 0.0043, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.908069243050122e-06, |
|
"loss": 0.0051, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.886671248545243e-06, |
|
"loss": 0.0045, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.865327057001078e-06, |
|
"loss": 0.0054, |
|
"step": 1697 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.8440367228088995e-06, |
|
"loss": 0.0051, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.8228003002227255e-06, |
|
"loss": 0.0048, |
|
"step": 1699 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.801617843359187e-06, |
|
"loss": 0.0048, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.7804894061974183e-06, |
|
"loss": 0.0059, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.7594150425788675e-06, |
|
"loss": 0.0057, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.738394806207207e-06, |
|
"loss": 0.0057, |
|
"step": 1703 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.7174287506481776e-06, |
|
"loss": 0.0046, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.6965169293294357e-06, |
|
"loss": 0.0039, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.67565939554044e-06, |
|
"loss": 0.0045, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.654856202432319e-06, |
|
"loss": 0.0069, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.6341074030177114e-06, |
|
"loss": 0.0053, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.6134130501706417e-06, |
|
"loss": 0.0061, |
|
"step": 1709 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.592773196626417e-06, |
|
"loss": 0.0049, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.5721878949814323e-06, |
|
"loss": 0.0051, |
|
"step": 1711 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.5516571976930786e-06, |
|
"loss": 0.0053, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.531181157079605e-06, |
|
"loss": 0.0045, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.5107598253199758e-06, |
|
"loss": 0.0048, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.4903932544537276e-06, |
|
"loss": 0.0044, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.470081496380881e-06, |
|
"loss": 0.0047, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.4498246028617536e-06, |
|
"loss": 0.0041, |
|
"step": 1717 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.4296226255168485e-06, |
|
"loss": 0.0053, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.409475615826746e-06, |
|
"loss": 0.0057, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.3893836251319422e-06, |
|
"loss": 0.0044, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.3693467046327117e-06, |
|
"loss": 0.005, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.3493649053890326e-06, |
|
"loss": 0.0045, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.32943827832039e-06, |
|
"loss": 0.0049, |
|
"step": 1723 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.309566874205672e-06, |
|
"loss": 0.0052, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.289750743683062e-06, |
|
"loss": 0.0046, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2699899372498733e-06, |
|
"loss": 0.0048, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2502845052624354e-06, |
|
"loss": 0.0055, |
|
"step": 1727 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.230634497935983e-06, |
|
"loss": 0.0042, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.211039965344512e-06, |
|
"loss": 0.0037, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.1915009574206262e-06, |
|
"loss": 0.0051, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.17201752395547e-06, |
|
"loss": 0.0051, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.1525897145985472e-06, |
|
"loss": 0.0041, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.133217578857611e-06, |
|
"loss": 0.0048, |
|
"step": 1733 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.113901166098562e-06, |
|
"loss": 0.0053, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.0946405255452947e-06, |
|
"loss": 0.0044, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.075435706279567e-06, |
|
"loss": 0.0044, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.0562867572409034e-06, |
|
"loss": 0.0051, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.037193727226445e-06, |
|
"loss": 0.0046, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.018156664890834e-06, |
|
"loss": 0.0039, |
|
"step": 1739 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.9991756187461e-06, |
|
"loss": 0.0054, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.9802506371615246e-06, |
|
"loss": 0.0043, |
|
"step": 1741 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.961381768363511e-06, |
|
"loss": 0.0049, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.942569060435482e-06, |
|
"loss": 0.0047, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.9238125613177403e-06, |
|
"loss": 0.0047, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.905112318807346e-06, |
|
"loss": 0.0047, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.8864683805580133e-06, |
|
"loss": 0.0054, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.8678807940799744e-06, |
|
"loss": 0.0037, |
|
"step": 1747 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.8493496067398483e-06, |
|
"loss": 0.0062, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.8308748657605522e-06, |
|
"loss": 0.0042, |
|
"step": 1749 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.812456618221143e-06, |
|
"loss": 0.006, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.794094911056719e-06, |
|
"loss": 0.0043, |
|
"step": 1751 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.775789791058306e-06, |
|
"loss": 0.0049, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.757541304872732e-06, |
|
"loss": 0.0052, |
|
"step": 1753 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.7393494990024834e-06, |
|
"loss": 0.0052, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.7212144198056374e-06, |
|
"loss": 0.0065, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.7031361134956913e-06, |
|
"loss": 0.0049, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.6851146261414747e-06, |
|
"loss": 0.0063, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.667150003667032e-06, |
|
"loss": 0.0043, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.649242291851503e-06, |
|
"loss": 0.0048, |
|
"step": 1759 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.631391536328992e-06, |
|
"loss": 0.005, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.6135977825884533e-06, |
|
"loss": 0.0066, |
|
"step": 1761 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.595861075973613e-06, |
|
"loss": 0.0053, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.578181461682794e-06, |
|
"loss": 0.0031, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.5605589847688518e-06, |
|
"loss": 0.0028, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.5429936901390284e-06, |
|
"loss": 0.004, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.5254856225548544e-06, |
|
"loss": 0.0034, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.508034826632022e-06, |
|
"loss": 0.0068, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.4906413468402916e-06, |
|
"loss": 0.0049, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.4733052275033448e-06, |
|
"loss": 0.0042, |
|
"step": 1769 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.4560265127987147e-06, |
|
"loss": 0.0048, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.4388052467576308e-06, |
|
"loss": 0.0053, |
|
"step": 1771 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.4216414732649432e-06, |
|
"loss": 0.0047, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.40453523605898e-06, |
|
"loss": 0.0046, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.3874865787314598e-06, |
|
"loss": 0.0043, |
|
"step": 1774 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.3704955447273636e-06, |
|
"loss": 0.0053, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.3535621773448395e-06, |
|
"loss": 0.005, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.3366865197350733e-06, |
|
"loss": 0.0045, |
|
"step": 1777 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.3198686149022013e-06, |
|
"loss": 0.006, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.303108505703178e-06, |
|
"loss": 0.0047, |
|
"step": 1779 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.2864062348476905e-06, |
|
"loss": 0.0038, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.2697618448980217e-06, |
|
"loss": 0.0054, |
|
"step": 1781 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.2531753782689598e-06, |
|
"loss": 0.0052, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.2366468772276994e-06, |
|
"loss": 0.0038, |
|
"step": 1783 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.2201763838937184e-06, |
|
"loss": 0.0043, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.2037639402386566e-06, |
|
"loss": 0.0038, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.1874095880862505e-06, |
|
"loss": 0.0052, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.1711133691121903e-06, |
|
"loss": 0.0052, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.1548753248440164e-06, |
|
"loss": 0.0055, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.138695496661039e-06, |
|
"loss": 0.0044, |
|
"step": 1789 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.122573925794219e-06, |
|
"loss": 0.0037, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.1065106533260383e-06, |
|
"loss": 0.0063, |
|
"step": 1791 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.0905057201904445e-06, |
|
"loss": 0.0035, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.0745591671727018e-06, |
|
"loss": 0.0048, |
|
"step": 1793 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.0586710349093013e-06, |
|
"loss": 0.0057, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.0428413638878764e-06, |
|
"loss": 0.0055, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.027070194447081e-06, |
|
"loss": 0.0052, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.0113575667764755e-06, |
|
"loss": 0.0036, |
|
"step": 1797 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.995703520916456e-06, |
|
"loss": 0.0041, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.9801080967581263e-06, |
|
"loss": 0.0054, |
|
"step": 1799 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.9645713340431997e-06, |
|
"loss": 0.0044, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.9490932723639165e-06, |
|
"loss": 0.0047, |
|
"step": 1801 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.9336739511629233e-06, |
|
"loss": 0.0049, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.918313409733169e-06, |
|
"loss": 0.0041, |
|
"step": 1803 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 0.0044, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.8877688226101919e-06, |
|
"loss": 0.0059, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.8725848547535368e-06, |
|
"loss": 0.0041, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.8574598223410872e-06, |
|
"loss": 0.0061, |
|
"step": 1807 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.8423937639158534e-06, |
|
"loss": 0.0046, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.82738671787058e-06, |
|
"loss": 0.0043, |
|
"step": 1809 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.8124387224476347e-06, |
|
"loss": 0.0052, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.7975498157388915e-06, |
|
"loss": 0.0051, |
|
"step": 1811 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.7827200356856533e-06, |
|
"loss": 0.0046, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.7679494200785601e-06, |
|
"loss": 0.0069, |
|
"step": 1813 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.7532380065574726e-06, |
|
"loss": 0.005, |
|
"step": 1814 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.7385858326113918e-06, |
|
"loss": 0.004, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.7239929355783668e-06, |
|
"loss": 0.004, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.709459352645379e-06, |
|
"loss": 0.0052, |
|
"step": 1817 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.694985120848258e-06, |
|
"loss": 0.0054, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.6805702770716053e-06, |
|
"loss": 0.0044, |
|
"step": 1819 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.6662148580486702e-06, |
|
"loss": 0.0044, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.6519189003612767e-06, |
|
"loss": 0.0045, |
|
"step": 1821 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.6376824404397251e-06, |
|
"loss": 0.0045, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.6235055145626953e-06, |
|
"loss": 0.0045, |
|
"step": 1823 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.6093881588571501e-06, |
|
"loss": 0.0058, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.5953304092982624e-06, |
|
"loss": 0.0047, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.581332301709304e-06, |
|
"loss": 0.005, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.5673938717615605e-06, |
|
"loss": 0.0065, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.5535151549742528e-06, |
|
"loss": 0.0043, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.5396961867144206e-06, |
|
"loss": 0.0044, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.525937002196845e-06, |
|
"loss": 0.004, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.512237636483982e-06, |
|
"loss": 0.0058, |
|
"step": 1831 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.4985981244858254e-06, |
|
"loss": 0.0032, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.4850185009598645e-06, |
|
"loss": 0.0058, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.471498800510962e-06, |
|
"loss": 0.0051, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.4580390575912872e-06, |
|
"loss": 0.005, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.4446393065002144e-06, |
|
"loss": 0.0044, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.431299581384249e-06, |
|
"loss": 0.005, |
|
"step": 1837 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.4180199162369207e-06, |
|
"loss": 0.0053, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.4048003448987213e-06, |
|
"loss": 0.0044, |
|
"step": 1839 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.3916409010569926e-06, |
|
"loss": 0.0061, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.378541618245871e-06, |
|
"loss": 0.0043, |
|
"step": 1841 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.365502529846166e-06, |
|
"loss": 0.0038, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3525236690853093e-06, |
|
"loss": 0.0038, |
|
"step": 1843 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3396050690372418e-06, |
|
"loss": 0.0042, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3267467626223606e-06, |
|
"loss": 0.0058, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3139487826073937e-06, |
|
"loss": 0.0041, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.3012111616053618e-06, |
|
"loss": 0.0047, |
|
"step": 1847 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.288533932075453e-06, |
|
"loss": 0.0047, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2759171263229813e-06, |
|
"loss": 0.0043, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2633607764992671e-06, |
|
"loss": 0.0044, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.250864914601571e-06, |
|
"loss": 0.0059, |
|
"step": 1851 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.2384295724730266e-06, |
|
"loss": 0.0062, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.2260547818025326e-06, |
|
"loss": 0.0041, |
|
"step": 1853 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.2137405741246916e-06, |
|
"loss": 0.0065, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.201486980819716e-06, |
|
"loss": 0.0047, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.1892940331133612e-06, |
|
"loss": 0.0041, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.1771617620768394e-06, |
|
"loss": 0.0051, |
|
"step": 1857 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.1650901986267365e-06, |
|
"loss": 0.0042, |
|
"step": 1858 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.1530793735249458e-06, |
|
"loss": 0.0048, |
|
"step": 1859 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.1411293173785726e-06, |
|
"loss": 0.0042, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.1292400606398635e-06, |
|
"loss": 0.0034, |
|
"step": 1861 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.1174116336061468e-06, |
|
"loss": 0.005, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.1056440664197144e-06, |
|
"loss": 0.0053, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.0939373890677923e-06, |
|
"loss": 0.0043, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.0822916313824316e-06, |
|
"loss": 0.0046, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.0707068230404404e-06, |
|
"loss": 0.0041, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.059182993563304e-06, |
|
"loss": 0.0043, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.0477201723171377e-06, |
|
"loss": 0.0052, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.036318388512561e-06, |
|
"loss": 0.004, |
|
"step": 1869 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.0249776712046744e-06, |
|
"loss": 0.0045, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.0136980492929605e-06, |
|
"loss": 0.0043, |
|
"step": 1871 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.0024795515211988e-06, |
|
"loss": 0.0048, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.913222064774157e-07, |
|
"loss": 0.0039, |
|
"step": 1873 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.802260425938099e-07, |
|
"loss": 0.0051, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.691910881466564e-07, |
|
"loss": 0.0039, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.58217371256262e-07, |
|
"loss": 0.0055, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.473049198868822e-07, |
|
"loss": 0.0047, |
|
"step": 1877 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.364537618466451e-07, |
|
"loss": 0.0047, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.25663924787487e-07, |
|
"loss": 0.0053, |
|
"step": 1879 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 9.149354362050805e-07, |
|
"loss": 0.0037, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 9.042683234387645e-07, |
|
"loss": 0.0044, |
|
"step": 1881 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.936626136714754e-07, |
|
"loss": 0.0058, |
|
"step": 1882 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.831183339296751e-07, |
|
"loss": 0.0045, |
|
"step": 1883 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.726355110832862e-07, |
|
"loss": 0.0049, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.622141718456128e-07, |
|
"loss": 0.0042, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.51854342773295e-07, |
|
"loss": 0.005, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.415560502662151e-07, |
|
"loss": 0.008, |
|
"step": 1887 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.313193205674391e-07, |
|
"loss": 0.0055, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.211441797631752e-07, |
|
"loss": 0.004, |
|
"step": 1889 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.110306537826601e-07, |
|
"loss": 0.0051, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.009787683981279e-07, |
|
"loss": 0.0055, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.909885492247359e-07, |
|
"loss": 0.0037, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.81060021720495e-07, |
|
"loss": 0.0039, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.711932111862025e-07, |
|
"loss": 0.0047, |
|
"step": 1894 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.613881427654013e-07, |
|
"loss": 0.0039, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.516448414442739e-07, |
|
"loss": 0.0035, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.419633320516178e-07, |
|
"loss": 0.0054, |
|
"step": 1897 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.32343639258759e-07, |
|
"loss": 0.0055, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.227857875795025e-07, |
|
"loss": 0.0049, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.13289801370054e-07, |
|
"loss": 0.0048, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.038557048289818e-07, |
|
"loss": 0.004, |
|
"step": 1901 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.944835219971329e-07, |
|
"loss": 0.0051, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.851732767575752e-07, |
|
"loss": 0.0037, |
|
"step": 1903 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.759249928355554e-07, |
|
"loss": 0.0045, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.667386937984105e-07, |
|
"loss": 0.0055, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.576144030555259e-07, |
|
"loss": 0.0039, |
|
"step": 1906 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.485521438582748e-07, |
|
"loss": 0.0044, |
|
"step": 1907 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.395519392999621e-07, |
|
"loss": 0.0048, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.30613812315739e-07, |
|
"loss": 0.0053, |
|
"step": 1909 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.217377856825885e-07, |
|
"loss": 0.0057, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.129238820192285e-07, |
|
"loss": 0.0045, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.041721237860676e-07, |
|
"loss": 0.0047, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.954825332851632e-07, |
|
"loss": 0.0055, |
|
"step": 1913 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.868551326601413e-07, |
|
"loss": 0.0037, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.782899438961487e-07, |
|
"loss": 0.0054, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.697869888198065e-07, |
|
"loss": 0.0041, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.613462890991378e-07, |
|
"loss": 0.0047, |
|
"step": 1917 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.529678662435228e-07, |
|
"loss": 0.0028, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.446517416036412e-07, |
|
"loss": 0.0041, |
|
"step": 1919 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.363979363714245e-07, |
|
"loss": 0.0039, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.282064715799895e-07, |
|
"loss": 0.0046, |
|
"step": 1921 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.20077368103597e-07, |
|
"loss": 0.0038, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.120106466575875e-07, |
|
"loss": 0.0053, |
|
"step": 1923 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.040063277983287e-07, |
|
"loss": 0.0039, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.96064431923185e-07, |
|
"loss": 0.0053, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.881849792704368e-07, |
|
"loss": 0.0041, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.803679899192392e-07, |
|
"loss": 0.0042, |
|
"step": 1927 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.7261348378958016e-07, |
|
"loss": 0.0048, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.649214806422164e-07, |
|
"loss": 0.0068, |
|
"step": 1929 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.5729200007862683e-07, |
|
"loss": 0.0053, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.497250615409732e-07, |
|
"loss": 0.004, |
|
"step": 1931 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.4222068431203634e-07, |
|
"loss": 0.0038, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.34778887515172e-07, |
|
"loss": 0.005, |
|
"step": 1933 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.2739969011426074e-07, |
|
"loss": 0.0055, |
|
"step": 1934 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.2008311091366606e-07, |
|
"loss": 0.0051, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.128291685581792e-07, |
|
"loss": 0.0055, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.0563788153297755e-07, |
|
"loss": 0.0043, |
|
"step": 1937 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.9850926816357157e-07, |
|
"loss": 0.004, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.9144334661576074e-07, |
|
"loss": 0.0039, |
|
"step": 1939 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.8444013489558337e-07, |
|
"loss": 0.0042, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.774996508492834e-07, |
|
"loss": 0.0054, |
|
"step": 1941 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.70621912163252e-07, |
|
"loss": 0.0062, |
|
"step": 1942 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.6380693636398343e-07, |
|
"loss": 0.0051, |
|
"step": 1943 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.570547408180441e-07, |
|
"loss": 0.0043, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.503653427320036e-07, |
|
"loss": 0.0035, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.4373875915241493e-07, |
|
"loss": 0.0057, |
|
"step": 1946 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.371750069657592e-07, |
|
"loss": 0.0045, |
|
"step": 1947 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.306741028984012e-07, |
|
"loss": 0.0058, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.242360635165559e-07, |
|
"loss": 0.0044, |
|
"step": 1949 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.1786090522624156e-07, |
|
"loss": 0.0047, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.1154864427322685e-07, |
|
"loss": 0.0034, |
|
"step": 1951 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.052992967430085e-07, |
|
"loss": 0.0035, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.991128785607589e-07, |
|
"loss": 0.0039, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.9298940549128964e-07, |
|
"loss": 0.0039, |
|
"step": 1954 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.8692889313900186e-07, |
|
"loss": 0.0047, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.8093135694786667e-07, |
|
"loss": 0.0056, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.749968122013669e-07, |
|
"loss": 0.0051, |
|
"step": 1957 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.6912527402246367e-07, |
|
"loss": 0.0043, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.633167573735579e-07, |
|
"loss": 0.0054, |
|
"step": 1959 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.575712770564592e-07, |
|
"loss": 0.0035, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.5188884771233656e-07, |
|
"loss": 0.0043, |
|
"step": 1961 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.4626948382168726e-07, |
|
"loss": 0.0038, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.407131997043038e-07, |
|
"loss": 0.0039, |
|
"step": 1963 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.3522000951922417e-07, |
|
"loss": 0.0031, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.2978992726471748e-07, |
|
"loss": 0.0045, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.244229667782205e-07, |
|
"loss": 0.0081, |
|
"step": 1966 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.1911914173632643e-07, |
|
"loss": 0.0046, |
|
"step": 1967 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.1387846565474045e-07, |
|
"loss": 0.0048, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.08700951888241e-07, |
|
"loss": 0.0039, |
|
"step": 1969 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.0358661363065746e-07, |
|
"loss": 0.0042, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.985354639148229e-07, |
|
"loss": 0.0051, |
|
"step": 1971 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.9354751561254937e-07, |
|
"loss": 0.004, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.8862278143459144e-07, |
|
"loss": 0.0043, |
|
"step": 1973 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.8376127393062158e-07, |
|
"loss": 0.0051, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.7896300548918832e-07, |
|
"loss": 0.0058, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.7422798833768572e-07, |
|
"loss": 0.004, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.6955623454233128e-07, |
|
"loss": 0.0051, |
|
"step": 1977 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.6494775600812417e-07, |
|
"loss": 0.0048, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.6040256447881763e-07, |
|
"loss": 0.0056, |
|
"step": 1979 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.559206715368966e-07, |
|
"loss": 0.0039, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.5150208860354176e-07, |
|
"loss": 0.0053, |
|
"step": 1981 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.4714682693859617e-07, |
|
"loss": 0.0039, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.428548976405486e-07, |
|
"loss": 0.0058, |
|
"step": 1983 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.3862631164649475e-07, |
|
"loss": 0.0039, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.344610797321122e-07, |
|
"loss": 0.0048, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.3035921251163263e-07, |
|
"loss": 0.0039, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.2632072043782252e-07, |
|
"loss": 0.0032, |
|
"step": 1987 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.223456138019413e-07, |
|
"loss": 0.0042, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.1843390273373057e-07, |
|
"loss": 0.0056, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.1458559720137762e-07, |
|
"loss": 0.0042, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.1080070701149359e-07, |
|
"loss": 0.004, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.0707924180909379e-07, |
|
"loss": 0.0047, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.0342121107755898e-07, |
|
"loss": 0.0057, |
|
"step": 1993 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 9.982662413862975e-08, |
|
"loss": 0.0046, |
|
"step": 1994 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.629549015237049e-08, |
|
"loss": 0.0037, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.282781811714159e-08, |
|
"loss": 0.0055, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 8.94236168695911e-08, |
|
"loss": 0.004, |
|
"step": 1997 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 8.608289508462708e-08, |
|
"loss": 0.0036, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 8.280566127538691e-08, |
|
"loss": 0.0047, |
|
"step": 1999 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.959192379322077e-08, |
|
"loss": 0.0043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.644169082768326e-08, |
|
"loss": 0.0043, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.335497040648898e-08, |
|
"loss": 0.004, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.033177039550698e-08, |
|
"loss": 0.0037, |
|
"step": 2003 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.73720984987386e-08, |
|
"loss": 0.0051, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.4475962258298e-08, |
|
"loss": 0.0046, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.164336905438994e-08, |
|
"loss": 0.005, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 5.8874326105293196e-08, |
|
"loss": 0.0045, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.616884046734383e-08, |
|
"loss": 0.0051, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.352691903491303e-08, |
|
"loss": 0.0046, |
|
"step": 2009 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.094856854039043e-08, |
|
"loss": 0.0049, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.8433795554173046e-08, |
|
"loss": 0.0037, |
|
"step": 2011 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.598260648463748e-08, |
|
"loss": 0.0039, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.359500757813717e-08, |
|
"loss": 0.0041, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.1271004918971847e-08, |
|
"loss": 0.004, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.901060442938198e-08, |
|
"loss": 0.0056, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.68138118695377e-08, |
|
"loss": 0.0049, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.468063283750267e-08, |
|
"loss": 0.0051, |
|
"step": 2017 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.2611072769250795e-08, |
|
"loss": 0.0039, |
|
"step": 2018 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.0605136938624544e-08, |
|
"loss": 0.004, |
|
"step": 2019 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.866283045734053e-08, |
|
"loss": 0.0045, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.6784158274964498e-08, |
|
"loss": 0.0063, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.496912517890304e-08, |
|
"loss": 0.0056, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.3217735794392458e-08, |
|
"loss": 0.0039, |
|
"step": 2023 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.152999458449323e-08, |
|
"loss": 0.004, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.990590585005947e-08, |
|
"loss": 0.0045, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.834547372975004e-08, |
|
"loss": 0.0046, |
|
"step": 2026 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.6848702200000786e-08, |
|
"loss": 0.0051, |
|
"step": 2027 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.5415595075027324e-08, |
|
"loss": 0.0054, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.4046156006808364e-08, |
|
"loss": 0.0048, |
|
"step": 2029 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.2740388485071863e-08, |
|
"loss": 0.0041, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.149829583730333e-08, |
|
"loss": 0.0048, |
|
"step": 2031 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.03198812287153e-08, |
|
"loss": 0.0045, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 9.20514766225289e-09, |
|
"loss": 0.0046, |
|
"step": 2033 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 8.154097978591014e-09, |
|
"loss": 0.0039, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 7.166734856103863e-09, |
|
"loss": 0.0044, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 6.243060810892654e-09, |
|
"loss": 0.0043, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.3830781967412205e-09, |
|
"loss": 0.006, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.586789205140995e-09, |
|
"loss": 0.004, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.854195865271582e-09, |
|
"loss": 0.0048, |
|
"step": 2039 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.1853000439951987e-09, |
|
"loss": 0.0054, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.58010344585391e-09, |
|
"loss": 0.0038, |
|
"step": 2041 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.038607613066845e-09, |
|
"loss": 0.0041, |
|
"step": 2042 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.5608139255246512e-09, |
|
"loss": 0.0047, |
|
"step": 2043 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.1467236007867144e-09, |
|
"loss": 0.0036, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 7.963376940728351e-10, |
|
"loss": 0.0051, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.096570982743298e-10, |
|
"loss": 0.0049, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.866825439346021e-10, |
|
"loss": 0.004, |
|
"step": 2047 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.2741459925746935e-10, |
|
"loss": 0.0057, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 3.185367010716256e-11, |
|
"loss": 0.004, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.0058, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 1.1227930784225464, |
|
"eval_runtime": 115.4648, |
|
"eval_samples_per_second": 6.599, |
|
"eval_steps_per_second": 0.416, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"step": 2050, |
|
"total_flos": 2.0983989578550477e+19, |
|
"train_loss": 0.1699243627804354, |
|
"train_runtime": 174534.9388, |
|
"train_samples_per_second": 1.506, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"max_steps": 2050, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.0983989578550477e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|