|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9058960565963444, |
|
"global_step": 301500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99749613653868e-05, |
|
"loss": 4.5704, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9949922730773584e-05, |
|
"loss": 3.9367, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.992488409616038e-05, |
|
"loss": 4.3169, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9899845461547165e-05, |
|
"loss": 4.4226, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.987480682693396e-05, |
|
"loss": 4.3777, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.984976819232076e-05, |
|
"loss": 4.3272, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.982472955770754e-05, |
|
"loss": 4.2964, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.979969092309434e-05, |
|
"loss": 4.2587, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9774652288481124e-05, |
|
"loss": 4.2217, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.974961365386792e-05, |
|
"loss": 4.2056, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.972457501925471e-05, |
|
"loss": 4.2108, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.96995363846415e-05, |
|
"loss": 4.135, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.96744977500283e-05, |
|
"loss": 4.1552, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9649459115415084e-05, |
|
"loss": 4.0666, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.962442048080188e-05, |
|
"loss": 4.1164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.959938184618867e-05, |
|
"loss": 4.0798, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.957434321157546e-05, |
|
"loss": 4.0284, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.954930457696226e-05, |
|
"loss": 4.046, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9524265942349043e-05, |
|
"loss": 4.0595, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.949922730773584e-05, |
|
"loss": 4.0243, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.947418867312263e-05, |
|
"loss": 4.0064, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944915003850942e-05, |
|
"loss": 3.9907, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.942411140389621e-05, |
|
"loss": 3.9488, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9399072769283e-05, |
|
"loss": 3.95, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.93740341346698e-05, |
|
"loss": 3.9148, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.934899550005659e-05, |
|
"loss": 3.917, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.932395686544338e-05, |
|
"loss": 3.8958, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.929891823083017e-05, |
|
"loss": 3.9248, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.927387959621696e-05, |
|
"loss": 3.8752, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.924884096160376e-05, |
|
"loss": 3.8548, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.922380232699055e-05, |
|
"loss": 3.8882, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.919876369237734e-05, |
|
"loss": 3.8457, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.917372505776413e-05, |
|
"loss": 3.8472, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.914868642315092e-05, |
|
"loss": 3.8743, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.912364778853771e-05, |
|
"loss": 3.8541, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.909860915392451e-05, |
|
"loss": 3.823, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.90735705193113e-05, |
|
"loss": 3.841, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.904853188469809e-05, |
|
"loss": 3.8262, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.902349325008488e-05, |
|
"loss": 3.7779, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.899845461547167e-05, |
|
"loss": 3.7952, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.897341598085847e-05, |
|
"loss": 3.8115, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.894837734624526e-05, |
|
"loss": 3.7978, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.892333871163205e-05, |
|
"loss": 3.7757, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.889830007701884e-05, |
|
"loss": 3.7953, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.887326144240563e-05, |
|
"loss": 3.7988, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.884822280779243e-05, |
|
"loss": 3.779, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.882318417317922e-05, |
|
"loss": 3.7545, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.879814553856601e-05, |
|
"loss": 3.7502, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.87731069039528e-05, |
|
"loss": 3.762, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.874806826933959e-05, |
|
"loss": 3.7711, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.872302963472639e-05, |
|
"loss": 3.7314, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.869799100011317e-05, |
|
"loss": 3.7427, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.867295236549997e-05, |
|
"loss": 3.7024, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.864791373088676e-05, |
|
"loss": 3.7356, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.862287509627355e-05, |
|
"loss": 3.7117, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.859783646166035e-05, |
|
"loss": 3.7235, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.857279782704713e-05, |
|
"loss": 3.6791, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.854775919243393e-05, |
|
"loss": 3.696, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852272055782072e-05, |
|
"loss": 3.6852, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.849768192320751e-05, |
|
"loss": 3.6896, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.847264328859431e-05, |
|
"loss": 3.6759, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.844760465398109e-05, |
|
"loss": 3.7012, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.842256601936789e-05, |
|
"loss": 3.6378, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.839752738475467e-05, |
|
"loss": 3.6746, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.837248875014147e-05, |
|
"loss": 3.68, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.834745011552827e-05, |
|
"loss": 3.6775, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.832241148091505e-05, |
|
"loss": 3.6471, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.829737284630185e-05, |
|
"loss": 3.662, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.827233421168863e-05, |
|
"loss": 3.6359, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.824729557707543e-05, |
|
"loss": 3.6494, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.822225694246222e-05, |
|
"loss": 3.6187, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.819721830784901e-05, |
|
"loss": 3.6115, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.817217967323581e-05, |
|
"loss": 3.6062, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.814714103862259e-05, |
|
"loss": 3.6109, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.812210240400939e-05, |
|
"loss": 3.6239, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809706376939618e-05, |
|
"loss": 3.646, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.807202513478297e-05, |
|
"loss": 3.5898, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.804698650016977e-05, |
|
"loss": 3.6008, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.802194786555655e-05, |
|
"loss": 3.6265, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.799690923094335e-05, |
|
"loss": 3.6111, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.797187059633014e-05, |
|
"loss": 3.5766, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.794683196171693e-05, |
|
"loss": 3.5916, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.792179332710373e-05, |
|
"loss": 3.641, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.789675469249051e-05, |
|
"loss": 3.5743, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.787171605787731e-05, |
|
"loss": 3.6153, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.78466774232641e-05, |
|
"loss": 3.6073, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.782163878865089e-05, |
|
"loss": 3.5881, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.779660015403769e-05, |
|
"loss": 3.5649, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.777156151942447e-05, |
|
"loss": 3.5602, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.774652288481127e-05, |
|
"loss": 3.5707, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.772148425019806e-05, |
|
"loss": 3.5546, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769644561558485e-05, |
|
"loss": 3.5327, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.767140698097164e-05, |
|
"loss": 3.5489, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.764636834635843e-05, |
|
"loss": 3.5559, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.762132971174523e-05, |
|
"loss": 3.5514, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.759629107713202e-05, |
|
"loss": 3.5295, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.757125244251881e-05, |
|
"loss": 3.5198, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.75462138079056e-05, |
|
"loss": 3.5231, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.752117517329239e-05, |
|
"loss": 3.5695, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.749613653867919e-05, |
|
"loss": 3.581, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747109790406598e-05, |
|
"loss": 3.5228, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.744605926945277e-05, |
|
"loss": 3.5382, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.742102063483956e-05, |
|
"loss": 3.5384, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.739598200022635e-05, |
|
"loss": 3.4945, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.737094336561314e-05, |
|
"loss": 3.503, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.734590473099994e-05, |
|
"loss": 3.523, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.732086609638673e-05, |
|
"loss": 3.5115, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.729582746177352e-05, |
|
"loss": 3.5014, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.727078882716031e-05, |
|
"loss": 3.5253, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.72457501925471e-05, |
|
"loss": 3.4913, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.72207115579339e-05, |
|
"loss": 3.5152, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.719567292332069e-05, |
|
"loss": 3.4847, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.717063428870748e-05, |
|
"loss": 3.5086, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.714559565409427e-05, |
|
"loss": 3.5071, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.712055701948106e-05, |
|
"loss": 3.4918, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.709551838486786e-05, |
|
"loss": 3.4677, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707047975025464e-05, |
|
"loss": 3.4594, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.704544111564144e-05, |
|
"loss": 3.5236, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.702040248102823e-05, |
|
"loss": 3.4743, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.699536384641502e-05, |
|
"loss": 3.478, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6970325211801817e-05, |
|
"loss": 3.458, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.69452865771886e-05, |
|
"loss": 3.464, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.69202479425754e-05, |
|
"loss": 3.4501, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.689520930796219e-05, |
|
"loss": 3.4772, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.687017067334898e-05, |
|
"loss": 3.4522, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6845132038735776e-05, |
|
"loss": 3.4415, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.682009340412256e-05, |
|
"loss": 3.4953, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.679505476950936e-05, |
|
"loss": 3.4713, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.677001613489615e-05, |
|
"loss": 3.4588, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.674497750028294e-05, |
|
"loss": 3.4765, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.671993886566973e-05, |
|
"loss": 3.4408, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.669490023105652e-05, |
|
"loss": 3.4454, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.666986159644332e-05, |
|
"loss": 3.4629, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.66448229618301e-05, |
|
"loss": 3.4751, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.66197843272169e-05, |
|
"loss": 3.446, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.659474569260369e-05, |
|
"loss": 3.4132, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.656970705799048e-05, |
|
"loss": 3.4266, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6544668423377277e-05, |
|
"loss": 3.4197, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.651962978876406e-05, |
|
"loss": 3.444, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.649459115415086e-05, |
|
"loss": 3.4453, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.646955251953765e-05, |
|
"loss": 3.4215, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.644451388492444e-05, |
|
"loss": 3.4524, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6419475250311236e-05, |
|
"loss": 3.4432, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.639443661569802e-05, |
|
"loss": 3.4105, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.636939798108482e-05, |
|
"loss": 3.415, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.634435934647161e-05, |
|
"loss": 3.4441, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.63193207118584e-05, |
|
"loss": 3.4117, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6294282077245196e-05, |
|
"loss": 3.4101, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.626924344263198e-05, |
|
"loss": 3.3981, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.624420480801878e-05, |
|
"loss": 3.4246, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.621916617340557e-05, |
|
"loss": 3.423, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.619412753879236e-05, |
|
"loss": 3.4062, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6169088904179155e-05, |
|
"loss": 3.4059, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.614405026956594e-05, |
|
"loss": 3.3988, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6119011634952736e-05, |
|
"loss": 3.4308, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.609397300033953e-05, |
|
"loss": 3.3904, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.606893436572632e-05, |
|
"loss": 3.3847, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6043895731113115e-05, |
|
"loss": 3.3941, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.60188570964999e-05, |
|
"loss": 3.3834, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5993818461886696e-05, |
|
"loss": 3.4271, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.596877982727349e-05, |
|
"loss": 3.3714, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.594374119266028e-05, |
|
"loss": 3.3709, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.591870255804707e-05, |
|
"loss": 3.4005, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.589366392343386e-05, |
|
"loss": 3.3783, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5868625288820656e-05, |
|
"loss": 3.385, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5843586654207446e-05, |
|
"loss": 3.3812, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.581854801959424e-05, |
|
"loss": 3.362, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.579350938498103e-05, |
|
"loss": 3.3715, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.576847075036782e-05, |
|
"loss": 3.387, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5743432115754615e-05, |
|
"loss": 3.3741, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5718393481141406e-05, |
|
"loss": 3.3461, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5693354846528196e-05, |
|
"loss": 3.3659, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.566831621191499e-05, |
|
"loss": 3.3693, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.564327757730178e-05, |
|
"loss": 3.3776, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.561823894268857e-05, |
|
"loss": 3.3391, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5593200308075365e-05, |
|
"loss": 3.3516, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5568161673462156e-05, |
|
"loss": 3.3808, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5543123038848947e-05, |
|
"loss": 3.3933, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.551808440423574e-05, |
|
"loss": 3.3717, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549304576962253e-05, |
|
"loss": 3.3625, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5468007135009325e-05, |
|
"loss": 3.3391, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5442968500396116e-05, |
|
"loss": 3.3361, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5417929865782906e-05, |
|
"loss": 3.3201, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.53928912311697e-05, |
|
"loss": 3.3444, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536785259655649e-05, |
|
"loss": 3.3624, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5342813961943285e-05, |
|
"loss": 3.3289, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.531777532733007e-05, |
|
"loss": 3.342, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5292736692716866e-05, |
|
"loss": 3.3372, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5267698058103656e-05, |
|
"loss": 3.3458, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.524265942349045e-05, |
|
"loss": 3.386, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.521762078887724e-05, |
|
"loss": 3.336, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.519258215426403e-05, |
|
"loss": 3.3324, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5167543519650825e-05, |
|
"loss": 3.3054, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5142504885037616e-05, |
|
"loss": 3.3409, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5117466250424406e-05, |
|
"loss": 3.3233, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.50924276158112e-05, |
|
"loss": 3.3296, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.506738898119799e-05, |
|
"loss": 3.3135, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5042350346584785e-05, |
|
"loss": 3.3307, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5017311711971576e-05, |
|
"loss": 3.3117, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4992273077358366e-05, |
|
"loss": 3.3407, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496723444274516e-05, |
|
"loss": 3.3376, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.494219580813195e-05, |
|
"loss": 3.3178, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4917157173518745e-05, |
|
"loss": 3.3416, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.489211853890553e-05, |
|
"loss": 3.2907, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4867079904292326e-05, |
|
"loss": 3.3315, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4842041269679116e-05, |
|
"loss": 3.3199, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.481700263506591e-05, |
|
"loss": 3.3082, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4791964000452704e-05, |
|
"loss": 3.3224, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.476692536583949e-05, |
|
"loss": 3.3023, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4741886731226285e-05, |
|
"loss": 3.3068, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4716848096613076e-05, |
|
"loss": 3.3125, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4691809461999866e-05, |
|
"loss": 3.3026, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4666770827386664e-05, |
|
"loss": 3.302, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.464173219277345e-05, |
|
"loss": 3.299, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4616693558160245e-05, |
|
"loss": 3.2802, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4591654923547035e-05, |
|
"loss": 3.2993, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4566616288933826e-05, |
|
"loss": 3.2822, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.454157765432062e-05, |
|
"loss": 3.2963, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.451653901970741e-05, |
|
"loss": 3.3129, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4491500385094204e-05, |
|
"loss": 3.3025, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4466461750480995e-05, |
|
"loss": 3.3245, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4441423115867786e-05, |
|
"loss": 3.2676, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.441638448125458e-05, |
|
"loss": 3.317, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.439134584664137e-05, |
|
"loss": 3.3008, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4366307212028164e-05, |
|
"loss": 3.3093, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4341268577414955e-05, |
|
"loss": 3.3066, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4316229942801745e-05, |
|
"loss": 3.2671, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4291191308188536e-05, |
|
"loss": 3.2873, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4266152673575326e-05, |
|
"loss": 3.2857, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4241114038962124e-05, |
|
"loss": 3.2992, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4216075404348914e-05, |
|
"loss": 3.2959, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4191036769735705e-05, |
|
"loss": 3.2688, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4165998135122495e-05, |
|
"loss": 3.2958, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4140959500509286e-05, |
|
"loss": 3.2933, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.411592086589608e-05, |
|
"loss": 3.2796, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4090882231282874e-05, |
|
"loss": 3.2577, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4065843596669664e-05, |
|
"loss": 3.2797, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4040804962056455e-05, |
|
"loss": 3.3004, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4015766327443246e-05, |
|
"loss": 3.2539, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.399072769283004e-05, |
|
"loss": 3.2525, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3965689058216833e-05, |
|
"loss": 3.2668, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3940650423603624e-05, |
|
"loss": 3.2693, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3915611788990415e-05, |
|
"loss": 3.2792, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3890573154377205e-05, |
|
"loss": 3.2649, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3865534519763996e-05, |
|
"loss": 3.2556, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3840495885150786e-05, |
|
"loss": 3.2509, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3815457250537584e-05, |
|
"loss": 3.2616, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3790418615924374e-05, |
|
"loss": 3.2654, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3765379981311165e-05, |
|
"loss": 3.2628, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3740341346697955e-05, |
|
"loss": 3.2462, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3715302712084746e-05, |
|
"loss": 3.2517, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.369026407747154e-05, |
|
"loss": 3.2897, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3665225442858334e-05, |
|
"loss": 3.2707, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3640186808245124e-05, |
|
"loss": 3.269, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3615148173631915e-05, |
|
"loss": 3.2331, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3590109539018705e-05, |
|
"loss": 3.2644, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3565070904405496e-05, |
|
"loss": 3.2406, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.354003226979229e-05, |
|
"loss": 3.2499, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3514993635179084e-05, |
|
"loss": 3.2325, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3489955000565875e-05, |
|
"loss": 3.2631, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3464916365952665e-05, |
|
"loss": 3.2447, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3439877731339456e-05, |
|
"loss": 3.2349, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.341483909672625e-05, |
|
"loss": 3.2372, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3389800462113044e-05, |
|
"loss": 3.2456, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3364761827499834e-05, |
|
"loss": 3.2477, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3339723192886625e-05, |
|
"loss": 3.243, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3314684558273415e-05, |
|
"loss": 3.2287, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.328964592366021e-05, |
|
"loss": 3.2613, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3264607289046996e-05, |
|
"loss": 3.2242, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3239568654433794e-05, |
|
"loss": 3.2481, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3214530019820584e-05, |
|
"loss": 3.2482, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3189491385207375e-05, |
|
"loss": 3.2439, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.316445275059417e-05, |
|
"loss": 3.2321, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3139414115980956e-05, |
|
"loss": 3.2422, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.311437548136775e-05, |
|
"loss": 3.249, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3089336846754544e-05, |
|
"loss": 3.2349, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3064298212141334e-05, |
|
"loss": 3.218, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.303925957752813e-05, |
|
"loss": 3.2232, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3014220942914916e-05, |
|
"loss": 3.2183, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.298918230830171e-05, |
|
"loss": 3.2547, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2964143673688503e-05, |
|
"loss": 3.2168, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2939105039075294e-05, |
|
"loss": 3.2378, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.291406640446209e-05, |
|
"loss": 3.2035, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2889027769848875e-05, |
|
"loss": 3.2112, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.286398913523567e-05, |
|
"loss": 3.1962, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.283895050062246e-05, |
|
"loss": 3.2278, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2813911866009254e-05, |
|
"loss": 3.2453, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.278887323139605e-05, |
|
"loss": 3.2265, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2763834596782835e-05, |
|
"loss": 3.1941, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.273879596216963e-05, |
|
"loss": 3.2519, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.271375732755642e-05, |
|
"loss": 3.1941, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.268871869294321e-05, |
|
"loss": 3.2421, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.266368005833001e-05, |
|
"loss": 3.2396, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2638641423716794e-05, |
|
"loss": 3.2021, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.261360278910359e-05, |
|
"loss": 3.2229, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.258856415449038e-05, |
|
"loss": 3.2251, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.256352551987717e-05, |
|
"loss": 3.1941, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2538486885263963e-05, |
|
"loss": 3.2002, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2513448250650754e-05, |
|
"loss": 3.2177, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.248840961603755e-05, |
|
"loss": 3.2386, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.246337098142434e-05, |
|
"loss": 3.1961, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.243833234681113e-05, |
|
"loss": 3.1943, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.241329371219792e-05, |
|
"loss": 3.1818, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2388255077584714e-05, |
|
"loss": 3.2139, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.236321644297151e-05, |
|
"loss": 3.1935, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2338177808358295e-05, |
|
"loss": 3.206, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.231313917374509e-05, |
|
"loss": 3.2269, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.228810053913188e-05, |
|
"loss": 3.2269, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.226306190451867e-05, |
|
"loss": 3.1957, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2238023269905464e-05, |
|
"loss": 3.2306, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2212984635292254e-05, |
|
"loss": 3.224, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.218794600067905e-05, |
|
"loss": 3.2183, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.216290736606584e-05, |
|
"loss": 3.2007, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.213786873145263e-05, |
|
"loss": 3.1717, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.211283009683942e-05, |
|
"loss": 3.1949, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2087791462226214e-05, |
|
"loss": 3.1852, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.206275282761301e-05, |
|
"loss": 3.1967, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.20377141929998e-05, |
|
"loss": 3.1857, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.201267555838659e-05, |
|
"loss": 3.184, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.198763692377338e-05, |
|
"loss": 3.1921, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1962598289160173e-05, |
|
"loss": 3.1994, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.193755965454697e-05, |
|
"loss": 3.1853, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.191252101993376e-05, |
|
"loss": 3.207, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.188748238532055e-05, |
|
"loss": 3.1822, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.186244375070734e-05, |
|
"loss": 3.1786, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.183740511609413e-05, |
|
"loss": 3.1686, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1812366481480924e-05, |
|
"loss": 3.2091, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.178732784686772e-05, |
|
"loss": 3.1702, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.176228921225451e-05, |
|
"loss": 3.1915, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.17372505776413e-05, |
|
"loss": 3.1912, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.171221194302809e-05, |
|
"loss": 3.2141, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.168717330841488e-05, |
|
"loss": 3.1828, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.166213467380168e-05, |
|
"loss": 3.1877, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.163709603918847e-05, |
|
"loss": 3.1859, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.161205740457526e-05, |
|
"loss": 3.1695, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.158701876996205e-05, |
|
"loss": 3.227, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.156198013534884e-05, |
|
"loss": 3.1862, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.153694150073564e-05, |
|
"loss": 3.2209, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1511902866122424e-05, |
|
"loss": 3.1829, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.148686423150922e-05, |
|
"loss": 3.1981, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.146182559689601e-05, |
|
"loss": 3.1563, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.14367869622828e-05, |
|
"loss": 3.1743, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.14117483276696e-05, |
|
"loss": 3.1754, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1386709693056384e-05, |
|
"loss": 3.1729, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.136167105844318e-05, |
|
"loss": 3.1476, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.133663242382997e-05, |
|
"loss": 3.1561, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.131159378921676e-05, |
|
"loss": 3.1844, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.128655515460356e-05, |
|
"loss": 3.1756, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.126151651999034e-05, |
|
"loss": 3.181, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.123647788537714e-05, |
|
"loss": 3.1479, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.121143925076393e-05, |
|
"loss": 3.1654, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.118640061615072e-05, |
|
"loss": 3.1903, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.116136198153752e-05, |
|
"loss": 3.1761, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11363233469243e-05, |
|
"loss": 3.1882, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11112847123111e-05, |
|
"loss": 3.175, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.108624607769789e-05, |
|
"loss": 3.1517, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.106120744308468e-05, |
|
"loss": 3.1663, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.103616880847148e-05, |
|
"loss": 3.1669, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.101113017385826e-05, |
|
"loss": 3.1764, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.098609153924506e-05, |
|
"loss": 3.1554, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.096105290463185e-05, |
|
"loss": 3.1488, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.093601427001864e-05, |
|
"loss": 3.1657, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.091097563540544e-05, |
|
"loss": 3.1415, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.088593700079222e-05, |
|
"loss": 3.1519, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.086089836617902e-05, |
|
"loss": 3.1763, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.08358597315658e-05, |
|
"loss": 3.1373, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.08108210969526e-05, |
|
"loss": 3.1619, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.078578246233939e-05, |
|
"loss": 3.1682, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.076074382772618e-05, |
|
"loss": 3.1474, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.073570519311298e-05, |
|
"loss": 3.2058, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.071066655849976e-05, |
|
"loss": 3.1558, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.068562792388656e-05, |
|
"loss": 3.1667, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.066058928927335e-05, |
|
"loss": 3.1724, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.063555065466014e-05, |
|
"loss": 3.1421, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.061051202004694e-05, |
|
"loss": 3.1602, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.058547338543372e-05, |
|
"loss": 3.1505, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.056043475082052e-05, |
|
"loss": 3.1426, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.053539611620731e-05, |
|
"loss": 3.1507, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.05103574815941e-05, |
|
"loss": 3.1579, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.048531884698089e-05, |
|
"loss": 3.1455, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.046028021236768e-05, |
|
"loss": 3.1776, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.043524157775448e-05, |
|
"loss": 3.1343, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.041020294314127e-05, |
|
"loss": 3.1487, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038516430852806e-05, |
|
"loss": 3.15, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.036012567391485e-05, |
|
"loss": 3.1351, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.033508703930164e-05, |
|
"loss": 3.1242, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.031004840468844e-05, |
|
"loss": 3.1655, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.028500977007523e-05, |
|
"loss": 3.1386, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.025997113546202e-05, |
|
"loss": 3.1384, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.023493250084881e-05, |
|
"loss": 3.1433, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.02098938662356e-05, |
|
"loss": 3.1139, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.018485523162239e-05, |
|
"loss": 3.1579, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.015981659700919e-05, |
|
"loss": 3.1283, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.013477796239598e-05, |
|
"loss": 3.138, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.010973932778277e-05, |
|
"loss": 3.1325, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.008470069316956e-05, |
|
"loss": 3.1043, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.005966205855635e-05, |
|
"loss": 3.1572, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.003462342394315e-05, |
|
"loss": 3.1539, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.000958478932994e-05, |
|
"loss": 3.1408, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.998454615471673e-05, |
|
"loss": 3.1204, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.995950752010352e-05, |
|
"loss": 3.1164, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.993446888549031e-05, |
|
"loss": 3.1518, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.990943025087711e-05, |
|
"loss": 3.1399, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.98843916162639e-05, |
|
"loss": 3.116, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.985935298165069e-05, |
|
"loss": 3.1405, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.983431434703748e-05, |
|
"loss": 3.116, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.980927571242427e-05, |
|
"loss": 3.1357, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.978423707781107e-05, |
|
"loss": 3.149, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.975919844319785e-05, |
|
"loss": 3.1418, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.973415980858465e-05, |
|
"loss": 3.1229, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.970912117397144e-05, |
|
"loss": 3.141, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.968408253935823e-05, |
|
"loss": 3.1376, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.965904390474503e-05, |
|
"loss": 3.1284, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.963400527013181e-05, |
|
"loss": 3.1405, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.960896663551861e-05, |
|
"loss": 3.1543, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.95839280009054e-05, |
|
"loss": 3.1693, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.955888936629219e-05, |
|
"loss": 3.116, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.953385073167899e-05, |
|
"loss": 3.125, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.950881209706577e-05, |
|
"loss": 3.1362, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.948377346245257e-05, |
|
"loss": 3.0917, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.945873482783936e-05, |
|
"loss": 3.121, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.943369619322615e-05, |
|
"loss": 3.0947, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.940865755861295e-05, |
|
"loss": 3.1065, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.938361892399973e-05, |
|
"loss": 3.1274, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.935858028938653e-05, |
|
"loss": 3.1071, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.933354165477331e-05, |
|
"loss": 3.1405, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.930850302016011e-05, |
|
"loss": 3.1544, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9283464385546906e-05, |
|
"loss": 3.1427, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.925842575093369e-05, |
|
"loss": 3.1102, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.923338711632049e-05, |
|
"loss": 3.1475, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.920834848170727e-05, |
|
"loss": 3.125, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.918330984709407e-05, |
|
"loss": 3.1, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9158271212480866e-05, |
|
"loss": 3.1193, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.913323257786765e-05, |
|
"loss": 3.1084, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.910819394325445e-05, |
|
"loss": 3.1027, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.908315530864123e-05, |
|
"loss": 3.1362, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.905811667402803e-05, |
|
"loss": 3.1049, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.903307803941482e-05, |
|
"loss": 3.1278, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.900803940480161e-05, |
|
"loss": 3.1058, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.8983000770188407e-05, |
|
"loss": 3.0892, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.895796213557519e-05, |
|
"loss": 3.1202, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.893292350096199e-05, |
|
"loss": 3.1286, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.890788486634878e-05, |
|
"loss": 3.1254, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.888284623173557e-05, |
|
"loss": 3.1017, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8857807597122366e-05, |
|
"loss": 3.0974, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.883276896250915e-05, |
|
"loss": 3.0832, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.880773032789595e-05, |
|
"loss": 3.1195, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.878269169328274e-05, |
|
"loss": 3.1234, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.875765305866953e-05, |
|
"loss": 3.0852, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.873261442405632e-05, |
|
"loss": 3.1366, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.870757578944311e-05, |
|
"loss": 3.1166, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.868253715482991e-05, |
|
"loss": 3.1384, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.86574985202167e-05, |
|
"loss": 3.1121, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.863245988560349e-05, |
|
"loss": 3.1149, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.860742125099028e-05, |
|
"loss": 3.1072, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.858238261637707e-05, |
|
"loss": 3.0932, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8557343981763866e-05, |
|
"loss": 3.0846, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.853230534715066e-05, |
|
"loss": 3.0819, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.850726671253745e-05, |
|
"loss": 3.104, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.848222807792424e-05, |
|
"loss": 3.1016, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.845718944331103e-05, |
|
"loss": 3.075, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.843215080869782e-05, |
|
"loss": 3.0945, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.840711217408462e-05, |
|
"loss": 3.0949, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.838207353947141e-05, |
|
"loss": 3.1097, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.83570349048582e-05, |
|
"loss": 3.0881, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.833199627024499e-05, |
|
"loss": 3.1087, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.830695763563178e-05, |
|
"loss": 3.1164, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8281919001018576e-05, |
|
"loss": 3.1171, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.825688036640537e-05, |
|
"loss": 3.0979, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.823184173179216e-05, |
|
"loss": 3.0912, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.820680309717895e-05, |
|
"loss": 3.1041, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.818176446256574e-05, |
|
"loss": 3.0904, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8156725827952536e-05, |
|
"loss": 3.0874, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8131687193339326e-05, |
|
"loss": 3.0746, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.810664855872612e-05, |
|
"loss": 3.1111, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.808160992411291e-05, |
|
"loss": 3.0794, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.80565712894997e-05, |
|
"loss": 3.0809, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.8031532654886495e-05, |
|
"loss": 3.0907, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.800649402027328e-05, |
|
"loss": 3.0692, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7981455385660077e-05, |
|
"loss": 3.1191, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.795641675104687e-05, |
|
"loss": 3.0884, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.793137811643366e-05, |
|
"loss": 3.0955, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7906339481820455e-05, |
|
"loss": 3.1308, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.788130084720724e-05, |
|
"loss": 3.0821, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7856262212594036e-05, |
|
"loss": 3.1044, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.783122357798083e-05, |
|
"loss": 3.0543, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.780618494336762e-05, |
|
"loss": 3.0943, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7781146308754415e-05, |
|
"loss": 3.0787, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.77561076741412e-05, |
|
"loss": 3.0943, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7731069039527996e-05, |
|
"loss": 3.0755, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.770603040491478e-05, |
|
"loss": 3.0912, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.768099177030158e-05, |
|
"loss": 3.0616, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7655953135688374e-05, |
|
"loss": 3.0884, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.763091450107516e-05, |
|
"loss": 3.0833, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7605875866461955e-05, |
|
"loss": 3.0648, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.758083723184874e-05, |
|
"loss": 3.1001, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7555798597235537e-05, |
|
"loss": 3.0793, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7530759962622334e-05, |
|
"loss": 3.0914, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.750572132800912e-05, |
|
"loss": 3.0715, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7480682693395915e-05, |
|
"loss": 3.0778, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.74556440587827e-05, |
|
"loss": 3.0749, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7430605424169496e-05, |
|
"loss": 3.0795, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.740556678955629e-05, |
|
"loss": 3.0838, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.738052815494308e-05, |
|
"loss": 3.0874, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7355489520329875e-05, |
|
"loss": 3.1254, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.733045088571666e-05, |
|
"loss": 3.0686, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7305412251103456e-05, |
|
"loss": 3.0645, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7280373616490246e-05, |
|
"loss": 3.0608, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.725533498187704e-05, |
|
"loss": 3.0896, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7230296347263834e-05, |
|
"loss": 3.0622, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.720525771265062e-05, |
|
"loss": 3.0935, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7180219078037415e-05, |
|
"loss": 3.051, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7155180443424206e-05, |
|
"loss": 3.05, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7130141808810996e-05, |
|
"loss": 3.0711, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7105103174197794e-05, |
|
"loss": 3.086, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.708006453958458e-05, |
|
"loss": 3.0711, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7055025904971375e-05, |
|
"loss": 3.0573, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7029987270358165e-05, |
|
"loss": 3.0937, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7004948635744956e-05, |
|
"loss": 3.0803, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6979910001131747e-05, |
|
"loss": 3.0894, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.695487136651854e-05, |
|
"loss": 3.0443, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6929832731905335e-05, |
|
"loss": 3.0307, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6904794097292125e-05, |
|
"loss": 3.0806, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6879755462678916e-05, |
|
"loss": 3.0864, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6854716828065706e-05, |
|
"loss": 3.0655, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.68296781934525e-05, |
|
"loss": 3.0769, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6804639558839294e-05, |
|
"loss": 3.0842, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6779600924226085e-05, |
|
"loss": 3.0599, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6754562289612875e-05, |
|
"loss": 3.0444, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6729523654999666e-05, |
|
"loss": 3.0919, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6704485020386456e-05, |
|
"loss": 3.0803, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.667944638577325e-05, |
|
"loss": 3.0625, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6654407751160044e-05, |
|
"loss": 3.0659, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6629369116546835e-05, |
|
"loss": 3.058, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6604330481933625e-05, |
|
"loss": 3.039, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6579291847320416e-05, |
|
"loss": 3.0609, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6554253212707207e-05, |
|
"loss": 3.0642, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6529214578094004e-05, |
|
"loss": 3.0689, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6504175943480794e-05, |
|
"loss": 3.0742, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6479137308867585e-05, |
|
"loss": 3.0593, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6454098674254376e-05, |
|
"loss": 3.0275, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6429060039641166e-05, |
|
"loss": 3.0494, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6404021405027963e-05, |
|
"loss": 3.0539, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.637898277041475e-05, |
|
"loss": 3.0219, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6353944135801545e-05, |
|
"loss": 3.0594, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6328905501188335e-05, |
|
"loss": 3.0226, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6303866866575126e-05, |
|
"loss": 3.062, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.627882823196192e-05, |
|
"loss": 3.0278, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.625378959734871e-05, |
|
"loss": 3.0481, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6228750962735504e-05, |
|
"loss": 3.0711, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6203712328122295e-05, |
|
"loss": 3.0537, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6178673693509085e-05, |
|
"loss": 3.0285, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.615363505889588e-05, |
|
"loss": 3.059, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6128596424282666e-05, |
|
"loss": 3.0548, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6103557789669464e-05, |
|
"loss": 3.0681, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6078519155056254e-05, |
|
"loss": 3.0542, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6053480520443045e-05, |
|
"loss": 3.0432, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.602844188582984e-05, |
|
"loss": 3.0262, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6003403251216626e-05, |
|
"loss": 3.0477, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5978364616603423e-05, |
|
"loss": 3.0659, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.595332598199021e-05, |
|
"loss": 3.0485, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5928287347377005e-05, |
|
"loss": 3.0876, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5903248712763795e-05, |
|
"loss": 3.0533, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5878210078150586e-05, |
|
"loss": 3.0515, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.585317144353738e-05, |
|
"loss": 3.0581, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.582813280892417e-05, |
|
"loss": 3.0733, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5803094174310964e-05, |
|
"loss": 3.0407, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5778055539697755e-05, |
|
"loss": 3.0176, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5753016905084545e-05, |
|
"loss": 3.0331, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.572797827047134e-05, |
|
"loss": 3.0587, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5702939635858126e-05, |
|
"loss": 3.036, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5677901001244924e-05, |
|
"loss": 3.0488, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5652862366631714e-05, |
|
"loss": 3.0591, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5627823732018505e-05, |
|
"loss": 3.0318, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.56027850974053e-05, |
|
"loss": 3.0268, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5577746462792086e-05, |
|
"loss": 3.0655, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.555270782817888e-05, |
|
"loss": 3.0387, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5527669193565674e-05, |
|
"loss": 3.0587, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5502630558952464e-05, |
|
"loss": 3.0486, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.547759192433926e-05, |
|
"loss": 3.078, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5452553289726046e-05, |
|
"loss": 3.0346, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.542751465511284e-05, |
|
"loss": 3.0631, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5402476020499634e-05, |
|
"loss": 3.0286, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5377437385886424e-05, |
|
"loss": 3.0451, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5352398751273215e-05, |
|
"loss": 3.0457, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5327360116660005e-05, |
|
"loss": 3.0523, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.53023214820468e-05, |
|
"loss": 3.0505, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.527728284743359e-05, |
|
"loss": 3.0391, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5252244212820384e-05, |
|
"loss": 3.032, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5227205578207174e-05, |
|
"loss": 3.048, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5202166943593965e-05, |
|
"loss": 3.0276, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.517712830898076e-05, |
|
"loss": 3.0132, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.515208967436755e-05, |
|
"loss": 3.0059, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.512705103975434e-05, |
|
"loss": 3.0293, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5102012405141134e-05, |
|
"loss": 3.0349, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5076973770527924e-05, |
|
"loss": 3.0338, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.505193513591472e-05, |
|
"loss": 3.0429, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.502689650130151e-05, |
|
"loss": 3.017, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.50018578666883e-05, |
|
"loss": 3.0174, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4976819232075093e-05, |
|
"loss": 3.0668, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4951780597461884e-05, |
|
"loss": 3.0236, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4926741962848675e-05, |
|
"loss": 3.0195, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.490170332823547e-05, |
|
"loss": 3.0443, |
|
"step": 301500 |
|
} |
|
], |
|
"max_steps": 998457, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.46482559989836e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|