{ "best_metric": 2.225323438644409, "best_model_checkpoint": "autotrain-x906d-mvlef/checkpoint-2460", "epoch": 3.0, "eval_steps": 500, "global_step": 2460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03048780487804878, "grad_norm": 135.88742065429688, "learning_rate": 1.016260162601626e-05, "loss": 30.3396, "step": 25 }, { "epoch": 0.06097560975609756, "grad_norm": 93.93063354492188, "learning_rate": 2.032520325203252e-05, "loss": 24.435, "step": 50 }, { "epoch": 0.09146341463414634, "grad_norm": 104.44407653808594, "learning_rate": 3.048780487804878e-05, "loss": 14.4152, "step": 75 }, { "epoch": 0.12195121951219512, "grad_norm": 15.064167976379395, "learning_rate": 4.065040650406504e-05, "loss": 8.6678, "step": 100 }, { "epoch": 0.1524390243902439, "grad_norm": 8.59422492980957, "learning_rate": 5.081300813008131e-05, "loss": 6.2719, "step": 125 }, { "epoch": 0.18292682926829268, "grad_norm": 6.402270793914795, "learning_rate": 6.097560975609756e-05, "loss": 5.391, "step": 150 }, { "epoch": 0.21341463414634146, "grad_norm": 7.405852794647217, "learning_rate": 7.113821138211383e-05, "loss": 4.9132, "step": 175 }, { "epoch": 0.24390243902439024, "grad_norm": 6.045359134674072, "learning_rate": 8.130081300813008e-05, "loss": 4.4981, "step": 200 }, { "epoch": 0.27439024390243905, "grad_norm": 4.673328876495361, "learning_rate": 9.146341463414635e-05, "loss": 4.267, "step": 225 }, { "epoch": 0.3048780487804878, "grad_norm": 5.1413774490356445, "learning_rate": 9.981933152664861e-05, "loss": 4.0875, "step": 250 }, { "epoch": 0.3353658536585366, "grad_norm": 4.102631092071533, "learning_rate": 9.869015356820235e-05, "loss": 3.9819, "step": 275 }, { "epoch": 0.36585365853658536, "grad_norm": 3.946659803390503, "learning_rate": 9.75609756097561e-05, "loss": 4.0086, "step": 300 }, { "epoch": 0.39634146341463417, "grad_norm": 5.377846717834473, "learning_rate": 9.643179765130986e-05, "loss": 3.7839, "step": 325 }, { "epoch": 0.4268292682926829, "grad_norm": 3.9124577045440674, "learning_rate": 9.530261969286361e-05, "loss": 3.6614, "step": 350 }, { "epoch": 0.4573170731707317, "grad_norm": 3.9939420223236084, "learning_rate": 9.417344173441735e-05, "loss": 3.7353, "step": 375 }, { "epoch": 0.4878048780487805, "grad_norm": 4.000846862792969, "learning_rate": 9.30442637759711e-05, "loss": 3.6826, "step": 400 }, { "epoch": 0.5182926829268293, "grad_norm": 4.859684944152832, "learning_rate": 9.191508581752484e-05, "loss": 3.5672, "step": 425 }, { "epoch": 0.5487804878048781, "grad_norm": 3.8994104862213135, "learning_rate": 9.07859078590786e-05, "loss": 3.4385, "step": 450 }, { "epoch": 0.5792682926829268, "grad_norm": 5.588184356689453, "learning_rate": 8.965672990063234e-05, "loss": 3.4864, "step": 475 }, { "epoch": 0.6097560975609756, "grad_norm": 4.631311416625977, "learning_rate": 8.852755194218609e-05, "loss": 3.4949, "step": 500 }, { "epoch": 0.6402439024390244, "grad_norm": 4.662644386291504, "learning_rate": 8.739837398373984e-05, "loss": 3.4993, "step": 525 }, { "epoch": 0.6707317073170732, "grad_norm": 4.6329874992370605, "learning_rate": 8.626919602529358e-05, "loss": 3.4153, "step": 550 }, { "epoch": 0.7012195121951219, "grad_norm": 5.302168846130371, "learning_rate": 8.514001806684734e-05, "loss": 3.4356, "step": 575 }, { "epoch": 0.7317073170731707, "grad_norm": 4.273636341094971, "learning_rate": 8.401084010840109e-05, "loss": 3.479, "step": 600 }, { "epoch": 0.7621951219512195, "grad_norm": 3.5382657051086426, "learning_rate": 8.288166214995484e-05, "loss": 3.4343, "step": 625 }, { "epoch": 0.7926829268292683, "grad_norm": 3.272901773452759, "learning_rate": 8.175248419150858e-05, "loss": 3.3167, "step": 650 }, { "epoch": 0.823170731707317, "grad_norm": 3.156431198120117, "learning_rate": 8.062330623306234e-05, "loss": 3.2939, "step": 675 }, { "epoch": 0.8536585365853658, "grad_norm": 4.288792610168457, "learning_rate": 7.949412827461609e-05, "loss": 3.2499, "step": 700 }, { "epoch": 0.8841463414634146, "grad_norm": 3.8503577709198, "learning_rate": 7.836495031616983e-05, "loss": 3.3326, "step": 725 }, { "epoch": 0.9146341463414634, "grad_norm": 3.282243251800537, "learning_rate": 7.723577235772358e-05, "loss": 3.3124, "step": 750 }, { "epoch": 0.9451219512195121, "grad_norm": 3.938842296600342, "learning_rate": 7.610659439927734e-05, "loss": 3.2749, "step": 775 }, { "epoch": 0.975609756097561, "grad_norm": 4.175856590270996, "learning_rate": 7.497741644083109e-05, "loss": 3.2124, "step": 800 }, { "epoch": 1.0, "eval_gen_len": 19.0, "eval_loss": 2.420257329940796, "eval_rouge1": 25.1635, "eval_rouge2": 23.2382, "eval_rougeL": 24.6426, "eval_rougeLsum": 25.0666, "eval_runtime": 54.8826, "eval_samples_per_second": 7.47, "eval_steps_per_second": 1.877, "step": 820 }, { "epoch": 1.0060975609756098, "grad_norm": 3.5674939155578613, "learning_rate": 7.384823848238483e-05, "loss": 3.2418, "step": 825 }, { "epoch": 1.0365853658536586, "grad_norm": 4.070037841796875, "learning_rate": 7.271906052393858e-05, "loss": 3.2318, "step": 850 }, { "epoch": 1.0670731707317074, "grad_norm": 4.293022632598877, "learning_rate": 7.158988256549232e-05, "loss": 3.1837, "step": 875 }, { "epoch": 1.0975609756097562, "grad_norm": 3.463479995727539, "learning_rate": 7.046070460704606e-05, "loss": 3.228, "step": 900 }, { "epoch": 1.1280487804878048, "grad_norm": 7.417110443115234, "learning_rate": 6.933152664859982e-05, "loss": 3.1462, "step": 925 }, { "epoch": 1.1585365853658536, "grad_norm": 5.171451091766357, "learning_rate": 6.820234869015357e-05, "loss": 3.0788, "step": 950 }, { "epoch": 1.1890243902439024, "grad_norm": 3.9318175315856934, "learning_rate": 6.707317073170732e-05, "loss": 3.207, "step": 975 }, { "epoch": 1.2195121951219512, "grad_norm": 5.252470016479492, "learning_rate": 6.594399277326106e-05, "loss": 3.0539, "step": 1000 }, { "epoch": 1.25, "grad_norm": 3.6664767265319824, "learning_rate": 6.481481481481482e-05, "loss": 3.1699, "step": 1025 }, { "epoch": 1.2804878048780488, "grad_norm": 2.665335178375244, "learning_rate": 6.368563685636857e-05, "loss": 3.0474, "step": 1050 }, { "epoch": 1.3109756097560976, "grad_norm": 3.2224156856536865, "learning_rate": 6.255645889792232e-05, "loss": 3.1378, "step": 1075 }, { "epoch": 1.3414634146341464, "grad_norm": 3.4332261085510254, "learning_rate": 6.142728093947606e-05, "loss": 3.1683, "step": 1100 }, { "epoch": 1.3719512195121952, "grad_norm": 4.648336887359619, "learning_rate": 6.0298102981029816e-05, "loss": 3.1224, "step": 1125 }, { "epoch": 1.4024390243902438, "grad_norm": 3.557978391647339, "learning_rate": 5.916892502258356e-05, "loss": 3.1106, "step": 1150 }, { "epoch": 1.4329268292682926, "grad_norm": 3.8348541259765625, "learning_rate": 5.803974706413731e-05, "loss": 3.1733, "step": 1175 }, { "epoch": 1.4634146341463414, "grad_norm": 4.132476329803467, "learning_rate": 5.6910569105691056e-05, "loss": 3.0671, "step": 1200 }, { "epoch": 1.4939024390243902, "grad_norm": 3.8571975231170654, "learning_rate": 5.578139114724481e-05, "loss": 3.1065, "step": 1225 }, { "epoch": 1.524390243902439, "grad_norm": 3.408566951751709, "learning_rate": 5.465221318879856e-05, "loss": 3.1613, "step": 1250 }, { "epoch": 1.5548780487804879, "grad_norm": 3.7324483394622803, "learning_rate": 5.35230352303523e-05, "loss": 2.9743, "step": 1275 }, { "epoch": 1.5853658536585367, "grad_norm": 3.1098973751068115, "learning_rate": 5.2393857271906056e-05, "loss": 3.1296, "step": 1300 }, { "epoch": 1.6158536585365852, "grad_norm": 5.050591468811035, "learning_rate": 5.126467931345981e-05, "loss": 2.9844, "step": 1325 }, { "epoch": 1.6463414634146343, "grad_norm": 3.30641508102417, "learning_rate": 5.013550135501355e-05, "loss": 3.0301, "step": 1350 }, { "epoch": 1.6768292682926829, "grad_norm": 4.2086358070373535, "learning_rate": 4.90063233965673e-05, "loss": 3.0881, "step": 1375 }, { "epoch": 1.7073170731707317, "grad_norm": 4.049353122711182, "learning_rate": 4.787714543812105e-05, "loss": 3.0382, "step": 1400 }, { "epoch": 1.7378048780487805, "grad_norm": 3.4281935691833496, "learning_rate": 4.6747967479674795e-05, "loss": 3.0105, "step": 1425 }, { "epoch": 1.7682926829268293, "grad_norm": 3.7564141750335693, "learning_rate": 4.561878952122855e-05, "loss": 3.0319, "step": 1450 }, { "epoch": 1.798780487804878, "grad_norm": 4.241165637969971, "learning_rate": 4.4489611562782295e-05, "loss": 3.0275, "step": 1475 }, { "epoch": 1.8292682926829267, "grad_norm": 3.024312734603882, "learning_rate": 4.336043360433605e-05, "loss": 3.004, "step": 1500 }, { "epoch": 1.8597560975609757, "grad_norm": 3.0586462020874023, "learning_rate": 4.2231255645889795e-05, "loss": 2.8741, "step": 1525 }, { "epoch": 1.8902439024390243, "grad_norm": 3.032233953475952, "learning_rate": 4.110207768744354e-05, "loss": 3.0305, "step": 1550 }, { "epoch": 1.9207317073170733, "grad_norm": 3.4631378650665283, "learning_rate": 3.9972899728997295e-05, "loss": 2.9829, "step": 1575 }, { "epoch": 1.951219512195122, "grad_norm": 3.462908983230591, "learning_rate": 3.884372177055104e-05, "loss": 2.9073, "step": 1600 }, { "epoch": 1.9817073170731707, "grad_norm": 3.844022512435913, "learning_rate": 3.771454381210479e-05, "loss": 2.943, "step": 1625 }, { "epoch": 2.0, "eval_gen_len": 17.9268, "eval_loss": 2.2693333625793457, "eval_rouge1": 24.8213, "eval_rouge2": 22.8064, "eval_rougeL": 24.126, "eval_rougeLsum": 24.7561, "eval_runtime": 54.0619, "eval_samples_per_second": 7.584, "eval_steps_per_second": 1.905, "step": 1640 }, { "epoch": 2.0121951219512195, "grad_norm": 3.2110657691955566, "learning_rate": 3.6585365853658535e-05, "loss": 2.9109, "step": 1650 }, { "epoch": 2.042682926829268, "grad_norm": 5.20732307434082, "learning_rate": 3.545618789521229e-05, "loss": 2.907, "step": 1675 }, { "epoch": 2.073170731707317, "grad_norm": 3.6366891860961914, "learning_rate": 3.4327009936766035e-05, "loss": 2.7614, "step": 1700 }, { "epoch": 2.1036585365853657, "grad_norm": 3.012146472930908, "learning_rate": 3.319783197831978e-05, "loss": 2.8866, "step": 1725 }, { "epoch": 2.1341463414634148, "grad_norm": 5.310023307800293, "learning_rate": 3.2068654019873535e-05, "loss": 2.9738, "step": 1750 }, { "epoch": 2.1646341463414633, "grad_norm": 3.908693313598633, "learning_rate": 3.093947606142728e-05, "loss": 2.989, "step": 1775 }, { "epoch": 2.1951219512195124, "grad_norm": 3.6812493801116943, "learning_rate": 2.9810298102981032e-05, "loss": 2.9844, "step": 1800 }, { "epoch": 2.225609756097561, "grad_norm": 3.1762988567352295, "learning_rate": 2.868112014453478e-05, "loss": 2.9518, "step": 1825 }, { "epoch": 2.2560975609756095, "grad_norm": 3.526785373687744, "learning_rate": 2.7551942186088532e-05, "loss": 3.0312, "step": 1850 }, { "epoch": 2.2865853658536586, "grad_norm": 3.4673197269439697, "learning_rate": 2.642276422764228e-05, "loss": 2.9413, "step": 1875 }, { "epoch": 2.317073170731707, "grad_norm": 3.7603211402893066, "learning_rate": 2.529358626919603e-05, "loss": 2.8524, "step": 1900 }, { "epoch": 2.347560975609756, "grad_norm": 3.039940357208252, "learning_rate": 2.4164408310749775e-05, "loss": 2.9403, "step": 1925 }, { "epoch": 2.3780487804878048, "grad_norm": 4.610980987548828, "learning_rate": 2.3035230352303525e-05, "loss": 2.892, "step": 1950 }, { "epoch": 2.408536585365854, "grad_norm": 3.3454501628875732, "learning_rate": 2.1906052393857275e-05, "loss": 2.905, "step": 1975 }, { "epoch": 2.4390243902439024, "grad_norm": 2.9850571155548096, "learning_rate": 2.077687443541102e-05, "loss": 2.9325, "step": 2000 }, { "epoch": 2.4695121951219514, "grad_norm": 4.394461631774902, "learning_rate": 1.9647696476964768e-05, "loss": 2.9112, "step": 2025 }, { "epoch": 2.5, "grad_norm": 4.401582717895508, "learning_rate": 1.8518518518518518e-05, "loss": 2.8956, "step": 2050 }, { "epoch": 2.5304878048780486, "grad_norm": 3.5677547454833984, "learning_rate": 1.7389340560072268e-05, "loss": 2.9706, "step": 2075 }, { "epoch": 2.5609756097560976, "grad_norm": 3.1517820358276367, "learning_rate": 1.6260162601626018e-05, "loss": 2.896, "step": 2100 }, { "epoch": 2.591463414634146, "grad_norm": 5.237459182739258, "learning_rate": 1.5130984643179767e-05, "loss": 2.9137, "step": 2125 }, { "epoch": 2.6219512195121952, "grad_norm": 3.8501670360565186, "learning_rate": 1.4001806684733515e-05, "loss": 2.9322, "step": 2150 }, { "epoch": 2.652439024390244, "grad_norm": 6.259314060211182, "learning_rate": 1.2872628726287265e-05, "loss": 2.8803, "step": 2175 }, { "epoch": 2.682926829268293, "grad_norm": 3.369128942489624, "learning_rate": 1.1743450767841012e-05, "loss": 2.9141, "step": 2200 }, { "epoch": 2.7134146341463414, "grad_norm": 3.4941694736480713, "learning_rate": 1.0614272809394762e-05, "loss": 2.9613, "step": 2225 }, { "epoch": 2.7439024390243905, "grad_norm": 4.332786560058594, "learning_rate": 9.48509485094851e-06, "loss": 2.9518, "step": 2250 }, { "epoch": 2.774390243902439, "grad_norm": 3.981058359146118, "learning_rate": 8.35591689250226e-06, "loss": 2.8848, "step": 2275 }, { "epoch": 2.8048780487804876, "grad_norm": 3.464871644973755, "learning_rate": 7.226738934056007e-06, "loss": 2.9663, "step": 2300 }, { "epoch": 2.8353658536585367, "grad_norm": 3.324209690093994, "learning_rate": 6.0975609756097564e-06, "loss": 2.8891, "step": 2325 }, { "epoch": 2.8658536585365852, "grad_norm": 3.896878480911255, "learning_rate": 4.968383017163506e-06, "loss": 2.952, "step": 2350 }, { "epoch": 2.8963414634146343, "grad_norm": 3.644209384918213, "learning_rate": 3.839205058717254e-06, "loss": 2.8857, "step": 2375 }, { "epoch": 2.926829268292683, "grad_norm": 3.039807081222534, "learning_rate": 2.710027100271003e-06, "loss": 2.8308, "step": 2400 }, { "epoch": 2.9573170731707314, "grad_norm": 3.2632334232330322, "learning_rate": 1.5808491418247518e-06, "loss": 2.9224, "step": 2425 }, { "epoch": 2.9878048780487805, "grad_norm": 4.359528064727783, "learning_rate": 4.5167118337850045e-07, "loss": 2.901, "step": 2450 }, { "epoch": 3.0, "eval_gen_len": 18.8, "eval_loss": 2.225323438644409, "eval_rouge1": 26.024, "eval_rouge2": 24.1663, "eval_rougeL": 25.432, "eval_rougeLsum": 25.9929, "eval_runtime": 55.2175, "eval_samples_per_second": 7.425, "eval_steps_per_second": 1.865, "step": 2460 } ], "logging_steps": 25, "max_steps": 2460, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 27756032163840.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }