{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 3100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 38.730377197265625, "eval_runtime": 2.8841, "eval_samples_per_second": 41.955, "eval_steps_per_second": 5.548, "step": 62 }, { "epoch": 2.0, "eval_loss": 37.18061828613281, "eval_runtime": 2.9537, "eval_samples_per_second": 40.966, "eval_steps_per_second": 5.417, "step": 124 }, { "epoch": 3.0, "eval_loss": 33.34883499145508, "eval_runtime": 3.0329, "eval_samples_per_second": 39.895, "eval_steps_per_second": 5.275, "step": 186 }, { "epoch": 4.0, "eval_loss": 28.77151870727539, "eval_runtime": 3.038, "eval_samples_per_second": 39.829, "eval_steps_per_second": 5.267, "step": 248 }, { "epoch": 5.0, "eval_loss": 24.89649200439453, "eval_runtime": 3.1068, "eval_samples_per_second": 38.947, "eval_steps_per_second": 5.15, "step": 310 }, { "epoch": 6.0, "eval_loss": 21.40663719177246, "eval_runtime": 3.1605, "eval_samples_per_second": 38.285, "eval_steps_per_second": 5.062, "step": 372 }, { "epoch": 7.0, "eval_loss": 19.166534423828125, "eval_runtime": 3.191, "eval_samples_per_second": 37.919, "eval_steps_per_second": 5.014, "step": 434 }, { "epoch": 8.0, "eval_loss": 17.308666229248047, "eval_runtime": 3.1957, "eval_samples_per_second": 37.863, "eval_steps_per_second": 5.007, "step": 496 }, { "epoch": 8.06, "learning_rate": 1.6774193548387096e-07, "loss": 29.5022, "step": 500 }, { "epoch": 9.0, "eval_loss": 15.532267570495605, "eval_runtime": 3.0171, "eval_samples_per_second": 40.104, "eval_steps_per_second": 5.303, "step": 558 }, { "epoch": 10.0, "eval_loss": 14.449665069580078, "eval_runtime": 3.0595, "eval_samples_per_second": 39.549, "eval_steps_per_second": 5.23, "step": 620 }, { "epoch": 11.0, "eval_loss": 14.162155151367188, "eval_runtime": 3.1079, "eval_samples_per_second": 38.933, "eval_steps_per_second": 5.148, "step": 682 }, { "epoch": 12.0, "eval_loss": 13.751199722290039, "eval_runtime": 3.1538, "eval_samples_per_second": 38.366, "eval_steps_per_second": 5.073, "step": 744 }, { "epoch": 13.0, "eval_loss": 13.394129753112793, "eval_runtime": 3.1936, "eval_samples_per_second": 37.888, "eval_steps_per_second": 5.01, "step": 806 }, { "epoch": 14.0, "eval_loss": 13.021451950073242, "eval_runtime": 3.2109, "eval_samples_per_second": 37.684, "eval_steps_per_second": 4.983, "step": 868 }, { "epoch": 15.0, "eval_loss": 12.83625602722168, "eval_runtime": 3.2002, "eval_samples_per_second": 37.81, "eval_steps_per_second": 5.0, "step": 930 }, { "epoch": 16.0, "eval_loss": 12.659578323364258, "eval_runtime": 3.1987, "eval_samples_per_second": 37.828, "eval_steps_per_second": 5.002, "step": 992 }, { "epoch": 16.13, "learning_rate": 1.3548387096774192e-07, "loss": 13.2409, "step": 1000 }, { "epoch": 17.0, "eval_loss": 12.591437339782715, "eval_runtime": 3.1748, "eval_samples_per_second": 38.113, "eval_steps_per_second": 5.04, "step": 1054 }, { "epoch": 18.0, "eval_loss": 12.289512634277344, "eval_runtime": 3.1814, "eval_samples_per_second": 38.033, "eval_steps_per_second": 5.029, "step": 1116 }, { "epoch": 19.0, "eval_loss": 12.269988059997559, "eval_runtime": 3.2171, "eval_samples_per_second": 37.611, "eval_steps_per_second": 4.973, "step": 1178 }, { "epoch": 20.0, "eval_loss": 12.142666816711426, "eval_runtime": 3.202, "eval_samples_per_second": 37.789, "eval_steps_per_second": 4.997, "step": 1240 }, { "epoch": 21.0, "eval_loss": 12.13444995880127, "eval_runtime": 3.1974, "eval_samples_per_second": 37.843, "eval_steps_per_second": 5.004, "step": 1302 }, { "epoch": 22.0, "eval_loss": 12.062304496765137, "eval_runtime": 3.1989, "eval_samples_per_second": 37.825, "eval_steps_per_second": 5.002, "step": 1364 }, { "epoch": 23.0, "eval_loss": 12.063033103942871, "eval_runtime": 3.2187, "eval_samples_per_second": 37.593, "eval_steps_per_second": 4.971, "step": 1426 }, { "epoch": 24.0, "eval_loss": 12.098295211791992, "eval_runtime": 3.1987, "eval_samples_per_second": 37.827, "eval_steps_per_second": 5.002, "step": 1488 }, { "epoch": 24.19, "learning_rate": 1.032258064516129e-07, "loss": 11.0242, "step": 1500 }, { "epoch": 25.0, "eval_loss": 11.790215492248535, "eval_runtime": 3.1841, "eval_samples_per_second": 38.002, "eval_steps_per_second": 5.025, "step": 1550 }, { "epoch": 26.0, "eval_loss": 11.862567901611328, "eval_runtime": 3.1927, "eval_samples_per_second": 37.899, "eval_steps_per_second": 5.011, "step": 1612 }, { "epoch": 27.0, "eval_loss": 11.915380477905273, "eval_runtime": 3.2213, "eval_samples_per_second": 37.562, "eval_steps_per_second": 4.967, "step": 1674 }, { "epoch": 28.0, "eval_loss": 11.648334503173828, "eval_runtime": 3.1958, "eval_samples_per_second": 37.862, "eval_steps_per_second": 5.007, "step": 1736 }, { "epoch": 29.0, "eval_loss": 11.861973762512207, "eval_runtime": 3.1992, "eval_samples_per_second": 37.822, "eval_steps_per_second": 5.001, "step": 1798 }, { "epoch": 30.0, "eval_loss": 11.598742485046387, "eval_runtime": 3.2, "eval_samples_per_second": 37.812, "eval_steps_per_second": 5.0, "step": 1860 }, { "epoch": 31.0, "eval_loss": 11.763324737548828, "eval_runtime": 3.1997, "eval_samples_per_second": 37.817, "eval_steps_per_second": 5.001, "step": 1922 }, { "epoch": 32.0, "eval_loss": 11.699971199035645, "eval_runtime": 3.1883, "eval_samples_per_second": 37.951, "eval_steps_per_second": 5.018, "step": 1984 }, { "epoch": 32.26, "learning_rate": 7.096774193548388e-08, "loss": 10.5931, "step": 2000 }, { "epoch": 33.0, "eval_loss": 11.6630859375, "eval_runtime": 3.1817, "eval_samples_per_second": 38.03, "eval_steps_per_second": 5.029, "step": 2046 }, { "epoch": 34.0, "eval_loss": 11.431499481201172, "eval_runtime": 3.1876, "eval_samples_per_second": 37.96, "eval_steps_per_second": 5.019, "step": 2108 }, { "epoch": 35.0, "eval_loss": 11.56187915802002, "eval_runtime": 3.1936, "eval_samples_per_second": 37.889, "eval_steps_per_second": 5.01, "step": 2170 }, { "epoch": 36.0, "eval_loss": 11.59299087524414, "eval_runtime": 3.1844, "eval_samples_per_second": 37.998, "eval_steps_per_second": 5.025, "step": 2232 }, { "epoch": 37.0, "eval_loss": 11.55374526977539, "eval_runtime": 3.2023, "eval_samples_per_second": 37.785, "eval_steps_per_second": 4.996, "step": 2294 }, { "epoch": 38.0, "eval_loss": 11.67027473449707, "eval_runtime": 3.1993, "eval_samples_per_second": 37.821, "eval_steps_per_second": 5.001, "step": 2356 }, { "epoch": 39.0, "eval_loss": 11.548821449279785, "eval_runtime": 3.1859, "eval_samples_per_second": 37.98, "eval_steps_per_second": 5.022, "step": 2418 }, { "epoch": 40.0, "eval_loss": 11.443954467773438, "eval_runtime": 3.1913, "eval_samples_per_second": 37.916, "eval_steps_per_second": 5.014, "step": 2480 }, { "epoch": 40.32, "learning_rate": 3.8709677419354835e-08, "loss": 10.4503, "step": 2500 }, { "epoch": 41.0, "eval_loss": 11.321048736572266, "eval_runtime": 3.1707, "eval_samples_per_second": 38.162, "eval_steps_per_second": 5.046, "step": 2542 }, { "epoch": 42.0, "eval_loss": 11.437300682067871, "eval_runtime": 3.1784, "eval_samples_per_second": 38.07, "eval_steps_per_second": 5.034, "step": 2604 }, { "epoch": 43.0, "eval_loss": 11.486796379089355, "eval_runtime": 3.187, "eval_samples_per_second": 37.967, "eval_steps_per_second": 5.02, "step": 2666 }, { "epoch": 44.0, "eval_loss": 11.38952350616455, "eval_runtime": 3.1899, "eval_samples_per_second": 37.932, "eval_steps_per_second": 5.016, "step": 2728 }, { "epoch": 45.0, "eval_loss": 11.409669876098633, "eval_runtime": 3.1917, "eval_samples_per_second": 37.911, "eval_steps_per_second": 5.013, "step": 2790 }, { "epoch": 46.0, "eval_loss": 11.556734085083008, "eval_runtime": 3.1934, "eval_samples_per_second": 37.891, "eval_steps_per_second": 5.01, "step": 2852 }, { "epoch": 47.0, "eval_loss": 11.35185718536377, "eval_runtime": 3.1869, "eval_samples_per_second": 37.968, "eval_steps_per_second": 5.021, "step": 2914 }, { "epoch": 48.0, "eval_loss": 11.326269149780273, "eval_runtime": 3.1877, "eval_samples_per_second": 37.959, "eval_steps_per_second": 5.019, "step": 2976 }, { "epoch": 48.39, "learning_rate": 6.451612903225806e-09, "loss": 10.4239, "step": 3000 }, { "epoch": 49.0, "eval_loss": 11.404916763305664, "eval_runtime": 3.3462, "eval_samples_per_second": 36.16, "eval_steps_per_second": 4.782, "step": 3038 }, { "epoch": 50.0, "eval_loss": 11.230060577392578, "eval_runtime": 3.1439, "eval_samples_per_second": 38.487, "eval_steps_per_second": 5.089, "step": 3100 }, { "epoch": 50.0, "step": 3100, "total_flos": 1623561609830400.0, "train_loss": 14.082046567855343, "train_runtime": 2464.2419, "train_samples_per_second": 9.983, "train_steps_per_second": 1.258 } ], "max_steps": 3100, "num_train_epochs": 50, "total_flos": 1623561609830400.0, "trial_name": null, "trial_params": null }