{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.01960784313726, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 2.0116, "step": 255 }, { "epoch": 1.0, "eval_accuracy": 0.4553, "eval_f1_macro": 0.1796, "eval_gen_len": 2.3716, "eval_loss": 0.7925581336021423, "eval_precision": 0.2144, "eval_recall": 0.2023, "eval_runtime": 18.41, "eval_samples_per_second": 264.584, "step": 255 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.779, "step": 510 }, { "epoch": 2.0, "eval_accuracy": 0.4911, "eval_f1_macro": 0.2749, "eval_gen_len": 2.2535, "eval_loss": 0.752581775188446, "eval_precision": 0.4243, "eval_recall": 0.2719, "eval_runtime": 18.4504, "eval_samples_per_second": 264.005, "step": 510 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6777, "step": 765 }, { "epoch": 3.0, "eval_accuracy": 0.503, "eval_f1_macro": 0.2851, "eval_gen_len": 2.3207, "eval_loss": 0.7490188479423523, "eval_precision": 0.3735, "eval_recall": 0.288, "eval_runtime": 18.569, "eval_samples_per_second": 262.318, "step": 765 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.5968, "step": 1020 }, { "epoch": 4.0, "eval_accuracy": 0.5013, "eval_f1_macro": 0.2994, "eval_gen_len": 2.2948, "eval_loss": 0.7779901027679443, "eval_precision": 0.3833, "eval_recall": 0.2971, "eval_runtime": 18.5932, "eval_samples_per_second": 261.978, "step": 1020 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.5367, "step": 1275 }, { "epoch": 5.0, "eval_accuracy": 0.5044, "eval_f1_macro": 0.3052, "eval_gen_len": 2.3751, "eval_loss": 0.8049420118331909, "eval_precision": 0.3699, "eval_recall": 0.3016, "eval_runtime": 18.6267, "eval_samples_per_second": 261.506, "step": 1275 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.475, "step": 1530 }, { "epoch": 6.0, "eval_accuracy": 0.5005, "eval_f1_macro": 0.3111, "eval_gen_len": 2.3361, "eval_loss": 0.8648290038108826, "eval_precision": 0.3545, "eval_recall": 0.3096, "eval_runtime": 18.6251, "eval_samples_per_second": 261.529, "step": 1530 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.4194, "step": 1785 }, { "epoch": 7.0, "eval_accuracy": 0.4868, "eval_f1_macro": 0.2957, "eval_gen_len": 2.3318, "eval_loss": 0.9633024334907532, "eval_precision": 0.3316, "eval_recall": 0.2895, "eval_runtime": 18.3899, "eval_samples_per_second": 264.873, "step": 1785 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.3619, "step": 2040 }, { "epoch": 8.0, "eval_accuracy": 0.4925, "eval_f1_macro": 0.2992, "eval_gen_len": 2.3545, "eval_loss": 0.9422969818115234, "eval_precision": 0.3316, "eval_recall": 0.2964, "eval_runtime": 18.6364, "eval_samples_per_second": 261.37, "step": 2040 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.3126, "step": 2295 }, { "epoch": 9.0, "eval_accuracy": 0.4718, "eval_f1_macro": 0.2899, "eval_gen_len": 2.4246, "eval_loss": 1.086911678314209, "eval_precision": 0.3039, "eval_recall": 0.2956, "eval_runtime": 18.6675, "eval_samples_per_second": 260.935, "step": 2295 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.2714, "step": 2550 }, { "epoch": 10.0, "eval_accuracy": 0.4757, "eval_f1_macro": 0.2959, "eval_gen_len": 2.3593, "eval_loss": 1.1425822973251343, "eval_precision": 0.3162, "eval_recall": 0.2919, "eval_runtime": 18.4062, "eval_samples_per_second": 264.64, "step": 2550 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.2295, "step": 2805 }, { "epoch": 11.0, "eval_accuracy": 0.4691, "eval_f1_macro": 0.289, "eval_gen_len": 2.4087, "eval_loss": 1.189605951309204, "eval_precision": 0.304, "eval_recall": 0.2901, "eval_runtime": 18.6533, "eval_samples_per_second": 261.133, "step": 2805 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.1945, "step": 3060 }, { "epoch": 12.0, "eval_accuracy": 0.4703, "eval_f1_macro": 0.3098, "eval_gen_len": 2.38, "eval_loss": 1.2930792570114136, "eval_precision": 0.3263, "eval_recall": 0.3061, "eval_runtime": 18.3552, "eval_samples_per_second": 265.375, "step": 3060 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.1622, "step": 3315 }, { "epoch": 13.0, "eval_accuracy": 0.4648, "eval_f1_macro": 0.3084, "eval_gen_len": 2.4237, "eval_loss": 1.3369712829589844, "eval_precision": 0.3194, "eval_recall": 0.3082, "eval_runtime": 18.3904, "eval_samples_per_second": 264.867, "step": 3315 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.1339, "step": 3570 }, { "epoch": 14.0, "eval_accuracy": 0.4757, "eval_f1_macro": 0.3098, "eval_gen_len": 2.3874, "eval_loss": 1.5157912969589233, "eval_precision": 0.3258, "eval_recall": 0.3087, "eval_runtime": 18.3743, "eval_samples_per_second": 265.099, "step": 3570 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.1195, "step": 3825 }, { "epoch": 15.0, "eval_accuracy": 0.4683, "eval_f1_macro": 0.3044, "eval_gen_len": 2.403, "eval_loss": 1.5008689165115356, "eval_precision": 0.3135, "eval_recall": 0.3059, "eval_runtime": 18.3877, "eval_samples_per_second": 264.905, "step": 3825 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.1019, "step": 4080 }, { "epoch": 16.0, "eval_accuracy": 0.4738, "eval_f1_macro": 0.3054, "eval_gen_len": 2.3734, "eval_loss": 1.5503424406051636, "eval_precision": 0.3235, "eval_recall": 0.3035, "eval_runtime": 18.347, "eval_samples_per_second": 265.493, "step": 4080 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.0853, "step": 4335 }, { "epoch": 17.0, "eval_accuracy": 0.4759, "eval_f1_macro": 0.305, "eval_gen_len": 2.3954, "eval_loss": 1.7289695739746094, "eval_precision": 0.3219, "eval_recall": 0.3014, "eval_runtime": 18.3899, "eval_samples_per_second": 264.873, "step": 4335 }, { "epoch": 18.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.0773, "step": 4590 }, { "epoch": 18.0, "eval_accuracy": 0.473, "eval_f1_macro": 0.3045, "eval_gen_len": 2.4233, "eval_loss": 1.7796562910079956, "eval_precision": 0.321, "eval_recall": 0.2996, "eval_runtime": 18.3356, "eval_samples_per_second": 265.658, "step": 4590 }, { "epoch": 19.0, "learning_rate": 3.1e-05, "loss": 0.0681, "step": 4845 }, { "epoch": 19.0, "eval_accuracy": 0.4638, "eval_f1_macro": 0.2984, "eval_gen_len": 2.3843, "eval_loss": 1.753821611404419, "eval_precision": 0.3106, "eval_recall": 0.2996, "eval_runtime": 18.3825, "eval_samples_per_second": 264.98, "step": 4845 }, { "epoch": 20.0, "learning_rate": 3e-05, "loss": 0.0617, "step": 5100 }, { "epoch": 20.0, "eval_accuracy": 0.4638, "eval_f1_macro": 0.3047, "eval_gen_len": 2.4092, "eval_loss": 1.868014931678772, "eval_precision": 0.3154, "eval_recall": 0.3036, "eval_runtime": 18.3921, "eval_samples_per_second": 264.841, "step": 5100 }, { "epoch": 21.0, "learning_rate": 2.9e-05, "loss": 0.0537, "step": 5355 }, { "epoch": 21.0, "eval_accuracy": 0.4642, "eval_f1_macro": 0.3035, "eval_gen_len": 2.3738, "eval_loss": 1.9632675647735596, "eval_precision": 0.3201, "eval_recall": 0.2996, "eval_runtime": 18.4289, "eval_samples_per_second": 264.314, "step": 5355 }, { "epoch": 22.0, "learning_rate": 2.8000000000000003e-05, "loss": 0.0473, "step": 5610 }, { "epoch": 22.0, "eval_accuracy": 0.4726, "eval_f1_macro": 0.303, "eval_gen_len": 2.3862, "eval_loss": 1.8952040672302246, "eval_precision": 0.3214, "eval_recall": 0.2998, "eval_runtime": 18.3834, "eval_samples_per_second": 264.967, "step": 5610 }, { "epoch": 23.0, "learning_rate": 2.7000000000000002e-05, "loss": 0.0465, "step": 5865 }, { "epoch": 23.0, "eval_accuracy": 0.466, "eval_f1_macro": 0.3035, "eval_gen_len": 2.4024, "eval_loss": 1.9048091173171997, "eval_precision": 0.3173, "eval_recall": 0.3012, "eval_runtime": 18.3842, "eval_samples_per_second": 264.956, "step": 5865 }, { "epoch": 24.0, "learning_rate": 2.6000000000000002e-05, "loss": 0.0406, "step": 6120 }, { "epoch": 24.0, "eval_accuracy": 0.4634, "eval_f1_macro": 0.3068, "eval_gen_len": 2.426, "eval_loss": 2.021580696105957, "eval_precision": 0.3153, "eval_recall": 0.3044, "eval_runtime": 18.3498, "eval_samples_per_second": 265.452, "step": 6120 }, { "epoch": 25.0, "learning_rate": 2.5e-05, "loss": 0.0358, "step": 6375 }, { "epoch": 25.0, "eval_accuracy": 0.4742, "eval_f1_macro": 0.3003, "eval_gen_len": 2.3597, "eval_loss": 2.116412401199341, "eval_precision": 0.3236, "eval_recall": 0.2931, "eval_runtime": 18.3974, "eval_samples_per_second": 264.766, "step": 6375 }, { "epoch": 26.0, "learning_rate": 2.4e-05, "loss": 0.0353, "step": 6630 }, { "epoch": 26.0, "eval_accuracy": 0.4668, "eval_f1_macro": 0.3004, "eval_gen_len": 2.4484, "eval_loss": 2.0235698223114014, "eval_precision": 0.3084, "eval_recall": 0.2995, "eval_runtime": 18.3702, "eval_samples_per_second": 265.158, "step": 6630 }, { "epoch": 27.0, "learning_rate": 2.3000000000000003e-05, "loss": 0.0314, "step": 6885 }, { "epoch": 27.0, "eval_accuracy": 0.464, "eval_f1_macro": 0.3013, "eval_gen_len": 2.4204, "eval_loss": 2.124769926071167, "eval_precision": 0.3066, "eval_recall": 0.3019, "eval_runtime": 18.4132, "eval_samples_per_second": 264.538, "step": 6885 }, { "epoch": 28.0, "learning_rate": 2.2000000000000003e-05, "loss": 0.0296, "step": 7140 }, { "epoch": 28.0, "eval_accuracy": 0.4722, "eval_f1_macro": 0.2997, "eval_gen_len": 2.3732, "eval_loss": 2.124000072479248, "eval_precision": 0.3261, "eval_recall": 0.294, "eval_runtime": 18.3663, "eval_samples_per_second": 265.213, "step": 7140 }, { "epoch": 29.0, "learning_rate": 2.1e-05, "loss": 0.0274, "step": 7395 }, { "epoch": 29.0, "eval_accuracy": 0.467, "eval_f1_macro": 0.3011, "eval_gen_len": 2.3636, "eval_loss": 2.1549148559570312, "eval_precision": 0.3197, "eval_recall": 0.2963, "eval_runtime": 18.4004, "eval_samples_per_second": 264.722, "step": 7395 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.0248, "step": 7650 }, { "epoch": 30.0, "eval_accuracy": 0.4697, "eval_f1_macro": 0.2982, "eval_gen_len": 2.3798, "eval_loss": 2.2189269065856934, "eval_precision": 0.3152, "eval_recall": 0.2951, "eval_runtime": 18.3622, "eval_samples_per_second": 265.273, "step": 7650 }, { "epoch": 31.0, "learning_rate": 1.9e-05, "loss": 0.0219, "step": 7905 }, { "epoch": 31.0, "eval_accuracy": 0.4736, "eval_f1_macro": 0.3048, "eval_gen_len": 2.3942, "eval_loss": 2.3595752716064453, "eval_precision": 0.3157, "eval_recall": 0.3049, "eval_runtime": 18.3521, "eval_samples_per_second": 265.419, "step": 7905 }, { "epoch": 32.0, "learning_rate": 1.8e-05, "loss": 0.0205, "step": 8160 }, { "epoch": 32.0, "eval_accuracy": 0.4705, "eval_f1_macro": 0.3013, "eval_gen_len": 2.3909, "eval_loss": 2.4317517280578613, "eval_precision": 0.3151, "eval_recall": 0.3001, "eval_runtime": 18.3589, "eval_samples_per_second": 265.321, "step": 8160 }, { "epoch": 33.0, "learning_rate": 1.7000000000000003e-05, "loss": 0.0189, "step": 8415 }, { "epoch": 33.0, "eval_accuracy": 0.4767, "eval_f1_macro": 0.3084, "eval_gen_len": 2.3751, "eval_loss": 2.4803547859191895, "eval_precision": 0.3242, "eval_recall": 0.3045, "eval_runtime": 18.4166, "eval_samples_per_second": 264.489, "step": 8415 }, { "epoch": 34.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.0211, "step": 8670 }, { "epoch": 34.0, "eval_accuracy": 0.4699, "eval_f1_macro": 0.304, "eval_gen_len": 2.4102, "eval_loss": 2.371544361114502, "eval_precision": 0.3223, "eval_recall": 0.2999, "eval_runtime": 18.4037, "eval_samples_per_second": 264.675, "step": 8670 }, { "epoch": 35.0, "learning_rate": 1.5e-05, "loss": 0.0156, "step": 8925 }, { "epoch": 35.0, "eval_accuracy": 0.4753, "eval_f1_macro": 0.3014, "eval_gen_len": 2.395, "eval_loss": 2.55307674407959, "eval_precision": 0.3136, "eval_recall": 0.2973, "eval_runtime": 18.4323, "eval_samples_per_second": 264.264, "step": 8925 }, { "epoch": 36.0, "learning_rate": 1.4000000000000001e-05, "loss": 0.0179, "step": 9180 }, { "epoch": 36.0, "eval_accuracy": 0.4829, "eval_f1_macro": 0.3055, "eval_gen_len": 2.3621, "eval_loss": 2.49765682220459, "eval_precision": 0.3308, "eval_recall": 0.2991, "eval_runtime": 18.4231, "eval_samples_per_second": 264.396, "step": 9180 }, { "epoch": 37.0, "learning_rate": 1.3000000000000001e-05, "loss": 0.0155, "step": 9435 }, { "epoch": 37.0, "eval_accuracy": 0.4808, "eval_f1_macro": 0.3086, "eval_gen_len": 2.3775, "eval_loss": 2.4960439205169678, "eval_precision": 0.3245, "eval_recall": 0.3039, "eval_runtime": 18.4083, "eval_samples_per_second": 264.609, "step": 9435 }, { "epoch": 38.0, "learning_rate": 1.2e-05, "loss": 0.0154, "step": 9690 }, { "epoch": 38.0, "eval_accuracy": 0.4652, "eval_f1_macro": 0.3047, "eval_gen_len": 2.3983, "eval_loss": 2.59112811088562, "eval_precision": 0.3196, "eval_recall": 0.2998, "eval_runtime": 18.406, "eval_samples_per_second": 264.642, "step": 9690 }, { "epoch": 39.0, "learning_rate": 1.1000000000000001e-05, "loss": 0.0144, "step": 9945 }, { "epoch": 39.0, "eval_accuracy": 0.473, "eval_f1_macro": 0.3097, "eval_gen_len": 2.3634, "eval_loss": 2.646393299102783, "eval_precision": 0.3283, "eval_recall": 0.3046, "eval_runtime": 18.3793, "eval_samples_per_second": 265.027, "step": 9945 }, { "epoch": 40.0, "learning_rate": 1e-05, "loss": 0.0135, "step": 10200 }, { "epoch": 40.0, "eval_accuracy": 0.4695, "eval_f1_macro": 0.3035, "eval_gen_len": 2.3812, "eval_loss": 2.711395740509033, "eval_precision": 0.3185, "eval_recall": 0.2989, "eval_runtime": 18.3802, "eval_samples_per_second": 265.013, "step": 10200 }, { "epoch": 41.0, "learning_rate": 9e-06, "loss": 0.0132, "step": 10455 }, { "epoch": 41.0, "eval_accuracy": 0.4707, "eval_f1_macro": 0.307, "eval_gen_len": 2.4055, "eval_loss": 2.706991672515869, "eval_precision": 0.3218, "eval_recall": 0.3029, "eval_runtime": 18.4175, "eval_samples_per_second": 264.476, "step": 10455 }, { "epoch": 42.0, "learning_rate": 8.000000000000001e-06, "loss": 0.0113, "step": 10710 }, { "epoch": 42.0, "eval_accuracy": 0.4705, "eval_f1_macro": 0.3041, "eval_gen_len": 2.3833, "eval_loss": 2.7490220069885254, "eval_precision": 0.3226, "eval_recall": 0.3009, "eval_runtime": 18.3891, "eval_samples_per_second": 264.885, "step": 10710 }, { "epoch": 43.0, "learning_rate": 7.000000000000001e-06, "loss": 0.0104, "step": 10965 }, { "epoch": 43.0, "eval_accuracy": 0.4707, "eval_f1_macro": 0.3094, "eval_gen_len": 2.3868, "eval_loss": 2.8594679832458496, "eval_precision": 0.3257, "eval_recall": 0.304, "eval_runtime": 18.4068, "eval_samples_per_second": 264.631, "step": 10965 }, { "epoch": 44.0, "learning_rate": 6e-06, "loss": 0.0125, "step": 11220 }, { "epoch": 44.0, "eval_accuracy": 0.4705, "eval_f1_macro": 0.3068, "eval_gen_len": 2.3921, "eval_loss": 2.777812957763672, "eval_precision": 0.3207, "eval_recall": 0.3034, "eval_runtime": 18.3628, "eval_samples_per_second": 265.264, "step": 11220 }, { "epoch": 45.0, "learning_rate": 5e-06, "loss": 0.0094, "step": 11475 }, { "epoch": 45.0, "eval_accuracy": 0.4753, "eval_f1_macro": 0.3102, "eval_gen_len": 2.4036, "eval_loss": 2.7739901542663574, "eval_precision": 0.3301, "eval_recall": 0.3028, "eval_runtime": 18.4413, "eval_samples_per_second": 264.135, "step": 11475 }, { "epoch": 46.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0094, "step": 11730 }, { "epoch": 46.0, "eval_accuracy": 0.4755, "eval_f1_macro": 0.3122, "eval_gen_len": 2.3979, "eval_loss": 2.759046792984009, "eval_precision": 0.3284, "eval_recall": 0.3064, "eval_runtime": 18.4399, "eval_samples_per_second": 264.155, "step": 11730 }, { "epoch": 47.0, "learning_rate": 3e-06, "loss": 0.0085, "step": 11985 }, { "epoch": 47.0, "eval_accuracy": 0.4759, "eval_f1_macro": 0.3092, "eval_gen_len": 2.3903, "eval_loss": 2.8234634399414062, "eval_precision": 0.3248, "eval_recall": 0.304, "eval_runtime": 18.4435, "eval_samples_per_second": 264.104, "step": 11985 }, { "epoch": 48.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0087, "step": 12240 }, { "epoch": 48.0, "eval_accuracy": 0.474, "eval_f1_macro": 0.3097, "eval_gen_len": 2.3977, "eval_loss": 2.834430694580078, "eval_precision": 0.3247, "eval_recall": 0.3045, "eval_runtime": 18.4163, "eval_samples_per_second": 264.494, "step": 12240 }, { "epoch": 49.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.009, "step": 12495 }, { "epoch": 49.0, "eval_accuracy": 0.4732, "eval_f1_macro": 0.3081, "eval_gen_len": 2.3868, "eval_loss": 2.841893196105957, "eval_precision": 0.3256, "eval_recall": 0.3018, "eval_runtime": 18.4481, "eval_samples_per_second": 264.037, "step": 12495 } ], "max_steps": 12750, "num_train_epochs": 50, "total_flos": 1.14107158131029e+17, "trial_name": null, "trial_params": null }