{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.038, "eval_steps": 1000, "global_step": 95, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.666666666666667e-05, "loss": 2.3598, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.00013333333333333334, "loss": 2.1113, "step": 2 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0719, "step": 3 }, { "epoch": 0.0, "learning_rate": 0.00019793814432989693, "loss": 2.1789, "step": 4 }, { "epoch": 0.0, "learning_rate": 0.00019587628865979381, "loss": 1.9318, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.00019381443298969073, "loss": 2.4083, "step": 6 }, { "epoch": 0.0, "learning_rate": 0.00019175257731958765, "loss": 2.6823, "step": 7 }, { "epoch": 0.0, "learning_rate": 0.00018969072164948454, "loss": 1.5946, "step": 8 }, { "epoch": 0.0, "learning_rate": 0.00018762886597938145, "loss": 2.0224, "step": 9 }, { "epoch": 0.0, "learning_rate": 0.00018556701030927837, "loss": 2.0527, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00018350515463917526, "loss": 1.8021, "step": 11 }, { "epoch": 0.0, "learning_rate": 0.00018144329896907217, "loss": 2.0241, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.0001793814432989691, "loss": 1.8196, "step": 13 }, { "epoch": 0.01, "learning_rate": 0.00017731958762886598, "loss": 1.8936, "step": 14 }, { "epoch": 0.01, "learning_rate": 0.0001752577319587629, "loss": 1.797, "step": 15 }, { "epoch": 0.01, "learning_rate": 0.0001731958762886598, "loss": 1.5366, "step": 16 }, { "epoch": 0.01, "learning_rate": 0.0001711340206185567, "loss": 1.7419, "step": 17 }, { "epoch": 0.01, "learning_rate": 0.00016907216494845361, "loss": 2.5421, "step": 18 }, { "epoch": 0.01, "learning_rate": 0.00016701030927835053, "loss": 1.5245, "step": 19 }, { "epoch": 0.01, "learning_rate": 0.00016494845360824742, "loss": 1.9081, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00016288659793814434, "loss": 1.9725, "step": 21 }, { "epoch": 0.01, "learning_rate": 0.00016082474226804125, "loss": 1.8649, "step": 22 }, { "epoch": 0.01, "learning_rate": 0.00015876288659793814, "loss": 1.5654, "step": 23 }, { "epoch": 0.01, "learning_rate": 0.00015670103092783506, "loss": 1.8573, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.00015463917525773197, "loss": 1.6624, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00015257731958762886, "loss": 1.95, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00015051546391752578, "loss": 1.5541, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.0001484536082474227, "loss": 1.4851, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.00014639175257731958, "loss": 1.6751, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.0001443298969072165, "loss": 1.5637, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00014226804123711342, "loss": 1.6947, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.0001402061855670103, "loss": 1.363, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.00013814432989690722, "loss": 2.0642, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00013608247422680414, "loss": 1.5458, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.00013402061855670103, "loss": 1.8339, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00013195876288659794, "loss": 1.6049, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.00012989690721649486, "loss": 1.5555, "step": 37 }, { "epoch": 0.02, "learning_rate": 0.00012783505154639175, "loss": 1.5485, "step": 38 }, { "epoch": 0.02, "learning_rate": 0.00012577319587628866, "loss": 2.0987, "step": 39 }, { "epoch": 0.02, "learning_rate": 0.00012371134020618558, "loss": 1.4402, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.00012164948453608247, "loss": 1.4069, "step": 41 }, { "epoch": 0.02, "learning_rate": 0.00011958762886597938, "loss": 1.5902, "step": 42 }, { "epoch": 0.02, "learning_rate": 0.0001175257731958763, "loss": 1.8907, "step": 43 }, { "epoch": 0.02, "learning_rate": 0.00011546391752577319, "loss": 1.6717, "step": 44 }, { "epoch": 0.02, "learning_rate": 0.0001134020618556701, "loss": 1.8157, "step": 45 }, { "epoch": 0.02, "learning_rate": 0.00011134020618556702, "loss": 1.436, "step": 46 }, { "epoch": 0.02, "learning_rate": 0.00010927835051546391, "loss": 1.8154, "step": 47 }, { "epoch": 0.02, "learning_rate": 0.00010721649484536083, "loss": 1.5157, "step": 48 }, { "epoch": 0.02, "learning_rate": 0.00010515463917525774, "loss": 1.7832, "step": 49 }, { "epoch": 0.02, "learning_rate": 0.00010309278350515463, "loss": 1.3788, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.00010103092783505155, "loss": 1.7474, "step": 51 }, { "epoch": 0.02, "learning_rate": 9.896907216494846e-05, "loss": 1.6072, "step": 52 }, { "epoch": 0.02, "learning_rate": 9.690721649484537e-05, "loss": 1.5144, "step": 53 }, { "epoch": 0.02, "learning_rate": 9.484536082474227e-05, "loss": 1.3593, "step": 54 }, { "epoch": 0.02, "learning_rate": 9.278350515463918e-05, "loss": 1.4519, "step": 55 }, { "epoch": 0.02, "learning_rate": 9.072164948453609e-05, "loss": 1.6386, "step": 56 }, { "epoch": 0.02, "learning_rate": 8.865979381443299e-05, "loss": 1.6327, "step": 57 }, { "epoch": 0.02, "learning_rate": 8.65979381443299e-05, "loss": 1.5667, "step": 58 }, { "epoch": 0.02, "learning_rate": 8.453608247422681e-05, "loss": 1.8287, "step": 59 }, { "epoch": 0.02, "learning_rate": 8.247422680412371e-05, "loss": 1.7194, "step": 60 }, { "epoch": 0.02, "learning_rate": 8.041237113402063e-05, "loss": 1.7635, "step": 61 }, { "epoch": 0.02, "learning_rate": 7.835051546391753e-05, "loss": 1.5533, "step": 62 }, { "epoch": 0.03, "learning_rate": 7.628865979381443e-05, "loss": 1.4684, "step": 63 }, { "epoch": 0.03, "learning_rate": 7.422680412371135e-05, "loss": 1.3388, "step": 64 }, { "epoch": 0.03, "learning_rate": 7.216494845360825e-05, "loss": 1.4259, "step": 65 }, { "epoch": 0.03, "learning_rate": 7.010309278350515e-05, "loss": 1.8171, "step": 66 }, { "epoch": 0.03, "learning_rate": 6.804123711340207e-05, "loss": 1.4798, "step": 67 }, { "epoch": 0.03, "learning_rate": 6.597938144329897e-05, "loss": 1.5261, "step": 68 }, { "epoch": 0.03, "learning_rate": 6.391752577319587e-05, "loss": 1.5738, "step": 69 }, { "epoch": 0.03, "learning_rate": 6.185567010309279e-05, "loss": 1.593, "step": 70 }, { "epoch": 0.03, "learning_rate": 5.979381443298969e-05, "loss": 2.1242, "step": 71 }, { "epoch": 0.03, "learning_rate": 5.7731958762886594e-05, "loss": 1.8834, "step": 72 }, { "epoch": 0.03, "learning_rate": 5.567010309278351e-05, "loss": 1.3748, "step": 73 }, { "epoch": 0.03, "learning_rate": 5.360824742268041e-05, "loss": 1.3684, "step": 74 }, { "epoch": 0.03, "learning_rate": 5.1546391752577315e-05, "loss": 1.392, "step": 75 }, { "epoch": 0.03, "learning_rate": 4.948453608247423e-05, "loss": 1.8425, "step": 76 }, { "epoch": 0.03, "learning_rate": 4.7422680412371134e-05, "loss": 1.6621, "step": 77 }, { "epoch": 0.03, "learning_rate": 4.536082474226804e-05, "loss": 1.5169, "step": 78 }, { "epoch": 0.03, "learning_rate": 4.329896907216495e-05, "loss": 1.4549, "step": 79 }, { "epoch": 0.03, "learning_rate": 4.1237113402061855e-05, "loss": 1.6084, "step": 80 }, { "epoch": 0.03, "learning_rate": 3.9175257731958764e-05, "loss": 1.3467, "step": 81 }, { "epoch": 0.03, "learning_rate": 3.7113402061855674e-05, "loss": 1.6058, "step": 82 }, { "epoch": 0.03, "learning_rate": 3.5051546391752576e-05, "loss": 1.3481, "step": 83 }, { "epoch": 0.03, "learning_rate": 3.2989690721649485e-05, "loss": 1.9237, "step": 84 }, { "epoch": 0.03, "learning_rate": 3.0927835051546395e-05, "loss": 1.5566, "step": 85 }, { "epoch": 0.03, "learning_rate": 2.8865979381443297e-05, "loss": 1.5145, "step": 86 }, { "epoch": 0.03, "learning_rate": 2.6804123711340206e-05, "loss": 1.377, "step": 87 }, { "epoch": 0.04, "learning_rate": 2.4742268041237116e-05, "loss": 1.6574, "step": 88 }, { "epoch": 0.04, "learning_rate": 2.268041237113402e-05, "loss": 1.6581, "step": 89 }, { "epoch": 0.04, "learning_rate": 2.0618556701030927e-05, "loss": 1.872, "step": 90 }, { "epoch": 0.04, "learning_rate": 1.8556701030927837e-05, "loss": 1.4659, "step": 91 }, { "epoch": 0.04, "learning_rate": 1.6494845360824743e-05, "loss": 1.0273, "step": 92 }, { "epoch": 0.04, "learning_rate": 1.4432989690721649e-05, "loss": 1.5946, "step": 93 }, { "epoch": 0.04, "learning_rate": 1.2371134020618558e-05, "loss": 2.0506, "step": 94 }, { "epoch": 0.04, "learning_rate": 1.0309278350515464e-05, "loss": 1.4259, "step": 95 } ], "logging_steps": 1, "max_steps": 100, "num_train_epochs": 1, "save_steps": 5, "total_flos": 1.147472100999168e+16, "trial_name": null, "trial_params": null }