|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.038, |
|
"eval_steps": 1000, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.3598, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 2.1113, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0719, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019793814432989693, |
|
"loss": 2.1789, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019587628865979381, |
|
"loss": 1.9318, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019381443298969073, |
|
"loss": 2.4083, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019175257731958765, |
|
"loss": 2.6823, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018969072164948454, |
|
"loss": 1.5946, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018762886597938145, |
|
"loss": 2.0224, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018556701030927837, |
|
"loss": 2.0527, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018350515463917526, |
|
"loss": 1.8021, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018144329896907217, |
|
"loss": 2.0241, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001793814432989691, |
|
"loss": 1.8196, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017731958762886598, |
|
"loss": 1.8936, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001752577319587629, |
|
"loss": 1.797, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001731958762886598, |
|
"loss": 1.5366, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001711340206185567, |
|
"loss": 1.7419, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016907216494845361, |
|
"loss": 2.5421, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016701030927835053, |
|
"loss": 1.5245, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016494845360824742, |
|
"loss": 1.9081, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016288659793814434, |
|
"loss": 1.9725, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016082474226804125, |
|
"loss": 1.8649, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015876288659793814, |
|
"loss": 1.5654, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015670103092783506, |
|
"loss": 1.8573, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015463917525773197, |
|
"loss": 1.6624, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015257731958762886, |
|
"loss": 1.95, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015051546391752578, |
|
"loss": 1.5541, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001484536082474227, |
|
"loss": 1.4851, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014639175257731958, |
|
"loss": 1.6751, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001443298969072165, |
|
"loss": 1.5637, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014226804123711342, |
|
"loss": 1.6947, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001402061855670103, |
|
"loss": 1.363, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013814432989690722, |
|
"loss": 2.0642, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013608247422680414, |
|
"loss": 1.5458, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013402061855670103, |
|
"loss": 1.8339, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013195876288659794, |
|
"loss": 1.6049, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012989690721649486, |
|
"loss": 1.5555, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012783505154639175, |
|
"loss": 1.5485, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012577319587628866, |
|
"loss": 2.0987, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012371134020618558, |
|
"loss": 1.4402, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012164948453608247, |
|
"loss": 1.4069, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011958762886597938, |
|
"loss": 1.5902, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001175257731958763, |
|
"loss": 1.8907, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011546391752577319, |
|
"loss": 1.6717, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001134020618556701, |
|
"loss": 1.8157, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011134020618556702, |
|
"loss": 1.436, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010927835051546391, |
|
"loss": 1.8154, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010721649484536083, |
|
"loss": 1.5157, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010515463917525774, |
|
"loss": 1.7832, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010309278350515463, |
|
"loss": 1.3788, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00010103092783505155, |
|
"loss": 1.7474, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.896907216494846e-05, |
|
"loss": 1.6072, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.690721649484537e-05, |
|
"loss": 1.5144, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.484536082474227e-05, |
|
"loss": 1.3593, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.278350515463918e-05, |
|
"loss": 1.4519, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.072164948453609e-05, |
|
"loss": 1.6386, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.865979381443299e-05, |
|
"loss": 1.6327, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.65979381443299e-05, |
|
"loss": 1.5667, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.453608247422681e-05, |
|
"loss": 1.8287, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.247422680412371e-05, |
|
"loss": 1.7194, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.041237113402063e-05, |
|
"loss": 1.7635, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.835051546391753e-05, |
|
"loss": 1.5533, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.628865979381443e-05, |
|
"loss": 1.4684, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.422680412371135e-05, |
|
"loss": 1.3388, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.216494845360825e-05, |
|
"loss": 1.4259, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.010309278350515e-05, |
|
"loss": 1.8171, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.804123711340207e-05, |
|
"loss": 1.4798, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.597938144329897e-05, |
|
"loss": 1.5261, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.391752577319587e-05, |
|
"loss": 1.5738, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.185567010309279e-05, |
|
"loss": 1.593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.979381443298969e-05, |
|
"loss": 2.1242, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.7731958762886594e-05, |
|
"loss": 1.8834, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.567010309278351e-05, |
|
"loss": 1.3748, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.360824742268041e-05, |
|
"loss": 1.3684, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.1546391752577315e-05, |
|
"loss": 1.392, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948453608247423e-05, |
|
"loss": 1.8425, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.7422680412371134e-05, |
|
"loss": 1.6621, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.536082474226804e-05, |
|
"loss": 1.5169, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.329896907216495e-05, |
|
"loss": 1.4549, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.1237113402061855e-05, |
|
"loss": 1.6084, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.9175257731958764e-05, |
|
"loss": 1.3467, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7113402061855674e-05, |
|
"loss": 1.6058, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5051546391752576e-05, |
|
"loss": 1.3481, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.2989690721649485e-05, |
|
"loss": 1.9237, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.0927835051546395e-05, |
|
"loss": 1.5566, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.8865979381443297e-05, |
|
"loss": 1.5145, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6804123711340206e-05, |
|
"loss": 1.377, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.4742268041237116e-05, |
|
"loss": 1.6574, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.268041237113402e-05, |
|
"loss": 1.6581, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0618556701030927e-05, |
|
"loss": 1.872, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8556701030927837e-05, |
|
"loss": 1.4659, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6494845360824743e-05, |
|
"loss": 1.0273, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4432989690721649e-05, |
|
"loss": 1.5946, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2371134020618558e-05, |
|
"loss": 2.0506, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 1.4259, |
|
"step": 95 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"total_flos": 1.147472100999168e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|