TachyHealthResearch's picture
Training in progress, step 95, checkpoint
ef03979
raw
history blame
11.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.038,
"eval_steps": 1000,
"global_step": 95,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 6.666666666666667e-05,
"loss": 2.3598,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 0.00013333333333333334,
"loss": 2.1113,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 2.0719,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 0.00019793814432989693,
"loss": 2.1789,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0.00019587628865979381,
"loss": 1.9318,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 0.00019381443298969073,
"loss": 2.4083,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 0.00019175257731958765,
"loss": 2.6823,
"step": 7
},
{
"epoch": 0.0,
"learning_rate": 0.00018969072164948454,
"loss": 1.5946,
"step": 8
},
{
"epoch": 0.0,
"learning_rate": 0.00018762886597938145,
"loss": 2.0224,
"step": 9
},
{
"epoch": 0.0,
"learning_rate": 0.00018556701030927837,
"loss": 2.0527,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.00018350515463917526,
"loss": 1.8021,
"step": 11
},
{
"epoch": 0.0,
"learning_rate": 0.00018144329896907217,
"loss": 2.0241,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0.0001793814432989691,
"loss": 1.8196,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 0.00017731958762886598,
"loss": 1.8936,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 0.0001752577319587629,
"loss": 1.797,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 0.0001731958762886598,
"loss": 1.5366,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0.0001711340206185567,
"loss": 1.7419,
"step": 17
},
{
"epoch": 0.01,
"learning_rate": 0.00016907216494845361,
"loss": 2.5421,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 0.00016701030927835053,
"loss": 1.5245,
"step": 19
},
{
"epoch": 0.01,
"learning_rate": 0.00016494845360824742,
"loss": 1.9081,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00016288659793814434,
"loss": 1.9725,
"step": 21
},
{
"epoch": 0.01,
"learning_rate": 0.00016082474226804125,
"loss": 1.8649,
"step": 22
},
{
"epoch": 0.01,
"learning_rate": 0.00015876288659793814,
"loss": 1.5654,
"step": 23
},
{
"epoch": 0.01,
"learning_rate": 0.00015670103092783506,
"loss": 1.8573,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 0.00015463917525773197,
"loss": 1.6624,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 0.00015257731958762886,
"loss": 1.95,
"step": 26
},
{
"epoch": 0.01,
"learning_rate": 0.00015051546391752578,
"loss": 1.5541,
"step": 27
},
{
"epoch": 0.01,
"learning_rate": 0.0001484536082474227,
"loss": 1.4851,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 0.00014639175257731958,
"loss": 1.6751,
"step": 29
},
{
"epoch": 0.01,
"learning_rate": 0.0001443298969072165,
"loss": 1.5637,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00014226804123711342,
"loss": 1.6947,
"step": 31
},
{
"epoch": 0.01,
"learning_rate": 0.0001402061855670103,
"loss": 1.363,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 0.00013814432989690722,
"loss": 2.0642,
"step": 33
},
{
"epoch": 0.01,
"learning_rate": 0.00013608247422680414,
"loss": 1.5458,
"step": 34
},
{
"epoch": 0.01,
"learning_rate": 0.00013402061855670103,
"loss": 1.8339,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 0.00013195876288659794,
"loss": 1.6049,
"step": 36
},
{
"epoch": 0.01,
"learning_rate": 0.00012989690721649486,
"loss": 1.5555,
"step": 37
},
{
"epoch": 0.02,
"learning_rate": 0.00012783505154639175,
"loss": 1.5485,
"step": 38
},
{
"epoch": 0.02,
"learning_rate": 0.00012577319587628866,
"loss": 2.0987,
"step": 39
},
{
"epoch": 0.02,
"learning_rate": 0.00012371134020618558,
"loss": 1.4402,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 0.00012164948453608247,
"loss": 1.4069,
"step": 41
},
{
"epoch": 0.02,
"learning_rate": 0.00011958762886597938,
"loss": 1.5902,
"step": 42
},
{
"epoch": 0.02,
"learning_rate": 0.0001175257731958763,
"loss": 1.8907,
"step": 43
},
{
"epoch": 0.02,
"learning_rate": 0.00011546391752577319,
"loss": 1.6717,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 0.0001134020618556701,
"loss": 1.8157,
"step": 45
},
{
"epoch": 0.02,
"learning_rate": 0.00011134020618556702,
"loss": 1.436,
"step": 46
},
{
"epoch": 0.02,
"learning_rate": 0.00010927835051546391,
"loss": 1.8154,
"step": 47
},
{
"epoch": 0.02,
"learning_rate": 0.00010721649484536083,
"loss": 1.5157,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 0.00010515463917525774,
"loss": 1.7832,
"step": 49
},
{
"epoch": 0.02,
"learning_rate": 0.00010309278350515463,
"loss": 1.3788,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 0.00010103092783505155,
"loss": 1.7474,
"step": 51
},
{
"epoch": 0.02,
"learning_rate": 9.896907216494846e-05,
"loss": 1.6072,
"step": 52
},
{
"epoch": 0.02,
"learning_rate": 9.690721649484537e-05,
"loss": 1.5144,
"step": 53
},
{
"epoch": 0.02,
"learning_rate": 9.484536082474227e-05,
"loss": 1.3593,
"step": 54
},
{
"epoch": 0.02,
"learning_rate": 9.278350515463918e-05,
"loss": 1.4519,
"step": 55
},
{
"epoch": 0.02,
"learning_rate": 9.072164948453609e-05,
"loss": 1.6386,
"step": 56
},
{
"epoch": 0.02,
"learning_rate": 8.865979381443299e-05,
"loss": 1.6327,
"step": 57
},
{
"epoch": 0.02,
"learning_rate": 8.65979381443299e-05,
"loss": 1.5667,
"step": 58
},
{
"epoch": 0.02,
"learning_rate": 8.453608247422681e-05,
"loss": 1.8287,
"step": 59
},
{
"epoch": 0.02,
"learning_rate": 8.247422680412371e-05,
"loss": 1.7194,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 8.041237113402063e-05,
"loss": 1.7635,
"step": 61
},
{
"epoch": 0.02,
"learning_rate": 7.835051546391753e-05,
"loss": 1.5533,
"step": 62
},
{
"epoch": 0.03,
"learning_rate": 7.628865979381443e-05,
"loss": 1.4684,
"step": 63
},
{
"epoch": 0.03,
"learning_rate": 7.422680412371135e-05,
"loss": 1.3388,
"step": 64
},
{
"epoch": 0.03,
"learning_rate": 7.216494845360825e-05,
"loss": 1.4259,
"step": 65
},
{
"epoch": 0.03,
"learning_rate": 7.010309278350515e-05,
"loss": 1.8171,
"step": 66
},
{
"epoch": 0.03,
"learning_rate": 6.804123711340207e-05,
"loss": 1.4798,
"step": 67
},
{
"epoch": 0.03,
"learning_rate": 6.597938144329897e-05,
"loss": 1.5261,
"step": 68
},
{
"epoch": 0.03,
"learning_rate": 6.391752577319587e-05,
"loss": 1.5738,
"step": 69
},
{
"epoch": 0.03,
"learning_rate": 6.185567010309279e-05,
"loss": 1.593,
"step": 70
},
{
"epoch": 0.03,
"learning_rate": 5.979381443298969e-05,
"loss": 2.1242,
"step": 71
},
{
"epoch": 0.03,
"learning_rate": 5.7731958762886594e-05,
"loss": 1.8834,
"step": 72
},
{
"epoch": 0.03,
"learning_rate": 5.567010309278351e-05,
"loss": 1.3748,
"step": 73
},
{
"epoch": 0.03,
"learning_rate": 5.360824742268041e-05,
"loss": 1.3684,
"step": 74
},
{
"epoch": 0.03,
"learning_rate": 5.1546391752577315e-05,
"loss": 1.392,
"step": 75
},
{
"epoch": 0.03,
"learning_rate": 4.948453608247423e-05,
"loss": 1.8425,
"step": 76
},
{
"epoch": 0.03,
"learning_rate": 4.7422680412371134e-05,
"loss": 1.6621,
"step": 77
},
{
"epoch": 0.03,
"learning_rate": 4.536082474226804e-05,
"loss": 1.5169,
"step": 78
},
{
"epoch": 0.03,
"learning_rate": 4.329896907216495e-05,
"loss": 1.4549,
"step": 79
},
{
"epoch": 0.03,
"learning_rate": 4.1237113402061855e-05,
"loss": 1.6084,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 3.9175257731958764e-05,
"loss": 1.3467,
"step": 81
},
{
"epoch": 0.03,
"learning_rate": 3.7113402061855674e-05,
"loss": 1.6058,
"step": 82
},
{
"epoch": 0.03,
"learning_rate": 3.5051546391752576e-05,
"loss": 1.3481,
"step": 83
},
{
"epoch": 0.03,
"learning_rate": 3.2989690721649485e-05,
"loss": 1.9237,
"step": 84
},
{
"epoch": 0.03,
"learning_rate": 3.0927835051546395e-05,
"loss": 1.5566,
"step": 85
},
{
"epoch": 0.03,
"learning_rate": 2.8865979381443297e-05,
"loss": 1.5145,
"step": 86
},
{
"epoch": 0.03,
"learning_rate": 2.6804123711340206e-05,
"loss": 1.377,
"step": 87
},
{
"epoch": 0.04,
"learning_rate": 2.4742268041237116e-05,
"loss": 1.6574,
"step": 88
},
{
"epoch": 0.04,
"learning_rate": 2.268041237113402e-05,
"loss": 1.6581,
"step": 89
},
{
"epoch": 0.04,
"learning_rate": 2.0618556701030927e-05,
"loss": 1.872,
"step": 90
},
{
"epoch": 0.04,
"learning_rate": 1.8556701030927837e-05,
"loss": 1.4659,
"step": 91
},
{
"epoch": 0.04,
"learning_rate": 1.6494845360824743e-05,
"loss": 1.0273,
"step": 92
},
{
"epoch": 0.04,
"learning_rate": 1.4432989690721649e-05,
"loss": 1.5946,
"step": 93
},
{
"epoch": 0.04,
"learning_rate": 1.2371134020618558e-05,
"loss": 2.0506,
"step": 94
},
{
"epoch": 0.04,
"learning_rate": 1.0309278350515464e-05,
"loss": 1.4259,
"step": 95
}
],
"logging_steps": 1,
"max_steps": 100,
"num_train_epochs": 1,
"save_steps": 5,
"total_flos": 1.147472100999168e+16,
"trial_name": null,
"trial_params": null
}