|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 112, |
|
"global_step": 1344, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002232142857142857, |
|
"grad_norm": 4.520495891571045, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.8829, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002232142857142857, |
|
"eval_loss": 0.8741821050643921, |
|
"eval_runtime": 23.3187, |
|
"eval_samples_per_second": 3.131, |
|
"eval_steps_per_second": 0.429, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004464285714285714, |
|
"grad_norm": 4.709418773651123, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.9726, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006696428571428571, |
|
"grad_norm": 4.854740619659424, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.94, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008928571428571428, |
|
"grad_norm": 4.648777008056641, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.8458, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011160714285714286, |
|
"grad_norm": 5.138184547424316, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.0834, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013392857142857142, |
|
"grad_norm": 4.406048774719238, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.9389, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 4.439329147338867, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.949, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017857142857142856, |
|
"grad_norm": 4.817677021026611, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.9092, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020089285714285716, |
|
"grad_norm": 4.216228485107422, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 0.9105, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.022321428571428572, |
|
"grad_norm": 4.030458927154541, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9263, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024553571428571428, |
|
"grad_norm": 4.106152057647705, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.991, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.026785714285714284, |
|
"grad_norm": 3.9047749042510986, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.8173, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029017857142857144, |
|
"grad_norm": 3.7009527683258057, |
|
"learning_rate": 2.6e-06, |
|
"loss": 1.0595, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 3.7975056171417236, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.9012, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033482142857142856, |
|
"grad_norm": 2.969536066055298, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8177, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03571428571428571, |
|
"grad_norm": 3.660879373550415, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.915, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03794642857142857, |
|
"grad_norm": 3.0569710731506348, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.8795, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04017857142857143, |
|
"grad_norm": 2.7441296577453613, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.8276, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04241071428571429, |
|
"grad_norm": 2.8655402660369873, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.7529, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.044642857142857144, |
|
"grad_norm": 2.769359827041626, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.817, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 3.5258753299713135, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.9505, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.049107142857142856, |
|
"grad_norm": 3.55863356590271, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.8813, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05133928571428571, |
|
"grad_norm": 3.1872193813323975, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.8472, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05357142857142857, |
|
"grad_norm": 3.643343687057495, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.8751, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05580357142857143, |
|
"grad_norm": 3.154827356338501, |
|
"learning_rate": 5e-06, |
|
"loss": 0.827, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05803571428571429, |
|
"grad_norm": 2.529634714126587, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.8818, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.060267857142857144, |
|
"grad_norm": 2.5746371746063232, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.885, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 2.452150583267212, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.975, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06473214285714286, |
|
"grad_norm": 2.5640347003936768, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.7795, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06696428571428571, |
|
"grad_norm": 2.178790330886841, |
|
"learning_rate": 6e-06, |
|
"loss": 0.7853, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06919642857142858, |
|
"grad_norm": 2.054187297821045, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.7556, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 2.3759331703186035, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.8584, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07366071428571429, |
|
"grad_norm": 2.5890913009643555, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.8545, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07589285714285714, |
|
"grad_norm": 2.1318633556365967, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.8014, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 2.1878416538238525, |
|
"learning_rate": 7e-06, |
|
"loss": 0.8423, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08035714285714286, |
|
"grad_norm": 1.959555745124817, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.7856, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08258928571428571, |
|
"grad_norm": 1.9672911167144775, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.7517, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08482142857142858, |
|
"grad_norm": 2.133237600326538, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.8081, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08705357142857142, |
|
"grad_norm": 2.6118452548980713, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.9733, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 1.9084440469741821, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6862, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09151785714285714, |
|
"grad_norm": 2.092421531677246, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.7759, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 2.126476764678955, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.8206, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09598214285714286, |
|
"grad_norm": 1.9326355457305908, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.8061, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09821428571428571, |
|
"grad_norm": 2.1919474601745605, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.8693, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10044642857142858, |
|
"grad_norm": 2.066986322402954, |
|
"learning_rate": 9e-06, |
|
"loss": 0.8146, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10267857142857142, |
|
"grad_norm": 2.196195602416992, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.8237, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10491071428571429, |
|
"grad_norm": 2.36797833442688, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.8609, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10714285714285714, |
|
"grad_norm": 2.007786512374878, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.8415, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 2.127784013748169, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.8149, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11160714285714286, |
|
"grad_norm": 1.842410922050476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6706, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11383928571428571, |
|
"grad_norm": 2.025834321975708, |
|
"learning_rate": 1.02e-05, |
|
"loss": 0.7797, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11607142857142858, |
|
"grad_norm": 2.0152997970581055, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.7977, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11830357142857142, |
|
"grad_norm": 1.8089625835418701, |
|
"learning_rate": 1.0600000000000002e-05, |
|
"loss": 0.7222, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12053571428571429, |
|
"grad_norm": 1.9475045204162598, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 0.7971, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12276785714285714, |
|
"grad_norm": 1.9405206441879272, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.77, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 1.7220442295074463, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.6592, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12723214285714285, |
|
"grad_norm": 2.070206880569458, |
|
"learning_rate": 1.14e-05, |
|
"loss": 0.8843, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12946428571428573, |
|
"grad_norm": 2.2304985523223877, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.7968, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13169642857142858, |
|
"grad_norm": 2.300931215286255, |
|
"learning_rate": 1.18e-05, |
|
"loss": 0.7917, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13392857142857142, |
|
"grad_norm": 2.126228094100952, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.7965, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13616071428571427, |
|
"grad_norm": 2.0050771236419678, |
|
"learning_rate": 1.22e-05, |
|
"loss": 0.7334, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13839285714285715, |
|
"grad_norm": 2.097790241241455, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 0.7254, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 2.2999649047851562, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 0.7892, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 2.2662696838378906, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.7692, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14508928571428573, |
|
"grad_norm": 1.8592685461044312, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.7562, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14732142857142858, |
|
"grad_norm": 2.0617785453796387, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 0.8464, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14955357142857142, |
|
"grad_norm": 1.990391492843628, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 0.7656, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15178571428571427, |
|
"grad_norm": 2.021301031112671, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.7506, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15401785714285715, |
|
"grad_norm": 2.1052801609039307, |
|
"learning_rate": 1.38e-05, |
|
"loss": 0.8233, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 2.0981056690216064, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.7702, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15848214285714285, |
|
"grad_norm": 1.9719496965408325, |
|
"learning_rate": 1.4200000000000001e-05, |
|
"loss": 0.7602, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16071428571428573, |
|
"grad_norm": 1.983307957649231, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.7432, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16294642857142858, |
|
"grad_norm": 2.3522326946258545, |
|
"learning_rate": 1.46e-05, |
|
"loss": 0.8172, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16517857142857142, |
|
"grad_norm": 2.026918888092041, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.6841, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16741071428571427, |
|
"grad_norm": 2.1341769695281982, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.852, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16964285714285715, |
|
"grad_norm": 1.8743571043014526, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.7731, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 2.333038330078125, |
|
"learning_rate": 1.54e-05, |
|
"loss": 0.8161, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17410714285714285, |
|
"grad_norm": 2.553131103515625, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.9338, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17633928571428573, |
|
"grad_norm": 1.8976587057113647, |
|
"learning_rate": 1.58e-05, |
|
"loss": 0.772, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 2.028928279876709, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.8268, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18080357142857142, |
|
"grad_norm": 2.094634771347046, |
|
"learning_rate": 1.62e-05, |
|
"loss": 0.8385, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18303571428571427, |
|
"grad_norm": 2.3168070316314697, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.9017, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18526785714285715, |
|
"grad_norm": 2.350069522857666, |
|
"learning_rate": 1.66e-05, |
|
"loss": 0.8375, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.871971607208252, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.7232, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18973214285714285, |
|
"grad_norm": 2.1683645248413086, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.7449, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19196428571428573, |
|
"grad_norm": 1.8138465881347656, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.6897, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.19419642857142858, |
|
"grad_norm": 2.2803397178649902, |
|
"learning_rate": 1.7400000000000003e-05, |
|
"loss": 0.8541, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19642857142857142, |
|
"grad_norm": 1.8534305095672607, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.7187, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19866071428571427, |
|
"grad_norm": 2.2822651863098145, |
|
"learning_rate": 1.7800000000000002e-05, |
|
"loss": 0.7458, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.20089285714285715, |
|
"grad_norm": 2.2075366973876953, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.8119, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 2.797893762588501, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 0.9198, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.20535714285714285, |
|
"grad_norm": 2.2375845909118652, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.6985, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.20758928571428573, |
|
"grad_norm": 2.1225900650024414, |
|
"learning_rate": 1.86e-05, |
|
"loss": 0.8483, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.20982142857142858, |
|
"grad_norm": 1.8341416120529175, |
|
"learning_rate": 1.88e-05, |
|
"loss": 0.7703, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21205357142857142, |
|
"grad_norm": 2.27540922164917, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.8437, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 2.091398000717163, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.7553, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21651785714285715, |
|
"grad_norm": 1.8585134744644165, |
|
"learning_rate": 1.94e-05, |
|
"loss": 0.6444, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 2.1151020526885986, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.7303, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22098214285714285, |
|
"grad_norm": 1.9768584966659546, |
|
"learning_rate": 1.98e-05, |
|
"loss": 0.74, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22321428571428573, |
|
"grad_norm": 2.13527250289917, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7134, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22544642857142858, |
|
"grad_norm": 2.309387683868408, |
|
"learning_rate": 1.9999968111891562e-05, |
|
"loss": 0.8742, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.22767857142857142, |
|
"grad_norm": 1.8575270175933838, |
|
"learning_rate": 1.9999872447769624e-05, |
|
"loss": 0.7745, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22991071428571427, |
|
"grad_norm": 1.9398894309997559, |
|
"learning_rate": 1.9999713008244287e-05, |
|
"loss": 0.7618, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23214285714285715, |
|
"grad_norm": 2.5453739166259766, |
|
"learning_rate": 1.9999489794332404e-05, |
|
"loss": 0.931, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 2.1891441345214844, |
|
"learning_rate": 1.9999202807457537e-05, |
|
"loss": 0.8582, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23660714285714285, |
|
"grad_norm": 1.949729084968567, |
|
"learning_rate": 1.9998852049449998e-05, |
|
"loss": 0.9173, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.23883928571428573, |
|
"grad_norm": 1.8219000101089478, |
|
"learning_rate": 1.999843752254677e-05, |
|
"loss": 0.7732, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24107142857142858, |
|
"grad_norm": 1.942179799079895, |
|
"learning_rate": 1.9997959229391567e-05, |
|
"loss": 0.7376, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.24330357142857142, |
|
"grad_norm": 1.6319869756698608, |
|
"learning_rate": 1.9997417173034746e-05, |
|
"loss": 0.7755, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24553571428571427, |
|
"grad_norm": 1.9693115949630737, |
|
"learning_rate": 1.9996811356933346e-05, |
|
"loss": 0.7828, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24776785714285715, |
|
"grad_norm": 2.1049964427948, |
|
"learning_rate": 1.999614178495103e-05, |
|
"loss": 0.7936, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.169593572616577, |
|
"learning_rate": 1.9995408461358074e-05, |
|
"loss": 0.7894, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.7251861691474915, |
|
"eval_runtime": 27.5365, |
|
"eval_samples_per_second": 2.651, |
|
"eval_steps_per_second": 0.363, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.25223214285714285, |
|
"grad_norm": 1.8239336013793945, |
|
"learning_rate": 1.9994611390831342e-05, |
|
"loss": 0.7608, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2544642857142857, |
|
"grad_norm": 2.3521170616149902, |
|
"learning_rate": 1.9993750578454248e-05, |
|
"loss": 0.9461, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.25669642857142855, |
|
"grad_norm": 2.146218776702881, |
|
"learning_rate": 1.9992826029716722e-05, |
|
"loss": 0.8203, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25892857142857145, |
|
"grad_norm": 2.1784703731536865, |
|
"learning_rate": 1.999183775051519e-05, |
|
"loss": 0.749, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2611607142857143, |
|
"grad_norm": 1.7962055206298828, |
|
"learning_rate": 1.9990785747152527e-05, |
|
"loss": 0.7431, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.26339285714285715, |
|
"grad_norm": 2.184608221054077, |
|
"learning_rate": 1.9989670026338002e-05, |
|
"loss": 0.8456, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 2.270358085632324, |
|
"learning_rate": 1.9988490595187273e-05, |
|
"loss": 0.8213, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 2.243161678314209, |
|
"learning_rate": 1.9987247461222297e-05, |
|
"loss": 0.7454, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2700892857142857, |
|
"grad_norm": 2.3505022525787354, |
|
"learning_rate": 1.9985940632371316e-05, |
|
"loss": 0.853, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.27232142857142855, |
|
"grad_norm": 2.321498394012451, |
|
"learning_rate": 1.9984570116968785e-05, |
|
"loss": 0.8958, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.27455357142857145, |
|
"grad_norm": 2.576880693435669, |
|
"learning_rate": 1.9983135923755336e-05, |
|
"loss": 0.9688, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2767857142857143, |
|
"grad_norm": 2.2674782276153564, |
|
"learning_rate": 1.9981638061877714e-05, |
|
"loss": 0.8822, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.27901785714285715, |
|
"grad_norm": 1.8983664512634277, |
|
"learning_rate": 1.998007654088871e-05, |
|
"loss": 0.6536, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 2.1132736206054688, |
|
"learning_rate": 1.9978451370747122e-05, |
|
"loss": 0.8452, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.28348214285714285, |
|
"grad_norm": 2.033719778060913, |
|
"learning_rate": 1.9976762561817656e-05, |
|
"loss": 0.763, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 2.284616231918335, |
|
"learning_rate": 1.997501012487091e-05, |
|
"loss": 0.8082, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.28794642857142855, |
|
"grad_norm": 1.9635744094848633, |
|
"learning_rate": 1.997319407108326e-05, |
|
"loss": 0.8587, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29017857142857145, |
|
"grad_norm": 2.307817220687866, |
|
"learning_rate": 1.9971314412036807e-05, |
|
"loss": 0.7933, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2924107142857143, |
|
"grad_norm": 2.1261589527130127, |
|
"learning_rate": 1.9969371159719307e-05, |
|
"loss": 0.8069, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29464285714285715, |
|
"grad_norm": 2.0330147743225098, |
|
"learning_rate": 1.996736432652409e-05, |
|
"loss": 0.7368, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 2.067072629928589, |
|
"learning_rate": 1.9965293925249976e-05, |
|
"loss": 0.7402, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.29910714285714285, |
|
"grad_norm": 2.2394609451293945, |
|
"learning_rate": 1.9963159969101207e-05, |
|
"loss": 0.8081, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3013392857142857, |
|
"grad_norm": 1.8908040523529053, |
|
"learning_rate": 1.996096247168734e-05, |
|
"loss": 0.6806, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30357142857142855, |
|
"grad_norm": 2.1276235580444336, |
|
"learning_rate": 1.9958701447023188e-05, |
|
"loss": 0.8402, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30580357142857145, |
|
"grad_norm": 1.948089361190796, |
|
"learning_rate": 1.9956376909528704e-05, |
|
"loss": 0.8141, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3080357142857143, |
|
"grad_norm": 2.3023507595062256, |
|
"learning_rate": 1.9953988874028917e-05, |
|
"loss": 0.8263, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31026785714285715, |
|
"grad_norm": 2.078064441680908, |
|
"learning_rate": 1.995153735575381e-05, |
|
"loss": 0.8128, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 2.271723985671997, |
|
"learning_rate": 1.994902237033824e-05, |
|
"loss": 0.7636, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31473214285714285, |
|
"grad_norm": 1.9039952754974365, |
|
"learning_rate": 1.994644393382183e-05, |
|
"loss": 0.7801, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3169642857142857, |
|
"grad_norm": 2.113295078277588, |
|
"learning_rate": 1.9943802062648877e-05, |
|
"loss": 0.7634, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.31919642857142855, |
|
"grad_norm": 1.9675801992416382, |
|
"learning_rate": 1.9941096773668232e-05, |
|
"loss": 0.7411, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.32142857142857145, |
|
"grad_norm": 2.325932741165161, |
|
"learning_rate": 1.9938328084133206e-05, |
|
"loss": 0.8638, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3236607142857143, |
|
"grad_norm": 1.9418251514434814, |
|
"learning_rate": 1.9935496011701453e-05, |
|
"loss": 0.7443, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32589285714285715, |
|
"grad_norm": 1.611464500427246, |
|
"learning_rate": 1.9932600574434864e-05, |
|
"loss": 0.7198, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 2.157644748687744, |
|
"learning_rate": 1.9929641790799438e-05, |
|
"loss": 0.8276, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33035714285714285, |
|
"grad_norm": 2.295194625854492, |
|
"learning_rate": 1.9926619679665175e-05, |
|
"loss": 0.8713, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3325892857142857, |
|
"grad_norm": 2.154426097869873, |
|
"learning_rate": 1.992353426030596e-05, |
|
"loss": 0.7274, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33482142857142855, |
|
"grad_norm": 1.6973615884780884, |
|
"learning_rate": 1.9920385552399434e-05, |
|
"loss": 0.6846, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33705357142857145, |
|
"grad_norm": 1.7057573795318604, |
|
"learning_rate": 1.991717357602686e-05, |
|
"loss": 0.7335, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3392857142857143, |
|
"grad_norm": 1.9547100067138672, |
|
"learning_rate": 1.9913898351673006e-05, |
|
"loss": 0.6845, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.34151785714285715, |
|
"grad_norm": 2.0757429599761963, |
|
"learning_rate": 1.991055990022602e-05, |
|
"loss": 0.7628, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 2.1312568187713623, |
|
"learning_rate": 1.990715824297728e-05, |
|
"loss": 0.7328, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.34598214285714285, |
|
"grad_norm": 1.9267735481262207, |
|
"learning_rate": 1.990369340162127e-05, |
|
"loss": 0.8076, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3482142857142857, |
|
"grad_norm": 1.9615391492843628, |
|
"learning_rate": 1.9900165398255434e-05, |
|
"loss": 0.7789, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.35044642857142855, |
|
"grad_norm": 1.7132021188735962, |
|
"learning_rate": 1.9896574255380045e-05, |
|
"loss": 0.7017, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.35267857142857145, |
|
"grad_norm": 2.122762680053711, |
|
"learning_rate": 1.9892919995898052e-05, |
|
"loss": 0.7483, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3549107142857143, |
|
"grad_norm": 2.0038235187530518, |
|
"learning_rate": 1.988920264311494e-05, |
|
"loss": 0.6985, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 2.013420820236206, |
|
"learning_rate": 1.9885422220738583e-05, |
|
"loss": 0.6655, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 2.1159446239471436, |
|
"learning_rate": 1.988157875287908e-05, |
|
"loss": 0.8129, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.36160714285714285, |
|
"grad_norm": 1.8331681489944458, |
|
"learning_rate": 1.9877672264048618e-05, |
|
"loss": 0.667, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3638392857142857, |
|
"grad_norm": 1.9691740274429321, |
|
"learning_rate": 1.98737027791613e-05, |
|
"loss": 0.6916, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.36607142857142855, |
|
"grad_norm": 2.064512252807617, |
|
"learning_rate": 1.9869670323533005e-05, |
|
"loss": 0.8727, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.36830357142857145, |
|
"grad_norm": 2.3000264167785645, |
|
"learning_rate": 1.9865574922881204e-05, |
|
"loss": 0.7485, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3705357142857143, |
|
"grad_norm": 2.070896625518799, |
|
"learning_rate": 1.986141660332482e-05, |
|
"loss": 0.8254, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.37276785714285715, |
|
"grad_norm": 2.051863431930542, |
|
"learning_rate": 1.9857195391384038e-05, |
|
"loss": 0.801, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.0693776607513428, |
|
"learning_rate": 1.9852911313980146e-05, |
|
"loss": 0.6922, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.37723214285714285, |
|
"grad_norm": 1.5244134664535522, |
|
"learning_rate": 1.9848564398435374e-05, |
|
"loss": 0.7052, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3794642857142857, |
|
"grad_norm": 1.927579641342163, |
|
"learning_rate": 1.9844154672472707e-05, |
|
"loss": 0.7238, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38169642857142855, |
|
"grad_norm": 1.7581312656402588, |
|
"learning_rate": 1.9839682164215707e-05, |
|
"loss": 0.7498, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.38392857142857145, |
|
"grad_norm": 2.004220485687256, |
|
"learning_rate": 1.9835146902188336e-05, |
|
"loss": 0.8368, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3861607142857143, |
|
"grad_norm": 1.9040734767913818, |
|
"learning_rate": 1.983054891531478e-05, |
|
"loss": 0.7625, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.38839285714285715, |
|
"grad_norm": 1.787835955619812, |
|
"learning_rate": 1.9825888232919268e-05, |
|
"loss": 0.7894, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 1.8486220836639404, |
|
"learning_rate": 1.982116488472586e-05, |
|
"loss": 0.7794, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39285714285714285, |
|
"grad_norm": 2.068049907684326, |
|
"learning_rate": 1.9816378900858288e-05, |
|
"loss": 0.7192, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3950892857142857, |
|
"grad_norm": 1.8563698530197144, |
|
"learning_rate": 1.9811530311839747e-05, |
|
"loss": 0.8747, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.39732142857142855, |
|
"grad_norm": 2.2349257469177246, |
|
"learning_rate": 1.98066191485927e-05, |
|
"loss": 0.9516, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.39955357142857145, |
|
"grad_norm": 1.9404733180999756, |
|
"learning_rate": 1.980164544243869e-05, |
|
"loss": 0.7122, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4017857142857143, |
|
"grad_norm": 2.0351598262786865, |
|
"learning_rate": 1.9796609225098136e-05, |
|
"loss": 0.8076, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.40401785714285715, |
|
"grad_norm": 1.7152974605560303, |
|
"learning_rate": 1.9791510528690125e-05, |
|
"loss": 0.7297, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 1.8740495443344116, |
|
"learning_rate": 1.9786349385732212e-05, |
|
"loss": 0.7284, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.40848214285714285, |
|
"grad_norm": 1.9318393468856812, |
|
"learning_rate": 1.9781125829140214e-05, |
|
"loss": 0.6855, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4107142857142857, |
|
"grad_norm": 1.8202929496765137, |
|
"learning_rate": 1.9775839892228004e-05, |
|
"loss": 0.7345, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.41294642857142855, |
|
"grad_norm": 1.782867670059204, |
|
"learning_rate": 1.977049160870728e-05, |
|
"loss": 0.744, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.41517857142857145, |
|
"grad_norm": 1.9989078044891357, |
|
"learning_rate": 1.976508101268738e-05, |
|
"loss": 0.8473, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4174107142857143, |
|
"grad_norm": 2.07568359375, |
|
"learning_rate": 1.975960813867503e-05, |
|
"loss": 0.8046, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.41964285714285715, |
|
"grad_norm": 1.87251615524292, |
|
"learning_rate": 1.9754073021574153e-05, |
|
"loss": 0.7159, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 2.1433751583099365, |
|
"learning_rate": 1.9748475696685637e-05, |
|
"loss": 0.8732, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.42410714285714285, |
|
"grad_norm": 1.7815970182418823, |
|
"learning_rate": 1.9742816199707096e-05, |
|
"loss": 0.7325, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4263392857142857, |
|
"grad_norm": 2.1016180515289307, |
|
"learning_rate": 1.9737094566732663e-05, |
|
"loss": 0.8413, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 2.0545904636383057, |
|
"learning_rate": 1.9731310834252747e-05, |
|
"loss": 0.7327, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43080357142857145, |
|
"grad_norm": 1.8373966217041016, |
|
"learning_rate": 1.972546503915381e-05, |
|
"loss": 0.6376, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.4330357142857143, |
|
"grad_norm": 1.8860907554626465, |
|
"learning_rate": 1.9719557218718116e-05, |
|
"loss": 0.6071, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.43526785714285715, |
|
"grad_norm": 1.9010783433914185, |
|
"learning_rate": 1.9713587410623516e-05, |
|
"loss": 0.6556, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 2.155518054962158, |
|
"learning_rate": 1.970755565294318e-05, |
|
"loss": 0.8064, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.43973214285714285, |
|
"grad_norm": 1.8603652715682983, |
|
"learning_rate": 1.970146198414538e-05, |
|
"loss": 0.7676, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4419642857142857, |
|
"grad_norm": 1.6968109607696533, |
|
"learning_rate": 1.969530644309323e-05, |
|
"loss": 0.6538, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.44419642857142855, |
|
"grad_norm": 1.864494800567627, |
|
"learning_rate": 1.968908906904444e-05, |
|
"loss": 0.655, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 1.8527575731277466, |
|
"learning_rate": 1.9682809901651074e-05, |
|
"loss": 0.7734, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4486607142857143, |
|
"grad_norm": 1.9814064502716064, |
|
"learning_rate": 1.9676468980959284e-05, |
|
"loss": 0.6819, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.45089285714285715, |
|
"grad_norm": 2.267021894454956, |
|
"learning_rate": 1.9670066347409063e-05, |
|
"loss": 0.8216, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 2.0436460971832275, |
|
"learning_rate": 1.9663602041833983e-05, |
|
"loss": 0.8168, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.45535714285714285, |
|
"grad_norm": 1.9789938926696777, |
|
"learning_rate": 1.9657076105460945e-05, |
|
"loss": 0.7879, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4575892857142857, |
|
"grad_norm": 1.8295159339904785, |
|
"learning_rate": 1.9650488579909898e-05, |
|
"loss": 0.7912, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.45982142857142855, |
|
"grad_norm": 2.1058108806610107, |
|
"learning_rate": 1.964383950719359e-05, |
|
"loss": 0.8244, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.46205357142857145, |
|
"grad_norm": 1.6311708688735962, |
|
"learning_rate": 1.9637128929717294e-05, |
|
"loss": 0.7164, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4642857142857143, |
|
"grad_norm": 1.8252456188201904, |
|
"learning_rate": 1.9630356890278527e-05, |
|
"loss": 0.7296, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.46651785714285715, |
|
"grad_norm": 2.008681297302246, |
|
"learning_rate": 1.96235234320668e-05, |
|
"loss": 0.7393, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 2.0544400215148926, |
|
"learning_rate": 1.9616628598663322e-05, |
|
"loss": 0.8566, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47098214285714285, |
|
"grad_norm": 1.8580057621002197, |
|
"learning_rate": 1.9609672434040736e-05, |
|
"loss": 0.7186, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4732142857142857, |
|
"grad_norm": 1.7987284660339355, |
|
"learning_rate": 1.9602654982562822e-05, |
|
"loss": 0.8183, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.47544642857142855, |
|
"grad_norm": 1.8287429809570312, |
|
"learning_rate": 1.9595576288984233e-05, |
|
"loss": 0.6638, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.47767857142857145, |
|
"grad_norm": 1.8274677991867065, |
|
"learning_rate": 1.9588436398450206e-05, |
|
"loss": 0.777, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4799107142857143, |
|
"grad_norm": 1.702154517173767, |
|
"learning_rate": 1.958123535649625e-05, |
|
"loss": 0.7325, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.48214285714285715, |
|
"grad_norm": 1.7472929954528809, |
|
"learning_rate": 1.9573973209047893e-05, |
|
"loss": 0.7387, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 2.046131134033203, |
|
"learning_rate": 1.9566650002420363e-05, |
|
"loss": 0.8264, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.48660714285714285, |
|
"grad_norm": 1.8448314666748047, |
|
"learning_rate": 1.9559265783318304e-05, |
|
"loss": 0.7476, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4888392857142857, |
|
"grad_norm": 1.8311007022857666, |
|
"learning_rate": 1.9551820598835464e-05, |
|
"loss": 0.7377, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.49107142857142855, |
|
"grad_norm": 1.852664589881897, |
|
"learning_rate": 1.9544314496454423e-05, |
|
"loss": 0.7963, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49330357142857145, |
|
"grad_norm": 1.744728446006775, |
|
"learning_rate": 1.9536747524046254e-05, |
|
"loss": 0.8079, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4955357142857143, |
|
"grad_norm": 1.957882285118103, |
|
"learning_rate": 1.9529119729870253e-05, |
|
"loss": 0.7432, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.49776785714285715, |
|
"grad_norm": 1.8669383525848389, |
|
"learning_rate": 1.9521431162573596e-05, |
|
"loss": 0.7875, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.79106605052948, |
|
"learning_rate": 1.9513681871191063e-05, |
|
"loss": 0.7095, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.7226072549819946, |
|
"eval_runtime": 37.3023, |
|
"eval_samples_per_second": 1.957, |
|
"eval_steps_per_second": 0.268, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5022321428571429, |
|
"grad_norm": 1.7508628368377686, |
|
"learning_rate": 1.95058719051447e-05, |
|
"loss": 0.6829, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5044642857142857, |
|
"grad_norm": 1.7533146142959595, |
|
"learning_rate": 1.949800131424352e-05, |
|
"loss": 0.6786, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5066964285714286, |
|
"grad_norm": 1.9132986068725586, |
|
"learning_rate": 1.9490070148683166e-05, |
|
"loss": 0.7689, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5089285714285714, |
|
"grad_norm": 1.6711753606796265, |
|
"learning_rate": 1.9482078459045617e-05, |
|
"loss": 0.7087, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5111607142857143, |
|
"grad_norm": 2.01895809173584, |
|
"learning_rate": 1.947402629629885e-05, |
|
"loss": 0.7217, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5133928571428571, |
|
"grad_norm": 2.0448741912841797, |
|
"learning_rate": 1.9465913711796502e-05, |
|
"loss": 0.7922, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 2.043036460876465, |
|
"learning_rate": 1.9457740757277577e-05, |
|
"loss": 0.7573, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5178571428571429, |
|
"grad_norm": 2.070568561553955, |
|
"learning_rate": 1.9449507484866084e-05, |
|
"loss": 0.8412, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5200892857142857, |
|
"grad_norm": 1.930888295173645, |
|
"learning_rate": 1.944121394707072e-05, |
|
"loss": 0.8104, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5223214285714286, |
|
"grad_norm": 2.184985876083374, |
|
"learning_rate": 1.9432860196784533e-05, |
|
"loss": 0.9096, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5245535714285714, |
|
"grad_norm": 1.9199402332305908, |
|
"learning_rate": 1.9424446287284576e-05, |
|
"loss": 0.7141, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5267857142857143, |
|
"grad_norm": 1.6737233400344849, |
|
"learning_rate": 1.941597227223159e-05, |
|
"loss": 0.712, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5290178571428571, |
|
"grad_norm": 1.6949608325958252, |
|
"learning_rate": 1.940743820566963e-05, |
|
"loss": 0.7317, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 1.8811354637145996, |
|
"learning_rate": 1.9398844142025746e-05, |
|
"loss": 0.7427, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5334821428571429, |
|
"grad_norm": 2.093593120574951, |
|
"learning_rate": 1.9390190136109625e-05, |
|
"loss": 0.7851, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 1.7398838996887207, |
|
"learning_rate": 1.9381476243113243e-05, |
|
"loss": 0.7885, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5379464285714286, |
|
"grad_norm": 1.7277969121932983, |
|
"learning_rate": 1.9372702518610512e-05, |
|
"loss": 0.8121, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5401785714285714, |
|
"grad_norm": 2.2008254528045654, |
|
"learning_rate": 1.9363869018556928e-05, |
|
"loss": 0.8755, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5424107142857143, |
|
"grad_norm": 2.0191445350646973, |
|
"learning_rate": 1.9354975799289215e-05, |
|
"loss": 0.8049, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5446428571428571, |
|
"grad_norm": 1.8473167419433594, |
|
"learning_rate": 1.9346022917524958e-05, |
|
"loss": 0.7737, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 2.2000248432159424, |
|
"learning_rate": 1.933701043036225e-05, |
|
"loss": 0.79, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5491071428571429, |
|
"grad_norm": 1.9332422018051147, |
|
"learning_rate": 1.9327938395279325e-05, |
|
"loss": 0.8249, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5513392857142857, |
|
"grad_norm": 1.9443155527114868, |
|
"learning_rate": 1.9318806870134194e-05, |
|
"loss": 0.7453, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5535714285714286, |
|
"grad_norm": 1.7897255420684814, |
|
"learning_rate": 1.9309615913164262e-05, |
|
"loss": 0.7778, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5558035714285714, |
|
"grad_norm": 1.7514328956604004, |
|
"learning_rate": 1.9300365582985984e-05, |
|
"loss": 0.7577, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5580357142857143, |
|
"grad_norm": 1.7380211353302002, |
|
"learning_rate": 1.9291055938594464e-05, |
|
"loss": 0.7522, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5602678571428571, |
|
"grad_norm": 2.0598490238189697, |
|
"learning_rate": 1.9281687039363088e-05, |
|
"loss": 0.799, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 2.061765670776367, |
|
"learning_rate": 1.9272258945043154e-05, |
|
"loss": 0.7477, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5647321428571429, |
|
"grad_norm": 1.8268564939498901, |
|
"learning_rate": 1.9262771715763483e-05, |
|
"loss": 0.7743, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5669642857142857, |
|
"grad_norm": 2.0371830463409424, |
|
"learning_rate": 1.9253225412030028e-05, |
|
"loss": 0.883, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5691964285714286, |
|
"grad_norm": 1.8290431499481201, |
|
"learning_rate": 1.924362009472551e-05, |
|
"loss": 0.7619, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.886815071105957, |
|
"learning_rate": 1.9233955825109e-05, |
|
"loss": 0.7959, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5736607142857143, |
|
"grad_norm": 2.0654468536376953, |
|
"learning_rate": 1.9224232664815563e-05, |
|
"loss": 0.7899, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5758928571428571, |
|
"grad_norm": 1.9492045640945435, |
|
"learning_rate": 1.9214450675855832e-05, |
|
"loss": 0.809, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 1.9223597049713135, |
|
"learning_rate": 1.9204609920615635e-05, |
|
"loss": 0.7791, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5803571428571429, |
|
"grad_norm": 1.9948698282241821, |
|
"learning_rate": 1.919471046185558e-05, |
|
"loss": 0.8161, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5825892857142857, |
|
"grad_norm": 2.0385048389434814, |
|
"learning_rate": 1.9184752362710674e-05, |
|
"loss": 0.736, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5848214285714286, |
|
"grad_norm": 2.2720816135406494, |
|
"learning_rate": 1.917473568668991e-05, |
|
"loss": 0.7706, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5870535714285714, |
|
"grad_norm": 1.8173810243606567, |
|
"learning_rate": 1.9164660497675848e-05, |
|
"loss": 0.6735, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5892857142857143, |
|
"grad_norm": 1.7594642639160156, |
|
"learning_rate": 1.9154526859924242e-05, |
|
"loss": 0.8137, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5915178571428571, |
|
"grad_norm": 1.6718664169311523, |
|
"learning_rate": 1.9144334838063595e-05, |
|
"loss": 0.6624, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 1.8529527187347412, |
|
"learning_rate": 1.9134084497094766e-05, |
|
"loss": 0.789, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5959821428571429, |
|
"grad_norm": 2.0226452350616455, |
|
"learning_rate": 1.9123775902390555e-05, |
|
"loss": 0.8884, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5982142857142857, |
|
"grad_norm": 1.9775701761245728, |
|
"learning_rate": 1.9113409119695276e-05, |
|
"loss": 0.6447, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6004464285714286, |
|
"grad_norm": 1.7548224925994873, |
|
"learning_rate": 1.9102984215124352e-05, |
|
"loss": 0.6737, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6026785714285714, |
|
"grad_norm": 1.7850803136825562, |
|
"learning_rate": 1.9092501255163874e-05, |
|
"loss": 0.6363, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6049107142857143, |
|
"grad_norm": 1.8443948030471802, |
|
"learning_rate": 1.9081960306670198e-05, |
|
"loss": 0.7323, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6071428571428571, |
|
"grad_norm": 1.8395159244537354, |
|
"learning_rate": 1.907136143686951e-05, |
|
"loss": 0.788, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 1.7697207927703857, |
|
"learning_rate": 1.9060704713357382e-05, |
|
"loss": 0.7168, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6116071428571429, |
|
"grad_norm": 1.7455432415008545, |
|
"learning_rate": 1.904999020409837e-05, |
|
"loss": 0.7696, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6138392857142857, |
|
"grad_norm": 1.7991318702697754, |
|
"learning_rate": 1.9039217977425567e-05, |
|
"loss": 0.7197, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6160714285714286, |
|
"grad_norm": 1.7043858766555786, |
|
"learning_rate": 1.902838810204015e-05, |
|
"loss": 0.7258, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6183035714285714, |
|
"grad_norm": 1.7921115159988403, |
|
"learning_rate": 1.901750064701097e-05, |
|
"loss": 0.6635, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6205357142857143, |
|
"grad_norm": 1.8393748998641968, |
|
"learning_rate": 1.90065556817741e-05, |
|
"loss": 0.7361, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6227678571428571, |
|
"grad_norm": 1.4951876401901245, |
|
"learning_rate": 1.8995553276132385e-05, |
|
"loss": 0.6451, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.0124995708465576, |
|
"learning_rate": 1.8984493500255e-05, |
|
"loss": 0.9129, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6272321428571429, |
|
"grad_norm": 1.8670498132705688, |
|
"learning_rate": 1.8973376424677022e-05, |
|
"loss": 0.7747, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6294642857142857, |
|
"grad_norm": 1.8474571704864502, |
|
"learning_rate": 1.8962202120298948e-05, |
|
"loss": 0.7649, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6316964285714286, |
|
"grad_norm": 2.081151247024536, |
|
"learning_rate": 1.8950970658386262e-05, |
|
"loss": 0.7737, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6339285714285714, |
|
"grad_norm": 1.9373351335525513, |
|
"learning_rate": 1.8939682110568982e-05, |
|
"loss": 0.7365, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6361607142857143, |
|
"grad_norm": 1.9412529468536377, |
|
"learning_rate": 1.8928336548841197e-05, |
|
"loss": 0.6813, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6383928571428571, |
|
"grad_norm": 1.8421021699905396, |
|
"learning_rate": 1.8916934045560603e-05, |
|
"loss": 0.7973, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 1.9929094314575195, |
|
"learning_rate": 1.8905474673448055e-05, |
|
"loss": 0.6829, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 1.6566269397735596, |
|
"learning_rate": 1.8893958505587093e-05, |
|
"loss": 0.6942, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6450892857142857, |
|
"grad_norm": 2.019409656524658, |
|
"learning_rate": 1.8882385615423477e-05, |
|
"loss": 0.767, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6473214285714286, |
|
"grad_norm": 1.7549775838851929, |
|
"learning_rate": 1.8870756076764728e-05, |
|
"loss": 0.7294, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6495535714285714, |
|
"grad_norm": 1.7515791654586792, |
|
"learning_rate": 1.8859069963779636e-05, |
|
"loss": 0.7496, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6517857142857143, |
|
"grad_norm": 1.8197311162948608, |
|
"learning_rate": 1.8847327350997814e-05, |
|
"loss": 0.6977, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6540178571428571, |
|
"grad_norm": 1.6706933975219727, |
|
"learning_rate": 1.88355283133092e-05, |
|
"loss": 0.7149, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.9751033782958984, |
|
"learning_rate": 1.8823672925963598e-05, |
|
"loss": 0.7942, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6584821428571429, |
|
"grad_norm": 1.8076329231262207, |
|
"learning_rate": 1.8811761264570177e-05, |
|
"loss": 0.7787, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6607142857142857, |
|
"grad_norm": 1.9289543628692627, |
|
"learning_rate": 1.879979340509701e-05, |
|
"loss": 0.7987, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6629464285714286, |
|
"grad_norm": 1.650168538093567, |
|
"learning_rate": 1.8787769423870583e-05, |
|
"loss": 0.6981, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6651785714285714, |
|
"grad_norm": 2.0561366081237793, |
|
"learning_rate": 1.877568939757529e-05, |
|
"loss": 0.7365, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6674107142857143, |
|
"grad_norm": 1.8385493755340576, |
|
"learning_rate": 1.8763553403252975e-05, |
|
"loss": 0.5859, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6696428571428571, |
|
"grad_norm": 1.7286657094955444, |
|
"learning_rate": 1.8751361518302413e-05, |
|
"loss": 0.7328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 1.8077149391174316, |
|
"learning_rate": 1.873911382047884e-05, |
|
"loss": 0.8031, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6741071428571429, |
|
"grad_norm": 1.7272533178329468, |
|
"learning_rate": 1.8726810387893438e-05, |
|
"loss": 0.6546, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6763392857142857, |
|
"grad_norm": 1.9572628736495972, |
|
"learning_rate": 1.871445129901284e-05, |
|
"loss": 0.8659, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6785714285714286, |
|
"grad_norm": 1.978677749633789, |
|
"learning_rate": 1.8702036632658646e-05, |
|
"loss": 0.7295, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6808035714285714, |
|
"grad_norm": 1.81928288936615, |
|
"learning_rate": 1.8689566468006898e-05, |
|
"loss": 0.7568, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6830357142857143, |
|
"grad_norm": 1.6536098718643188, |
|
"learning_rate": 1.867704088458759e-05, |
|
"loss": 0.7303, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6852678571428571, |
|
"grad_norm": 1.989862084388733, |
|
"learning_rate": 1.866445996228415e-05, |
|
"loss": 0.7545, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 1.7992223501205444, |
|
"learning_rate": 1.8651823781332948e-05, |
|
"loss": 0.7724, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6897321428571429, |
|
"grad_norm": 1.92568039894104, |
|
"learning_rate": 1.863913242232276e-05, |
|
"loss": 0.7042, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6919642857142857, |
|
"grad_norm": 1.8733493089675903, |
|
"learning_rate": 1.8626385966194275e-05, |
|
"loss": 0.7978, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6941964285714286, |
|
"grad_norm": 1.7693002223968506, |
|
"learning_rate": 1.8613584494239568e-05, |
|
"loss": 0.7821, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6964285714285714, |
|
"grad_norm": 1.6573666334152222, |
|
"learning_rate": 1.8600728088101587e-05, |
|
"loss": 0.7033, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6986607142857143, |
|
"grad_norm": 2.043008327484131, |
|
"learning_rate": 1.858781682977362e-05, |
|
"loss": 0.6474, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7008928571428571, |
|
"grad_norm": 1.755703330039978, |
|
"learning_rate": 1.857485080159879e-05, |
|
"loss": 0.8343, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 2.0336127281188965, |
|
"learning_rate": 1.8561830086269524e-05, |
|
"loss": 0.8187, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7053571428571429, |
|
"grad_norm": 1.6667089462280273, |
|
"learning_rate": 1.8548754766827016e-05, |
|
"loss": 0.6551, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7075892857142857, |
|
"grad_norm": 1.6601287126541138, |
|
"learning_rate": 1.8535624926660707e-05, |
|
"loss": 0.762, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7098214285714286, |
|
"grad_norm": 1.8507710695266724, |
|
"learning_rate": 1.852244064950775e-05, |
|
"loss": 0.7247, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7120535714285714, |
|
"grad_norm": 1.7145278453826904, |
|
"learning_rate": 1.8509202019452472e-05, |
|
"loss": 0.6654, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 2.0679047107696533, |
|
"learning_rate": 1.8495909120925857e-05, |
|
"loss": 0.7885, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7165178571428571, |
|
"grad_norm": 1.8965860605239868, |
|
"learning_rate": 1.8482562038704975e-05, |
|
"loss": 0.6821, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 2.1363813877105713, |
|
"learning_rate": 1.846916085791247e-05, |
|
"loss": 0.8906, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7209821428571429, |
|
"grad_norm": 1.9793356657028198, |
|
"learning_rate": 1.8455705664016003e-05, |
|
"loss": 0.7592, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7232142857142857, |
|
"grad_norm": 1.793078899383545, |
|
"learning_rate": 1.8442196542827712e-05, |
|
"loss": 0.7786, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7254464285714286, |
|
"grad_norm": 2.359466314315796, |
|
"learning_rate": 1.8428633580503658e-05, |
|
"loss": 0.9417, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7276785714285714, |
|
"grad_norm": 2.090745210647583, |
|
"learning_rate": 1.8415016863543286e-05, |
|
"loss": 0.8483, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7299107142857143, |
|
"grad_norm": 2.008620500564575, |
|
"learning_rate": 1.8401346478788865e-05, |
|
"loss": 0.8119, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7321428571428571, |
|
"grad_norm": 2.027491569519043, |
|
"learning_rate": 1.8387622513424942e-05, |
|
"loss": 0.7877, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 1.982364296913147, |
|
"learning_rate": 1.8373845054977764e-05, |
|
"loss": 0.7336, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7366071428571429, |
|
"grad_norm": 1.8951386213302612, |
|
"learning_rate": 1.836001419131476e-05, |
|
"loss": 0.7059, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7388392857142857, |
|
"grad_norm": 1.8096810579299927, |
|
"learning_rate": 1.834613001064394e-05, |
|
"loss": 0.6819, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7410714285714286, |
|
"grad_norm": 1.936662197113037, |
|
"learning_rate": 1.8332192601513358e-05, |
|
"loss": 0.8011, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7433035714285714, |
|
"grad_norm": 1.7710180282592773, |
|
"learning_rate": 1.8318202052810538e-05, |
|
"loss": 0.7537, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7455357142857143, |
|
"grad_norm": 1.7253098487854004, |
|
"learning_rate": 1.8304158453761904e-05, |
|
"loss": 0.6547, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7477678571428571, |
|
"grad_norm": 1.9151325225830078, |
|
"learning_rate": 1.829006189393222e-05, |
|
"loss": 0.7737, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.715453028678894, |
|
"learning_rate": 1.827591246322401e-05, |
|
"loss": 0.7343, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.7154207229614258, |
|
"eval_runtime": 43.0546, |
|
"eval_samples_per_second": 1.696, |
|
"eval_steps_per_second": 0.232, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7522321428571429, |
|
"grad_norm": 1.6917095184326172, |
|
"learning_rate": 1.8261710251876993e-05, |
|
"loss": 0.6706, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7544642857142857, |
|
"grad_norm": 1.8422549962997437, |
|
"learning_rate": 1.8247455350467496e-05, |
|
"loss": 0.7681, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7566964285714286, |
|
"grad_norm": 1.6443406343460083, |
|
"learning_rate": 1.8233147849907894e-05, |
|
"loss": 0.6611, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7589285714285714, |
|
"grad_norm": 2.035898447036743, |
|
"learning_rate": 1.8218787841446003e-05, |
|
"loss": 0.7388, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7611607142857143, |
|
"grad_norm": 1.7699940204620361, |
|
"learning_rate": 1.8204375416664536e-05, |
|
"loss": 0.7281, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7633928571428571, |
|
"grad_norm": 1.7037780284881592, |
|
"learning_rate": 1.8189910667480476e-05, |
|
"loss": 0.6242, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 1.887932538986206, |
|
"learning_rate": 1.8175393686144524e-05, |
|
"loss": 0.7796, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7678571428571429, |
|
"grad_norm": 1.8445992469787598, |
|
"learning_rate": 1.8160824565240495e-05, |
|
"loss": 0.7814, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7700892857142857, |
|
"grad_norm": 1.56995689868927, |
|
"learning_rate": 1.8146203397684734e-05, |
|
"loss": 0.65, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7723214285714286, |
|
"grad_norm": 1.7116246223449707, |
|
"learning_rate": 1.8131530276725514e-05, |
|
"loss": 0.7689, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7745535714285714, |
|
"grad_norm": 1.7758585214614868, |
|
"learning_rate": 1.811680529594245e-05, |
|
"loss": 0.7553, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7767857142857143, |
|
"grad_norm": 1.7060250043869019, |
|
"learning_rate": 1.8102028549245894e-05, |
|
"loss": 0.7016, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7790178571428571, |
|
"grad_norm": 1.8875247240066528, |
|
"learning_rate": 1.808720013087635e-05, |
|
"loss": 0.7161, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 1.7203177213668823, |
|
"learning_rate": 1.8072320135403862e-05, |
|
"loss": 0.6379, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7834821428571429, |
|
"grad_norm": 1.9935482740402222, |
|
"learning_rate": 1.805738865772741e-05, |
|
"loss": 0.7931, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 1.6885210275650024, |
|
"learning_rate": 1.804240579307431e-05, |
|
"loss": 0.7497, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7879464285714286, |
|
"grad_norm": 1.717721939086914, |
|
"learning_rate": 1.8027371636999605e-05, |
|
"loss": 0.6567, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7901785714285714, |
|
"grad_norm": 2.2966055870056152, |
|
"learning_rate": 1.8012286285385456e-05, |
|
"loss": 0.9328, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7924107142857143, |
|
"grad_norm": 2.057353973388672, |
|
"learning_rate": 1.7997149834440527e-05, |
|
"loss": 0.7644, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7946428571428571, |
|
"grad_norm": 1.7968207597732544, |
|
"learning_rate": 1.7981962380699376e-05, |
|
"loss": 0.758, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 1.8473401069641113, |
|
"learning_rate": 1.7966724021021837e-05, |
|
"loss": 0.6907, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7991071428571429, |
|
"grad_norm": 1.7238281965255737, |
|
"learning_rate": 1.7951434852592406e-05, |
|
"loss": 0.7409, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8013392857142857, |
|
"grad_norm": 1.6485793590545654, |
|
"learning_rate": 1.793609497291961e-05, |
|
"loss": 0.6849, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 1.7897621393203735, |
|
"learning_rate": 1.79207044798354e-05, |
|
"loss": 0.9022, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8058035714285714, |
|
"grad_norm": 2.24227237701416, |
|
"learning_rate": 1.7905263471494522e-05, |
|
"loss": 0.906, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8080357142857143, |
|
"grad_norm": 1.8071457147598267, |
|
"learning_rate": 1.788977204637388e-05, |
|
"loss": 0.6459, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8102678571428571, |
|
"grad_norm": 1.894426941871643, |
|
"learning_rate": 1.7874230303271932e-05, |
|
"loss": 0.9378, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 1.6523385047912598, |
|
"learning_rate": 1.7858638341308026e-05, |
|
"loss": 0.7221, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8147321428571429, |
|
"grad_norm": 1.9723589420318604, |
|
"learning_rate": 1.78429962599218e-05, |
|
"loss": 0.8942, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8169642857142857, |
|
"grad_norm": 1.8695416450500488, |
|
"learning_rate": 1.7827304158872538e-05, |
|
"loss": 0.6494, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8191964285714286, |
|
"grad_norm": 1.7047300338745117, |
|
"learning_rate": 1.7811562138238508e-05, |
|
"loss": 0.6725, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8214285714285714, |
|
"grad_norm": 1.707351803779602, |
|
"learning_rate": 1.779577029841638e-05, |
|
"loss": 0.7854, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8236607142857143, |
|
"grad_norm": 1.9581531286239624, |
|
"learning_rate": 1.7779928740120525e-05, |
|
"loss": 0.8307, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.8258928571428571, |
|
"grad_norm": 1.608521580696106, |
|
"learning_rate": 1.776403756438241e-05, |
|
"loss": 0.6982, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 2.188683032989502, |
|
"learning_rate": 1.774809687254994e-05, |
|
"loss": 0.8912, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8303571428571429, |
|
"grad_norm": 2.287449598312378, |
|
"learning_rate": 1.773210676628682e-05, |
|
"loss": 0.9133, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8325892857142857, |
|
"grad_norm": 1.6350460052490234, |
|
"learning_rate": 1.77160673475719e-05, |
|
"loss": 0.6556, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8348214285714286, |
|
"grad_norm": 1.6731687784194946, |
|
"learning_rate": 1.769997871869852e-05, |
|
"loss": 0.6751, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8370535714285714, |
|
"grad_norm": 2.1170239448547363, |
|
"learning_rate": 1.768384098227387e-05, |
|
"loss": 0.797, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8392857142857143, |
|
"grad_norm": 1.8839830160140991, |
|
"learning_rate": 1.7667654241218332e-05, |
|
"loss": 0.7856, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8415178571428571, |
|
"grad_norm": 1.9298778772354126, |
|
"learning_rate": 1.765141859876481e-05, |
|
"loss": 0.7929, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 2.2410686016082764, |
|
"learning_rate": 1.7635134158458095e-05, |
|
"loss": 0.8097, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8459821428571429, |
|
"grad_norm": 1.8766001462936401, |
|
"learning_rate": 1.7618801024154186e-05, |
|
"loss": 0.7552, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8482142857142857, |
|
"grad_norm": 1.960311770439148, |
|
"learning_rate": 1.7602419300019627e-05, |
|
"loss": 0.7243, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8504464285714286, |
|
"grad_norm": 1.9935823678970337, |
|
"learning_rate": 1.758598909053087e-05, |
|
"loss": 0.7236, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8526785714285714, |
|
"grad_norm": 1.8538720607757568, |
|
"learning_rate": 1.7569510500473566e-05, |
|
"loss": 0.742, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8549107142857143, |
|
"grad_norm": 1.5118043422698975, |
|
"learning_rate": 1.7552983634941928e-05, |
|
"loss": 0.5574, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.9784835577011108, |
|
"learning_rate": 1.753640859933806e-05, |
|
"loss": 0.7678, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 2.0069949626922607, |
|
"learning_rate": 1.751978549937126e-05, |
|
"loss": 0.8437, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8616071428571429, |
|
"grad_norm": 1.858752965927124, |
|
"learning_rate": 1.7503114441057374e-05, |
|
"loss": 0.7793, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8638392857142857, |
|
"grad_norm": 1.7572165727615356, |
|
"learning_rate": 1.7486395530718104e-05, |
|
"loss": 0.8084, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8660714285714286, |
|
"grad_norm": 1.585492491722107, |
|
"learning_rate": 1.746962887498034e-05, |
|
"loss": 0.6839, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8683035714285714, |
|
"grad_norm": 1.8265163898468018, |
|
"learning_rate": 1.7452814580775467e-05, |
|
"loss": 0.7074, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8705357142857143, |
|
"grad_norm": 1.6596579551696777, |
|
"learning_rate": 1.743595275533869e-05, |
|
"loss": 0.6543, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8727678571428571, |
|
"grad_norm": 1.6813689470291138, |
|
"learning_rate": 1.7419043506208348e-05, |
|
"loss": 0.804, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 1.6294770240783691, |
|
"learning_rate": 1.7402086941225246e-05, |
|
"loss": 0.6623, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8772321428571429, |
|
"grad_norm": 1.7459831237792969, |
|
"learning_rate": 1.7385083168531934e-05, |
|
"loss": 0.7403, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8794642857142857, |
|
"grad_norm": 1.845110297203064, |
|
"learning_rate": 1.736803229657204e-05, |
|
"loss": 0.7605, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8816964285714286, |
|
"grad_norm": 1.690212368965149, |
|
"learning_rate": 1.7350934434089583e-05, |
|
"loss": 0.6052, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8839285714285714, |
|
"grad_norm": 1.7885936498641968, |
|
"learning_rate": 1.7333789690128252e-05, |
|
"loss": 0.844, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8861607142857143, |
|
"grad_norm": 1.8944475650787354, |
|
"learning_rate": 1.7316598174030746e-05, |
|
"loss": 0.7689, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8883928571428571, |
|
"grad_norm": 2.1152751445770264, |
|
"learning_rate": 1.7299359995438046e-05, |
|
"loss": 0.8567, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 1.7011525630950928, |
|
"learning_rate": 1.728207526428873e-05, |
|
"loss": 0.7467, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 1.963889479637146, |
|
"learning_rate": 1.7264744090818284e-05, |
|
"loss": 0.8, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8950892857142857, |
|
"grad_norm": 1.824885606765747, |
|
"learning_rate": 1.7247366585558366e-05, |
|
"loss": 0.7644, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8973214285714286, |
|
"grad_norm": 1.8513473272323608, |
|
"learning_rate": 1.7229942859336142e-05, |
|
"loss": 0.8467, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8995535714285714, |
|
"grad_norm": 1.688294529914856, |
|
"learning_rate": 1.7212473023273532e-05, |
|
"loss": 0.6652, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9017857142857143, |
|
"grad_norm": 1.7810540199279785, |
|
"learning_rate": 1.719495718878655e-05, |
|
"loss": 0.7861, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9040178571428571, |
|
"grad_norm": 1.7689653635025024, |
|
"learning_rate": 1.7177395467584564e-05, |
|
"loss": 0.7411, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 2.2160940170288086, |
|
"learning_rate": 1.7159787971669586e-05, |
|
"loss": 0.777, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9084821428571429, |
|
"grad_norm": 2.250462770462036, |
|
"learning_rate": 1.7142134813335557e-05, |
|
"loss": 0.8158, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9107142857142857, |
|
"grad_norm": 1.9748404026031494, |
|
"learning_rate": 1.712443610516765e-05, |
|
"loss": 0.7991, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9129464285714286, |
|
"grad_norm": 2.171666145324707, |
|
"learning_rate": 1.7106691960041527e-05, |
|
"loss": 0.8593, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9151785714285714, |
|
"grad_norm": 1.7081716060638428, |
|
"learning_rate": 1.7088902491122636e-05, |
|
"loss": 0.7543, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9174107142857143, |
|
"grad_norm": 1.9155141115188599, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.8376, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9196428571428571, |
|
"grad_norm": 2.0130817890167236, |
|
"learning_rate": 1.7053188036012885e-05, |
|
"loss": 0.66, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 1.888626217842102, |
|
"learning_rate": 1.7035263277595314e-05, |
|
"loss": 0.6913, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9241071428571429, |
|
"grad_norm": 1.5536619424819946, |
|
"learning_rate": 1.7017293650930083e-05, |
|
"loss": 0.7703, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9263392857142857, |
|
"grad_norm": 1.720703363418579, |
|
"learning_rate": 1.6999279270620675e-05, |
|
"loss": 0.7862, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 1.6617650985717773, |
|
"learning_rate": 1.6981220251555996e-05, |
|
"loss": 0.7429, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9308035714285714, |
|
"grad_norm": 2.0875959396362305, |
|
"learning_rate": 1.6963116708909637e-05, |
|
"loss": 0.7905, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9330357142857143, |
|
"grad_norm": 1.4865297079086304, |
|
"learning_rate": 1.6944968758139144e-05, |
|
"loss": 0.7061, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9352678571428571, |
|
"grad_norm": 1.7242523431777954, |
|
"learning_rate": 1.6926776514985278e-05, |
|
"loss": 0.7275, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.7873682975769043, |
|
"learning_rate": 1.6908540095471288e-05, |
|
"loss": 0.7436, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9397321428571429, |
|
"grad_norm": 1.887552261352539, |
|
"learning_rate": 1.6890259615902153e-05, |
|
"loss": 0.8187, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9419642857142857, |
|
"grad_norm": 1.7543212175369263, |
|
"learning_rate": 1.6871935192863862e-05, |
|
"loss": 0.6981, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9441964285714286, |
|
"grad_norm": 1.7617319822311401, |
|
"learning_rate": 1.6853566943222647e-05, |
|
"loss": 0.801, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9464285714285714, |
|
"grad_norm": 1.8142614364624023, |
|
"learning_rate": 1.6835154984124266e-05, |
|
"loss": 0.7469, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9486607142857143, |
|
"grad_norm": 1.6476774215698242, |
|
"learning_rate": 1.6816699432993212e-05, |
|
"loss": 0.8173, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9508928571428571, |
|
"grad_norm": 2.0289416313171387, |
|
"learning_rate": 1.6798200407532025e-05, |
|
"loss": 0.9145, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 1.8370226621627808, |
|
"learning_rate": 1.677965802572048e-05, |
|
"loss": 0.748, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9553571428571429, |
|
"grad_norm": 1.7355858087539673, |
|
"learning_rate": 1.676107240581488e-05, |
|
"loss": 0.7599, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9575892857142857, |
|
"grad_norm": 1.7881946563720703, |
|
"learning_rate": 1.674244366634727e-05, |
|
"loss": 0.7741, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9598214285714286, |
|
"grad_norm": 2.2131218910217285, |
|
"learning_rate": 1.6723771926124704e-05, |
|
"loss": 0.9298, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9620535714285714, |
|
"grad_norm": 1.5364829301834106, |
|
"learning_rate": 1.6705057304228488e-05, |
|
"loss": 0.6597, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9642857142857143, |
|
"grad_norm": 1.8031491041183472, |
|
"learning_rate": 1.6686299920013388e-05, |
|
"loss": 0.7577, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9665178571428571, |
|
"grad_norm": 1.596772313117981, |
|
"learning_rate": 1.666749989310691e-05, |
|
"loss": 0.728, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 1.614382028579712, |
|
"learning_rate": 1.6648657343408517e-05, |
|
"loss": 0.7294, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9709821428571429, |
|
"grad_norm": 1.7294843196868896, |
|
"learning_rate": 1.6629772391088855e-05, |
|
"loss": 0.7244, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9732142857142857, |
|
"grad_norm": 1.9231197834014893, |
|
"learning_rate": 1.661084515658901e-05, |
|
"loss": 0.81, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.9754464285714286, |
|
"grad_norm": 1.6754019260406494, |
|
"learning_rate": 1.6591875760619718e-05, |
|
"loss": 0.6205, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9776785714285714, |
|
"grad_norm": 2.166166067123413, |
|
"learning_rate": 1.6572864324160617e-05, |
|
"loss": 0.8231, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9799107142857143, |
|
"grad_norm": 1.67359459400177, |
|
"learning_rate": 1.6553810968459455e-05, |
|
"loss": 0.6678, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9821428571428571, |
|
"grad_norm": 1.8316197395324707, |
|
"learning_rate": 1.6534715815031325e-05, |
|
"loss": 0.7779, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 1.8748825788497925, |
|
"learning_rate": 1.651557898565789e-05, |
|
"loss": 0.7911, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9866071428571429, |
|
"grad_norm": 1.6567225456237793, |
|
"learning_rate": 1.649640060238661e-05, |
|
"loss": 0.7287, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9888392857142857, |
|
"grad_norm": 1.8699995279312134, |
|
"learning_rate": 1.6477180787529957e-05, |
|
"loss": 0.7821, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9910714285714286, |
|
"grad_norm": 1.7357499599456787, |
|
"learning_rate": 1.645791966366464e-05, |
|
"loss": 0.6641, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9933035714285714, |
|
"grad_norm": 1.7094175815582275, |
|
"learning_rate": 1.6438617353630823e-05, |
|
"loss": 0.7252, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9955357142857143, |
|
"grad_norm": 1.7703126668930054, |
|
"learning_rate": 1.6419273980531333e-05, |
|
"loss": 0.8532, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9977678571428571, |
|
"grad_norm": 1.5840541124343872, |
|
"learning_rate": 1.6399889667730887e-05, |
|
"loss": 0.6525, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.72904634475708, |
|
"learning_rate": 1.63804645388553e-05, |
|
"loss": 0.6731, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7064380645751953, |
|
"eval_runtime": 45.5564, |
|
"eval_samples_per_second": 1.602, |
|
"eval_steps_per_second": 0.22, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.0022321428571428, |
|
"grad_norm": 1.7170028686523438, |
|
"learning_rate": 1.6360998717790694e-05, |
|
"loss": 0.532, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.0044642857142858, |
|
"grad_norm": 2.1551966667175293, |
|
"learning_rate": 1.6341492328682703e-05, |
|
"loss": 0.6989, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0066964285714286, |
|
"grad_norm": 1.7585958242416382, |
|
"learning_rate": 1.6321945495935717e-05, |
|
"loss": 0.6666, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.0089285714285714, |
|
"grad_norm": 1.7315536737442017, |
|
"learning_rate": 1.6302358344212025e-05, |
|
"loss": 0.5661, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.0111607142857142, |
|
"grad_norm": 1.7567814588546753, |
|
"learning_rate": 1.6282730998431072e-05, |
|
"loss": 0.6854, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.0133928571428572, |
|
"grad_norm": 1.8460400104522705, |
|
"learning_rate": 1.6263063583768652e-05, |
|
"loss": 0.5733, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 1.7143034934997559, |
|
"learning_rate": 1.624335622565609e-05, |
|
"loss": 0.5911, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.0178571428571428, |
|
"grad_norm": 1.7223514318466187, |
|
"learning_rate": 1.622360904977946e-05, |
|
"loss": 0.5492, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.0200892857142858, |
|
"grad_norm": 2.3188822269439697, |
|
"learning_rate": 1.6203822182078777e-05, |
|
"loss": 0.7361, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.0223214285714286, |
|
"grad_norm": 1.8782273530960083, |
|
"learning_rate": 1.6183995748747204e-05, |
|
"loss": 0.65, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.0245535714285714, |
|
"grad_norm": 1.8567129373550415, |
|
"learning_rate": 1.6164129876230226e-05, |
|
"loss": 0.5537, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.0267857142857142, |
|
"grad_norm": 1.7850383520126343, |
|
"learning_rate": 1.6144224691224868e-05, |
|
"loss": 0.6298, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0290178571428572, |
|
"grad_norm": 1.7569608688354492, |
|
"learning_rate": 1.6124280320678864e-05, |
|
"loss": 0.649, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"grad_norm": 2.0803897380828857, |
|
"learning_rate": 1.6104296891789867e-05, |
|
"loss": 0.744, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0334821428571428, |
|
"grad_norm": 1.786569595336914, |
|
"learning_rate": 1.608427453200463e-05, |
|
"loss": 0.5691, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.0357142857142858, |
|
"grad_norm": 1.8641101121902466, |
|
"learning_rate": 1.606421336901818e-05, |
|
"loss": 0.6681, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.0379464285714286, |
|
"grad_norm": 2.0264892578125, |
|
"learning_rate": 1.6044113530773034e-05, |
|
"loss": 0.7279, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.0401785714285714, |
|
"grad_norm": 1.8726037740707397, |
|
"learning_rate": 1.6023975145458352e-05, |
|
"loss": 0.5828, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0424107142857142, |
|
"grad_norm": 1.6226308345794678, |
|
"learning_rate": 1.600379834150914e-05, |
|
"loss": 0.4913, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.0446428571428572, |
|
"grad_norm": 1.677682638168335, |
|
"learning_rate": 1.5983583247605414e-05, |
|
"loss": 0.5904, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.046875, |
|
"grad_norm": 2.0546329021453857, |
|
"learning_rate": 1.5963329992671402e-05, |
|
"loss": 0.7253, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.0491071428571428, |
|
"grad_norm": 2.0313217639923096, |
|
"learning_rate": 1.5943038705874697e-05, |
|
"loss": 0.6989, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0513392857142858, |
|
"grad_norm": 1.7999160289764404, |
|
"learning_rate": 1.5922709516625453e-05, |
|
"loss": 0.6103, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.0535714285714286, |
|
"grad_norm": 1.782199501991272, |
|
"learning_rate": 1.590234255457555e-05, |
|
"loss": 0.6372, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.0558035714285714, |
|
"grad_norm": 2.0932509899139404, |
|
"learning_rate": 1.588193794961776e-05, |
|
"loss": 0.6486, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.0580357142857142, |
|
"grad_norm": 1.6826951503753662, |
|
"learning_rate": 1.5861495831884942e-05, |
|
"loss": 0.5463, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.0602678571428572, |
|
"grad_norm": 1.6843758821487427, |
|
"learning_rate": 1.5841016331749185e-05, |
|
"loss": 0.5937, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 1.8449771404266357, |
|
"learning_rate": 1.582049957982099e-05, |
|
"loss": 0.5093, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.0647321428571428, |
|
"grad_norm": 1.8449821472167969, |
|
"learning_rate": 1.5799945706948447e-05, |
|
"loss": 0.5731, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.0669642857142858, |
|
"grad_norm": 1.7647595405578613, |
|
"learning_rate": 1.5779354844216377e-05, |
|
"loss": 0.5962, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.0691964285714286, |
|
"grad_norm": 1.7663452625274658, |
|
"learning_rate": 1.5758727122945514e-05, |
|
"loss": 0.6871, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 1.7700406312942505, |
|
"learning_rate": 1.5738062674691657e-05, |
|
"loss": 0.6388, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0736607142857142, |
|
"grad_norm": 1.9175291061401367, |
|
"learning_rate": 1.5717361631244842e-05, |
|
"loss": 0.6238, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.0758928571428572, |
|
"grad_norm": 1.8494954109191895, |
|
"learning_rate": 1.5696624124628495e-05, |
|
"loss": 0.6641, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.078125, |
|
"grad_norm": 2.086261034011841, |
|
"learning_rate": 1.5675850287098585e-05, |
|
"loss": 0.7263, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.0803571428571428, |
|
"grad_norm": 1.7281228303909302, |
|
"learning_rate": 1.5655040251142787e-05, |
|
"loss": 0.6142, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.0825892857142858, |
|
"grad_norm": 1.798956036567688, |
|
"learning_rate": 1.5634194149479642e-05, |
|
"loss": 0.5619, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.0848214285714286, |
|
"grad_norm": 2.007969379425049, |
|
"learning_rate": 1.5613312115057697e-05, |
|
"loss": 0.7375, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.0870535714285714, |
|
"grad_norm": 1.869939923286438, |
|
"learning_rate": 1.559239428105467e-05, |
|
"loss": 0.6256, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.0892857142857142, |
|
"grad_norm": 2.1612086296081543, |
|
"learning_rate": 1.5571440780876588e-05, |
|
"loss": 0.6326, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.0915178571428572, |
|
"grad_norm": 1.9341946840286255, |
|
"learning_rate": 1.5550451748156957e-05, |
|
"loss": 0.619, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 1.7737098932266235, |
|
"learning_rate": 1.5529427316755876e-05, |
|
"loss": 0.6141, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0959821428571428, |
|
"grad_norm": 1.672850251197815, |
|
"learning_rate": 1.5508367620759224e-05, |
|
"loss": 0.5472, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.0982142857142858, |
|
"grad_norm": 1.985202431678772, |
|
"learning_rate": 1.548727279447777e-05, |
|
"loss": 0.6157, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.1004464285714286, |
|
"grad_norm": 1.824111819267273, |
|
"learning_rate": 1.546614297244634e-05, |
|
"loss": 0.5532, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.1026785714285714, |
|
"grad_norm": 2.178053617477417, |
|
"learning_rate": 1.5444978289422937e-05, |
|
"loss": 0.628, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.1049107142857142, |
|
"grad_norm": 1.8749269247055054, |
|
"learning_rate": 1.542377888038791e-05, |
|
"loss": 0.6572, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.1071428571428572, |
|
"grad_norm": 1.7741912603378296, |
|
"learning_rate": 1.540254488054307e-05, |
|
"loss": 0.5662, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.109375, |
|
"grad_norm": 1.8596246242523193, |
|
"learning_rate": 1.538127642531083e-05, |
|
"loss": 0.6233, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.1116071428571428, |
|
"grad_norm": 1.9491173028945923, |
|
"learning_rate": 1.5359973650333352e-05, |
|
"loss": 0.6861, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.1138392857142858, |
|
"grad_norm": 1.7947884798049927, |
|
"learning_rate": 1.533863669147168e-05, |
|
"loss": 0.6094, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.1160714285714286, |
|
"grad_norm": 1.9403222799301147, |
|
"learning_rate": 1.5317265684804865e-05, |
|
"loss": 0.6399, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1183035714285714, |
|
"grad_norm": 1.8400847911834717, |
|
"learning_rate": 1.5295860766629098e-05, |
|
"loss": 0.6124, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.1205357142857142, |
|
"grad_norm": 1.8174333572387695, |
|
"learning_rate": 1.5274422073456853e-05, |
|
"loss": 0.6962, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.1227678571428572, |
|
"grad_norm": 1.8172571659088135, |
|
"learning_rate": 1.5252949742016005e-05, |
|
"loss": 0.5751, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 2.1520352363586426, |
|
"learning_rate": 1.5231443909248956e-05, |
|
"loss": 0.7679, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.1272321428571428, |
|
"grad_norm": 1.8504657745361328, |
|
"learning_rate": 1.5209904712311777e-05, |
|
"loss": 0.5691, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.1294642857142858, |
|
"grad_norm": 1.9086402654647827, |
|
"learning_rate": 1.5188332288573313e-05, |
|
"loss": 0.621, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.1316964285714286, |
|
"grad_norm": 1.8591104745864868, |
|
"learning_rate": 1.5166726775614327e-05, |
|
"loss": 0.6644, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.1339285714285714, |
|
"grad_norm": 2.2467782497406006, |
|
"learning_rate": 1.5145088311226599e-05, |
|
"loss": 0.7193, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.1361607142857142, |
|
"grad_norm": 2.0161256790161133, |
|
"learning_rate": 1.5123417033412078e-05, |
|
"loss": 0.5497, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.1383928571428572, |
|
"grad_norm": 1.9359264373779297, |
|
"learning_rate": 1.510171308038197e-05, |
|
"loss": 0.5944, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.140625, |
|
"grad_norm": 1.893852710723877, |
|
"learning_rate": 1.5079976590555876e-05, |
|
"loss": 0.6466, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 1.742958426475525, |
|
"learning_rate": 1.5058207702560907e-05, |
|
"loss": 0.6072, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.1450892857142858, |
|
"grad_norm": 1.8722553253173828, |
|
"learning_rate": 1.5036406555230794e-05, |
|
"loss": 0.6441, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.1473214285714286, |
|
"grad_norm": 1.7931900024414062, |
|
"learning_rate": 1.501457328760501e-05, |
|
"loss": 0.5519, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.1495535714285714, |
|
"grad_norm": 1.7850182056427002, |
|
"learning_rate": 1.499270803892787e-05, |
|
"loss": 0.5238, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.1517857142857142, |
|
"grad_norm": 2.0565764904022217, |
|
"learning_rate": 1.4970810948647664e-05, |
|
"loss": 0.6809, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.1540178571428572, |
|
"grad_norm": 2.0107569694519043, |
|
"learning_rate": 1.4948882156415748e-05, |
|
"loss": 0.6119, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 1.9191194772720337, |
|
"learning_rate": 1.4926921802085662e-05, |
|
"loss": 0.7001, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.1584821428571428, |
|
"grad_norm": 1.797013282775879, |
|
"learning_rate": 1.4904930025712236e-05, |
|
"loss": 0.5865, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.1607142857142858, |
|
"grad_norm": 1.8777704238891602, |
|
"learning_rate": 1.4882906967550708e-05, |
|
"loss": 0.6189, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1629464285714286, |
|
"grad_norm": 1.7872058153152466, |
|
"learning_rate": 1.4860852768055804e-05, |
|
"loss": 0.6804, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.1651785714285714, |
|
"grad_norm": 1.9415029287338257, |
|
"learning_rate": 1.4838767567880865e-05, |
|
"loss": 0.5962, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.1674107142857142, |
|
"grad_norm": 1.8708178997039795, |
|
"learning_rate": 1.4816651507876946e-05, |
|
"loss": 0.6953, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.1696428571428572, |
|
"grad_norm": 1.8373233079910278, |
|
"learning_rate": 1.479450472909191e-05, |
|
"loss": 0.5464, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 1.692324161529541, |
|
"learning_rate": 1.4772327372769533e-05, |
|
"loss": 0.6527, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.1741071428571428, |
|
"grad_norm": 2.331209421157837, |
|
"learning_rate": 1.4750119580348601e-05, |
|
"loss": 0.7633, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.1763392857142858, |
|
"grad_norm": 1.8351179361343384, |
|
"learning_rate": 1.4727881493462018e-05, |
|
"loss": 0.5657, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.1785714285714286, |
|
"grad_norm": 1.9545246362686157, |
|
"learning_rate": 1.4705613253935886e-05, |
|
"loss": 0.6594, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.1808035714285714, |
|
"grad_norm": 1.8347816467285156, |
|
"learning_rate": 1.4683315003788614e-05, |
|
"loss": 0.6406, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.1830357142857142, |
|
"grad_norm": 2.1158814430236816, |
|
"learning_rate": 1.4660986885230002e-05, |
|
"loss": 0.5972, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.1852678571428572, |
|
"grad_norm": 1.7203682661056519, |
|
"learning_rate": 1.463862904066035e-05, |
|
"loss": 0.6403, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 1.9957817792892456, |
|
"learning_rate": 1.4616241612669523e-05, |
|
"loss": 0.5948, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.1897321428571428, |
|
"grad_norm": 1.9116469621658325, |
|
"learning_rate": 1.4593824744036078e-05, |
|
"loss": 0.6249, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.1919642857142858, |
|
"grad_norm": 1.8352584838867188, |
|
"learning_rate": 1.4571378577726317e-05, |
|
"loss": 0.5559, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.1941964285714286, |
|
"grad_norm": 1.9600341320037842, |
|
"learning_rate": 1.4548903256893392e-05, |
|
"loss": 0.6037, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.1964285714285714, |
|
"grad_norm": 2.105661153793335, |
|
"learning_rate": 1.4526398924876407e-05, |
|
"loss": 0.654, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.1986607142857142, |
|
"grad_norm": 1.9616891145706177, |
|
"learning_rate": 1.4503865725199468e-05, |
|
"loss": 0.6022, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.2008928571428572, |
|
"grad_norm": 1.8042315244674683, |
|
"learning_rate": 1.4481303801570805e-05, |
|
"loss": 0.6358, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.203125, |
|
"grad_norm": 1.7036793231964111, |
|
"learning_rate": 1.4458713297881828e-05, |
|
"loss": 0.5692, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.2053571428571428, |
|
"grad_norm": 1.8314141035079956, |
|
"learning_rate": 1.4436094358206224e-05, |
|
"loss": 0.6103, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2075892857142858, |
|
"grad_norm": 1.6869179010391235, |
|
"learning_rate": 1.4413447126799038e-05, |
|
"loss": 0.5561, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.2098214285714286, |
|
"grad_norm": 1.5481685400009155, |
|
"learning_rate": 1.4390771748095735e-05, |
|
"loss": 0.4697, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.2120535714285714, |
|
"grad_norm": 1.904807448387146, |
|
"learning_rate": 1.436806836671131e-05, |
|
"loss": 0.5764, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 1.9439524412155151, |
|
"learning_rate": 1.4345337127439333e-05, |
|
"loss": 0.6585, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.2165178571428572, |
|
"grad_norm": 1.8721519708633423, |
|
"learning_rate": 1.4322578175251058e-05, |
|
"loss": 0.685, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"grad_norm": 1.9289742708206177, |
|
"learning_rate": 1.4299791655294461e-05, |
|
"loss": 0.6364, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.2209821428571428, |
|
"grad_norm": 1.6985549926757812, |
|
"learning_rate": 1.4276977712893357e-05, |
|
"loss": 0.6419, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.2232142857142858, |
|
"grad_norm": 1.9946244955062866, |
|
"learning_rate": 1.4254136493546432e-05, |
|
"loss": 0.7154, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.2254464285714286, |
|
"grad_norm": 1.9798763990402222, |
|
"learning_rate": 1.4231268142926345e-05, |
|
"loss": 0.6713, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.2276785714285714, |
|
"grad_norm": 2.0185835361480713, |
|
"learning_rate": 1.4208372806878782e-05, |
|
"loss": 0.7014, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2299107142857142, |
|
"grad_norm": 2.082404851913452, |
|
"learning_rate": 1.4185450631421542e-05, |
|
"loss": 0.7588, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.2321428571428572, |
|
"grad_norm": 1.8249948024749756, |
|
"learning_rate": 1.4162501762743579e-05, |
|
"loss": 0.6651, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.234375, |
|
"grad_norm": 1.8358428478240967, |
|
"learning_rate": 1.41395263472041e-05, |
|
"loss": 0.6694, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.2366071428571428, |
|
"grad_norm": 1.7472898960113525, |
|
"learning_rate": 1.4116524531331616e-05, |
|
"loss": 0.5877, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.2388392857142858, |
|
"grad_norm": 1.6928068399429321, |
|
"learning_rate": 1.4093496461823002e-05, |
|
"loss": 0.6702, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.2410714285714286, |
|
"grad_norm": 1.7535682916641235, |
|
"learning_rate": 1.4070442285542579e-05, |
|
"loss": 0.6101, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.2433035714285714, |
|
"grad_norm": 1.7997139692306519, |
|
"learning_rate": 1.4047362149521152e-05, |
|
"loss": 0.6645, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.2455357142857142, |
|
"grad_norm": 1.8536964654922485, |
|
"learning_rate": 1.402425620095511e-05, |
|
"loss": 0.5933, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.2477678571428572, |
|
"grad_norm": 1.7359586954116821, |
|
"learning_rate": 1.400112458720544e-05, |
|
"loss": 0.6334, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.6964356899261475, |
|
"learning_rate": 1.3977967455796828e-05, |
|
"loss": 0.6578, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.7179591655731201, |
|
"eval_runtime": 45.9423, |
|
"eval_samples_per_second": 1.589, |
|
"eval_steps_per_second": 0.218, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2522321428571428, |
|
"grad_norm": 1.778124451637268, |
|
"learning_rate": 1.3954784954416703e-05, |
|
"loss": 0.6381, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.2544642857142856, |
|
"grad_norm": 1.8484911918640137, |
|
"learning_rate": 1.393157723091428e-05, |
|
"loss": 0.6636, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.2566964285714286, |
|
"grad_norm": 2.0035502910614014, |
|
"learning_rate": 1.3908344433299644e-05, |
|
"loss": 0.717, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.2589285714285714, |
|
"grad_norm": 1.7717210054397583, |
|
"learning_rate": 1.3885086709742788e-05, |
|
"loss": 0.5578, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.2611607142857144, |
|
"grad_norm": 1.9656518697738647, |
|
"learning_rate": 1.3861804208572674e-05, |
|
"loss": 0.6255, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.2633928571428572, |
|
"grad_norm": 1.973463773727417, |
|
"learning_rate": 1.3838497078276288e-05, |
|
"loss": 0.691, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.265625, |
|
"grad_norm": 1.6152547597885132, |
|
"learning_rate": 1.3815165467497686e-05, |
|
"loss": 0.6327, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.2678571428571428, |
|
"grad_norm": 1.9555470943450928, |
|
"learning_rate": 1.3791809525037057e-05, |
|
"loss": 0.6078, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.2700892857142856, |
|
"grad_norm": 1.9085325002670288, |
|
"learning_rate": 1.376842939984977e-05, |
|
"loss": 0.5348, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.2723214285714286, |
|
"grad_norm": 1.535760521888733, |
|
"learning_rate": 1.3745025241045414e-05, |
|
"loss": 0.5663, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2745535714285714, |
|
"grad_norm": 1.951587200164795, |
|
"learning_rate": 1.372159719788686e-05, |
|
"loss": 0.6476, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.2767857142857144, |
|
"grad_norm": 2.169016122817993, |
|
"learning_rate": 1.3698145419789302e-05, |
|
"loss": 0.6918, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.2790178571428572, |
|
"grad_norm": 1.7241047620773315, |
|
"learning_rate": 1.3674670056319315e-05, |
|
"loss": 0.6644, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"grad_norm": 1.6963378190994263, |
|
"learning_rate": 1.3651171257193883e-05, |
|
"loss": 0.6071, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.2834821428571428, |
|
"grad_norm": 1.891162395477295, |
|
"learning_rate": 1.3627649172279453e-05, |
|
"loss": 0.6225, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 1.9643828868865967, |
|
"learning_rate": 1.3604103951590993e-05, |
|
"loss": 0.6411, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.2879464285714286, |
|
"grad_norm": 1.9789677858352661, |
|
"learning_rate": 1.3580535745291001e-05, |
|
"loss": 0.6765, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.2901785714285714, |
|
"grad_norm": 1.958038568496704, |
|
"learning_rate": 1.3556944703688592e-05, |
|
"loss": 0.6554, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.2924107142857144, |
|
"grad_norm": 1.7940925359725952, |
|
"learning_rate": 1.3533330977238496e-05, |
|
"loss": 0.5874, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.2946428571428572, |
|
"grad_norm": 1.920786738395691, |
|
"learning_rate": 1.3509694716540135e-05, |
|
"loss": 0.5498, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.296875, |
|
"grad_norm": 1.6368259191513062, |
|
"learning_rate": 1.348603607233663e-05, |
|
"loss": 0.5471, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.2991071428571428, |
|
"grad_norm": 1.859761118888855, |
|
"learning_rate": 1.3462355195513868e-05, |
|
"loss": 0.7125, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.3013392857142856, |
|
"grad_norm": 1.9652460813522339, |
|
"learning_rate": 1.343865223709952e-05, |
|
"loss": 0.6611, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.3035714285714286, |
|
"grad_norm": 1.8966349363327026, |
|
"learning_rate": 1.341492734826209e-05, |
|
"loss": 0.6874, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.3058035714285714, |
|
"grad_norm": 1.8784470558166504, |
|
"learning_rate": 1.3391180680309945e-05, |
|
"loss": 0.5934, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.3080357142857144, |
|
"grad_norm": 2.3371737003326416, |
|
"learning_rate": 1.3367412384690346e-05, |
|
"loss": 0.7451, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.3102678571428572, |
|
"grad_norm": 1.7194281816482544, |
|
"learning_rate": 1.3343622612988492e-05, |
|
"loss": 0.6767, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.9259989261627197, |
|
"learning_rate": 1.3319811516926541e-05, |
|
"loss": 0.6433, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.3147321428571428, |
|
"grad_norm": 1.7472665309906006, |
|
"learning_rate": 1.329597924836267e-05, |
|
"loss": 0.6128, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.3169642857142856, |
|
"grad_norm": 2.028818130493164, |
|
"learning_rate": 1.3272125959290059e-05, |
|
"loss": 0.7026, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3191964285714286, |
|
"grad_norm": 1.8237242698669434, |
|
"learning_rate": 1.3248251801835968e-05, |
|
"loss": 0.6255, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.3214285714285714, |
|
"grad_norm": 2.014730215072632, |
|
"learning_rate": 1.3224356928260735e-05, |
|
"loss": 0.5823, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.3236607142857144, |
|
"grad_norm": 1.8209702968597412, |
|
"learning_rate": 1.3200441490956832e-05, |
|
"loss": 0.6455, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.3258928571428572, |
|
"grad_norm": 1.8454806804656982, |
|
"learning_rate": 1.317650564244787e-05, |
|
"loss": 0.6597, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.328125, |
|
"grad_norm": 1.621583104133606, |
|
"learning_rate": 1.3152549535387624e-05, |
|
"loss": 0.6227, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.3303571428571428, |
|
"grad_norm": 1.9547210931777954, |
|
"learning_rate": 1.3128573322559097e-05, |
|
"loss": 0.6325, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.3325892857142856, |
|
"grad_norm": 1.9106806516647339, |
|
"learning_rate": 1.3104577156873496e-05, |
|
"loss": 0.6069, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.3348214285714286, |
|
"grad_norm": 1.8474856615066528, |
|
"learning_rate": 1.3080561191369286e-05, |
|
"loss": 0.6753, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.3370535714285714, |
|
"grad_norm": 1.5305962562561035, |
|
"learning_rate": 1.3056525579211215e-05, |
|
"loss": 0.5475, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 2.134941577911377, |
|
"learning_rate": 1.3032470473689322e-05, |
|
"loss": 0.6793, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3415178571428572, |
|
"grad_norm": 1.8868045806884766, |
|
"learning_rate": 1.3008396028217969e-05, |
|
"loss": 0.6653, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.34375, |
|
"grad_norm": 1.9452149868011475, |
|
"learning_rate": 1.298430239633486e-05, |
|
"loss": 0.6529, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.3459821428571428, |
|
"grad_norm": 1.7577698230743408, |
|
"learning_rate": 1.296018973170007e-05, |
|
"loss": 0.6174, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.3482142857142856, |
|
"grad_norm": 1.6762840747833252, |
|
"learning_rate": 1.2936058188095045e-05, |
|
"loss": 0.5839, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.3504464285714286, |
|
"grad_norm": 1.8176460266113281, |
|
"learning_rate": 1.2911907919421647e-05, |
|
"loss": 0.6965, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.3526785714285714, |
|
"grad_norm": 1.8090909719467163, |
|
"learning_rate": 1.2887739079701147e-05, |
|
"loss": 0.6409, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.3549107142857144, |
|
"grad_norm": 1.937070369720459, |
|
"learning_rate": 1.2863551823073266e-05, |
|
"loss": 0.6219, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 1.9617418050765991, |
|
"learning_rate": 1.2839346303795173e-05, |
|
"loss": 0.5834, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.359375, |
|
"grad_norm": 1.7203189134597778, |
|
"learning_rate": 1.2815122676240518e-05, |
|
"loss": 0.5439, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.3616071428571428, |
|
"grad_norm": 1.7873952388763428, |
|
"learning_rate": 1.2790881094898428e-05, |
|
"loss": 0.6598, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3638392857142856, |
|
"grad_norm": 1.7811925411224365, |
|
"learning_rate": 1.2766621714372543e-05, |
|
"loss": 0.5395, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.3660714285714286, |
|
"grad_norm": 1.8524154424667358, |
|
"learning_rate": 1.274234468938001e-05, |
|
"loss": 0.6735, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.3683035714285714, |
|
"grad_norm": 1.8554515838623047, |
|
"learning_rate": 1.271805017475051e-05, |
|
"loss": 0.6132, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.3705357142857144, |
|
"grad_norm": 1.815579891204834, |
|
"learning_rate": 1.2693738325425272e-05, |
|
"loss": 0.6722, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.3727678571428572, |
|
"grad_norm": 2.338247299194336, |
|
"learning_rate": 1.266940929645606e-05, |
|
"loss": 0.7669, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 1.8510740995407104, |
|
"learning_rate": 1.2645063243004236e-05, |
|
"loss": 0.5504, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.3772321428571428, |
|
"grad_norm": 1.798982858657837, |
|
"learning_rate": 1.2620700320339705e-05, |
|
"loss": 0.604, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.3794642857142856, |
|
"grad_norm": 1.7797491550445557, |
|
"learning_rate": 1.2596320683839976e-05, |
|
"loss": 0.6598, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.3816964285714286, |
|
"grad_norm": 1.8819565773010254, |
|
"learning_rate": 1.2571924488989145e-05, |
|
"loss": 0.6177, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.3839285714285714, |
|
"grad_norm": 1.7809234857559204, |
|
"learning_rate": 1.2547511891376916e-05, |
|
"loss": 0.5357, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3861607142857144, |
|
"grad_norm": 1.736649513244629, |
|
"learning_rate": 1.2523083046697598e-05, |
|
"loss": 0.6372, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.3883928571428572, |
|
"grad_norm": 1.9298821687698364, |
|
"learning_rate": 1.2498638110749122e-05, |
|
"loss": 0.686, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.390625, |
|
"grad_norm": 1.8866440057754517, |
|
"learning_rate": 1.2474177239432042e-05, |
|
"loss": 0.6319, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.3928571428571428, |
|
"grad_norm": 1.7113823890686035, |
|
"learning_rate": 1.2449700588748541e-05, |
|
"loss": 0.6942, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.3950892857142856, |
|
"grad_norm": 1.9925826787948608, |
|
"learning_rate": 1.2425208314801441e-05, |
|
"loss": 0.5561, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.3973214285714286, |
|
"grad_norm": 2.008894443511963, |
|
"learning_rate": 1.2400700573793191e-05, |
|
"loss": 0.6861, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.3995535714285714, |
|
"grad_norm": 1.9158005714416504, |
|
"learning_rate": 1.23761775220249e-05, |
|
"loss": 0.6071, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.4017857142857144, |
|
"grad_norm": 2.0181586742401123, |
|
"learning_rate": 1.2351639315895309e-05, |
|
"loss": 0.7582, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.4040178571428572, |
|
"grad_norm": 2.089715003967285, |
|
"learning_rate": 1.2327086111899816e-05, |
|
"loss": 0.7459, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 2.0146496295928955, |
|
"learning_rate": 1.2302518066629467e-05, |
|
"loss": 0.6312, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.4084821428571428, |
|
"grad_norm": 1.7847448587417603, |
|
"learning_rate": 1.2277935336769961e-05, |
|
"loss": 0.5838, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.4107142857142856, |
|
"grad_norm": 1.8573119640350342, |
|
"learning_rate": 1.2253338079100652e-05, |
|
"loss": 0.6427, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.4129464285714286, |
|
"grad_norm": 1.8742104768753052, |
|
"learning_rate": 1.2228726450493538e-05, |
|
"loss": 0.6697, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.4151785714285714, |
|
"grad_norm": 2.0059330463409424, |
|
"learning_rate": 1.2204100607912277e-05, |
|
"loss": 0.6663, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.4174107142857144, |
|
"grad_norm": 1.8245443105697632, |
|
"learning_rate": 1.2179460708411177e-05, |
|
"loss": 0.6052, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.4196428571428572, |
|
"grad_norm": 1.759437084197998, |
|
"learning_rate": 1.2154806909134198e-05, |
|
"loss": 0.59, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.421875, |
|
"grad_norm": 1.7457926273345947, |
|
"learning_rate": 1.213013936731394e-05, |
|
"loss": 0.6548, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.4241071428571428, |
|
"grad_norm": 1.7185189723968506, |
|
"learning_rate": 1.210545824027066e-05, |
|
"loss": 0.6277, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.4263392857142856, |
|
"grad_norm": 2.0099501609802246, |
|
"learning_rate": 1.2080763685411243e-05, |
|
"loss": 0.6866, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.6704769134521484, |
|
"learning_rate": 1.205605586022822e-05, |
|
"loss": 0.6058, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.4308035714285714, |
|
"grad_norm": 1.7891658544540405, |
|
"learning_rate": 1.2031334922298749e-05, |
|
"loss": 0.6382, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.4330357142857144, |
|
"grad_norm": 1.7908434867858887, |
|
"learning_rate": 1.2006601029283629e-05, |
|
"loss": 0.6063, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.4352678571428572, |
|
"grad_norm": 1.9139795303344727, |
|
"learning_rate": 1.1981854338926262e-05, |
|
"loss": 0.6399, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 1.7342250347137451, |
|
"learning_rate": 1.1957095009051683e-05, |
|
"loss": 0.63, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.4397321428571428, |
|
"grad_norm": 1.8219507932662964, |
|
"learning_rate": 1.193232319756553e-05, |
|
"loss": 0.5838, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.4419642857142856, |
|
"grad_norm": 1.791318416595459, |
|
"learning_rate": 1.1907539062453044e-05, |
|
"loss": 0.6082, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.4441964285714286, |
|
"grad_norm": 1.9500919580459595, |
|
"learning_rate": 1.1882742761778069e-05, |
|
"loss": 0.6217, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.4464285714285714, |
|
"grad_norm": 1.8394417762756348, |
|
"learning_rate": 1.1857934453682016e-05, |
|
"loss": 0.62, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.4486607142857144, |
|
"grad_norm": 1.7951915264129639, |
|
"learning_rate": 1.1833114296382903e-05, |
|
"loss": 0.6073, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.4508928571428572, |
|
"grad_norm": 1.609543800354004, |
|
"learning_rate": 1.1808282448174295e-05, |
|
"loss": 0.5101, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.453125, |
|
"grad_norm": 1.9620007276535034, |
|
"learning_rate": 1.1783439067424329e-05, |
|
"loss": 0.6477, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.4553571428571428, |
|
"grad_norm": 1.7118951082229614, |
|
"learning_rate": 1.1758584312574693e-05, |
|
"loss": 0.5712, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.4575892857142856, |
|
"grad_norm": 1.7285962104797363, |
|
"learning_rate": 1.17337183421396e-05, |
|
"loss": 0.6041, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.4598214285714286, |
|
"grad_norm": 1.6281301975250244, |
|
"learning_rate": 1.1708841314704811e-05, |
|
"loss": 0.6508, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.4620535714285714, |
|
"grad_norm": 1.5081804990768433, |
|
"learning_rate": 1.1683953388926592e-05, |
|
"loss": 0.5559, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.4642857142857144, |
|
"grad_norm": 2.0005712509155273, |
|
"learning_rate": 1.1659054723530721e-05, |
|
"loss": 0.5777, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.4665178571428572, |
|
"grad_norm": 2.1517813205718994, |
|
"learning_rate": 1.163414547731146e-05, |
|
"loss": 0.7034, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.46875, |
|
"grad_norm": 2.2742764949798584, |
|
"learning_rate": 1.1609225809130566e-05, |
|
"loss": 0.6747, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.4709821428571428, |
|
"grad_norm": 1.6455707550048828, |
|
"learning_rate": 1.1584295877916251e-05, |
|
"loss": 0.5293, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.4732142857142856, |
|
"grad_norm": 1.7664430141448975, |
|
"learning_rate": 1.1559355842662188e-05, |
|
"loss": 0.6505, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4754464285714286, |
|
"grad_norm": 1.776328682899475, |
|
"learning_rate": 1.1534405862426481e-05, |
|
"loss": 0.6094, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.4776785714285714, |
|
"grad_norm": 1.7001668214797974, |
|
"learning_rate": 1.150944609633067e-05, |
|
"loss": 0.6687, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.4799107142857144, |
|
"grad_norm": 1.7800358533859253, |
|
"learning_rate": 1.1484476703558698e-05, |
|
"loss": 0.656, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.4821428571428572, |
|
"grad_norm": 1.980301856994629, |
|
"learning_rate": 1.1459497843355907e-05, |
|
"loss": 0.6866, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.484375, |
|
"grad_norm": 1.8124170303344727, |
|
"learning_rate": 1.1434509675028018e-05, |
|
"loss": 0.5867, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.4866071428571428, |
|
"grad_norm": 1.5809247493743896, |
|
"learning_rate": 1.1409512357940114e-05, |
|
"loss": 0.562, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.4888392857142856, |
|
"grad_norm": 2.0136334896087646, |
|
"learning_rate": 1.138450605151563e-05, |
|
"loss": 0.628, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.4910714285714286, |
|
"grad_norm": 1.7345236539840698, |
|
"learning_rate": 1.1359490915235323e-05, |
|
"loss": 0.6533, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.4933035714285714, |
|
"grad_norm": 1.6967463493347168, |
|
"learning_rate": 1.1334467108636273e-05, |
|
"loss": 0.6514, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.4955357142857144, |
|
"grad_norm": 1.7118983268737793, |
|
"learning_rate": 1.1309434791310848e-05, |
|
"loss": 0.7126, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.4977678571428572, |
|
"grad_norm": 1.6460996866226196, |
|
"learning_rate": 1.1284394122905697e-05, |
|
"loss": 0.6425, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.8571183681488037, |
|
"learning_rate": 1.1259345263120738e-05, |
|
"loss": 0.5949, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 0.7134630680084229, |
|
"eval_runtime": 45.5605, |
|
"eval_samples_per_second": 1.602, |
|
"eval_steps_per_second": 0.219, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.5022321428571428, |
|
"grad_norm": 1.7399746179580688, |
|
"learning_rate": 1.1234288371708112e-05, |
|
"loss": 0.6018, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.5044642857142856, |
|
"grad_norm": 1.7217991352081299, |
|
"learning_rate": 1.1209223608471202e-05, |
|
"loss": 0.5705, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.5066964285714286, |
|
"grad_norm": 1.7552762031555176, |
|
"learning_rate": 1.1184151133263578e-05, |
|
"loss": 0.6119, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.5089285714285714, |
|
"grad_norm": 1.923822045326233, |
|
"learning_rate": 1.1159071105988012e-05, |
|
"loss": 0.569, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.5111607142857144, |
|
"grad_norm": 1.7491592168807983, |
|
"learning_rate": 1.1133983686595416e-05, |
|
"loss": 0.6234, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.5133928571428572, |
|
"grad_norm": 1.9240578413009644, |
|
"learning_rate": 1.110888903508387e-05, |
|
"loss": 0.7482, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.515625, |
|
"grad_norm": 1.783152461051941, |
|
"learning_rate": 1.1083787311497562e-05, |
|
"loss": 0.6756, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.5178571428571428, |
|
"grad_norm": 2.130394220352173, |
|
"learning_rate": 1.1058678675925796e-05, |
|
"loss": 0.5983, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5200892857142856, |
|
"grad_norm": 2.051589012145996, |
|
"learning_rate": 1.1033563288501944e-05, |
|
"loss": 0.6002, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.5223214285714286, |
|
"grad_norm": 1.8057156801223755, |
|
"learning_rate": 1.1008441309402448e-05, |
|
"loss": 0.6485, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.5245535714285714, |
|
"grad_norm": 1.9219319820404053, |
|
"learning_rate": 1.0983312898845788e-05, |
|
"loss": 0.6524, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.5267857142857144, |
|
"grad_norm": 1.7352206707000732, |
|
"learning_rate": 1.0958178217091455e-05, |
|
"loss": 0.5449, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.5290178571428572, |
|
"grad_norm": 1.9508435726165771, |
|
"learning_rate": 1.093303742443895e-05, |
|
"loss": 0.704, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.53125, |
|
"grad_norm": 1.6700478792190552, |
|
"learning_rate": 1.0907890681226728e-05, |
|
"loss": 0.611, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.5334821428571428, |
|
"grad_norm": 1.7663685083389282, |
|
"learning_rate": 1.0882738147831209e-05, |
|
"loss": 0.5739, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.5357142857142856, |
|
"grad_norm": 1.8615878820419312, |
|
"learning_rate": 1.0857579984665733e-05, |
|
"loss": 0.6008, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.5379464285714286, |
|
"grad_norm": 1.6442456245422363, |
|
"learning_rate": 1.0832416352179549e-05, |
|
"loss": 0.638, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.5401785714285714, |
|
"grad_norm": 1.9190770387649536, |
|
"learning_rate": 1.0807247410856783e-05, |
|
"loss": 0.6204, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5424107142857144, |
|
"grad_norm": 1.8047891855239868, |
|
"learning_rate": 1.0782073321215423e-05, |
|
"loss": 0.6699, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.5446428571428572, |
|
"grad_norm": 1.5776137113571167, |
|
"learning_rate": 1.0756894243806291e-05, |
|
"loss": 0.563, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.546875, |
|
"grad_norm": 1.6929550170898438, |
|
"learning_rate": 1.073171033921201e-05, |
|
"loss": 0.6255, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.5491071428571428, |
|
"grad_norm": 1.6478757858276367, |
|
"learning_rate": 1.0706521768046006e-05, |
|
"loss": 0.5839, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.5513392857142856, |
|
"grad_norm": 1.6278104782104492, |
|
"learning_rate": 1.0681328690951447e-05, |
|
"loss": 0.5533, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.5535714285714286, |
|
"grad_norm": 1.8348695039749146, |
|
"learning_rate": 1.0656131268600254e-05, |
|
"loss": 0.5925, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.5558035714285714, |
|
"grad_norm": 1.8064024448394775, |
|
"learning_rate": 1.0630929661692051e-05, |
|
"loss": 0.6057, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.5580357142857144, |
|
"grad_norm": 1.8319587707519531, |
|
"learning_rate": 1.0605724030953155e-05, |
|
"loss": 0.6066, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.5602678571428572, |
|
"grad_norm": 2.0297727584838867, |
|
"learning_rate": 1.0580514537135542e-05, |
|
"loss": 0.7303, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 1.909903883934021, |
|
"learning_rate": 1.0555301341015832e-05, |
|
"loss": 0.7264, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5647321428571428, |
|
"grad_norm": 1.9629833698272705, |
|
"learning_rate": 1.0530084603394239e-05, |
|
"loss": 0.6276, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.5669642857142856, |
|
"grad_norm": 1.8616989850997925, |
|
"learning_rate": 1.0504864485093588e-05, |
|
"loss": 0.6027, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.5691964285714286, |
|
"grad_norm": 1.8771177530288696, |
|
"learning_rate": 1.0479641146958249e-05, |
|
"loss": 0.5703, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 1.9348293542861938, |
|
"learning_rate": 1.0454414749853126e-05, |
|
"loss": 0.5615, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.5736607142857144, |
|
"grad_norm": 2.0445797443389893, |
|
"learning_rate": 1.0429185454662638e-05, |
|
"loss": 0.7221, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.5758928571428572, |
|
"grad_norm": 1.6325020790100098, |
|
"learning_rate": 1.0403953422289687e-05, |
|
"loss": 0.6198, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.578125, |
|
"grad_norm": 1.6755043268203735, |
|
"learning_rate": 1.0378718813654633e-05, |
|
"loss": 0.6068, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.5803571428571428, |
|
"grad_norm": 2.0556771755218506, |
|
"learning_rate": 1.0353481789694258e-05, |
|
"loss": 0.6963, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.5825892857142856, |
|
"grad_norm": 2.051053524017334, |
|
"learning_rate": 1.0328242511360753e-05, |
|
"loss": 0.6156, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.5848214285714286, |
|
"grad_norm": 1.755422830581665, |
|
"learning_rate": 1.030300113962069e-05, |
|
"loss": 0.5775, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5870535714285714, |
|
"grad_norm": 1.5062118768692017, |
|
"learning_rate": 1.0277757835453989e-05, |
|
"loss": 0.5894, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.5892857142857144, |
|
"grad_norm": 2.010497808456421, |
|
"learning_rate": 1.0252512759852891e-05, |
|
"loss": 0.5907, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.5915178571428572, |
|
"grad_norm": 1.8485591411590576, |
|
"learning_rate": 1.0227266073820939e-05, |
|
"loss": 0.6699, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.59375, |
|
"grad_norm": 1.8647955656051636, |
|
"learning_rate": 1.0202017938371947e-05, |
|
"loss": 0.7198, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.5959821428571428, |
|
"grad_norm": 1.8782999515533447, |
|
"learning_rate": 1.0176768514528967e-05, |
|
"loss": 0.5807, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.5982142857142856, |
|
"grad_norm": 1.9086512327194214, |
|
"learning_rate": 1.015151796332328e-05, |
|
"loss": 0.6106, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.6004464285714286, |
|
"grad_norm": 1.9945212602615356, |
|
"learning_rate": 1.012626644579334e-05, |
|
"loss": 0.7394, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.6026785714285714, |
|
"grad_norm": 1.789683222770691, |
|
"learning_rate": 1.010101412298378e-05, |
|
"loss": 0.579, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.6049107142857144, |
|
"grad_norm": 1.959991216659546, |
|
"learning_rate": 1.0075761155944355e-05, |
|
"loss": 0.6562, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 1.911706566810608, |
|
"learning_rate": 1.0050507705728943e-05, |
|
"loss": 0.5403, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.609375, |
|
"grad_norm": 1.9638135433197021, |
|
"learning_rate": 1.0025253933394487e-05, |
|
"loss": 0.6123, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.6116071428571428, |
|
"grad_norm": 1.973676085472107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5727, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.6138392857142856, |
|
"grad_norm": 2.101343870162964, |
|
"learning_rate": 9.974746066605515e-06, |
|
"loss": 0.5999, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.6160714285714286, |
|
"grad_norm": 1.8011658191680908, |
|
"learning_rate": 9.949492294271062e-06, |
|
"loss": 0.5714, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.6183035714285714, |
|
"grad_norm": 1.8372050523757935, |
|
"learning_rate": 9.924238844055646e-06, |
|
"loss": 0.6376, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.6205357142857144, |
|
"grad_norm": 1.8423105478286743, |
|
"learning_rate": 9.898985877016225e-06, |
|
"loss": 0.6067, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.6227678571428572, |
|
"grad_norm": 1.605955719947815, |
|
"learning_rate": 9.873733554206663e-06, |
|
"loss": 0.5868, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 1.84028160572052, |
|
"learning_rate": 9.848482036676725e-06, |
|
"loss": 0.6101, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.6272321428571428, |
|
"grad_norm": 1.82899808883667, |
|
"learning_rate": 9.823231485471034e-06, |
|
"loss": 0.6578, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.6294642857142856, |
|
"grad_norm": 1.6356295347213745, |
|
"learning_rate": 9.797982061628056e-06, |
|
"loss": 0.6306, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.6316964285714286, |
|
"grad_norm": 1.9734749794006348, |
|
"learning_rate": 9.772733926179066e-06, |
|
"loss": 0.6826, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.6339285714285714, |
|
"grad_norm": 1.6971672773361206, |
|
"learning_rate": 9.747487240147112e-06, |
|
"loss": 0.6536, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.6361607142857144, |
|
"grad_norm": 2.0280985832214355, |
|
"learning_rate": 9.722242164546016e-06, |
|
"loss": 0.6014, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.6383928571428572, |
|
"grad_norm": 2.0396358966827393, |
|
"learning_rate": 9.696998860379313e-06, |
|
"loss": 0.6495, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.640625, |
|
"grad_norm": 1.9824975728988647, |
|
"learning_rate": 9.67175748863925e-06, |
|
"loss": 0.6704, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 1.9405537843704224, |
|
"learning_rate": 9.646518210305747e-06, |
|
"loss": 0.6041, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.6450892857142856, |
|
"grad_norm": 1.7607909440994263, |
|
"learning_rate": 9.621281186345367e-06, |
|
"loss": 0.6549, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.6473214285714286, |
|
"grad_norm": 1.8185755014419556, |
|
"learning_rate": 9.596046577710314e-06, |
|
"loss": 0.6028, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.6495535714285714, |
|
"grad_norm": 1.7550791501998901, |
|
"learning_rate": 9.570814545337362e-06, |
|
"loss": 0.6871, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.6517857142857144, |
|
"grad_norm": 2.181464195251465, |
|
"learning_rate": 9.545585250146879e-06, |
|
"loss": 0.789, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.6540178571428572, |
|
"grad_norm": 1.8093764781951904, |
|
"learning_rate": 9.520358853041756e-06, |
|
"loss": 0.547, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.65625, |
|
"grad_norm": 1.6014175415039062, |
|
"learning_rate": 9.495135514906415e-06, |
|
"loss": 0.5635, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.6584821428571428, |
|
"grad_norm": 1.8001309633255005, |
|
"learning_rate": 9.469915396605763e-06, |
|
"loss": 0.6576, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.6607142857142856, |
|
"grad_norm": 2.0399434566497803, |
|
"learning_rate": 9.44469865898417e-06, |
|
"loss": 0.6821, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.6629464285714286, |
|
"grad_norm": 1.7246460914611816, |
|
"learning_rate": 9.41948546286446e-06, |
|
"loss": 0.586, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.6651785714285714, |
|
"grad_norm": 1.621623158454895, |
|
"learning_rate": 9.394275969046845e-06, |
|
"loss": 0.5024, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.6674107142857144, |
|
"grad_norm": 1.9162286520004272, |
|
"learning_rate": 9.369070338307954e-06, |
|
"loss": 0.6224, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.6696428571428572, |
|
"grad_norm": 2.026129961013794, |
|
"learning_rate": 9.34386873139975e-06, |
|
"loss": 0.57, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.671875, |
|
"grad_norm": 1.8409785032272339, |
|
"learning_rate": 9.31867130904856e-06, |
|
"loss": 0.5404, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.6741071428571428, |
|
"grad_norm": 2.1627585887908936, |
|
"learning_rate": 9.293478231954e-06, |
|
"loss": 0.782, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6763392857142856, |
|
"grad_norm": 1.648873209953308, |
|
"learning_rate": 9.26828966078799e-06, |
|
"loss": 0.4748, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.6785714285714286, |
|
"grad_norm": 2.045337200164795, |
|
"learning_rate": 9.243105756193714e-06, |
|
"loss": 0.6399, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.6808035714285714, |
|
"grad_norm": 1.631089448928833, |
|
"learning_rate": 9.217926678784579e-06, |
|
"loss": 0.5552, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.6830357142857144, |
|
"grad_norm": 1.683990478515625, |
|
"learning_rate": 9.192752589143219e-06, |
|
"loss": 0.5701, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.6852678571428572, |
|
"grad_norm": 1.9549455642700195, |
|
"learning_rate": 9.167583647820453e-06, |
|
"loss": 0.7436, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 1.6943837404251099, |
|
"learning_rate": 9.14242001533427e-06, |
|
"loss": 0.5931, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.6897321428571428, |
|
"grad_norm": 1.7470611333847046, |
|
"learning_rate": 9.117261852168794e-06, |
|
"loss": 0.6501, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.6919642857142856, |
|
"grad_norm": 1.8046934604644775, |
|
"learning_rate": 9.092109318773274e-06, |
|
"loss": 0.7076, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.6941964285714286, |
|
"grad_norm": 1.9041763544082642, |
|
"learning_rate": 9.066962575561054e-06, |
|
"loss": 0.6717, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.6964285714285714, |
|
"grad_norm": 1.9243342876434326, |
|
"learning_rate": 9.041821782908544e-06, |
|
"loss": 0.6456, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.6986607142857144, |
|
"grad_norm": 1.4959568977355957, |
|
"learning_rate": 9.016687101154215e-06, |
|
"loss": 0.5338, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.7008928571428572, |
|
"grad_norm": 1.8079724311828613, |
|
"learning_rate": 8.991558690597553e-06, |
|
"loss": 0.6154, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.703125, |
|
"grad_norm": 1.9148213863372803, |
|
"learning_rate": 8.966436711498058e-06, |
|
"loss": 0.6811, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.7053571428571428, |
|
"grad_norm": 1.9065920114517212, |
|
"learning_rate": 8.941321324074207e-06, |
|
"loss": 0.6276, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.7075892857142856, |
|
"grad_norm": 1.983621597290039, |
|
"learning_rate": 8.916212688502438e-06, |
|
"loss": 0.5618, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.7098214285714286, |
|
"grad_norm": 1.6854325532913208, |
|
"learning_rate": 8.891110964916135e-06, |
|
"loss": 0.564, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.7120535714285714, |
|
"grad_norm": 1.6306523084640503, |
|
"learning_rate": 8.866016313404586e-06, |
|
"loss": 0.5719, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 1.9582360982894897, |
|
"learning_rate": 8.840928894011995e-06, |
|
"loss": 0.6923, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.7165178571428572, |
|
"grad_norm": 1.624234914779663, |
|
"learning_rate": 8.815848866736424e-06, |
|
"loss": 0.5244, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 2.052605628967285, |
|
"learning_rate": 8.790776391528803e-06, |
|
"loss": 0.6598, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.7209821428571428, |
|
"grad_norm": 2.070335626602173, |
|
"learning_rate": 8.76571162829189e-06, |
|
"loss": 0.7077, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.7232142857142856, |
|
"grad_norm": 1.8281620740890503, |
|
"learning_rate": 8.740654736879265e-06, |
|
"loss": 0.7506, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.7254464285714286, |
|
"grad_norm": 1.6640903949737549, |
|
"learning_rate": 8.715605877094304e-06, |
|
"loss": 0.5309, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.7276785714285714, |
|
"grad_norm": 1.8417410850524902, |
|
"learning_rate": 8.690565208689157e-06, |
|
"loss": 0.6216, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.7299107142857144, |
|
"grad_norm": 1.7450134754180908, |
|
"learning_rate": 8.665532891363732e-06, |
|
"loss": 0.6304, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.7321428571428572, |
|
"grad_norm": 1.8155803680419922, |
|
"learning_rate": 8.640509084764682e-06, |
|
"loss": 0.6779, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.734375, |
|
"grad_norm": 1.9008722305297852, |
|
"learning_rate": 8.615493948484375e-06, |
|
"loss": 0.7313, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.7366071428571428, |
|
"grad_norm": 1.5549745559692383, |
|
"learning_rate": 8.590487642059888e-06, |
|
"loss": 0.538, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.7388392857142856, |
|
"grad_norm": 1.857479214668274, |
|
"learning_rate": 8.565490324971983e-06, |
|
"loss": 0.5986, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.7410714285714286, |
|
"grad_norm": 2.1765432357788086, |
|
"learning_rate": 8.540502156644096e-06, |
|
"loss": 0.6422, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.7433035714285714, |
|
"grad_norm": 1.8333587646484375, |
|
"learning_rate": 8.515523296441304e-06, |
|
"loss": 0.6261, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.7455357142857144, |
|
"grad_norm": 1.5466289520263672, |
|
"learning_rate": 8.490553903669335e-06, |
|
"loss": 0.541, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.7477678571428572, |
|
"grad_norm": 1.7777531147003174, |
|
"learning_rate": 8.465594137573524e-06, |
|
"loss": 0.6545, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.5732624530792236, |
|
"learning_rate": 8.440644157337819e-06, |
|
"loss": 0.5988, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.709916353225708, |
|
"eval_runtime": 48.2251, |
|
"eval_samples_per_second": 1.514, |
|
"eval_steps_per_second": 0.207, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.7522321428571428, |
|
"grad_norm": 1.6632795333862305, |
|
"learning_rate": 8.415704122083752e-06, |
|
"loss": 0.5257, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.7544642857142856, |
|
"grad_norm": 1.7194244861602783, |
|
"learning_rate": 8.390774190869434e-06, |
|
"loss": 0.6742, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.7566964285714286, |
|
"grad_norm": 1.8653508424758911, |
|
"learning_rate": 8.365854522688543e-06, |
|
"loss": 0.7084, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.7589285714285714, |
|
"grad_norm": 1.814305067062378, |
|
"learning_rate": 8.340945276469282e-06, |
|
"loss": 0.6174, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.7611607142857144, |
|
"grad_norm": 1.7196688652038574, |
|
"learning_rate": 8.316046611073413e-06, |
|
"loss": 0.6082, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.7633928571428572, |
|
"grad_norm": 1.91426420211792, |
|
"learning_rate": 8.29115868529519e-06, |
|
"loss": 0.69, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.765625, |
|
"grad_norm": 1.8663746118545532, |
|
"learning_rate": 8.266281657860406e-06, |
|
"loss": 0.5621, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.7678571428571428, |
|
"grad_norm": 1.9205740690231323, |
|
"learning_rate": 8.24141568742531e-06, |
|
"loss": 0.6699, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.7700892857142856, |
|
"grad_norm": 1.7767086029052734, |
|
"learning_rate": 8.21656093257567e-06, |
|
"loss": 0.5818, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.7723214285714286, |
|
"grad_norm": 1.8975656032562256, |
|
"learning_rate": 8.191717551825707e-06, |
|
"loss": 0.6507, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.7745535714285714, |
|
"grad_norm": 1.672675371170044, |
|
"learning_rate": 8.166885703617098e-06, |
|
"loss": 0.5891, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.7767857142857144, |
|
"grad_norm": 1.7817902565002441, |
|
"learning_rate": 8.142065546317988e-06, |
|
"loss": 0.6545, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.7790178571428572, |
|
"grad_norm": 2.006059408187866, |
|
"learning_rate": 8.117257238221936e-06, |
|
"loss": 0.727, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.78125, |
|
"grad_norm": 1.5490721464157104, |
|
"learning_rate": 8.09246093754696e-06, |
|
"loss": 0.6469, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.7834821428571428, |
|
"grad_norm": 1.5985546112060547, |
|
"learning_rate": 8.067676802434472e-06, |
|
"loss": 0.5924, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 1.8553714752197266, |
|
"learning_rate": 8.042904990948319e-06, |
|
"loss": 0.6217, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7879464285714286, |
|
"grad_norm": 1.9970471858978271, |
|
"learning_rate": 8.01814566107374e-06, |
|
"loss": 0.6261, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.7901785714285714, |
|
"grad_norm": 1.6423388719558716, |
|
"learning_rate": 7.993398970716375e-06, |
|
"loss": 0.5292, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.7924107142857144, |
|
"grad_norm": 1.7289307117462158, |
|
"learning_rate": 7.968665077701253e-06, |
|
"loss": 0.6432, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.7946428571428572, |
|
"grad_norm": 1.7097069025039673, |
|
"learning_rate": 7.943944139771784e-06, |
|
"loss": 0.6175, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.796875, |
|
"grad_norm": 1.684017539024353, |
|
"learning_rate": 7.919236314588759e-06, |
|
"loss": 0.6137, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.7991071428571428, |
|
"grad_norm": 2.1642661094665527, |
|
"learning_rate": 7.894541759729344e-06, |
|
"loss": 0.6404, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.8013392857142856, |
|
"grad_norm": 1.771759271621704, |
|
"learning_rate": 7.869860632686059e-06, |
|
"loss": 0.6232, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.8035714285714286, |
|
"grad_norm": 2.106753349304199, |
|
"learning_rate": 7.845193090865807e-06, |
|
"loss": 0.6691, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.8058035714285714, |
|
"grad_norm": 1.966404676437378, |
|
"learning_rate": 7.820539291588825e-06, |
|
"loss": 0.6703, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.8080357142857144, |
|
"grad_norm": 1.7450644969940186, |
|
"learning_rate": 7.795899392087728e-06, |
|
"loss": 0.6583, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.8102678571428572, |
|
"grad_norm": 1.991557002067566, |
|
"learning_rate": 7.771273549506466e-06, |
|
"loss": 0.6434, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 1.7553882598876953, |
|
"learning_rate": 7.746661920899351e-06, |
|
"loss": 0.5625, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.8147321428571428, |
|
"grad_norm": 2.1503360271453857, |
|
"learning_rate": 7.72206466323004e-06, |
|
"loss": 0.7151, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.8169642857142856, |
|
"grad_norm": 1.7755491733551025, |
|
"learning_rate": 7.697481933370535e-06, |
|
"loss": 0.7162, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.8191964285714286, |
|
"grad_norm": 1.7988295555114746, |
|
"learning_rate": 7.672913888100187e-06, |
|
"loss": 0.5866, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.8214285714285714, |
|
"grad_norm": 1.6645320653915405, |
|
"learning_rate": 7.648360684104695e-06, |
|
"loss": 0.6317, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.8236607142857144, |
|
"grad_norm": 1.955287218093872, |
|
"learning_rate": 7.623822477975105e-06, |
|
"loss": 0.6164, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.8258928571428572, |
|
"grad_norm": 1.6788568496704102, |
|
"learning_rate": 7.599299426206812e-06, |
|
"loss": 0.4848, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.828125, |
|
"grad_norm": 1.9233473539352417, |
|
"learning_rate": 7.574791685198563e-06, |
|
"loss": 0.6127, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.8303571428571428, |
|
"grad_norm": 1.9900723695755005, |
|
"learning_rate": 7.550299411251461e-06, |
|
"loss": 0.6527, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.8325892857142856, |
|
"grad_norm": 1.84470534324646, |
|
"learning_rate": 7.52582276056796e-06, |
|
"loss": 0.6146, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.8348214285714286, |
|
"grad_norm": 1.8588616847991943, |
|
"learning_rate": 7.501361889250882e-06, |
|
"loss": 0.672, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.8370535714285714, |
|
"grad_norm": 1.5921292304992676, |
|
"learning_rate": 7.4769169533024055e-06, |
|
"loss": 0.6028, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.8392857142857144, |
|
"grad_norm": 1.7890609502792358, |
|
"learning_rate": 7.452488108623089e-06, |
|
"loss": 0.5925, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.8415178571428572, |
|
"grad_norm": 1.5813894271850586, |
|
"learning_rate": 7.428075511010858e-06, |
|
"loss": 0.5878, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.84375, |
|
"grad_norm": 1.9074643850326538, |
|
"learning_rate": 7.403679316160024e-06, |
|
"loss": 0.7553, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.8459821428571428, |
|
"grad_norm": 1.387987494468689, |
|
"learning_rate": 7.379299679660299e-06, |
|
"loss": 0.494, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.8482142857142856, |
|
"grad_norm": 1.7119059562683105, |
|
"learning_rate": 7.354936756995766e-06, |
|
"loss": 0.6305, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.8504464285714286, |
|
"grad_norm": 1.9815932512283325, |
|
"learning_rate": 7.3305907035439404e-06, |
|
"loss": 0.6832, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.8526785714285714, |
|
"grad_norm": 1.9235873222351074, |
|
"learning_rate": 7.3062616745747325e-06, |
|
"loss": 0.6207, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8549107142857144, |
|
"grad_norm": 1.837198257446289, |
|
"learning_rate": 7.281949825249495e-06, |
|
"loss": 0.6578, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 1.723789930343628, |
|
"learning_rate": 7.257655310619996e-06, |
|
"loss": 0.6194, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.859375, |
|
"grad_norm": 1.7569659948349, |
|
"learning_rate": 7.233378285627459e-06, |
|
"loss": 0.5448, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.8616071428571428, |
|
"grad_norm": 1.8790802955627441, |
|
"learning_rate": 7.209118905101575e-06, |
|
"loss": 0.7309, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.8638392857142856, |
|
"grad_norm": 1.8847455978393555, |
|
"learning_rate": 7.184877323759482e-06, |
|
"loss": 0.6599, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.8660714285714286, |
|
"grad_norm": 1.8182835578918457, |
|
"learning_rate": 7.16065369620483e-06, |
|
"loss": 0.683, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.8683035714285714, |
|
"grad_norm": 1.7820547819137573, |
|
"learning_rate": 7.136448176926736e-06, |
|
"loss": 0.6397, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.8705357142857144, |
|
"grad_norm": 1.9385348558425903, |
|
"learning_rate": 7.112260920298859e-06, |
|
"loss": 0.7061, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.8727678571428572, |
|
"grad_norm": 1.8107960224151611, |
|
"learning_rate": 7.088092080578357e-06, |
|
"loss": 0.7015, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 1.7129448652267456, |
|
"learning_rate": 7.063941811904956e-06, |
|
"loss": 0.6202, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.8772321428571428, |
|
"grad_norm": 1.7366535663604736, |
|
"learning_rate": 7.039810268299934e-06, |
|
"loss": 0.5723, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.8794642857142856, |
|
"grad_norm": 2.027513027191162, |
|
"learning_rate": 7.015697603665141e-06, |
|
"loss": 0.6308, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.8816964285714286, |
|
"grad_norm": 1.9543936252593994, |
|
"learning_rate": 6.991603971782035e-06, |
|
"loss": 0.7181, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.8839285714285714, |
|
"grad_norm": 1.7029448747634888, |
|
"learning_rate": 6.967529526310681e-06, |
|
"loss": 0.5145, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.8861607142857144, |
|
"grad_norm": 1.8236180543899536, |
|
"learning_rate": 6.943474420788788e-06, |
|
"loss": 0.6421, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.8883928571428572, |
|
"grad_norm": 1.779215931892395, |
|
"learning_rate": 6.919438808630716e-06, |
|
"loss": 0.5887, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.890625, |
|
"grad_norm": 1.6859050989151, |
|
"learning_rate": 6.895422843126507e-06, |
|
"loss": 0.5996, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.8928571428571428, |
|
"grad_norm": 1.9645501375198364, |
|
"learning_rate": 6.871426677440907e-06, |
|
"loss": 0.6966, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.8950892857142856, |
|
"grad_norm": 1.8417348861694336, |
|
"learning_rate": 6.847450464612378e-06, |
|
"loss": 0.6475, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.8973214285714286, |
|
"grad_norm": 1.9112606048583984, |
|
"learning_rate": 6.8234943575521365e-06, |
|
"loss": 0.6719, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8995535714285714, |
|
"grad_norm": 1.7694634199142456, |
|
"learning_rate": 6.799558509043169e-06, |
|
"loss": 0.6202, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.9017857142857144, |
|
"grad_norm": 1.9113940000534058, |
|
"learning_rate": 6.775643071739267e-06, |
|
"loss": 0.5926, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.9040178571428572, |
|
"grad_norm": 1.9161518812179565, |
|
"learning_rate": 6.751748198164036e-06, |
|
"loss": 0.8355, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.90625, |
|
"grad_norm": 1.9429389238357544, |
|
"learning_rate": 6.727874040709943e-06, |
|
"loss": 0.6021, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.9084821428571428, |
|
"grad_norm": 1.9069395065307617, |
|
"learning_rate": 6.704020751637333e-06, |
|
"loss": 0.5882, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.9107142857142856, |
|
"grad_norm": 1.7388685941696167, |
|
"learning_rate": 6.680188483073458e-06, |
|
"loss": 0.5105, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.9129464285714286, |
|
"grad_norm": 1.8594225645065308, |
|
"learning_rate": 6.6563773870115135e-06, |
|
"loss": 0.6032, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.9151785714285714, |
|
"grad_norm": 1.6092168092727661, |
|
"learning_rate": 6.632587615309658e-06, |
|
"loss": 0.5947, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.9174107142857144, |
|
"grad_norm": 1.7915990352630615, |
|
"learning_rate": 6.608819319690059e-06, |
|
"loss": 0.6142, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.9196428571428572, |
|
"grad_norm": 1.5304429531097412, |
|
"learning_rate": 6.585072651737911e-06, |
|
"loss": 0.5969, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.921875, |
|
"grad_norm": 1.6925547122955322, |
|
"learning_rate": 6.56134776290048e-06, |
|
"loss": 0.5597, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.9241071428571428, |
|
"grad_norm": 1.6392486095428467, |
|
"learning_rate": 6.537644804486136e-06, |
|
"loss": 0.6305, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.9263392857142856, |
|
"grad_norm": 1.886560320854187, |
|
"learning_rate": 6.513963927663372e-06, |
|
"loss": 0.6076, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 1.7735902070999146, |
|
"learning_rate": 6.49030528345987e-06, |
|
"loss": 0.5961, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.9308035714285714, |
|
"grad_norm": 1.7739813327789307, |
|
"learning_rate": 6.466669022761506e-06, |
|
"loss": 0.6179, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.9330357142857144, |
|
"grad_norm": 1.692681908607483, |
|
"learning_rate": 6.443055296311413e-06, |
|
"loss": 0.6816, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.9352678571428572, |
|
"grad_norm": 1.7736715078353882, |
|
"learning_rate": 6.4194642547090016e-06, |
|
"loss": 0.6573, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 1.8248765468597412, |
|
"learning_rate": 6.3958960484090094e-06, |
|
"loss": 0.565, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.9397321428571428, |
|
"grad_norm": 1.636215329170227, |
|
"learning_rate": 6.37235082772055e-06, |
|
"loss": 0.6068, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.9419642857142856, |
|
"grad_norm": 1.8558686971664429, |
|
"learning_rate": 6.348828742806122e-06, |
|
"loss": 0.6067, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.9441964285714286, |
|
"grad_norm": 1.7735726833343506, |
|
"learning_rate": 6.325329943680689e-06, |
|
"loss": 0.6364, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.9464285714285714, |
|
"grad_norm": 1.7822948694229126, |
|
"learning_rate": 6.3018545802107e-06, |
|
"loss": 0.6346, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.9486607142857144, |
|
"grad_norm": 1.7865424156188965, |
|
"learning_rate": 6.278402802113146e-06, |
|
"loss": 0.6141, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.9508928571428572, |
|
"grad_norm": 1.8328912258148193, |
|
"learning_rate": 6.25497475895459e-06, |
|
"loss": 0.6986, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 1.8505418300628662, |
|
"learning_rate": 6.2315706001502305e-06, |
|
"loss": 0.6397, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.9553571428571428, |
|
"grad_norm": 1.664512276649475, |
|
"learning_rate": 6.208190474962945e-06, |
|
"loss": 0.5629, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.9575892857142856, |
|
"grad_norm": 1.8029053211212158, |
|
"learning_rate": 6.184834532502315e-06, |
|
"loss": 0.6978, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.9598214285714286, |
|
"grad_norm": 1.6065319776535034, |
|
"learning_rate": 6.161502921723719e-06, |
|
"loss": 0.5763, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.9620535714285714, |
|
"grad_norm": 1.9059717655181885, |
|
"learning_rate": 6.138195791427329e-06, |
|
"loss": 0.5928, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 2.0226573944091797, |
|
"learning_rate": 6.114913290257219e-06, |
|
"loss": 0.6109, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.9665178571428572, |
|
"grad_norm": 1.741227626800537, |
|
"learning_rate": 6.091655566700359e-06, |
|
"loss": 0.6165, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.96875, |
|
"grad_norm": 1.747584581375122, |
|
"learning_rate": 6.068422769085722e-06, |
|
"loss": 0.5608, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.9709821428571428, |
|
"grad_norm": 1.6240609884262085, |
|
"learning_rate": 6.045215045583301e-06, |
|
"loss": 0.61, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.9732142857142856, |
|
"grad_norm": 1.6965066194534302, |
|
"learning_rate": 6.0220325442031714e-06, |
|
"loss": 0.5608, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.9754464285714286, |
|
"grad_norm": 1.6870455741882324, |
|
"learning_rate": 5.998875412794562e-06, |
|
"loss": 0.5619, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.9776785714285714, |
|
"grad_norm": 2.0198004245758057, |
|
"learning_rate": 5.975743799044894e-06, |
|
"loss": 0.6374, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.9799107142857144, |
|
"grad_norm": 1.584223985671997, |
|
"learning_rate": 5.952637850478852e-06, |
|
"loss": 0.6131, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.9821428571428572, |
|
"grad_norm": 2.044126272201538, |
|
"learning_rate": 5.929557714457425e-06, |
|
"loss": 0.7384, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.984375, |
|
"grad_norm": 1.8715416193008423, |
|
"learning_rate": 5.906503538176999e-06, |
|
"loss": 0.6409, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.9866071428571428, |
|
"grad_norm": 1.9569389820098877, |
|
"learning_rate": 5.883475468668387e-06, |
|
"loss": 0.6796, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.9888392857142856, |
|
"grad_norm": 1.5443964004516602, |
|
"learning_rate": 5.860473652795901e-06, |
|
"loss": 0.584, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.9910714285714286, |
|
"grad_norm": 1.6899211406707764, |
|
"learning_rate": 5.8374982372564255e-06, |
|
"loss": 0.5313, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.9933035714285714, |
|
"grad_norm": 1.6658509969711304, |
|
"learning_rate": 5.814549368578464e-06, |
|
"loss": 0.658, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.9955357142857144, |
|
"grad_norm": 1.86408531665802, |
|
"learning_rate": 5.7916271931212185e-06, |
|
"loss": 0.7255, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.9977678571428572, |
|
"grad_norm": 1.8427174091339111, |
|
"learning_rate": 5.768731857073657e-06, |
|
"loss": 0.6449, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.6087369918823242, |
|
"learning_rate": 5.745863506453569e-06, |
|
"loss": 0.4955, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.7043077349662781, |
|
"eval_runtime": 39.1667, |
|
"eval_samples_per_second": 1.864, |
|
"eval_steps_per_second": 0.255, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.002232142857143, |
|
"grad_norm": 1.738125205039978, |
|
"learning_rate": 5.7230222871066475e-06, |
|
"loss": 0.5085, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.0044642857142856, |
|
"grad_norm": 1.8423463106155396, |
|
"learning_rate": 5.700208344705537e-06, |
|
"loss": 0.5366, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.0066964285714284, |
|
"grad_norm": 1.9999382495880127, |
|
"learning_rate": 5.677421824748946e-06, |
|
"loss": 0.5329, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.0089285714285716, |
|
"grad_norm": 1.9602954387664795, |
|
"learning_rate": 5.6546628725606675e-06, |
|
"loss": 0.5518, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0111607142857144, |
|
"grad_norm": 1.907751441001892, |
|
"learning_rate": 5.631931633288696e-06, |
|
"loss": 0.5051, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.013392857142857, |
|
"grad_norm": 1.5521221160888672, |
|
"learning_rate": 5.609228251904265e-06, |
|
"loss": 0.4874, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.015625, |
|
"grad_norm": 1.9081358909606934, |
|
"learning_rate": 5.586552873200963e-06, |
|
"loss": 0.5361, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.017857142857143, |
|
"grad_norm": 1.8409134149551392, |
|
"learning_rate": 5.563905641793776e-06, |
|
"loss": 0.5204, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.0200892857142856, |
|
"grad_norm": 1.6720854043960571, |
|
"learning_rate": 5.541286702118174e-06, |
|
"loss": 0.5107, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.0223214285714284, |
|
"grad_norm": 1.8406893014907837, |
|
"learning_rate": 5.518696198429201e-06, |
|
"loss": 0.5427, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.0245535714285716, |
|
"grad_norm": 1.728305697441101, |
|
"learning_rate": 5.496134274800533e-06, |
|
"loss": 0.4973, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.0267857142857144, |
|
"grad_norm": 1.7480419874191284, |
|
"learning_rate": 5.473601075123599e-06, |
|
"loss": 0.5065, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.029017857142857, |
|
"grad_norm": 1.8376224040985107, |
|
"learning_rate": 5.451096743106611e-06, |
|
"loss": 0.5953, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.03125, |
|
"grad_norm": 1.8109792470932007, |
|
"learning_rate": 5.428621422273687e-06, |
|
"loss": 0.5203, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.033482142857143, |
|
"grad_norm": 1.8943278789520264, |
|
"learning_rate": 5.406175255963923e-06, |
|
"loss": 0.5111, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.0357142857142856, |
|
"grad_norm": 1.9139760732650757, |
|
"learning_rate": 5.383758387330476e-06, |
|
"loss": 0.4957, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.0379464285714284, |
|
"grad_norm": 1.9172364473342896, |
|
"learning_rate": 5.3613709593396545e-06, |
|
"loss": 0.5062, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.0401785714285716, |
|
"grad_norm": 1.674847960472107, |
|
"learning_rate": 5.3390131147699995e-06, |
|
"loss": 0.4564, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.0424107142857144, |
|
"grad_norm": 1.8302925825119019, |
|
"learning_rate": 5.3166849962113886e-06, |
|
"loss": 0.4828, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.044642857142857, |
|
"grad_norm": 1.9897193908691406, |
|
"learning_rate": 5.294386746064115e-06, |
|
"loss": 0.4754, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.046875, |
|
"grad_norm": 2.0378408432006836, |
|
"learning_rate": 5.272118506537982e-06, |
|
"loss": 0.5603, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.049107142857143, |
|
"grad_norm": 1.819687843322754, |
|
"learning_rate": 5.249880419651403e-06, |
|
"loss": 0.5372, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.0513392857142856, |
|
"grad_norm": 1.8932809829711914, |
|
"learning_rate": 5.2276726272304724e-06, |
|
"loss": 0.533, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.0535714285714284, |
|
"grad_norm": 1.5039235353469849, |
|
"learning_rate": 5.205495270908094e-06, |
|
"loss": 0.4317, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.0558035714285716, |
|
"grad_norm": 1.7907001972198486, |
|
"learning_rate": 5.183348492123056e-06, |
|
"loss": 0.4999, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.0580357142857144, |
|
"grad_norm": 1.8389475345611572, |
|
"learning_rate": 5.16123243211914e-06, |
|
"loss": 0.506, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.060267857142857, |
|
"grad_norm": 1.907362699508667, |
|
"learning_rate": 5.1391472319442016e-06, |
|
"loss": 0.4987, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.0625, |
|
"grad_norm": 1.7584993839263916, |
|
"learning_rate": 5.117093032449297e-06, |
|
"loss": 0.5286, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.064732142857143, |
|
"grad_norm": 1.9483177661895752, |
|
"learning_rate": 5.0950699742877645e-06, |
|
"loss": 0.5469, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.0669642857142856, |
|
"grad_norm": 1.7706096172332764, |
|
"learning_rate": 5.073078197914341e-06, |
|
"loss": 0.5409, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.0691964285714284, |
|
"grad_norm": 1.806550145149231, |
|
"learning_rate": 5.0511178435842565e-06, |
|
"loss": 0.558, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.0714285714285716, |
|
"grad_norm": 1.5738914012908936, |
|
"learning_rate": 5.029189051352339e-06, |
|
"loss": 0.4856, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.0736607142857144, |
|
"grad_norm": 1.8106647729873657, |
|
"learning_rate": 5.007291961072133e-06, |
|
"loss": 0.4639, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.075892857142857, |
|
"grad_norm": 1.849599838256836, |
|
"learning_rate": 4.985426712394994e-06, |
|
"loss": 0.5315, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.078125, |
|
"grad_norm": 1.3779913187026978, |
|
"learning_rate": 4.963593444769207e-06, |
|
"loss": 0.4128, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.080357142857143, |
|
"grad_norm": 1.9234905242919922, |
|
"learning_rate": 4.941792297439098e-06, |
|
"loss": 0.4776, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.0825892857142856, |
|
"grad_norm": 1.9030768871307373, |
|
"learning_rate": 4.920023409444128e-06, |
|
"loss": 0.626, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.0848214285714284, |
|
"grad_norm": 2.104311227798462, |
|
"learning_rate": 4.898286919618034e-06, |
|
"loss": 0.5971, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.0870535714285716, |
|
"grad_norm": 1.6451133489608765, |
|
"learning_rate": 4.876582966587924e-06, |
|
"loss": 0.4981, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.0892857142857144, |
|
"grad_norm": 1.9193094968795776, |
|
"learning_rate": 4.8549116887734045e-06, |
|
"loss": 0.5093, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.091517857142857, |
|
"grad_norm": 2.0272116661071777, |
|
"learning_rate": 4.833273224385678e-06, |
|
"loss": 0.5768, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.09375, |
|
"grad_norm": 1.6840240955352783, |
|
"learning_rate": 4.811667711426686e-06, |
|
"loss": 0.4768, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.095982142857143, |
|
"grad_norm": 1.901715636253357, |
|
"learning_rate": 4.790095287688227e-06, |
|
"loss": 0.6362, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.0982142857142856, |
|
"grad_norm": 1.6791905164718628, |
|
"learning_rate": 4.7685560907510465e-06, |
|
"loss": 0.4853, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.1004464285714284, |
|
"grad_norm": 1.718680739402771, |
|
"learning_rate": 4.747050257984002e-06, |
|
"loss": 0.4572, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.1026785714285716, |
|
"grad_norm": 1.6572511196136475, |
|
"learning_rate": 4.725577926543151e-06, |
|
"loss": 0.4536, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.1049107142857144, |
|
"grad_norm": 1.936851143836975, |
|
"learning_rate": 4.704139233370905e-06, |
|
"loss": 0.6019, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.107142857142857, |
|
"grad_norm": 1.6410084962844849, |
|
"learning_rate": 4.682734315195138e-06, |
|
"loss": 0.5204, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.109375, |
|
"grad_norm": 1.6365997791290283, |
|
"learning_rate": 4.661363308528319e-06, |
|
"loss": 0.4437, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.111607142857143, |
|
"grad_norm": 1.9370850324630737, |
|
"learning_rate": 4.640026349666651e-06, |
|
"loss": 0.554, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.1138392857142856, |
|
"grad_norm": 1.7888548374176025, |
|
"learning_rate": 4.61872357468917e-06, |
|
"loss": 0.4938, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.1160714285714284, |
|
"grad_norm": 1.5971767902374268, |
|
"learning_rate": 4.5974551194569336e-06, |
|
"loss": 0.4829, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.1183035714285716, |
|
"grad_norm": 1.7885452508926392, |
|
"learning_rate": 4.576221119612091e-06, |
|
"loss": 0.5542, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.1205357142857144, |
|
"grad_norm": 1.5876049995422363, |
|
"learning_rate": 4.555021710577068e-06, |
|
"loss": 0.5482, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.122767857142857, |
|
"grad_norm": 1.786490797996521, |
|
"learning_rate": 4.533857027553663e-06, |
|
"loss": 0.5871, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 1.8872352838516235, |
|
"learning_rate": 4.51272720552223e-06, |
|
"loss": 0.4679, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.127232142857143, |
|
"grad_norm": 1.8590319156646729, |
|
"learning_rate": 4.49163237924078e-06, |
|
"loss": 0.5014, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.1294642857142856, |
|
"grad_norm": 1.5889908075332642, |
|
"learning_rate": 4.470572683244127e-06, |
|
"loss": 0.4523, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.1316964285714284, |
|
"grad_norm": 1.8147304058074951, |
|
"learning_rate": 4.449548251843048e-06, |
|
"loss": 0.4648, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.1339285714285716, |
|
"grad_norm": 2.0382473468780518, |
|
"learning_rate": 4.4285592191234125e-06, |
|
"loss": 0.5273, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.1361607142857144, |
|
"grad_norm": 1.8126921653747559, |
|
"learning_rate": 4.4076057189453325e-06, |
|
"loss": 0.5059, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.138392857142857, |
|
"grad_norm": 1.8667762279510498, |
|
"learning_rate": 4.386687884942307e-06, |
|
"loss": 0.5113, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.140625, |
|
"grad_norm": 2.065971851348877, |
|
"learning_rate": 4.365805850520362e-06, |
|
"loss": 0.5056, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 1.8023256063461304, |
|
"learning_rate": 4.344959748857215e-06, |
|
"loss": 0.5219, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.1450892857142856, |
|
"grad_norm": 2.643050193786621, |
|
"learning_rate": 4.324149712901417e-06, |
|
"loss": 0.5836, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.1473214285714284, |
|
"grad_norm": 1.5510233640670776, |
|
"learning_rate": 4.3033758753715095e-06, |
|
"loss": 0.4494, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.1495535714285716, |
|
"grad_norm": 1.5919033288955688, |
|
"learning_rate": 4.282638368755161e-06, |
|
"loss": 0.425, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.1517857142857144, |
|
"grad_norm": 1.7790838479995728, |
|
"learning_rate": 4.261937325308347e-06, |
|
"loss": 0.5493, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.154017857142857, |
|
"grad_norm": 1.7823125123977661, |
|
"learning_rate": 4.241272877054489e-06, |
|
"loss": 0.5211, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.15625, |
|
"grad_norm": 1.9036273956298828, |
|
"learning_rate": 4.2206451557836235e-06, |
|
"loss": 0.5364, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.158482142857143, |
|
"grad_norm": 1.658921718597412, |
|
"learning_rate": 4.200054293051556e-06, |
|
"loss": 0.5241, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.1607142857142856, |
|
"grad_norm": 1.6842666864395142, |
|
"learning_rate": 4.179500420179011e-06, |
|
"loss": 0.5911, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.1629464285714284, |
|
"grad_norm": 1.8204643726348877, |
|
"learning_rate": 4.158983668250819e-06, |
|
"loss": 0.5023, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.1651785714285716, |
|
"grad_norm": 1.692069172859192, |
|
"learning_rate": 4.138504168115059e-06, |
|
"loss": 0.4712, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.1674107142857144, |
|
"grad_norm": 1.6010735034942627, |
|
"learning_rate": 4.11806205038224e-06, |
|
"loss": 0.48, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.169642857142857, |
|
"grad_norm": 2.0540120601654053, |
|
"learning_rate": 4.097657445424454e-06, |
|
"loss": 0.565, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.171875, |
|
"grad_norm": 1.8458772897720337, |
|
"learning_rate": 4.077290483374549e-06, |
|
"loss": 0.4537, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.174107142857143, |
|
"grad_norm": 1.8870779275894165, |
|
"learning_rate": 4.056961294125305e-06, |
|
"loss": 0.5089, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.1763392857142856, |
|
"grad_norm": 1.9770042896270752, |
|
"learning_rate": 4.0366700073286005e-06, |
|
"loss": 0.548, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.1785714285714284, |
|
"grad_norm": 1.8909940719604492, |
|
"learning_rate": 4.016416752394591e-06, |
|
"loss": 0.6115, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.1808035714285716, |
|
"grad_norm": 1.887465000152588, |
|
"learning_rate": 3.996201658490866e-06, |
|
"loss": 0.5199, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.1830357142857144, |
|
"grad_norm": 1.808553695678711, |
|
"learning_rate": 3.9760248545416465e-06, |
|
"loss": 0.4737, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.185267857142857, |
|
"grad_norm": 1.8134347200393677, |
|
"learning_rate": 3.955886469226967e-06, |
|
"loss": 0.4988, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.1875, |
|
"grad_norm": 1.8288437128067017, |
|
"learning_rate": 3.935786630981819e-06, |
|
"loss": 0.4873, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.189732142857143, |
|
"grad_norm": 1.7920408248901367, |
|
"learning_rate": 3.915725467995375e-06, |
|
"loss": 0.5163, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.1919642857142856, |
|
"grad_norm": 1.6849050521850586, |
|
"learning_rate": 3.895703108210135e-06, |
|
"loss": 0.5308, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.1941964285714284, |
|
"grad_norm": 1.5930182933807373, |
|
"learning_rate": 3.875719679321138e-06, |
|
"loss": 0.4919, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.1964285714285716, |
|
"grad_norm": 1.9762846231460571, |
|
"learning_rate": 3.8557753087751345e-06, |
|
"loss": 0.5924, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.1986607142857144, |
|
"grad_norm": 1.726677417755127, |
|
"learning_rate": 3.835870123769775e-06, |
|
"loss": 0.5425, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.200892857142857, |
|
"grad_norm": 1.9238899946212769, |
|
"learning_rate": 3.8160042512528e-06, |
|
"loss": 0.5421, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.203125, |
|
"grad_norm": 2.012009620666504, |
|
"learning_rate": 3.796177817921223e-06, |
|
"loss": 0.5438, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.205357142857143, |
|
"grad_norm": 1.6114519834518433, |
|
"learning_rate": 3.776390950220544e-06, |
|
"loss": 0.5355, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.2075892857142856, |
|
"grad_norm": 1.7870115041732788, |
|
"learning_rate": 3.756643774343913e-06, |
|
"loss": 0.638, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.2098214285714284, |
|
"grad_norm": 1.8089219331741333, |
|
"learning_rate": 3.7369364162313528e-06, |
|
"loss": 0.5771, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.2120535714285716, |
|
"grad_norm": 1.7549517154693604, |
|
"learning_rate": 3.7172690015689263e-06, |
|
"loss": 0.5726, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.2142857142857144, |
|
"grad_norm": 1.7794311046600342, |
|
"learning_rate": 3.6976416557879757e-06, |
|
"loss": 0.5378, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.216517857142857, |
|
"grad_norm": 1.7363981008529663, |
|
"learning_rate": 3.678054504064287e-06, |
|
"loss": 0.4822, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.21875, |
|
"grad_norm": 1.8827056884765625, |
|
"learning_rate": 3.658507671317296e-06, |
|
"loss": 0.5609, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.220982142857143, |
|
"grad_norm": 1.858846664428711, |
|
"learning_rate": 3.639001282209311e-06, |
|
"loss": 0.5211, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.2232142857142856, |
|
"grad_norm": 1.877846360206604, |
|
"learning_rate": 3.6195354611447033e-06, |
|
"loss": 0.5076, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.2254464285714284, |
|
"grad_norm": 1.9064738750457764, |
|
"learning_rate": 3.600110332269118e-06, |
|
"loss": 0.4971, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.2276785714285716, |
|
"grad_norm": 1.8775546550750732, |
|
"learning_rate": 3.580726019468671e-06, |
|
"loss": 0.5283, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.2299107142857144, |
|
"grad_norm": 1.8626521825790405, |
|
"learning_rate": 3.561382646369179e-06, |
|
"loss": 0.5797, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 1.8749574422836304, |
|
"learning_rate": 3.5420803363353604e-06, |
|
"loss": 0.6058, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.234375, |
|
"grad_norm": 1.8958619832992554, |
|
"learning_rate": 3.5228192124700433e-06, |
|
"loss": 0.5324, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.236607142857143, |
|
"grad_norm": 2.2370924949645996, |
|
"learning_rate": 3.503599397613394e-06, |
|
"loss": 0.5212, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.2388392857142856, |
|
"grad_norm": 1.7963013648986816, |
|
"learning_rate": 3.4844210143421143e-06, |
|
"loss": 0.5309, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.2410714285714284, |
|
"grad_norm": 1.7290846109390259, |
|
"learning_rate": 3.465284184968679e-06, |
|
"loss": 0.5216, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.2433035714285716, |
|
"grad_norm": 1.756559133529663, |
|
"learning_rate": 3.4461890315405466e-06, |
|
"loss": 0.5042, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.2455357142857144, |
|
"grad_norm": 1.7828274965286255, |
|
"learning_rate": 3.4271356758393827e-06, |
|
"loss": 0.5008, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.247767857142857, |
|
"grad_norm": 1.5559848546981812, |
|
"learning_rate": 3.4081242393802847e-06, |
|
"loss": 0.4937, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.6797314882278442, |
|
"learning_rate": 3.3891548434109942e-06, |
|
"loss": 0.5747, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.7400864362716675, |
|
"eval_runtime": 64.4492, |
|
"eval_samples_per_second": 1.133, |
|
"eval_steps_per_second": 0.155, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.252232142857143, |
|
"grad_norm": 1.8574589490890503, |
|
"learning_rate": 3.3702276089111484e-06, |
|
"loss": 0.5077, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.2544642857142856, |
|
"grad_norm": 1.894984245300293, |
|
"learning_rate": 3.3513426565914854e-06, |
|
"loss": 0.57, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.2566964285714284, |
|
"grad_norm": 1.81510591506958, |
|
"learning_rate": 3.3325001068930917e-06, |
|
"loss": 0.4895, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.2589285714285716, |
|
"grad_norm": 1.8635365962982178, |
|
"learning_rate": 3.3137000799866148e-06, |
|
"loss": 0.4275, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.2611607142857144, |
|
"grad_norm": 1.8582934141159058, |
|
"learning_rate": 3.2949426957715157e-06, |
|
"loss": 0.6211, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.263392857142857, |
|
"grad_norm": 1.6376415491104126, |
|
"learning_rate": 3.276228073875296e-06, |
|
"loss": 0.5382, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.265625, |
|
"grad_norm": 2.0684192180633545, |
|
"learning_rate": 3.257556333652734e-06, |
|
"loss": 0.4896, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.267857142857143, |
|
"grad_norm": 2.0028772354125977, |
|
"learning_rate": 3.238927594185127e-06, |
|
"loss": 0.5456, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.2700892857142856, |
|
"grad_norm": 1.8271785974502563, |
|
"learning_rate": 3.2203419742795237e-06, |
|
"loss": 0.5187, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.2723214285714284, |
|
"grad_norm": 1.8357112407684326, |
|
"learning_rate": 3.201799592467978e-06, |
|
"loss": 0.5304, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.2745535714285716, |
|
"grad_norm": 2.119241237640381, |
|
"learning_rate": 3.1833005670067874e-06, |
|
"loss": 0.55, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.2767857142857144, |
|
"grad_norm": 1.683974027633667, |
|
"learning_rate": 3.1648450158757373e-06, |
|
"loss": 0.5021, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.279017857142857, |
|
"grad_norm": 1.9696073532104492, |
|
"learning_rate": 3.146433056777355e-06, |
|
"loss": 0.53, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.28125, |
|
"grad_norm": 1.8277369737625122, |
|
"learning_rate": 3.128064807136142e-06, |
|
"loss": 0.4263, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.283482142857143, |
|
"grad_norm": 1.6111699342727661, |
|
"learning_rate": 3.10974038409785e-06, |
|
"loss": 0.4166, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.9450246095657349, |
|
"learning_rate": 3.0914599045287165e-06, |
|
"loss": 0.5023, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.2879464285714284, |
|
"grad_norm": 2.1073365211486816, |
|
"learning_rate": 3.073223485014727e-06, |
|
"loss": 0.5267, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.2901785714285716, |
|
"grad_norm": 1.8855870962142944, |
|
"learning_rate": 3.0550312418608617e-06, |
|
"loss": 0.557, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.2924107142857144, |
|
"grad_norm": 1.6264270544052124, |
|
"learning_rate": 3.0368832910903625e-06, |
|
"loss": 0.4561, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.294642857142857, |
|
"grad_norm": 2.0912892818450928, |
|
"learning_rate": 3.018779748444005e-06, |
|
"loss": 0.6132, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.296875, |
|
"grad_norm": 1.741155982017517, |
|
"learning_rate": 3.000720729379326e-06, |
|
"loss": 0.4242, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.299107142857143, |
|
"grad_norm": 1.8275063037872314, |
|
"learning_rate": 2.9827063490699225e-06, |
|
"loss": 0.4824, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.3013392857142856, |
|
"grad_norm": 1.7157033681869507, |
|
"learning_rate": 2.9647367224046884e-06, |
|
"loss": 0.5407, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.3035714285714284, |
|
"grad_norm": 1.880632996559143, |
|
"learning_rate": 2.9468119639871163e-06, |
|
"loss": 0.4596, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.3058035714285716, |
|
"grad_norm": 1.8524107933044434, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.5019, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.3080357142857144, |
|
"grad_norm": 1.9338825941085815, |
|
"learning_rate": 2.911097508877365e-06, |
|
"loss": 0.5025, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.310267857142857, |
|
"grad_norm": 1.640730857849121, |
|
"learning_rate": 2.8933080399584757e-06, |
|
"loss": 0.5657, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.3125, |
|
"grad_norm": 2.100184917449951, |
|
"learning_rate": 2.8755638948323494e-06, |
|
"loss": 0.5444, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.314732142857143, |
|
"grad_norm": 1.7842200994491577, |
|
"learning_rate": 2.8578651866644447e-06, |
|
"loss": 0.5173, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.3169642857142856, |
|
"grad_norm": 1.8273719549179077, |
|
"learning_rate": 2.840212028330418e-06, |
|
"loss": 0.4315, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.3191964285714284, |
|
"grad_norm": 1.823228120803833, |
|
"learning_rate": 2.8226045324154394e-06, |
|
"loss": 0.4966, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 1.7970882654190063, |
|
"learning_rate": 2.8050428112134474e-06, |
|
"loss": 0.5744, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.3236607142857144, |
|
"grad_norm": 1.9154415130615234, |
|
"learning_rate": 2.7875269767264667e-06, |
|
"loss": 0.498, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.325892857142857, |
|
"grad_norm": 1.6169114112854004, |
|
"learning_rate": 2.7700571406638633e-06, |
|
"loss": 0.452, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.328125, |
|
"grad_norm": 1.7965830564498901, |
|
"learning_rate": 2.7526334144416345e-06, |
|
"loss": 0.4517, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.330357142857143, |
|
"grad_norm": 1.9453188180923462, |
|
"learning_rate": 2.735255909181719e-06, |
|
"loss": 0.4696, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.3325892857142856, |
|
"grad_norm": 1.6845883131027222, |
|
"learning_rate": 2.7179247357112704e-06, |
|
"loss": 0.543, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.3348214285714284, |
|
"grad_norm": 1.776888132095337, |
|
"learning_rate": 2.7006400045619597e-06, |
|
"loss": 0.4481, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.3370535714285716, |
|
"grad_norm": 2.1295015811920166, |
|
"learning_rate": 2.6834018259692574e-06, |
|
"loss": 0.5429, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.3392857142857144, |
|
"grad_norm": 1.7422338724136353, |
|
"learning_rate": 2.6662103098717485e-06, |
|
"loss": 0.5375, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.341517857142857, |
|
"grad_norm": 1.8716145753860474, |
|
"learning_rate": 2.649065565910419e-06, |
|
"loss": 0.5372, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 1.8629887104034424, |
|
"learning_rate": 2.631967703427959e-06, |
|
"loss": 0.4969, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.345982142857143, |
|
"grad_norm": 1.9818273782730103, |
|
"learning_rate": 2.6149168314680707e-06, |
|
"loss": 0.4883, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.3482142857142856, |
|
"grad_norm": 1.8726003170013428, |
|
"learning_rate": 2.597913058774758e-06, |
|
"loss": 0.5668, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.3504464285714284, |
|
"grad_norm": 1.701008677482605, |
|
"learning_rate": 2.5809564937916543e-06, |
|
"loss": 0.5212, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.3526785714285716, |
|
"grad_norm": 1.8910822868347168, |
|
"learning_rate": 2.564047244661316e-06, |
|
"loss": 0.5219, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.3549107142857144, |
|
"grad_norm": 1.9475998878479004, |
|
"learning_rate": 2.547185419224537e-06, |
|
"loss": 0.5165, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.357142857142857, |
|
"grad_norm": 1.6685254573822021, |
|
"learning_rate": 2.530371125019664e-06, |
|
"loss": 0.4789, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.359375, |
|
"grad_norm": 2.028895139694214, |
|
"learning_rate": 2.513604469281897e-06, |
|
"loss": 0.531, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.361607142857143, |
|
"grad_norm": 1.9576796293258667, |
|
"learning_rate": 2.4968855589426288e-06, |
|
"loss": 0.548, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.3638392857142856, |
|
"grad_norm": 1.756537675857544, |
|
"learning_rate": 2.4802145006287425e-06, |
|
"loss": 0.4646, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.3660714285714284, |
|
"grad_norm": 1.7996472120285034, |
|
"learning_rate": 2.4635914006619454e-06, |
|
"loss": 0.4925, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.3683035714285716, |
|
"grad_norm": 2.11858868598938, |
|
"learning_rate": 2.4470163650580747e-06, |
|
"loss": 0.6414, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.3705357142857144, |
|
"grad_norm": 1.947381854057312, |
|
"learning_rate": 2.430489499526438e-06, |
|
"loss": 0.5795, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.372767857142857, |
|
"grad_norm": 1.8046473264694214, |
|
"learning_rate": 2.414010909469133e-06, |
|
"loss": 0.4544, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 1.7068886756896973, |
|
"learning_rate": 2.3975806999803717e-06, |
|
"loss": 0.5324, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.377232142857143, |
|
"grad_norm": 1.5860605239868164, |
|
"learning_rate": 2.38119897584582e-06, |
|
"loss": 0.4809, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.3794642857142856, |
|
"grad_norm": 1.935868501663208, |
|
"learning_rate": 2.364865841541908e-06, |
|
"loss": 0.5075, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.3816964285714284, |
|
"grad_norm": 1.793942928314209, |
|
"learning_rate": 2.3485814012351914e-06, |
|
"loss": 0.5324, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.3839285714285716, |
|
"grad_norm": 1.7716658115386963, |
|
"learning_rate": 2.33234575878167e-06, |
|
"loss": 0.5702, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.3861607142857144, |
|
"grad_norm": 1.8575302362442017, |
|
"learning_rate": 2.3161590177261294e-06, |
|
"loss": 0.4541, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.388392857142857, |
|
"grad_norm": 1.6426512002944946, |
|
"learning_rate": 2.300021281301483e-06, |
|
"loss": 0.4476, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.390625, |
|
"grad_norm": 1.7291110754013062, |
|
"learning_rate": 2.2839326524281037e-06, |
|
"loss": 0.508, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.392857142857143, |
|
"grad_norm": 1.7146512269973755, |
|
"learning_rate": 2.267893233713182e-06, |
|
"loss": 0.5586, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.3950892857142856, |
|
"grad_norm": 1.4405806064605713, |
|
"learning_rate": 2.2519031274500625e-06, |
|
"loss": 0.4264, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.3973214285714284, |
|
"grad_norm": 1.8408970832824707, |
|
"learning_rate": 2.235962435617596e-06, |
|
"loss": 0.4864, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.3995535714285716, |
|
"grad_norm": 1.5550113916397095, |
|
"learning_rate": 2.2200712598794804e-06, |
|
"loss": 0.4476, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.4017857142857144, |
|
"grad_norm": 1.8365901708602905, |
|
"learning_rate": 2.204229701583621e-06, |
|
"loss": 0.5003, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.404017857142857, |
|
"grad_norm": 1.6341036558151245, |
|
"learning_rate": 2.1884378617614933e-06, |
|
"loss": 0.3824, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.40625, |
|
"grad_norm": 1.920911431312561, |
|
"learning_rate": 2.172695841127468e-06, |
|
"loss": 0.5231, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.408482142857143, |
|
"grad_norm": 2.252639055252075, |
|
"learning_rate": 2.157003740078203e-06, |
|
"loss": 0.5236, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.4107142857142856, |
|
"grad_norm": 1.8135790824890137, |
|
"learning_rate": 2.141361658691975e-06, |
|
"loss": 0.5173, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.4129464285714284, |
|
"grad_norm": 2.1770637035369873, |
|
"learning_rate": 2.1257696967280716e-06, |
|
"loss": 0.5397, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.4151785714285716, |
|
"grad_norm": 1.7043235301971436, |
|
"learning_rate": 2.1102279536261193e-06, |
|
"loss": 0.5, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.4174107142857144, |
|
"grad_norm": 1.6420832872390747, |
|
"learning_rate": 2.09473652850548e-06, |
|
"loss": 0.4963, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.419642857142857, |
|
"grad_norm": 1.8134890794754028, |
|
"learning_rate": 2.0792955201646005e-06, |
|
"loss": 0.4707, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.421875, |
|
"grad_norm": 1.9724851846694946, |
|
"learning_rate": 2.063905027080392e-06, |
|
"loss": 0.5633, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.424107142857143, |
|
"grad_norm": 1.650792121887207, |
|
"learning_rate": 2.0485651474075987e-06, |
|
"loss": 0.5001, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.4263392857142856, |
|
"grad_norm": 1.668756127357483, |
|
"learning_rate": 2.033275978978164e-06, |
|
"loss": 0.513, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 1.710158348083496, |
|
"learning_rate": 2.018037619300628e-06, |
|
"loss": 0.4764, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.4308035714285716, |
|
"grad_norm": 1.7297494411468506, |
|
"learning_rate": 2.0028501655594736e-06, |
|
"loss": 0.5412, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.4330357142857144, |
|
"grad_norm": 1.5992000102996826, |
|
"learning_rate": 1.987713714614543e-06, |
|
"loss": 0.4284, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.435267857142857, |
|
"grad_norm": 1.7302836179733276, |
|
"learning_rate": 1.972628363000396e-06, |
|
"loss": 0.4839, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.4375, |
|
"grad_norm": 1.7639312744140625, |
|
"learning_rate": 1.9575942069256914e-06, |
|
"loss": 0.4923, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.439732142857143, |
|
"grad_norm": 2.4808318614959717, |
|
"learning_rate": 1.942611342272591e-06, |
|
"loss": 0.561, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.4419642857142856, |
|
"grad_norm": 1.8226264715194702, |
|
"learning_rate": 1.9276798645961392e-06, |
|
"loss": 0.5164, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.4441964285714284, |
|
"grad_norm": 1.6818956136703491, |
|
"learning_rate": 1.9127998691236537e-06, |
|
"loss": 0.4326, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.4464285714285716, |
|
"grad_norm": 1.865804672241211, |
|
"learning_rate": 1.8979714507541103e-06, |
|
"loss": 0.5218, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.4486607142857144, |
|
"grad_norm": 1.5961390733718872, |
|
"learning_rate": 1.883194704057556e-06, |
|
"loss": 0.4788, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.450892857142857, |
|
"grad_norm": 1.5959690809249878, |
|
"learning_rate": 1.8684697232744886e-06, |
|
"loss": 0.5347, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.453125, |
|
"grad_norm": 2.1117734909057617, |
|
"learning_rate": 1.8537966023152664e-06, |
|
"loss": 0.4431, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.455357142857143, |
|
"grad_norm": 2.0048511028289795, |
|
"learning_rate": 1.839175434759507e-06, |
|
"loss": 0.5143, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.4575892857142856, |
|
"grad_norm": 1.9185304641723633, |
|
"learning_rate": 1.8246063138554793e-06, |
|
"loss": 0.5521, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.4598214285714284, |
|
"grad_norm": 1.8508011102676392, |
|
"learning_rate": 1.810089332519528e-06, |
|
"loss": 0.5806, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.4620535714285716, |
|
"grad_norm": 1.7387700080871582, |
|
"learning_rate": 1.795624583335467e-06, |
|
"loss": 0.4702, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.4642857142857144, |
|
"grad_norm": 1.8409698009490967, |
|
"learning_rate": 1.7812121585539964e-06, |
|
"loss": 0.5616, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.466517857142857, |
|
"grad_norm": 1.5656404495239258, |
|
"learning_rate": 1.7668521500921098e-06, |
|
"loss": 0.4918, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.46875, |
|
"grad_norm": 2.4327805042266846, |
|
"learning_rate": 1.7525446495325038e-06, |
|
"loss": 0.6054, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.470982142857143, |
|
"grad_norm": 1.7695860862731934, |
|
"learning_rate": 1.7382897481230076e-06, |
|
"loss": 0.4232, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.4732142857142856, |
|
"grad_norm": 1.8793418407440186, |
|
"learning_rate": 1.7240875367759902e-06, |
|
"loss": 0.5547, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.4754464285714284, |
|
"grad_norm": 1.9762465953826904, |
|
"learning_rate": 1.7099381060677833e-06, |
|
"loss": 0.5693, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.4776785714285716, |
|
"grad_norm": 1.4983291625976562, |
|
"learning_rate": 1.6958415462380983e-06, |
|
"loss": 0.4565, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.4799107142857144, |
|
"grad_norm": 1.9047200679779053, |
|
"learning_rate": 1.6817979471894641e-06, |
|
"loss": 0.4563, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.482142857142857, |
|
"grad_norm": 1.9837721586227417, |
|
"learning_rate": 1.6678073984866438e-06, |
|
"loss": 0.5958, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.484375, |
|
"grad_norm": 2.0123696327209473, |
|
"learning_rate": 1.6538699893560618e-06, |
|
"loss": 0.5114, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.486607142857143, |
|
"grad_norm": 1.916115164756775, |
|
"learning_rate": 1.639985808685245e-06, |
|
"loss": 0.5507, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.4888392857142856, |
|
"grad_norm": 2.219719648361206, |
|
"learning_rate": 1.6261549450222392e-06, |
|
"loss": 0.57, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.4910714285714284, |
|
"grad_norm": 1.7933136224746704, |
|
"learning_rate": 1.6123774865750607e-06, |
|
"loss": 0.4511, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.4933035714285716, |
|
"grad_norm": 1.8515256643295288, |
|
"learning_rate": 1.5986535212111353e-06, |
|
"loss": 0.487, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.4955357142857144, |
|
"grad_norm": 1.9687520265579224, |
|
"learning_rate": 1.5849831364567137e-06, |
|
"loss": 0.4977, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.497767857142857, |
|
"grad_norm": 2.062831401824951, |
|
"learning_rate": 1.571366419496344e-06, |
|
"loss": 0.5189, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.8966569900512695, |
|
"learning_rate": 1.5578034571722879e-06, |
|
"loss": 0.5549, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.7405146360397339, |
|
"eval_runtime": 48.6921, |
|
"eval_samples_per_second": 1.499, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.502232142857143, |
|
"grad_norm": 1.9809284210205078, |
|
"learning_rate": 1.5442943359839978e-06, |
|
"loss": 0.5187, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.5044642857142856, |
|
"grad_norm": 2.1152169704437256, |
|
"learning_rate": 1.5308391420875312e-06, |
|
"loss": 0.5828, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.506696428571429, |
|
"grad_norm": 1.8229962587356567, |
|
"learning_rate": 1.5174379612950273e-06, |
|
"loss": 0.492, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.508928571428571, |
|
"grad_norm": 1.861548900604248, |
|
"learning_rate": 1.5040908790741448e-06, |
|
"loss": 0.523, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.5111607142857144, |
|
"grad_norm": 1.6094368696212769, |
|
"learning_rate": 1.490797980547528e-06, |
|
"loss": 0.4725, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.513392857142857, |
|
"grad_norm": 2.0476646423339844, |
|
"learning_rate": 1.4775593504922547e-06, |
|
"loss": 0.4939, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.515625, |
|
"grad_norm": 1.6590877771377563, |
|
"learning_rate": 1.4643750733392958e-06, |
|
"loss": 0.4955, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.517857142857143, |
|
"grad_norm": 1.984636664390564, |
|
"learning_rate": 1.4512452331729864e-06, |
|
"loss": 0.5275, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.5200892857142856, |
|
"grad_norm": 1.7271418571472168, |
|
"learning_rate": 1.438169913730475e-06, |
|
"loss": 0.5375, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.522321428571429, |
|
"grad_norm": 1.6943548917770386, |
|
"learning_rate": 1.4251491984012089e-06, |
|
"loss": 0.4983, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.524553571428571, |
|
"grad_norm": 1.9767521619796753, |
|
"learning_rate": 1.4121831702263833e-06, |
|
"loss": 0.5568, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.5267857142857144, |
|
"grad_norm": 1.9684196710586548, |
|
"learning_rate": 1.3992719118984167e-06, |
|
"loss": 0.5328, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.529017857142857, |
|
"grad_norm": 1.8626347780227661, |
|
"learning_rate": 1.3864155057604323e-06, |
|
"loss": 0.5693, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.53125, |
|
"grad_norm": 1.8821558952331543, |
|
"learning_rate": 1.3736140338057247e-06, |
|
"loss": 0.6132, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.533482142857143, |
|
"grad_norm": 1.952671766281128, |
|
"learning_rate": 1.3608675776772428e-06, |
|
"loss": 0.6163, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.5357142857142856, |
|
"grad_norm": 1.8021756410598755, |
|
"learning_rate": 1.3481762186670556e-06, |
|
"loss": 0.5465, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.537946428571429, |
|
"grad_norm": 1.8865597248077393, |
|
"learning_rate": 1.335540037715851e-06, |
|
"loss": 0.5889, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.540178571428571, |
|
"grad_norm": 1.9427474737167358, |
|
"learning_rate": 1.3229591154124132e-06, |
|
"loss": 0.5255, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.5424107142857144, |
|
"grad_norm": 1.6922953128814697, |
|
"learning_rate": 1.310433531993104e-06, |
|
"loss": 0.5588, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.544642857142857, |
|
"grad_norm": 1.7958623170852661, |
|
"learning_rate": 1.2979633673413571e-06, |
|
"loss": 0.461, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.546875, |
|
"grad_norm": 1.800321340560913, |
|
"learning_rate": 1.2855487009871615e-06, |
|
"loss": 0.5071, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.549107142857143, |
|
"grad_norm": 1.9416985511779785, |
|
"learning_rate": 1.2731896121065645e-06, |
|
"loss": 0.5693, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.5513392857142856, |
|
"grad_norm": 1.8170133829116821, |
|
"learning_rate": 1.2608861795211601e-06, |
|
"loss": 0.5963, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.553571428571429, |
|
"grad_norm": 1.5669772624969482, |
|
"learning_rate": 1.248638481697586e-06, |
|
"loss": 0.4923, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.555803571428571, |
|
"grad_norm": 2.1065783500671387, |
|
"learning_rate": 1.2364465967470284e-06, |
|
"loss": 0.5503, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.5580357142857144, |
|
"grad_norm": 1.905808925628662, |
|
"learning_rate": 1.224310602424712e-06, |
|
"loss": 0.4643, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.560267857142857, |
|
"grad_norm": 1.917167067527771, |
|
"learning_rate": 1.2122305761294196e-06, |
|
"loss": 0.5424, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.5625, |
|
"grad_norm": 1.7322640419006348, |
|
"learning_rate": 1.2002065949029896e-06, |
|
"loss": 0.4311, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.564732142857143, |
|
"grad_norm": 1.6713980436325073, |
|
"learning_rate": 1.1882387354298264e-06, |
|
"loss": 0.4733, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.5669642857142856, |
|
"grad_norm": 1.913543701171875, |
|
"learning_rate": 1.1763270740364074e-06, |
|
"loss": 0.4587, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.569196428571429, |
|
"grad_norm": 1.911083698272705, |
|
"learning_rate": 1.1644716866908035e-06, |
|
"loss": 0.6257, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 1.8229786157608032, |
|
"learning_rate": 1.15267264900219e-06, |
|
"loss": 0.4281, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.5736607142857144, |
|
"grad_norm": 1.7414361238479614, |
|
"learning_rate": 1.1409300362203667e-06, |
|
"loss": 0.5365, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.575892857142857, |
|
"grad_norm": 1.843278169631958, |
|
"learning_rate": 1.1292439232352781e-06, |
|
"loss": 0.5109, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.578125, |
|
"grad_norm": 1.76763117313385, |
|
"learning_rate": 1.1176143845765253e-06, |
|
"loss": 0.4557, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.580357142857143, |
|
"grad_norm": 1.6767909526824951, |
|
"learning_rate": 1.1060414944129106e-06, |
|
"loss": 0.4849, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.5825892857142856, |
|
"grad_norm": 1.6554889678955078, |
|
"learning_rate": 1.0945253265519472e-06, |
|
"loss": 0.4961, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.584821428571429, |
|
"grad_norm": 2.0062315464019775, |
|
"learning_rate": 1.0830659544393996e-06, |
|
"loss": 0.5364, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.587053571428571, |
|
"grad_norm": 1.7251535654067993, |
|
"learning_rate": 1.0716634511588076e-06, |
|
"loss": 0.47, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.5892857142857144, |
|
"grad_norm": 1.71232008934021, |
|
"learning_rate": 1.0603178894310185e-06, |
|
"loss": 0.5485, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.591517857142857, |
|
"grad_norm": 1.9494444131851196, |
|
"learning_rate": 1.0490293416137409e-06, |
|
"loss": 0.5122, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.59375, |
|
"grad_norm": 1.5578092336654663, |
|
"learning_rate": 1.0377978797010558e-06, |
|
"loss": 0.4307, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.595982142857143, |
|
"grad_norm": 1.9168074131011963, |
|
"learning_rate": 1.0266235753229825e-06, |
|
"loss": 0.6464, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.5982142857142856, |
|
"grad_norm": 1.903611660003662, |
|
"learning_rate": 1.0155064997450026e-06, |
|
"loss": 0.5234, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.600446428571429, |
|
"grad_norm": 1.7924622297286987, |
|
"learning_rate": 1.004446723867618e-06, |
|
"loss": 0.5628, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.602678571428571, |
|
"grad_norm": 2.0109128952026367, |
|
"learning_rate": 9.934443182259023e-07, |
|
"loss": 0.5824, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.6049107142857144, |
|
"grad_norm": 1.718166470527649, |
|
"learning_rate": 9.824993529890303e-07, |
|
"loss": 0.499, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.607142857142857, |
|
"grad_norm": 1.6572740077972412, |
|
"learning_rate": 9.716118979598533e-07, |
|
"loss": 0.5359, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.609375, |
|
"grad_norm": 1.667127251625061, |
|
"learning_rate": 9.607820225744346e-07, |
|
"loss": 0.482, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.611607142857143, |
|
"grad_norm": 1.948585867881775, |
|
"learning_rate": 9.500097959016297e-07, |
|
"loss": 0.5495, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.6138392857142856, |
|
"grad_norm": 1.6463160514831543, |
|
"learning_rate": 9.392952866426198e-07, |
|
"loss": 0.5428, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.616071428571429, |
|
"grad_norm": 1.8283066749572754, |
|
"learning_rate": 9.286385631304939e-07, |
|
"loss": 0.5095, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.618303571428571, |
|
"grad_norm": 1.8753612041473389, |
|
"learning_rate": 9.180396933298019e-07, |
|
"loss": 0.5784, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.6205357142857144, |
|
"grad_norm": 1.831152319908142, |
|
"learning_rate": 9.074987448361261e-07, |
|
"loss": 0.6219, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.622767857142857, |
|
"grad_norm": 1.857530951499939, |
|
"learning_rate": 8.970157848756511e-07, |
|
"loss": 0.4694, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 1.9244414567947388, |
|
"learning_rate": 8.865908803047241e-07, |
|
"loss": 0.6059, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.627232142857143, |
|
"grad_norm": 2.004091262817383, |
|
"learning_rate": 8.762240976094461e-07, |
|
"loss": 0.539, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.6294642857142856, |
|
"grad_norm": 1.735284686088562, |
|
"learning_rate": 8.659155029052346e-07, |
|
"loss": 0.4928, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.631696428571429, |
|
"grad_norm": 1.8576728105545044, |
|
"learning_rate": 8.556651619364065e-07, |
|
"loss": 0.4546, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.633928571428571, |
|
"grad_norm": 1.9069209098815918, |
|
"learning_rate": 8.454731400757599e-07, |
|
"loss": 0.4937, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.6361607142857144, |
|
"grad_norm": 1.8208372592926025, |
|
"learning_rate": 8.353395023241528e-07, |
|
"loss": 0.4555, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.638392857142857, |
|
"grad_norm": 1.4398638010025024, |
|
"learning_rate": 8.252643133100935e-07, |
|
"loss": 0.4341, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.640625, |
|
"grad_norm": 1.7023489475250244, |
|
"learning_rate": 8.152476372893259e-07, |
|
"loss": 0.5228, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.642857142857143, |
|
"grad_norm": 1.8599404096603394, |
|
"learning_rate": 8.052895381444226e-07, |
|
"loss": 0.4926, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.6450892857142856, |
|
"grad_norm": 1.6925806999206543, |
|
"learning_rate": 7.953900793843694e-07, |
|
"loss": 0.4984, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.647321428571429, |
|
"grad_norm": 1.822161316871643, |
|
"learning_rate": 7.855493241441692e-07, |
|
"loss": 0.4369, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.649553571428571, |
|
"grad_norm": 1.9494961500167847, |
|
"learning_rate": 7.757673351844386e-07, |
|
"loss": 0.5383, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.6517857142857144, |
|
"grad_norm": 1.6641318798065186, |
|
"learning_rate": 7.660441748909997e-07, |
|
"loss": 0.52, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.654017857142857, |
|
"grad_norm": 1.6094475984573364, |
|
"learning_rate": 7.563799052744947e-07, |
|
"loss": 0.4805, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.65625, |
|
"grad_norm": 1.6435906887054443, |
|
"learning_rate": 7.46774587969975e-07, |
|
"loss": 0.5334, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.658482142857143, |
|
"grad_norm": 1.7535940408706665, |
|
"learning_rate": 7.372282842365208e-07, |
|
"loss": 0.442, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.6607142857142856, |
|
"grad_norm": 2.087963104248047, |
|
"learning_rate": 7.277410549568476e-07, |
|
"loss": 0.6131, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.662946428571429, |
|
"grad_norm": 1.6514884233474731, |
|
"learning_rate": 7.183129606369133e-07, |
|
"loss": 0.5286, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.665178571428571, |
|
"grad_norm": 1.7809820175170898, |
|
"learning_rate": 7.089440614055398e-07, |
|
"loss": 0.4577, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.6674107142857144, |
|
"grad_norm": 1.8051931858062744, |
|
"learning_rate": 6.996344170140168e-07, |
|
"loss": 0.5563, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.669642857142857, |
|
"grad_norm": 1.717929482460022, |
|
"learning_rate": 6.903840868357382e-07, |
|
"loss": 0.4968, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.671875, |
|
"grad_norm": 1.920330286026001, |
|
"learning_rate": 6.811931298658092e-07, |
|
"loss": 0.4726, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.674107142857143, |
|
"grad_norm": 1.5852843523025513, |
|
"learning_rate": 6.720616047206774e-07, |
|
"loss": 0.5563, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.6763392857142856, |
|
"grad_norm": 2.0212185382843018, |
|
"learning_rate": 6.62989569637752e-07, |
|
"loss": 0.5112, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 1.5889393091201782, |
|
"learning_rate": 6.539770824750447e-07, |
|
"loss": 0.4565, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.680803571428571, |
|
"grad_norm": 1.6706169843673706, |
|
"learning_rate": 6.450242007107865e-07, |
|
"loss": 0.4681, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.6830357142857144, |
|
"grad_norm": 2.0316851139068604, |
|
"learning_rate": 6.361309814430727e-07, |
|
"loss": 0.5109, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.685267857142857, |
|
"grad_norm": 1.3804383277893066, |
|
"learning_rate": 6.272974813894905e-07, |
|
"loss": 0.3889, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.6875, |
|
"grad_norm": 1.4953737258911133, |
|
"learning_rate": 6.185237568867597e-07, |
|
"loss": 0.4755, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.689732142857143, |
|
"grad_norm": 1.5238755941390991, |
|
"learning_rate": 6.098098638903771e-07, |
|
"loss": 0.4694, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.6919642857142856, |
|
"grad_norm": 1.6073217391967773, |
|
"learning_rate": 6.01155857974256e-07, |
|
"loss": 0.4265, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.694196428571429, |
|
"grad_norm": 1.841497778892517, |
|
"learning_rate": 5.925617943303719e-07, |
|
"loss": 0.5209, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.696428571428571, |
|
"grad_norm": 1.7118959426879883, |
|
"learning_rate": 5.840277277684136e-07, |
|
"loss": 0.4698, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.6986607142857144, |
|
"grad_norm": 1.6239268779754639, |
|
"learning_rate": 5.755537127154231e-07, |
|
"loss": 0.5341, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.700892857142857, |
|
"grad_norm": 1.7772884368896484, |
|
"learning_rate": 5.671398032154707e-07, |
|
"loss": 0.4857, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.703125, |
|
"grad_norm": 1.6045022010803223, |
|
"learning_rate": 5.58786052929281e-07, |
|
"loss": 0.5097, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.705357142857143, |
|
"grad_norm": 1.5222342014312744, |
|
"learning_rate": 5.504925151339191e-07, |
|
"loss": 0.4765, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.7075892857142856, |
|
"grad_norm": 2.022216558456421, |
|
"learning_rate": 5.422592427224239e-07, |
|
"loss": 0.5601, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.709821428571429, |
|
"grad_norm": 1.724923014640808, |
|
"learning_rate": 5.340862882034992e-07, |
|
"loss": 0.4478, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.712053571428571, |
|
"grad_norm": 1.814286470413208, |
|
"learning_rate": 5.259737037011547e-07, |
|
"loss": 0.5301, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 1.8270453214645386, |
|
"learning_rate": 5.179215409543848e-07, |
|
"loss": 0.5095, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.716517857142857, |
|
"grad_norm": 2.0552830696105957, |
|
"learning_rate": 5.099298513168382e-07, |
|
"loss": 0.5872, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.71875, |
|
"grad_norm": 1.6828259229660034, |
|
"learning_rate": 5.01998685756484e-07, |
|
"loss": 0.4899, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.720982142857143, |
|
"grad_norm": 1.8285223245620728, |
|
"learning_rate": 4.941280948553018e-07, |
|
"loss": 0.5651, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.7232142857142856, |
|
"grad_norm": 1.8519248962402344, |
|
"learning_rate": 4.863181288089391e-07, |
|
"loss": 0.5844, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.725446428571429, |
|
"grad_norm": 1.867247223854065, |
|
"learning_rate": 4.785688374264053e-07, |
|
"loss": 0.5344, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.727678571428571, |
|
"grad_norm": 1.8264105319976807, |
|
"learning_rate": 4.708802701297499e-07, |
|
"loss": 0.5835, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.7299107142857144, |
|
"grad_norm": 1.8832281827926636, |
|
"learning_rate": 4.632524759537449e-07, |
|
"loss": 0.5418, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.732142857142857, |
|
"grad_norm": 1.7114546298980713, |
|
"learning_rate": 4.556855035455787e-07, |
|
"loss": 0.5168, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.734375, |
|
"grad_norm": 1.7664337158203125, |
|
"learning_rate": 4.481794011645368e-07, |
|
"loss": 0.5501, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.736607142857143, |
|
"grad_norm": 1.6939427852630615, |
|
"learning_rate": 4.407342166816997e-07, |
|
"loss": 0.4614, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.7388392857142856, |
|
"grad_norm": 1.8312894105911255, |
|
"learning_rate": 4.3334999757963734e-07, |
|
"loss": 0.6143, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.741071428571429, |
|
"grad_norm": 1.9868108034133911, |
|
"learning_rate": 4.2602679095210766e-07, |
|
"loss": 0.5636, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.743303571428571, |
|
"grad_norm": 1.5780657529830933, |
|
"learning_rate": 4.187646435037529e-07, |
|
"loss": 0.4947, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.7455357142857144, |
|
"grad_norm": 1.9387363195419312, |
|
"learning_rate": 4.1156360154979813e-07, |
|
"loss": 0.5237, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.747767857142857, |
|
"grad_norm": 1.5292437076568604, |
|
"learning_rate": 4.044237110157667e-07, |
|
"loss": 0.4427, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.718108892440796, |
|
"learning_rate": 3.9734501743717956e-07, |
|
"loss": 0.5295, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.7388916015625, |
|
"eval_runtime": 48.1103, |
|
"eval_samples_per_second": 1.517, |
|
"eval_steps_per_second": 0.208, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.752232142857143, |
|
"grad_norm": 1.6977214813232422, |
|
"learning_rate": 3.9032756595926755e-07, |
|
"loss": 0.4584, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.7544642857142856, |
|
"grad_norm": 1.8142741918563843, |
|
"learning_rate": 3.833714013366796e-07, |
|
"loss": 0.4569, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.756696428571429, |
|
"grad_norm": 1.7793419361114502, |
|
"learning_rate": 3.7647656793320164e-07, |
|
"loss": 0.4911, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.758928571428571, |
|
"grad_norm": 1.7444989681243896, |
|
"learning_rate": 3.696431097214748e-07, |
|
"loss": 0.5189, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.7611607142857144, |
|
"grad_norm": 1.9122920036315918, |
|
"learning_rate": 3.628710702827076e-07, |
|
"loss": 0.4846, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.763392857142857, |
|
"grad_norm": 1.8623292446136475, |
|
"learning_rate": 3.5616049280640995e-07, |
|
"loss": 0.4874, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.765625, |
|
"grad_norm": 2.041966199874878, |
|
"learning_rate": 3.4951142009010173e-07, |
|
"loss": 0.5527, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.767857142857143, |
|
"grad_norm": 1.7278554439544678, |
|
"learning_rate": 3.429238945390556e-07, |
|
"loss": 0.5564, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.7700892857142856, |
|
"grad_norm": 1.6937023401260376, |
|
"learning_rate": 3.3639795816601705e-07, |
|
"loss": 0.5349, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.772321428571429, |
|
"grad_norm": 1.569589614868164, |
|
"learning_rate": 3.299336525909391e-07, |
|
"loss": 0.4183, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.774553571428571, |
|
"grad_norm": 1.8380076885223389, |
|
"learning_rate": 3.235310190407182e-07, |
|
"loss": 0.4572, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.7767857142857144, |
|
"grad_norm": 1.6024210453033447, |
|
"learning_rate": 3.171900983489273e-07, |
|
"loss": 0.4429, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.779017857142857, |
|
"grad_norm": 1.658486008644104, |
|
"learning_rate": 3.109109309555602e-07, |
|
"loss": 0.5431, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.78125, |
|
"grad_norm": 1.8605934381484985, |
|
"learning_rate": 3.0469355690677216e-07, |
|
"loss": 0.5497, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.783482142857143, |
|
"grad_norm": 1.8473893404006958, |
|
"learning_rate": 2.985380158546236e-07, |
|
"loss": 0.4607, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.7857142857142856, |
|
"grad_norm": 2.1259796619415283, |
|
"learning_rate": 2.9244434705682276e-07, |
|
"loss": 0.542, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.787946428571429, |
|
"grad_norm": 1.9905544519424438, |
|
"learning_rate": 2.8641258937648577e-07, |
|
"loss": 0.5211, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.790178571428571, |
|
"grad_norm": 1.8052101135253906, |
|
"learning_rate": 2.8044278128188327e-07, |
|
"loss": 0.516, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.7924107142857144, |
|
"grad_norm": 1.7659034729003906, |
|
"learning_rate": 2.7453496084619116e-07, |
|
"loss": 0.5368, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.794642857142857, |
|
"grad_norm": 1.8553686141967773, |
|
"learning_rate": 2.6868916574725347e-07, |
|
"loss": 0.5216, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.796875, |
|
"grad_norm": 1.7524783611297607, |
|
"learning_rate": 2.6290543326733865e-07, |
|
"loss": 0.4936, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.799107142857143, |
|
"grad_norm": 1.8771616220474243, |
|
"learning_rate": 2.571838002929061e-07, |
|
"loss": 0.604, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.8013392857142856, |
|
"grad_norm": 1.833112120628357, |
|
"learning_rate": 2.515243033143644e-07, |
|
"loss": 0.4917, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.803571428571429, |
|
"grad_norm": 1.7152862548828125, |
|
"learning_rate": 2.459269784258467e-07, |
|
"loss": 0.5593, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.805803571428571, |
|
"grad_norm": 1.824363350868225, |
|
"learning_rate": 2.4039186132497226e-07, |
|
"loss": 0.5888, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.8080357142857144, |
|
"grad_norm": 1.7618277072906494, |
|
"learning_rate": 2.349189873126223e-07, |
|
"loss": 0.5356, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.810267857142857, |
|
"grad_norm": 2.1107370853424072, |
|
"learning_rate": 2.2950839129272096e-07, |
|
"loss": 0.569, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.8125, |
|
"grad_norm": 1.6434730291366577, |
|
"learning_rate": 2.2416010777199904e-07, |
|
"loss": 0.5319, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.814732142857143, |
|
"grad_norm": 1.9061239957809448, |
|
"learning_rate": 2.1887417085978745e-07, |
|
"loss": 0.5174, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.8169642857142856, |
|
"grad_norm": 1.8371518850326538, |
|
"learning_rate": 2.1365061426778967e-07, |
|
"loss": 0.583, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.819196428571429, |
|
"grad_norm": 1.754087209701538, |
|
"learning_rate": 2.0848947130987617e-07, |
|
"loss": 0.6134, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.821428571428571, |
|
"grad_norm": 1.772619366645813, |
|
"learning_rate": 2.0339077490186488e-07, |
|
"loss": 0.4524, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.8236607142857144, |
|
"grad_norm": 1.8808914422988892, |
|
"learning_rate": 1.9835455756130995e-07, |
|
"loss": 0.5474, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.825892857142857, |
|
"grad_norm": 1.8659659624099731, |
|
"learning_rate": 1.93380851407301e-07, |
|
"loss": 0.539, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.828125, |
|
"grad_norm": 1.6725388765335083, |
|
"learning_rate": 1.8846968816025434e-07, |
|
"loss": 0.4956, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.830357142857143, |
|
"grad_norm": 2.0685112476348877, |
|
"learning_rate": 1.83621099141712e-07, |
|
"loss": 0.568, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.8325892857142856, |
|
"grad_norm": 2.1546757221221924, |
|
"learning_rate": 1.7883511527414078e-07, |
|
"loss": 0.543, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.834821428571429, |
|
"grad_norm": 1.8702294826507568, |
|
"learning_rate": 1.741117670807335e-07, |
|
"loss": 0.5957, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.837053571428571, |
|
"grad_norm": 1.8077850341796875, |
|
"learning_rate": 1.694510846852193e-07, |
|
"loss": 0.5135, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.8392857142857144, |
|
"grad_norm": 1.8730038404464722, |
|
"learning_rate": 1.648530978116658e-07, |
|
"loss": 0.5495, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.841517857142857, |
|
"grad_norm": 2.0048348903656006, |
|
"learning_rate": 1.6031783578429605e-07, |
|
"loss": 0.5356, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.84375, |
|
"grad_norm": 1.7988780736923218, |
|
"learning_rate": 1.558453275272942e-07, |
|
"loss": 0.5383, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.845982142857143, |
|
"grad_norm": 1.999723196029663, |
|
"learning_rate": 1.5143560156462567e-07, |
|
"loss": 0.4961, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.8482142857142856, |
|
"grad_norm": 1.982683539390564, |
|
"learning_rate": 1.4708868601985503e-07, |
|
"loss": 0.4886, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.850446428571429, |
|
"grad_norm": 1.733881950378418, |
|
"learning_rate": 1.4280460861596513e-07, |
|
"loss": 0.4615, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.852678571428571, |
|
"grad_norm": 1.7100967168807983, |
|
"learning_rate": 1.385833966751815e-07, |
|
"loss": 0.4739, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.8549107142857144, |
|
"grad_norm": 1.951353669166565, |
|
"learning_rate": 1.3442507711879494e-07, |
|
"loss": 0.5577, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 1.7354167699813843, |
|
"learning_rate": 1.303296764669959e-07, |
|
"loss": 0.6184, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.859375, |
|
"grad_norm": 1.764654517173767, |
|
"learning_rate": 1.2629722083870033e-07, |
|
"loss": 0.4979, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.861607142857143, |
|
"grad_norm": 1.8083339929580688, |
|
"learning_rate": 1.2232773595138415e-07, |
|
"loss": 0.4016, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.8638392857142856, |
|
"grad_norm": 1.7972429990768433, |
|
"learning_rate": 1.1842124712092117e-07, |
|
"loss": 0.4883, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.866071428571429, |
|
"grad_norm": 1.6948587894439697, |
|
"learning_rate": 1.1457777926141889e-07, |
|
"loss": 0.4177, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.868303571428571, |
|
"grad_norm": 1.7051078081130981, |
|
"learning_rate": 1.1079735688506065e-07, |
|
"loss": 0.5008, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.8705357142857144, |
|
"grad_norm": 1.6710540056228638, |
|
"learning_rate": 1.0708000410195041e-07, |
|
"loss": 0.4351, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.872767857142857, |
|
"grad_norm": 1.5446432828903198, |
|
"learning_rate": 1.0342574461995936e-07, |
|
"loss": 0.4409, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 1.8338514566421509, |
|
"learning_rate": 9.98346017445706e-08, |
|
"loss": 0.5283, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.877232142857143, |
|
"grad_norm": 1.672766923904419, |
|
"learning_rate": 9.630659837873368e-08, |
|
"loss": 0.5112, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.8794642857142856, |
|
"grad_norm": 1.7518768310546875, |
|
"learning_rate": 9.284175702272246e-08, |
|
"loss": 0.4747, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.881696428571429, |
|
"grad_norm": 2.162524938583374, |
|
"learning_rate": 8.944009977398083e-08, |
|
"loss": 0.5968, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.883928571428571, |
|
"grad_norm": 1.8234249353408813, |
|
"learning_rate": 8.610164832699608e-08, |
|
"loss": 0.5249, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.8861607142857144, |
|
"grad_norm": 1.7678987979888916, |
|
"learning_rate": 8.282642397314356e-08, |
|
"loss": 0.5712, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.888392857142857, |
|
"grad_norm": 1.769107460975647, |
|
"learning_rate": 7.96144476005689e-08, |
|
"loss": 0.4407, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.890625, |
|
"grad_norm": 1.5588442087173462, |
|
"learning_rate": 7.646573969404159e-08, |
|
"loss": 0.444, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.892857142857143, |
|
"grad_norm": 1.6710723638534546, |
|
"learning_rate": 7.338032033482712e-08, |
|
"loss": 0.4247, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.8950892857142856, |
|
"grad_norm": 1.6855120658874512, |
|
"learning_rate": 7.035820920056724e-08, |
|
"loss": 0.4729, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.897321428571429, |
|
"grad_norm": 2.1896936893463135, |
|
"learning_rate": 6.73994255651389e-08, |
|
"loss": 0.5533, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.899553571428571, |
|
"grad_norm": 1.658887267112732, |
|
"learning_rate": 6.450398829854764e-08, |
|
"loss": 0.4726, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.9017857142857144, |
|
"grad_norm": 1.7160634994506836, |
|
"learning_rate": 6.167191586679556e-08, |
|
"loss": 0.4999, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.904017857142857, |
|
"grad_norm": 1.9953809976577759, |
|
"learning_rate": 5.890322633177126e-08, |
|
"loss": 0.5515, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.90625, |
|
"grad_norm": 1.998899221420288, |
|
"learning_rate": 5.6197937351125664e-08, |
|
"loss": 0.4844, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.908482142857143, |
|
"grad_norm": 1.6095709800720215, |
|
"learning_rate": 5.355606617817089e-08, |
|
"loss": 0.4459, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.9107142857142856, |
|
"grad_norm": 1.6279726028442383, |
|
"learning_rate": 5.097762966176256e-08, |
|
"loss": 0.4293, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.912946428571429, |
|
"grad_norm": 1.60764741897583, |
|
"learning_rate": 4.846264424619218e-08, |
|
"loss": 0.4048, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.915178571428571, |
|
"grad_norm": 1.5504363775253296, |
|
"learning_rate": 4.6011125971084924e-08, |
|
"loss": 0.4506, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.9174107142857144, |
|
"grad_norm": 1.8371487855911255, |
|
"learning_rate": 4.3623090471296426e-08, |
|
"loss": 0.431, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.919642857142857, |
|
"grad_norm": 1.6488250494003296, |
|
"learning_rate": 4.129855297681618e-08, |
|
"loss": 0.4522, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.921875, |
|
"grad_norm": 1.6050375699996948, |
|
"learning_rate": 3.903752831266205e-08, |
|
"loss": 0.4436, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.924107142857143, |
|
"grad_norm": 1.6481090784072876, |
|
"learning_rate": 3.684003089879484e-08, |
|
"loss": 0.4896, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.9263392857142856, |
|
"grad_norm": 1.685328722000122, |
|
"learning_rate": 3.4706074750022744e-08, |
|
"loss": 0.5347, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.928571428571429, |
|
"grad_norm": 1.8341789245605469, |
|
"learning_rate": 3.2635673475910345e-08, |
|
"loss": 0.515, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.930803571428571, |
|
"grad_norm": 1.7492722272872925, |
|
"learning_rate": 3.062884028069313e-08, |
|
"loss": 0.5591, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.9330357142857144, |
|
"grad_norm": 1.7350465059280396, |
|
"learning_rate": 2.8685587963194206e-08, |
|
"loss": 0.547, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.935267857142857, |
|
"grad_norm": 1.6732760667800903, |
|
"learning_rate": 2.6805928916742163e-08, |
|
"loss": 0.4303, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.9375, |
|
"grad_norm": 1.7712833881378174, |
|
"learning_rate": 2.4989875129091124e-08, |
|
"loss": 0.4711, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.939732142857143, |
|
"grad_norm": 1.616205096244812, |
|
"learning_rate": 2.323743818234414e-08, |
|
"loss": 0.4163, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.9419642857142856, |
|
"grad_norm": 1.7002397775650024, |
|
"learning_rate": 2.154862925288326e-08, |
|
"loss": 0.5499, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.944196428571429, |
|
"grad_norm": 1.6891026496887207, |
|
"learning_rate": 1.9923459111290676e-08, |
|
"loss": 0.5318, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.946428571428571, |
|
"grad_norm": 1.8011424541473389, |
|
"learning_rate": 1.8361938122287704e-08, |
|
"loss": 0.5235, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.9486607142857144, |
|
"grad_norm": 2.001744508743286, |
|
"learning_rate": 1.6864076244663686e-08, |
|
"loss": 0.6503, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.950892857142857, |
|
"grad_norm": 1.6341923475265503, |
|
"learning_rate": 1.5429883031217173e-08, |
|
"loss": 0.5342, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.953125, |
|
"grad_norm": 1.8023895025253296, |
|
"learning_rate": 1.4059367628687094e-08, |
|
"loss": 0.5666, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.955357142857143, |
|
"grad_norm": 1.7343465089797974, |
|
"learning_rate": 1.2752538777704993e-08, |
|
"loss": 0.4884, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.9575892857142856, |
|
"grad_norm": 1.7615457773208618, |
|
"learning_rate": 1.1509404812728443e-08, |
|
"loss": 0.5254, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.959821428571429, |
|
"grad_norm": 1.7637721300125122, |
|
"learning_rate": 1.0329973661996617e-08, |
|
"loss": 0.4997, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.962053571428571, |
|
"grad_norm": 1.8085095882415771, |
|
"learning_rate": 9.214252847475902e-09, |
|
"loss": 0.4738, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.9642857142857144, |
|
"grad_norm": 2.0455617904663086, |
|
"learning_rate": 8.162249484809926e-09, |
|
"loss": 0.5687, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.966517857142857, |
|
"grad_norm": 1.6833295822143555, |
|
"learning_rate": 7.173970283279597e-09, |
|
"loss": 0.4429, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.96875, |
|
"grad_norm": 1.766695499420166, |
|
"learning_rate": 6.249421545755363e-09, |
|
"loss": 0.5295, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.970982142857143, |
|
"grad_norm": 1.7241015434265137, |
|
"learning_rate": 5.388609168659465e-09, |
|
"loss": 0.5362, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.9732142857142856, |
|
"grad_norm": 1.826074242591858, |
|
"learning_rate": 4.591538641927074e-09, |
|
"loss": 0.5459, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.975446428571429, |
|
"grad_norm": 1.6138020753860474, |
|
"learning_rate": 3.858215048972991e-09, |
|
"loss": 0.515, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.977678571428571, |
|
"grad_norm": 1.8125077486038208, |
|
"learning_rate": 3.1886430666561163e-09, |
|
"loss": 0.5262, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.9799107142857144, |
|
"grad_norm": 1.748060703277588, |
|
"learning_rate": 2.5828269652561355e-09, |
|
"loss": 0.4633, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.982142857142857, |
|
"grad_norm": 1.6022776365280151, |
|
"learning_rate": 2.0407706084368816e-09, |
|
"loss": 0.4314, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.984375, |
|
"grad_norm": 1.672903060913086, |
|
"learning_rate": 1.5624774532285726e-09, |
|
"loss": 0.4904, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.986607142857143, |
|
"grad_norm": 1.8359776735305786, |
|
"learning_rate": 1.1479505500044952e-09, |
|
"loss": 0.4427, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.9888392857142856, |
|
"grad_norm": 1.6358612775802612, |
|
"learning_rate": 7.971925424621329e-10, |
|
"loss": 0.4399, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.991071428571429, |
|
"grad_norm": 1.8373823165893555, |
|
"learning_rate": 5.102056675998501e-10, |
|
"loss": 0.5499, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.993303571428571, |
|
"grad_norm": 1.7908254861831665, |
|
"learning_rate": 2.8699175571467177e-10, |
|
"loss": 0.4939, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.9955357142857144, |
|
"grad_norm": 1.9384500980377197, |
|
"learning_rate": 1.2755223037896892e-10, |
|
"loss": 0.5783, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.997767857142857, |
|
"grad_norm": 1.8946892023086548, |
|
"learning_rate": 3.1888108437128085e-11, |
|
"loss": 0.5923, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.533162236213684, |
|
"learning_rate": 0.0, |
|
"loss": 0.4701, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.739547848701477, |
|
"eval_runtime": 48.8136, |
|
"eval_samples_per_second": 1.495, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1344 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1344, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 224, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.304796257625047e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|