|
{ |
|
"best_metric": 1.6035598516464233, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.0338863395693611, |
|
"eval_steps": 10, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002823861630780092, |
|
"grad_norm": 1.2318695783615112, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0472, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002823861630780092, |
|
"eval_loss": 2.352551221847534, |
|
"eval_runtime": 134.4924, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 5.547, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005647723261560184, |
|
"grad_norm": 3.381352663040161, |
|
"learning_rate": 4e-05, |
|
"loss": 2.7995, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0008471584892340275, |
|
"grad_norm": 0.7312057018280029, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1274, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0011295446523120368, |
|
"grad_norm": 1.382169485092163, |
|
"learning_rate": 8e-05, |
|
"loss": 3.0301, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0014119308153900459, |
|
"grad_norm": 1.6353622674942017, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7291, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001694316978468055, |
|
"grad_norm": 0.6910256147384644, |
|
"learning_rate": 0.00012, |
|
"loss": 2.8307, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001976703141546064, |
|
"grad_norm": 1.4815878868103027, |
|
"learning_rate": 0.00014, |
|
"loss": 2.0774, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0022590893046240735, |
|
"grad_norm": 1.042596459388733, |
|
"learning_rate": 0.00016, |
|
"loss": 2.6145, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0025414754677020824, |
|
"grad_norm": 0.8719121217727661, |
|
"learning_rate": 0.00018, |
|
"loss": 2.9212, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0028238616307800918, |
|
"grad_norm": 1.0421559810638428, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3507, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0028238616307800918, |
|
"eval_loss": 2.1853621006011963, |
|
"eval_runtime": 133.969, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 5.568, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003106247793858101, |
|
"grad_norm": 0.9601467847824097, |
|
"learning_rate": 0.0001999979446958366, |
|
"loss": 1.9811, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00338863395693611, |
|
"grad_norm": 1.353819489479065, |
|
"learning_rate": 0.00019999177886783194, |
|
"loss": 2.373, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0036710201200141194, |
|
"grad_norm": 0.897077202796936, |
|
"learning_rate": 0.00019998150276943902, |
|
"loss": 1.696, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.003953406283092128, |
|
"grad_norm": 1.358173131942749, |
|
"learning_rate": 0.000199967116823068, |
|
"loss": 2.3396, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004235792446170138, |
|
"grad_norm": 1.0444282293319702, |
|
"learning_rate": 0.0001999486216200688, |
|
"loss": 1.482, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004518178609248147, |
|
"grad_norm": 3.5194482803344727, |
|
"learning_rate": 0.00019992601792070679, |
|
"loss": 1.6677, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004800564772326156, |
|
"grad_norm": 3.523196220397949, |
|
"learning_rate": 0.00019989930665413147, |
|
"loss": 2.393, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005082950935404165, |
|
"grad_norm": 0.9800947904586792, |
|
"learning_rate": 0.00019986848891833845, |
|
"loss": 1.6947, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005365337098482174, |
|
"grad_norm": 1.540337324142456, |
|
"learning_rate": 0.0001998335659801241, |
|
"loss": 0.563, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0056477232615601836, |
|
"grad_norm": 1.577869176864624, |
|
"learning_rate": 0.00019979453927503364, |
|
"loss": 2.0525, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0056477232615601836, |
|
"eval_loss": 1.7350926399230957, |
|
"eval_runtime": 133.9613, |
|
"eval_samples_per_second": 5.569, |
|
"eval_steps_per_second": 5.569, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005930109424638193, |
|
"grad_norm": 0.876990020275116, |
|
"learning_rate": 0.00019975141040730207, |
|
"loss": 1.2469, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.006212495587716202, |
|
"grad_norm": 1.1722772121429443, |
|
"learning_rate": 0.0001997041811497882, |
|
"loss": 2.2798, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.006494881750794211, |
|
"grad_norm": 0.9232650995254517, |
|
"learning_rate": 0.00019965285344390184, |
|
"loss": 2.5319, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00677726791387222, |
|
"grad_norm": 1.6948198080062866, |
|
"learning_rate": 0.00019959742939952392, |
|
"loss": 1.6864, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0070596540769502295, |
|
"grad_norm": 1.277117133140564, |
|
"learning_rate": 0.00019953791129491983, |
|
"loss": 2.1785, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007342040240028239, |
|
"grad_norm": 3.347221851348877, |
|
"learning_rate": 0.00019947430157664576, |
|
"loss": 1.5917, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.007624426403106248, |
|
"grad_norm": 1.2190711498260498, |
|
"learning_rate": 0.00019940660285944803, |
|
"loss": 2.3655, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.007906812566184257, |
|
"grad_norm": 1.9080499410629272, |
|
"learning_rate": 0.00019933481792615583, |
|
"loss": 2.0602, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.008189198729262267, |
|
"grad_norm": 1.5589416027069092, |
|
"learning_rate": 0.0001992589497275665, |
|
"loss": 1.9817, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.008471584892340275, |
|
"grad_norm": 2.701538562774658, |
|
"learning_rate": 0.0001991790013823246, |
|
"loss": 1.434, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008471584892340275, |
|
"eval_loss": 1.6901509761810303, |
|
"eval_runtime": 134.7409, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 5.537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008753971055418284, |
|
"grad_norm": 1.0493268966674805, |
|
"learning_rate": 0.00019909497617679348, |
|
"loss": 2.413, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009036357218496294, |
|
"grad_norm": 1.5526515245437622, |
|
"learning_rate": 0.0001990068775649202, |
|
"loss": 2.2275, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.009318743381574303, |
|
"grad_norm": 1.4149539470672607, |
|
"learning_rate": 0.00019891470916809362, |
|
"loss": 2.4908, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.009601129544652313, |
|
"grad_norm": 1.2589884996414185, |
|
"learning_rate": 0.00019881847477499557, |
|
"loss": 1.7045, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.009883515707730321, |
|
"grad_norm": 2.9048409461975098, |
|
"learning_rate": 0.00019871817834144504, |
|
"loss": 1.6211, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01016590187080833, |
|
"grad_norm": 1.029226303100586, |
|
"learning_rate": 0.0001986138239902355, |
|
"loss": 2.1988, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01044828803388634, |
|
"grad_norm": 1.716841697692871, |
|
"learning_rate": 0.0001985054160109657, |
|
"loss": 1.7938, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.010730674196964348, |
|
"grad_norm": 1.0365818738937378, |
|
"learning_rate": 0.00019839295885986296, |
|
"loss": 1.896, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.011013060360042359, |
|
"grad_norm": 1.4247523546218872, |
|
"learning_rate": 0.0001982764571596004, |
|
"loss": 2.0023, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.011295446523120367, |
|
"grad_norm": 1.0945035219192505, |
|
"learning_rate": 0.00019815591569910654, |
|
"loss": 2.4118, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011295446523120367, |
|
"eval_loss": 1.651785135269165, |
|
"eval_runtime": 134.7054, |
|
"eval_samples_per_second": 5.538, |
|
"eval_steps_per_second": 5.538, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011577832686198376, |
|
"grad_norm": 1.3919488191604614, |
|
"learning_rate": 0.00019803133943336874, |
|
"loss": 2.096, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.011860218849276386, |
|
"grad_norm": 1.0962899923324585, |
|
"learning_rate": 0.0001979027334832293, |
|
"loss": 2.0469, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.012142605012354394, |
|
"grad_norm": 1.147463083267212, |
|
"learning_rate": 0.00019777010313517518, |
|
"loss": 2.0405, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.012424991175432405, |
|
"grad_norm": 2.677388906478882, |
|
"learning_rate": 0.00019763345384112043, |
|
"loss": 1.0599, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.012707377338510413, |
|
"grad_norm": 1.20905339717865, |
|
"learning_rate": 0.00019749279121818235, |
|
"loss": 0.6549, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012989763501588421, |
|
"grad_norm": 1.109489917755127, |
|
"learning_rate": 0.00019734812104845047, |
|
"loss": 1.9309, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.013272149664666432, |
|
"grad_norm": 1.518839955329895, |
|
"learning_rate": 0.00019719944927874881, |
|
"loss": 1.8122, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01355453582774444, |
|
"grad_norm": 0.8112537860870361, |
|
"learning_rate": 0.0001970467820203915, |
|
"loss": 1.8915, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01383692199082245, |
|
"grad_norm": 0.7357921004295349, |
|
"learning_rate": 0.00019689012554893154, |
|
"loss": 2.1672, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.014119308153900459, |
|
"grad_norm": 0.7238839268684387, |
|
"learning_rate": 0.00019672948630390294, |
|
"loss": 0.9114, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014119308153900459, |
|
"eval_loss": 1.624532699584961, |
|
"eval_runtime": 134.5228, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 5.546, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014401694316978467, |
|
"grad_norm": 4.2082200050354, |
|
"learning_rate": 0.00019656487088855592, |
|
"loss": 1.5288, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.014684080480056478, |
|
"grad_norm": 0.6968432068824768, |
|
"learning_rate": 0.00019639628606958533, |
|
"loss": 1.4653, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.014966466643134486, |
|
"grad_norm": 1.9569013118743896, |
|
"learning_rate": 0.0001962237387768529, |
|
"loss": 1.7478, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.015248852806212496, |
|
"grad_norm": 1.2472929954528809, |
|
"learning_rate": 0.00019604723610310194, |
|
"loss": 1.1666, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.015531238969290505, |
|
"grad_norm": 0.7596736550331116, |
|
"learning_rate": 0.00019586678530366606, |
|
"loss": 1.7902, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.015813625132368513, |
|
"grad_norm": 1.2892470359802246, |
|
"learning_rate": 0.00019568239379617088, |
|
"loss": 1.8049, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.016096011295446522, |
|
"grad_norm": 1.5124154090881348, |
|
"learning_rate": 0.00019549406916022905, |
|
"loss": 1.7232, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.016378397458524534, |
|
"grad_norm": 1.0523513555526733, |
|
"learning_rate": 0.00019530181913712872, |
|
"loss": 1.772, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.016660783621602542, |
|
"grad_norm": 2.185861825942993, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 2.3071, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01694316978468055, |
|
"grad_norm": 0.6275917887687683, |
|
"learning_rate": 0.00019490557470106686, |
|
"loss": 2.0081, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01694316978468055, |
|
"eval_loss": 1.624603033065796, |
|
"eval_runtime": 134.5052, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 5.546, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01722555594775856, |
|
"grad_norm": 0.9301966428756714, |
|
"learning_rate": 0.00019470159657616215, |
|
"loss": 1.7401, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.017507942110836568, |
|
"grad_norm": 1.2353283166885376, |
|
"learning_rate": 0.00019449372563954293, |
|
"loss": 1.7241, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.01779032827391458, |
|
"grad_norm": 1.0061556100845337, |
|
"learning_rate": 0.0001942819704359693, |
|
"loss": 0.5784, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.018072714436992588, |
|
"grad_norm": 1.062904953956604, |
|
"learning_rate": 0.00019406633966986828, |
|
"loss": 2.1718, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.018355100600070597, |
|
"grad_norm": 1.897362470626831, |
|
"learning_rate": 0.00019384684220497605, |
|
"loss": 1.9466, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.018637486763148605, |
|
"grad_norm": 1.1101641654968262, |
|
"learning_rate": 0.00019362348706397373, |
|
"loss": 1.8438, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.018919872926226614, |
|
"grad_norm": 0.8784818649291992, |
|
"learning_rate": 0.00019339628342811632, |
|
"loss": 0.844, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.019202259089304625, |
|
"grad_norm": 1.256935477256775, |
|
"learning_rate": 0.0001931652406368554, |
|
"loss": 0.9449, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.019484645252382634, |
|
"grad_norm": 1.1505467891693115, |
|
"learning_rate": 0.0001929303681874552, |
|
"loss": 1.9627, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.019767031415460642, |
|
"grad_norm": 1.6793428659439087, |
|
"learning_rate": 0.0001926916757346022, |
|
"loss": 2.6109, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019767031415460642, |
|
"eval_loss": 1.6243711709976196, |
|
"eval_runtime": 134.6115, |
|
"eval_samples_per_second": 5.542, |
|
"eval_steps_per_second": 5.542, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02004941757853865, |
|
"grad_norm": 2.2794175148010254, |
|
"learning_rate": 0.00019244917309000817, |
|
"loss": 1.624, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.02033180374161666, |
|
"grad_norm": 1.5373339653015137, |
|
"learning_rate": 0.00019220287022200707, |
|
"loss": 1.8068, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02061418990469467, |
|
"grad_norm": 0.9991006255149841, |
|
"learning_rate": 0.0001919527772551451, |
|
"loss": 1.6704, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02089657606777268, |
|
"grad_norm": 1.392340064048767, |
|
"learning_rate": 0.00019169890446976454, |
|
"loss": 2.378, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.02117896223085069, |
|
"grad_norm": 1.1224641799926758, |
|
"learning_rate": 0.00019144126230158127, |
|
"loss": 1.6227, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.021461348393928697, |
|
"grad_norm": 1.2003456354141235, |
|
"learning_rate": 0.0001911798613412557, |
|
"loss": 2.4362, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.021743734557006705, |
|
"grad_norm": 1.102264642715454, |
|
"learning_rate": 0.0001909147123339575, |
|
"loss": 2.3662, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.022026120720084717, |
|
"grad_norm": 0.7298341989517212, |
|
"learning_rate": 0.0001906458261789238, |
|
"loss": 2.8474, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.022308506883162726, |
|
"grad_norm": 1.0796235799789429, |
|
"learning_rate": 0.00019037321392901136, |
|
"loss": 1.5521, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.022590893046240734, |
|
"grad_norm": 0.7039556503295898, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 2.47, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022590893046240734, |
|
"eval_loss": 1.617271900177002, |
|
"eval_runtime": 134.9383, |
|
"eval_samples_per_second": 5.528, |
|
"eval_steps_per_second": 5.528, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022873279209318743, |
|
"grad_norm": 0.908437192440033, |
|
"learning_rate": 0.0001898168561213419, |
|
"loss": 1.9605, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.02315566537239675, |
|
"grad_norm": 0.8091934323310852, |
|
"learning_rate": 0.0001895331334332753, |
|
"loss": 1.5024, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.023438051535474763, |
|
"grad_norm": 1.5132545232772827, |
|
"learning_rate": 0.0001892457303887706, |
|
"loss": 1.0696, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.02372043769855277, |
|
"grad_norm": 1.2116718292236328, |
|
"learning_rate": 0.0001889546588018412, |
|
"loss": 1.7115, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02400282386163078, |
|
"grad_norm": 0.8074257969856262, |
|
"learning_rate": 0.00018865993063730004, |
|
"loss": 2.7291, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02428521002470879, |
|
"grad_norm": 1.1341824531555176, |
|
"learning_rate": 0.00018836155801026753, |
|
"loss": 1.8977, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.024567596187786797, |
|
"grad_norm": 1.9230746030807495, |
|
"learning_rate": 0.0001880595531856738, |
|
"loss": 1.5919, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.02484998235086481, |
|
"grad_norm": 0.9849350452423096, |
|
"learning_rate": 0.00018775392857775432, |
|
"loss": 2.8404, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.025132368513942818, |
|
"grad_norm": 2.4642324447631836, |
|
"learning_rate": 0.00018744469674953956, |
|
"loss": 1.2864, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.025414754677020826, |
|
"grad_norm": 1.7967941761016846, |
|
"learning_rate": 0.00018713187041233896, |
|
"loss": 1.8635, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.025414754677020826, |
|
"eval_loss": 1.6206018924713135, |
|
"eval_runtime": 134.6331, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 5.541, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.025697140840098835, |
|
"grad_norm": 1.2300053834915161, |
|
"learning_rate": 0.00018681546242521786, |
|
"loss": 2.5979, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.025979527003176843, |
|
"grad_norm": 1.4374971389770508, |
|
"learning_rate": 0.00018649548579446936, |
|
"loss": 1.9891, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.026261913166254855, |
|
"grad_norm": 1.0630384683609009, |
|
"learning_rate": 0.0001861719536730795, |
|
"loss": 2.2702, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.026544299329332863, |
|
"grad_norm": 1.124637246131897, |
|
"learning_rate": 0.00018584487936018661, |
|
"loss": 2.8282, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.026826685492410872, |
|
"grad_norm": 1.3020342588424683, |
|
"learning_rate": 0.00018551427630053463, |
|
"loss": 1.946, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02710907165548888, |
|
"grad_norm": 1.0363761186599731, |
|
"learning_rate": 0.00018518015808392045, |
|
"loss": 1.4936, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02739145781856689, |
|
"grad_norm": 1.612716794013977, |
|
"learning_rate": 0.00018484253844463526, |
|
"loss": 1.2949, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0276738439816449, |
|
"grad_norm": 0.5706181526184082, |
|
"learning_rate": 0.00018450143126090015, |
|
"loss": 1.3093, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.02795623014472291, |
|
"grad_norm": 1.2899292707443237, |
|
"learning_rate": 0.00018415685055429533, |
|
"loss": 2.5082, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.028238616307800918, |
|
"grad_norm": 1.1273449659347534, |
|
"learning_rate": 0.00018380881048918405, |
|
"loss": 1.8346, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028238616307800918, |
|
"eval_loss": 1.6035598516464233, |
|
"eval_runtime": 134.1623, |
|
"eval_samples_per_second": 5.56, |
|
"eval_steps_per_second": 5.56, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028521002470878926, |
|
"grad_norm": 1.1187326908111572, |
|
"learning_rate": 0.00018345732537213027, |
|
"loss": 2.3691, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.028803388633956935, |
|
"grad_norm": 1.2883691787719727, |
|
"learning_rate": 0.00018310240965131041, |
|
"loss": 2.9992, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.029085774797034947, |
|
"grad_norm": 1.5844606161117554, |
|
"learning_rate": 0.00018274407791591966, |
|
"loss": 1.4277, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.029368160960112955, |
|
"grad_norm": 1.5807751417160034, |
|
"learning_rate": 0.00018238234489557215, |
|
"loss": 0.6992, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.029650547123190964, |
|
"grad_norm": 1.4340300559997559, |
|
"learning_rate": 0.0001820172254596956, |
|
"loss": 2.2425, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.029932933286268972, |
|
"grad_norm": 1.3567688465118408, |
|
"learning_rate": 0.00018164873461691986, |
|
"loss": 0.7425, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.03021531944934698, |
|
"grad_norm": 1.126590609550476, |
|
"learning_rate": 0.00018127688751446027, |
|
"loss": 1.8765, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.030497705612424993, |
|
"grad_norm": 0.9429426193237305, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 2.088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.030780091775503, |
|
"grad_norm": 1.0398406982421875, |
|
"learning_rate": 0.0001805231858085356, |
|
"loss": 1.8745, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03106247793858101, |
|
"grad_norm": 0.563444197177887, |
|
"learning_rate": 0.00018014136218679567, |
|
"loss": 2.4523, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03106247793858101, |
|
"eval_loss": 1.6250616312026978, |
|
"eval_runtime": 134.2663, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 5.556, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03134486410165902, |
|
"grad_norm": 0.664234459400177, |
|
"learning_rate": 0.00017975624426754848, |
|
"loss": 1.4157, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.03162725026473703, |
|
"grad_norm": 1.8084183931350708, |
|
"learning_rate": 0.00017936784788148328, |
|
"loss": 1.9874, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03190963642781504, |
|
"grad_norm": 1.104672908782959, |
|
"learning_rate": 0.00017897618899405423, |
|
"loss": 2.7257, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.032192022590893044, |
|
"grad_norm": 0.7887753844261169, |
|
"learning_rate": 0.00017858128370482426, |
|
"loss": 2.7542, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.032474408753971055, |
|
"grad_norm": 1.3737729787826538, |
|
"learning_rate": 0.000178183148246803, |
|
"loss": 2.0767, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03275679491704907, |
|
"grad_norm": 0.7426419258117676, |
|
"learning_rate": 0.00017778179898577973, |
|
"loss": 1.4909, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03303918108012707, |
|
"grad_norm": 2.0367558002471924, |
|
"learning_rate": 0.00017737725241965069, |
|
"loss": 1.6603, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.033321567243205084, |
|
"grad_norm": 0.9449915289878845, |
|
"learning_rate": 0.00017696952517774062, |
|
"loss": 2.323, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.03360395340628309, |
|
"grad_norm": 2.1352381706237793, |
|
"learning_rate": 0.00017655863402011947, |
|
"loss": 1.9585, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0338863395693611, |
|
"grad_norm": 1.0839155912399292, |
|
"learning_rate": 0.00017614459583691346, |
|
"loss": 1.5273, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0338863395693611, |
|
"eval_loss": 1.6136579513549805, |
|
"eval_runtime": 134.3244, |
|
"eval_samples_per_second": 5.554, |
|
"eval_steps_per_second": 5.554, |
|
"step": 120 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.174564302225408e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|