{ "best_metric": 0.6744453310966492, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.016511186328737718, "eval_steps": 100, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001651118632873772, "grad_norm": 0.6025562286376953, "learning_rate": 2e-05, "loss": 0.8633, "step": 1 }, { "epoch": 0.0001651118632873772, "eval_loss": 0.953804612159729, "eval_runtime": 831.0169, "eval_samples_per_second": 7.213, "eval_steps_per_second": 1.804, "step": 1 }, { "epoch": 0.0003302237265747544, "grad_norm": 0.6425795555114746, "learning_rate": 4e-05, "loss": 0.9543, "step": 2 }, { "epoch": 0.0004953355898621316, "grad_norm": 0.5645630955696106, "learning_rate": 6e-05, "loss": 0.8123, "step": 3 }, { "epoch": 0.0006604474531495088, "grad_norm": 0.5715195536613464, "learning_rate": 8e-05, "loss": 0.9324, "step": 4 }, { "epoch": 0.000825559316436886, "grad_norm": 0.5616925954818726, "learning_rate": 0.0001, "loss": 0.9792, "step": 5 }, { "epoch": 0.0009906711797242633, "grad_norm": 0.4393025040626526, "learning_rate": 0.00012, "loss": 0.8852, "step": 6 }, { "epoch": 0.0011557830430116403, "grad_norm": 0.4828271269798279, "learning_rate": 0.00014, "loss": 0.8654, "step": 7 }, { "epoch": 0.0013208949062990176, "grad_norm": 0.633884608745575, "learning_rate": 0.00016, "loss": 0.9406, "step": 8 }, { "epoch": 0.0014860067695863948, "grad_norm": 0.626456618309021, "learning_rate": 0.00018, "loss": 0.8317, "step": 9 }, { "epoch": 0.001651118632873772, "grad_norm": 0.4569125473499298, "learning_rate": 0.0002, "loss": 0.7754, "step": 10 }, { "epoch": 0.0018162304961611491, "grad_norm": 0.4024462103843689, "learning_rate": 0.00019999535665248002, "loss": 0.8435, "step": 11 }, { "epoch": 0.0019813423594485266, "grad_norm": 0.3451145887374878, "learning_rate": 0.0001999814270411335, "loss": 0.7137, "step": 12 }, { "epoch": 0.0021464542227359034, "grad_norm": 0.33148011565208435, "learning_rate": 0.000199958212459561, "loss": 0.7715, "step": 13 }, { "epoch": 0.0023115660860232807, "grad_norm": 0.34364211559295654, "learning_rate": 0.00019992571506363, "loss": 0.8241, "step": 14 }, { "epoch": 0.002476677949310658, "grad_norm": 0.32813310623168945, "learning_rate": 0.00019988393787127441, "loss": 0.7905, "step": 15 }, { "epoch": 0.002641789812598035, "grad_norm": 0.35355880856513977, "learning_rate": 0.0001998328847622148, "loss": 0.7944, "step": 16 }, { "epoch": 0.0028069016758854124, "grad_norm": 0.32826319336891174, "learning_rate": 0.00019977256047759765, "loss": 0.7636, "step": 17 }, { "epoch": 0.0029720135391727897, "grad_norm": 0.3257204294204712, "learning_rate": 0.00019970297061955533, "loss": 0.7244, "step": 18 }, { "epoch": 0.003137125402460167, "grad_norm": 0.3375376760959625, "learning_rate": 0.00019962412165068573, "loss": 0.714, "step": 19 }, { "epoch": 0.003302237265747544, "grad_norm": 0.3384173810482025, "learning_rate": 0.00019953602089345217, "loss": 0.7644, "step": 20 }, { "epoch": 0.003467349129034921, "grad_norm": 0.2862045466899872, "learning_rate": 0.0001994386765295032, "loss": 0.7, "step": 21 }, { "epoch": 0.0036324609923222982, "grad_norm": 0.3143760859966278, "learning_rate": 0.00019933209759891317, "loss": 0.7111, "step": 22 }, { "epoch": 0.0037975728556096755, "grad_norm": 0.2990686297416687, "learning_rate": 0.00019921629399934223, "loss": 0.6409, "step": 23 }, { "epoch": 0.003962684718897053, "grad_norm": 0.3404158055782318, "learning_rate": 0.00019909127648511755, "loss": 0.7219, "step": 24 }, { "epoch": 0.0041277965821844296, "grad_norm": 0.3198144733905792, "learning_rate": 0.0001989570566662345, "loss": 0.6811, "step": 25 }, { "epoch": 0.004292908445471807, "grad_norm": 0.3477422297000885, "learning_rate": 0.00019881364700727823, "loss": 0.7442, "step": 26 }, { "epoch": 0.004458020308759184, "grad_norm": 0.3631756007671356, "learning_rate": 0.0001986610608262665, "loss": 0.8221, "step": 27 }, { "epoch": 0.004623132172046561, "grad_norm": 0.32080984115600586, "learning_rate": 0.00019849931229341258, "loss": 0.6626, "step": 28 }, { "epoch": 0.0047882440353339386, "grad_norm": 0.3251453936100006, "learning_rate": 0.00019832841642980945, "loss": 0.7156, "step": 29 }, { "epoch": 0.004953355898621316, "grad_norm": 0.3476259112358093, "learning_rate": 0.00019814838910603481, "loss": 0.7087, "step": 30 }, { "epoch": 0.005118467761908693, "grad_norm": 0.2831595242023468, "learning_rate": 0.00019795924704067721, "loss": 0.5912, "step": 31 }, { "epoch": 0.00528357962519607, "grad_norm": 0.3411269783973694, "learning_rate": 0.00019776100779878345, "loss": 0.7125, "step": 32 }, { "epoch": 0.0054486914884834476, "grad_norm": 0.35447773337364197, "learning_rate": 0.00019755368979022732, "loss": 0.7567, "step": 33 }, { "epoch": 0.005613803351770825, "grad_norm": 0.32824215292930603, "learning_rate": 0.00019733731226800015, "loss": 0.7238, "step": 34 }, { "epoch": 0.005778915215058202, "grad_norm": 0.3237905204296112, "learning_rate": 0.00019711189532642243, "loss": 0.7055, "step": 35 }, { "epoch": 0.005944027078345579, "grad_norm": 0.3332991898059845, "learning_rate": 0.00019687745989927823, "loss": 0.7217, "step": 36 }, { "epoch": 0.006109138941632957, "grad_norm": 0.3003871738910675, "learning_rate": 0.00019663402775787066, "loss": 0.7078, "step": 37 }, { "epoch": 0.006274250804920334, "grad_norm": 0.31166723370552063, "learning_rate": 0.00019638162150900027, "loss": 0.674, "step": 38 }, { "epoch": 0.006439362668207711, "grad_norm": 0.31454190611839294, "learning_rate": 0.00019612026459286578, "loss": 0.631, "step": 39 }, { "epoch": 0.006604474531495088, "grad_norm": 0.2961897552013397, "learning_rate": 0.00019584998128088684, "loss": 0.6135, "step": 40 }, { "epoch": 0.006769586394782465, "grad_norm": 0.31604376435279846, "learning_rate": 0.0001955707966734505, "loss": 0.6517, "step": 41 }, { "epoch": 0.006934698258069842, "grad_norm": 0.3436253070831299, "learning_rate": 0.00019528273669757972, "loss": 0.7957, "step": 42 }, { "epoch": 0.007099810121357219, "grad_norm": 0.3258967399597168, "learning_rate": 0.0001949858281045261, "loss": 0.7068, "step": 43 }, { "epoch": 0.0072649219846445965, "grad_norm": 0.3063102066516876, "learning_rate": 0.00019468009846728513, "loss": 0.6734, "step": 44 }, { "epoch": 0.007430033847931974, "grad_norm": 0.32180124521255493, "learning_rate": 0.00019436557617803595, "loss": 0.7536, "step": 45 }, { "epoch": 0.007595145711219351, "grad_norm": 0.31712058186531067, "learning_rate": 0.00019404229044550433, "loss": 0.7201, "step": 46 }, { "epoch": 0.007760257574506728, "grad_norm": 0.2951134145259857, "learning_rate": 0.00019371027129225042, "loss": 0.62, "step": 47 }, { "epoch": 0.007925369437794106, "grad_norm": 0.30386847257614136, "learning_rate": 0.0001933695495518804, "loss": 0.6752, "step": 48 }, { "epoch": 0.008090481301081483, "grad_norm": 0.3203580677509308, "learning_rate": 0.00019302015686618326, "loss": 0.6805, "step": 49 }, { "epoch": 0.008255593164368859, "grad_norm": 0.2829570174217224, "learning_rate": 0.0001926621256821922, "loss": 0.5785, "step": 50 }, { "epoch": 0.008420705027656237, "grad_norm": 0.3065510094165802, "learning_rate": 0.00019229548924917146, "loss": 0.6573, "step": 51 }, { "epoch": 0.008585816890943614, "grad_norm": 0.36454063653945923, "learning_rate": 0.00019192028161552847, "loss": 0.7931, "step": 52 }, { "epoch": 0.008750928754230992, "grad_norm": 0.31599584221839905, "learning_rate": 0.0001915365376256519, "loss": 0.7019, "step": 53 }, { "epoch": 0.008916040617518368, "grad_norm": 0.3125596046447754, "learning_rate": 0.00019114429291667583, "loss": 0.6634, "step": 54 }, { "epoch": 0.009081152480805746, "grad_norm": 0.3362664580345154, "learning_rate": 0.00019074358391517023, "loss": 0.7445, "step": 55 }, { "epoch": 0.009246264344093123, "grad_norm": 0.3391803801059723, "learning_rate": 0.00019033444783375804, "loss": 0.7809, "step": 56 }, { "epoch": 0.0094113762073805, "grad_norm": 0.3022312819957733, "learning_rate": 0.00018991692266765947, "loss": 0.6743, "step": 57 }, { "epoch": 0.009576488070667877, "grad_norm": 0.30301007628440857, "learning_rate": 0.00018949104719116332, "loss": 0.6472, "step": 58 }, { "epoch": 0.009741599933955255, "grad_norm": 0.3142068386077881, "learning_rate": 0.00018905686095402647, "loss": 0.6656, "step": 59 }, { "epoch": 0.009906711797242632, "grad_norm": 0.2799323797225952, "learning_rate": 0.0001886144042778006, "loss": 0.6186, "step": 60 }, { "epoch": 0.01007182366053001, "grad_norm": 0.33195391297340393, "learning_rate": 0.00018816371825208789, "loss": 0.7365, "step": 61 }, { "epoch": 0.010236935523817386, "grad_norm": 0.2870056927204132, "learning_rate": 0.0001877048447307252, "loss": 0.6195, "step": 62 }, { "epoch": 0.010402047387104764, "grad_norm": 0.2942243218421936, "learning_rate": 0.00018723782632789701, "loss": 0.6346, "step": 63 }, { "epoch": 0.01056715925039214, "grad_norm": 0.36610937118530273, "learning_rate": 0.00018676270641417822, "loss": 0.7423, "step": 64 }, { "epoch": 0.010732271113679517, "grad_norm": 0.32092538475990295, "learning_rate": 0.0001862795291125063, "loss": 0.6371, "step": 65 }, { "epoch": 0.010897382976966895, "grad_norm": 0.2960004508495331, "learning_rate": 0.0001857883392940837, "loss": 0.5923, "step": 66 }, { "epoch": 0.011062494840254272, "grad_norm": 0.3343415856361389, "learning_rate": 0.000185289182574211, "loss": 0.6983, "step": 67 }, { "epoch": 0.01122760670354165, "grad_norm": 0.3269459903240204, "learning_rate": 0.0001847821053080505, "loss": 0.7298, "step": 68 }, { "epoch": 0.011392718566829026, "grad_norm": 0.29574498534202576, "learning_rate": 0.00018426715458632153, "loss": 0.6434, "step": 69 }, { "epoch": 0.011557830430116404, "grad_norm": 0.2938220202922821, "learning_rate": 0.00018374437823092724, "loss": 0.6383, "step": 70 }, { "epoch": 0.01172294229340378, "grad_norm": 0.3147146999835968, "learning_rate": 0.00018321382479051347, "loss": 0.6643, "step": 71 }, { "epoch": 0.011888054156691159, "grad_norm": 0.3595152497291565, "learning_rate": 0.00018267554353596025, "loss": 0.7867, "step": 72 }, { "epoch": 0.012053166019978535, "grad_norm": 0.32271626591682434, "learning_rate": 0.0001821295844558062, "loss": 0.6786, "step": 73 }, { "epoch": 0.012218277883265913, "grad_norm": 0.2905098795890808, "learning_rate": 0.0001815759982516061, "loss": 0.5966, "step": 74 }, { "epoch": 0.01238338974655329, "grad_norm": 0.3641018867492676, "learning_rate": 0.00018101483633322255, "loss": 0.7708, "step": 75 }, { "epoch": 0.012548501609840668, "grad_norm": 0.3200140595436096, "learning_rate": 0.00018044615081405153, "loss": 0.6035, "step": 76 }, { "epoch": 0.012713613473128044, "grad_norm": 0.3007895350456238, "learning_rate": 0.00017986999450618295, "loss": 0.6379, "step": 77 }, { "epoch": 0.012878725336415422, "grad_norm": 0.3379741907119751, "learning_rate": 0.00017928642091549613, "loss": 0.7315, "step": 78 }, { "epoch": 0.013043837199702799, "grad_norm": 0.2804384231567383, "learning_rate": 0.00017869548423669077, "loss": 0.5894, "step": 79 }, { "epoch": 0.013208949062990177, "grad_norm": 0.3281707465648651, "learning_rate": 0.00017809723934825405, "loss": 0.6707, "step": 80 }, { "epoch": 0.013374060926277553, "grad_norm": 0.28626206517219543, "learning_rate": 0.00017749174180736442, "loss": 0.5444, "step": 81 }, { "epoch": 0.01353917278956493, "grad_norm": 0.34358954429626465, "learning_rate": 0.00017687904784473188, "loss": 0.7219, "step": 82 }, { "epoch": 0.013704284652852308, "grad_norm": 0.33361655473709106, "learning_rate": 0.00017625921435937637, "loss": 0.663, "step": 83 }, { "epoch": 0.013869396516139684, "grad_norm": 0.38789382576942444, "learning_rate": 0.00017563229891334338, "loss": 0.7912, "step": 84 }, { "epoch": 0.014034508379427062, "grad_norm": 0.32810458540916443, "learning_rate": 0.00017499835972635856, "loss": 0.6905, "step": 85 }, { "epoch": 0.014199620242714438, "grad_norm": 0.3457249701023102, "learning_rate": 0.00017435745567042095, "loss": 0.6821, "step": 86 }, { "epoch": 0.014364732106001817, "grad_norm": 0.3237375020980835, "learning_rate": 0.00017370964626433567, "loss": 0.6797, "step": 87 }, { "epoch": 0.014529843969289193, "grad_norm": 0.3508392572402954, "learning_rate": 0.0001730549916681868, "loss": 0.6382, "step": 88 }, { "epoch": 0.014694955832576571, "grad_norm": 0.3452267348766327, "learning_rate": 0.00017239355267775018, "loss": 0.6762, "step": 89 }, { "epoch": 0.014860067695863947, "grad_norm": 0.318096399307251, "learning_rate": 0.0001717253907188477, "loss": 0.6225, "step": 90 }, { "epoch": 0.015025179559151326, "grad_norm": 0.32376089692115784, "learning_rate": 0.00017105056784164294, "loss": 0.6575, "step": 91 }, { "epoch": 0.015190291422438702, "grad_norm": 0.2974165678024292, "learning_rate": 0.00017036914671487852, "loss": 0.5908, "step": 92 }, { "epoch": 0.01535540328572608, "grad_norm": 0.2842133343219757, "learning_rate": 0.00016968119062005642, "loss": 0.5888, "step": 93 }, { "epoch": 0.015520515149013456, "grad_norm": 0.3510216474533081, "learning_rate": 0.00016898676344556118, "loss": 0.7047, "step": 94 }, { "epoch": 0.015685627012300835, "grad_norm": 0.33503997325897217, "learning_rate": 0.00016828592968072678, "loss": 0.6951, "step": 95 }, { "epoch": 0.015850738875588213, "grad_norm": 0.31613603234291077, "learning_rate": 0.00016757875440984768, "loss": 0.6436, "step": 96 }, { "epoch": 0.016015850738875587, "grad_norm": 0.41182562708854675, "learning_rate": 0.0001668653033061347, "loss": 0.5818, "step": 97 }, { "epoch": 0.016180962602162965, "grad_norm": 0.3742947280406952, "learning_rate": 0.00016614564262561608, "loss": 0.7527, "step": 98 }, { "epoch": 0.016346074465450344, "grad_norm": 0.3371233642101288, "learning_rate": 0.0001654198392009846, "loss": 0.6411, "step": 99 }, { "epoch": 0.016511186328737718, "grad_norm": 0.3669418692588806, "learning_rate": 0.0001646879604353908, "loss": 0.6869, "step": 100 }, { "epoch": 0.016511186328737718, "eval_loss": 0.6744453310966492, "eval_runtime": 833.756, "eval_samples_per_second": 7.189, "eval_steps_per_second": 1.798, "step": 100 } ], "logging_steps": 1, "max_steps": 336, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7337044009641574e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }