{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5746, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01740341106856944, "grad_norm": 0.046589791774749756, "learning_rate": 4.351610095735422e-07, "loss": 2.7999, "step": 100 }, { "epoch": 0.03480682213713888, "grad_norm": 0.04616040736436844, "learning_rate": 8.703220191470844e-07, "loss": 2.7996, "step": 200 }, { "epoch": 0.05221023320570832, "grad_norm": 0.05549981817603111, "learning_rate": 1.305483028720627e-06, "loss": 2.797, "step": 300 }, { "epoch": 0.06961364427427776, "grad_norm": 0.063571035861969, "learning_rate": 1.7406440382941688e-06, "loss": 2.7909, "step": 400 }, { "epoch": 0.0870170553428472, "grad_norm": 0.08422163128852844, "learning_rate": 2.1758050478677113e-06, "loss": 2.7951, "step": 500 }, { "epoch": 0.10442046641141664, "grad_norm": 0.09257014095783234, "learning_rate": 2.610966057441254e-06, "loss": 2.7803, "step": 600 }, { "epoch": 0.12182387747998608, "grad_norm": 0.11055697500705719, "learning_rate": 3.046127067014796e-06, "loss": 2.7681, "step": 700 }, { "epoch": 0.1392272885485555, "grad_norm": 0.10759040713310242, "learning_rate": 3.4812880765883376e-06, "loss": 2.7611, "step": 800 }, { "epoch": 0.15663069961712495, "grad_norm": 0.12318646907806396, "learning_rate": 3.9164490861618806e-06, "loss": 2.7402, "step": 900 }, { "epoch": 0.1740341106856944, "grad_norm": 0.12962989509105682, "learning_rate": 4.351610095735423e-06, "loss": 2.7451, "step": 1000 }, { "epoch": 0.19143752175426385, "grad_norm": 0.13981275260448456, "learning_rate": 4.786771105308965e-06, "loss": 2.735, "step": 1100 }, { "epoch": 0.20884093282283328, "grad_norm": 0.14711035788059235, "learning_rate": 5.221932114882508e-06, "loss": 2.7469, "step": 1200 }, { "epoch": 0.22624434389140272, "grad_norm": 0.15727241337299347, "learning_rate": 5.657093124456049e-06, "loss": 2.7327, "step": 1300 }, { "epoch": 0.24364775495997215, "grad_norm": 0.15055705606937408, "learning_rate": 6.092254134029592e-06, "loss": 2.7234, "step": 1400 }, { "epoch": 0.2610511660285416, "grad_norm": 0.16661331057548523, "learning_rate": 6.527415143603134e-06, "loss": 2.7174, "step": 1500 }, { "epoch": 0.278454577097111, "grad_norm": 0.17976854741573334, "learning_rate": 6.962576153176675e-06, "loss": 2.719, "step": 1600 }, { "epoch": 0.2958579881656805, "grad_norm": 0.1790621429681778, "learning_rate": 7.397737162750218e-06, "loss": 2.7173, "step": 1700 }, { "epoch": 0.3132613992342499, "grad_norm": 0.19079644978046417, "learning_rate": 7.832898172323761e-06, "loss": 2.7131, "step": 1800 }, { "epoch": 0.33066481030281936, "grad_norm": 0.19005636870861053, "learning_rate": 8.268059181897302e-06, "loss": 2.7168, "step": 1900 }, { "epoch": 0.3480682213713888, "grad_norm": 0.19910404086112976, "learning_rate": 8.703220191470845e-06, "loss": 2.7061, "step": 2000 }, { "epoch": 0.3654716324399582, "grad_norm": 0.20510949194431305, "learning_rate": 9.138381201044387e-06, "loss": 2.6862, "step": 2100 }, { "epoch": 0.3828750435085277, "grad_norm": 0.20418143272399902, "learning_rate": 9.57354221061793e-06, "loss": 2.6802, "step": 2200 }, { "epoch": 0.4002784545770971, "grad_norm": 0.21713656187057495, "learning_rate": 1.000870322019147e-05, "loss": 2.6923, "step": 2300 }, { "epoch": 0.41768186564566656, "grad_norm": 0.2298802137374878, "learning_rate": 1.0443864229765015e-05, "loss": 2.6818, "step": 2400 }, { "epoch": 0.43508527671423597, "grad_norm": 0.2294008880853653, "learning_rate": 1.0879025239338557e-05, "loss": 2.6896, "step": 2500 }, { "epoch": 0.45248868778280543, "grad_norm": 0.21464629471302032, "learning_rate": 1.1314186248912098e-05, "loss": 2.6805, "step": 2600 }, { "epoch": 0.4698920988513749, "grad_norm": 0.25449061393737793, "learning_rate": 1.174934725848564e-05, "loss": 2.6806, "step": 2700 }, { "epoch": 0.4872955099199443, "grad_norm": 0.24079586565494537, "learning_rate": 1.2184508268059184e-05, "loss": 2.6844, "step": 2800 }, { "epoch": 0.5046989209885138, "grad_norm": 0.2414436638355255, "learning_rate": 1.2619669277632725e-05, "loss": 2.6817, "step": 2900 }, { "epoch": 0.5221023320570832, "grad_norm": 0.2530564069747925, "learning_rate": 1.3054830287206268e-05, "loss": 2.6556, "step": 3000 }, { "epoch": 0.5395057431256526, "grad_norm": 0.26441535353660583, "learning_rate": 1.348999129677981e-05, "loss": 2.6749, "step": 3100 }, { "epoch": 0.556909154194222, "grad_norm": 0.2584131062030792, "learning_rate": 1.392515230635335e-05, "loss": 2.6575, "step": 3200 }, { "epoch": 0.5743125652627915, "grad_norm": 0.25025609135627747, "learning_rate": 1.4360313315926895e-05, "loss": 2.6658, "step": 3300 }, { "epoch": 0.591715976331361, "grad_norm": 0.26518625020980835, "learning_rate": 1.4795474325500436e-05, "loss": 2.6586, "step": 3400 }, { "epoch": 0.6091193873999304, "grad_norm": 0.26597312092781067, "learning_rate": 1.5230635335073978e-05, "loss": 2.6451, "step": 3500 }, { "epoch": 0.6265227984684998, "grad_norm": 0.2725384831428528, "learning_rate": 1.5665796344647522e-05, "loss": 2.6521, "step": 3600 }, { "epoch": 0.6439262095370692, "grad_norm": 0.2752222716808319, "learning_rate": 1.6100957354221064e-05, "loss": 2.6398, "step": 3700 }, { "epoch": 0.6613296206056387, "grad_norm": 0.2558598518371582, "learning_rate": 1.6536118363794605e-05, "loss": 2.6486, "step": 3800 }, { "epoch": 0.6787330316742082, "grad_norm": 0.26938167214393616, "learning_rate": 1.697127937336815e-05, "loss": 2.641, "step": 3900 }, { "epoch": 0.6961364427427776, "grad_norm": 0.28793784976005554, "learning_rate": 1.740644038294169e-05, "loss": 2.6344, "step": 4000 }, { "epoch": 0.713539853811347, "grad_norm": 0.2677360773086548, "learning_rate": 1.7841601392515232e-05, "loss": 2.6542, "step": 4100 }, { "epoch": 0.7309432648799165, "grad_norm": 0.28143930435180664, "learning_rate": 1.8276762402088773e-05, "loss": 2.6446, "step": 4200 }, { "epoch": 0.7483466759484859, "grad_norm": 0.28870299458503723, "learning_rate": 1.8711923411662314e-05, "loss": 2.6243, "step": 4300 }, { "epoch": 0.7657500870170554, "grad_norm": 0.296633780002594, "learning_rate": 1.914708442123586e-05, "loss": 2.6306, "step": 4400 }, { "epoch": 0.7831534980856247, "grad_norm": 0.2806219160556793, "learning_rate": 1.95822454308094e-05, "loss": 2.6356, "step": 4500 }, { "epoch": 0.8005569091541942, "grad_norm": 0.2914940416812897, "learning_rate": 1.999940297883134e-05, "loss": 2.644, "step": 4600 }, { "epoch": 0.8179603202227637, "grad_norm": 0.28510311245918274, "learning_rate": 1.9599117132813187e-05, "loss": 2.6357, "step": 4700 }, { "epoch": 0.8353637312913331, "grad_norm": 0.3171123266220093, "learning_rate": 1.8486908682093175e-05, "loss": 2.6307, "step": 4800 }, { "epoch": 0.8527671423599026, "grad_norm": 0.2955775558948517, "learning_rate": 1.674526503944611e-05, "loss": 2.6315, "step": 4900 }, { "epoch": 0.8701705534284719, "grad_norm": 0.2767013907432556, "learning_rate": 1.450335594635761e-05, "loss": 2.6138, "step": 5000 }, { "epoch": 0.8875739644970414, "grad_norm": 0.27960339188575745, "learning_rate": 1.1927453544210397e-05, "loss": 2.6305, "step": 5100 }, { "epoch": 0.9049773755656109, "grad_norm": 0.31521016359329224, "learning_rate": 9.20860073020234e-06, "loss": 2.6249, "step": 5200 }, { "epoch": 0.9223807866341803, "grad_norm": 0.2640378773212433, "learning_rate": 6.548442379624425e-06, "loss": 2.6257, "step": 5300 }, { "epoch": 0.9397841977027498, "grad_norm": 0.28068870306015015, "learning_rate": 4.144270267924306e-06, "loss": 2.6261, "step": 5400 }, { "epoch": 0.9571876087713191, "grad_norm": 0.2999429702758789, "learning_rate": 2.1743908422712135e-06, "loss": 2.6245, "step": 5500 }, { "epoch": 0.9745910198398886, "grad_norm": 0.2793658971786499, "learning_rate": 7.849010480670938e-07, "loss": 2.6209, "step": 5600 }, { "epoch": 0.9919944309084581, "grad_norm": 0.30049070715904236, "learning_rate": 7.885298685522235e-08, "loss": 2.6215, "step": 5700 }, { "epoch": 1.0, "step": 5746, "total_flos": 8.372955480242258e+17, "train_loss": 2.6846868539578486, "train_runtime": 1624.688, "train_samples_per_second": 56.585, "train_steps_per_second": 3.537 } ], "logging_steps": 100, "max_steps": 5746, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.372955480242258e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }