{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9942857142857143, "eval_steps": 500, "global_step": 87, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011428571428571429, "grad_norm": 1.4554669857025146, "learning_rate": 2.2222222222222223e-05, "loss": 1.9343, "step": 1 }, { "epoch": 0.05714285714285714, "grad_norm": 1.3781572580337524, "learning_rate": 0.00011111111111111112, "loss": 1.8872, "step": 5 }, { "epoch": 0.11428571428571428, "grad_norm": 1.2489840984344482, "learning_rate": 0.00019991889981715698, "loss": 1.8455, "step": 10 }, { "epoch": 0.17142857142857143, "grad_norm": 0.9914447665214539, "learning_rate": 0.0001970941817426052, "loss": 1.7457, "step": 15 }, { "epoch": 0.22857142857142856, "grad_norm": 0.7855111360549927, "learning_rate": 0.00019034504346103823, "loss": 1.6664, "step": 20 }, { "epoch": 0.2857142857142857, "grad_norm": 0.8774387240409851, "learning_rate": 0.00017994427634035015, "loss": 1.5641, "step": 25 }, { "epoch": 0.34285714285714286, "grad_norm": 0.5209712982177734, "learning_rate": 0.00016631226582407952, "loss": 1.512, "step": 30 }, { "epoch": 0.4, "grad_norm": 0.4458630383014679, "learning_rate": 0.00015000000000000001, "loss": 1.4799, "step": 35 }, { "epoch": 0.45714285714285713, "grad_norm": 0.42980432510375977, "learning_rate": 0.00013166679938014726, "loss": 1.4725, "step": 40 }, { "epoch": 0.5142857142857142, "grad_norm": 0.4260351359844208, "learning_rate": 0.0001120536680255323, "loss": 1.4651, "step": 45 }, { "epoch": 0.5714285714285714, "grad_norm": 0.3732384741306305, "learning_rate": 9.195334312832742e-05, "loss": 1.4722, "step": 50 }, { "epoch": 0.6285714285714286, "grad_norm": 0.3934707045555115, "learning_rate": 7.217825360835473e-05, "loss": 1.471, "step": 55 }, { "epoch": 0.6857142857142857, "grad_norm": 0.4875844717025757, "learning_rate": 5.3527682795623146e-05, "loss": 1.4555, "step": 60 }, { "epoch": 0.7428571428571429, "grad_norm": 0.36814966797828674, "learning_rate": 3.675546244046228e-05, "loss": 1.4407, "step": 65 }, { "epoch": 0.8, "grad_norm": 0.4143487513065338, "learning_rate": 2.2539503817234553e-05, "loss": 1.4469, "step": 70 }, { "epoch": 0.8571428571428571, "grad_norm": 0.3434593081474304, "learning_rate": 1.1454397434679021e-05, "loss": 1.4406, "step": 75 }, { "epoch": 0.9142857142857143, "grad_norm": 0.39424604177474976, "learning_rate": 3.948188836862776e-06, "loss": 1.442, "step": 80 }, { "epoch": 0.9714285714285714, "grad_norm": 0.4415414333343506, "learning_rate": 3.2426918657900704e-07, "loss": 1.4574, "step": 85 }, { "epoch": 0.9942857142857143, "eval_loss": 1.4540868997573853, "eval_runtime": 1.2938, "eval_samples_per_second": 150.714, "eval_steps_per_second": 1.546, "step": 87 }, { "epoch": 0.9942857142857143, "step": 87, "total_flos": 4.260877023880151e+17, "train_loss": 1.5436431375043145, "train_runtime": 332.2798, "train_samples_per_second": 58.947, "train_steps_per_second": 0.262 } ], "logging_steps": 5, "max_steps": 87, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.260877023880151e+17, "train_batch_size": 14, "trial_name": null, "trial_params": null }