{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5360983102918588, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07680491551459294, "grad_norm": 0.15998965501785278, "learning_rate": 0.0002, "loss": 0.8173, "step": 50 }, { "epoch": 0.15360983102918588, "grad_norm": 0.1599128544330597, "learning_rate": 0.00019965952472163515, "loss": 0.5618, "step": 100 }, { "epoch": 0.2304147465437788, "grad_norm": 0.1505613625049591, "learning_rate": 0.0001986404173548442, "loss": 0.5308, "step": 150 }, { "epoch": 0.30721966205837176, "grad_norm": 0.13830986618995667, "learning_rate": 0.0001969496175169149, "loss": 0.5155, "step": 200 }, { "epoch": 0.38402457757296465, "grad_norm": 0.12936702370643616, "learning_rate": 0.00019459863871875693, "loss": 0.5085, "step": 250 }, { "epoch": 0.4608294930875576, "grad_norm": 0.1287979781627655, "learning_rate": 0.00019160348996358483, "loss": 0.4953, "step": 300 }, { "epoch": 0.5376344086021505, "grad_norm": 0.13770239055156708, "learning_rate": 0.0001879845667335219, "loss": 0.4883, "step": 350 }, { "epoch": 0.6144393241167435, "grad_norm": 0.12349069118499756, "learning_rate": 0.00018376651210645086, "loss": 0.4828, "step": 400 }, { "epoch": 0.6912442396313364, "grad_norm": 0.1296069324016571, "learning_rate": 0.0001789780489488379, "loss": 0.4793, "step": 450 }, { "epoch": 0.7680491551459293, "grad_norm": 0.14504997432231903, "learning_rate": 0.0001736517843272136, "loss": 0.4772, "step": 500 }, { "epoch": 0.8448540706605223, "grad_norm": 0.13662400841712952, "learning_rate": 0.00016782398747017174, "loss": 0.4714, "step": 550 }, { "epoch": 0.9216589861751152, "grad_norm": 0.13271653652191162, "learning_rate": 0.0001615343427928555, "loss": 0.4658, "step": 600 }, { "epoch": 0.9984639016897081, "grad_norm": 0.13312594592571259, "learning_rate": 0.00015482567966571136, "loss": 0.464, "step": 650 }, { "epoch": 1.075268817204301, "grad_norm": 0.13460350036621094, "learning_rate": 0.00014774368076765274, "loss": 0.4452, "step": 700 }, { "epoch": 1.1520737327188941, "grad_norm": 0.12317976355552673, "learning_rate": 0.00014033657100960355, "loss": 0.446, "step": 750 }, { "epoch": 1.228878648233487, "grad_norm": 0.12744882702827454, "learning_rate": 0.0001326547891466988, "loss": 0.443, "step": 800 }, { "epoch": 1.30568356374808, "grad_norm": 0.13042095303535461, "learning_rate": 0.00012475064431530065, "loss": 0.4436, "step": 850 }, { "epoch": 1.3824884792626728, "grad_norm": 0.12759792804718018, "learning_rate": 0.00011667795983364332, "loss": 0.4436, "step": 900 }, { "epoch": 1.4592933947772657, "grad_norm": 0.1353318691253662, "learning_rate": 0.00010849170669164763, "loss": 0.4385, "step": 950 }, { "epoch": 1.5360983102918588, "grad_norm": 0.13270655274391174, "learning_rate": 0.00010024762922565933, "loss": 0.4358, "step": 1000 } ], "logging_steps": 50, "max_steps": 1953, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.8344214986752e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }