{ "best_metric": 0.01243716301208047, "best_model_checkpoint": "./checkpoints/easyrec-small", "epoch": 1.733531451213472, "eval_steps": 1000, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06191183754333829, "grad_norm": 24.75, "learning_rate": 4.896813604094436e-05, "loss": 3.2576, "step": 500 }, { "epoch": 0.12382367508667658, "grad_norm": 21.75, "learning_rate": 4.793627208188873e-05, "loss": 2.7834, "step": 1000 }, { "epoch": 0.18573551263001487, "grad_norm": 21.25, "learning_rate": 4.6904408122833086e-05, "loss": 2.6955, "step": 1500 }, { "epoch": 0.24764735017335315, "grad_norm": 24.375, "learning_rate": 4.587254416377745e-05, "loss": 2.5928, "step": 2000 }, { "epoch": 0.30955918771669144, "grad_norm": 27.875, "learning_rate": 4.484068020472182e-05, "loss": 2.5848, "step": 2500 }, { "epoch": 0.37147102526002973, "grad_norm": 21.625, "learning_rate": 4.3808816245666175e-05, "loss": 2.5582, "step": 3000 }, { "epoch": 0.433382862803368, "grad_norm": 27.125, "learning_rate": 4.2776952286610534e-05, "loss": 2.5386, "step": 3500 }, { "epoch": 0.4952947003467063, "grad_norm": 29.625, "learning_rate": 4.17450883275549e-05, "loss": 2.5143, "step": 4000 }, { "epoch": 0.5572065378900446, "grad_norm": 20.0, "learning_rate": 4.071322436849926e-05, "loss": 2.5071, "step": 4500 }, { "epoch": 0.6191183754333829, "grad_norm": 26.25, "learning_rate": 3.968136040944362e-05, "loss": 2.4754, "step": 5000 }, { "epoch": 0.6810302129767212, "grad_norm": 23.0, "learning_rate": 3.864949645038798e-05, "loss": 2.4846, "step": 5500 }, { "epoch": 0.7429420505200595, "grad_norm": 24.5, "learning_rate": 3.761763249133234e-05, "loss": 2.4806, "step": 6000 }, { "epoch": 0.8048538880633977, "grad_norm": 24.125, "learning_rate": 3.658576853227671e-05, "loss": 2.4532, "step": 6500 }, { "epoch": 0.866765725606736, "grad_norm": 24.25, "learning_rate": 3.555390457322107e-05, "loss": 2.4561, "step": 7000 }, { "epoch": 0.9286775631500743, "grad_norm": 30.75, "learning_rate": 3.452204061416543e-05, "loss": 2.4607, "step": 7500 }, { "epoch": 0.9905894006934126, "grad_norm": 26.375, "learning_rate": 3.349017665510979e-05, "loss": 2.4332, "step": 8000 }, { "epoch": 1.052501238236751, "grad_norm": 29.375, "learning_rate": 3.2458312696054156e-05, "loss": 2.4328, "step": 8500 }, { "epoch": 1.1144130757800892, "grad_norm": 25.0, "learning_rate": 3.1426448736998515e-05, "loss": 2.4362, "step": 9000 }, { "epoch": 1.1763249133234275, "grad_norm": 24.25, "learning_rate": 3.0394584777942874e-05, "loss": 2.4432, "step": 9500 }, { "epoch": 1.2382367508667658, "grad_norm": 34.75, "learning_rate": 2.936272081888724e-05, "loss": 2.427, "step": 10000 }, { "epoch": 1.300148588410104, "grad_norm": 24.5, "learning_rate": 2.83308568598316e-05, "loss": 2.4219, "step": 10500 }, { "epoch": 1.3620604259534423, "grad_norm": 35.0, "learning_rate": 2.729899290077596e-05, "loss": 2.4213, "step": 11000 }, { "epoch": 1.4239722634967806, "grad_norm": 28.75, "learning_rate": 2.6267128941720326e-05, "loss": 2.424, "step": 11500 }, { "epoch": 1.485884101040119, "grad_norm": 26.0, "learning_rate": 2.5235264982664684e-05, "loss": 2.4192, "step": 12000 }, { "epoch": 1.5477959385834572, "grad_norm": 22.75, "learning_rate": 2.420340102360905e-05, "loss": 2.4032, "step": 12500 }, { "epoch": 1.6097077761267955, "grad_norm": 35.0, "learning_rate": 2.3171537064553412e-05, "loss": 2.4317, "step": 13000 }, { "epoch": 1.6716196136701336, "grad_norm": 20.875, "learning_rate": 2.213967310549777e-05, "loss": 2.432, "step": 13500 }, { "epoch": 1.733531451213472, "grad_norm": 25.625, "learning_rate": 2.1107809146442133e-05, "loss": 2.4309, "step": 14000 } ], "logging_steps": 500, "max_steps": 24228, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }