{ "best_metric": 2.9403679370880127, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.0005820552370419952, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9401841234733175e-05, "eval_loss": 3.109846830368042, "eval_runtime": 2024.7504, "eval_samples_per_second": 10.718, "eval_steps_per_second": 5.359, "step": 1 }, { "epoch": 5.820552370419953e-05, "grad_norm": 0.19847838580608368, "learning_rate": 6e-05, "loss": 2.9343, "step": 3 }, { "epoch": 9.700920617366589e-05, "eval_loss": 3.0984432697296143, "eval_runtime": 2027.1501, "eval_samples_per_second": 10.706, "eval_steps_per_second": 5.353, "step": 5 }, { "epoch": 0.00011641104740839906, "grad_norm": 0.25328367948532104, "learning_rate": 9.96057350657239e-05, "loss": 2.9184, "step": 6 }, { "epoch": 0.0001746165711125986, "grad_norm": 0.23369625210762024, "learning_rate": 9.381533400219318e-05, "loss": 3.0195, "step": 9 }, { "epoch": 0.00019401841234733177, "eval_loss": 3.0252015590667725, "eval_runtime": 2024.6928, "eval_samples_per_second": 10.719, "eval_steps_per_second": 5.359, "step": 10 }, { "epoch": 0.00023282209481679812, "grad_norm": 0.4297844171524048, "learning_rate": 8.18711994874345e-05, "loss": 2.8714, "step": 12 }, { "epoch": 0.0002910276185209976, "grad_norm": 0.3757598400115967, "learning_rate": 6.545084971874738e-05, "loss": 2.8659, "step": 15 }, { "epoch": 0.0002910276185209976, "eval_loss": 2.9789626598358154, "eval_runtime": 2026.1573, "eval_samples_per_second": 10.711, "eval_steps_per_second": 5.355, "step": 15 }, { "epoch": 0.0003492331422251972, "grad_norm": 0.35646551847457886, "learning_rate": 4.6860474023534335e-05, "loss": 2.8937, "step": 18 }, { "epoch": 0.00038803682469466355, "eval_loss": 2.954017400741577, "eval_runtime": 2027.9112, "eval_samples_per_second": 10.702, "eval_steps_per_second": 5.351, "step": 20 }, { "epoch": 0.0004074386659293967, "grad_norm": 0.40540552139282227, "learning_rate": 2.8711035421746367e-05, "loss": 3.0663, "step": 21 }, { "epoch": 0.00046564418963359624, "grad_norm": 0.3658403158187866, "learning_rate": 1.3551568628929434e-05, "loss": 2.9409, "step": 24 }, { "epoch": 0.0004850460308683294, "eval_loss": 2.942352056503296, "eval_runtime": 2026.1692, "eval_samples_per_second": 10.711, "eval_steps_per_second": 5.355, "step": 25 }, { "epoch": 0.0005238497133377958, "grad_norm": 0.6082600951194763, "learning_rate": 3.511175705587433e-06, "loss": 3.0562, "step": 27 }, { "epoch": 0.0005820552370419952, "grad_norm": 0.45390358567237854, "learning_rate": 0.0, "loss": 2.8248, "step": 30 }, { "epoch": 0.0005820552370419952, "eval_loss": 2.9403679370880127, "eval_runtime": 2027.2099, "eval_samples_per_second": 10.705, "eval_steps_per_second": 5.353, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3915159982571520.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }