{ "best_metric": 1.5792856216430664, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.16913319238900634, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0033826638477801266, "grad_norm": 2.0565555095672607, "learning_rate": 5.000000000000001e-07, "loss": 2.2282, "step": 1 }, { "epoch": 0.0033826638477801266, "eval_loss": 2.194361448287964, "eval_runtime": 58.7509, "eval_samples_per_second": 16.953, "eval_steps_per_second": 4.238, "step": 1 }, { "epoch": 0.006765327695560253, "grad_norm": 2.1685779094696045, "learning_rate": 1.0000000000000002e-06, "loss": 2.149, "step": 2 }, { "epoch": 0.01014799154334038, "grad_norm": 2.2771434783935547, "learning_rate": 1.5e-06, "loss": 2.1532, "step": 3 }, { "epoch": 0.013530655391120507, "grad_norm": 2.1824076175689697, "learning_rate": 2.0000000000000003e-06, "loss": 2.1714, "step": 4 }, { "epoch": 0.016913319238900635, "grad_norm": 2.2797279357910156, "learning_rate": 2.5e-06, "loss": 2.2472, "step": 5 }, { "epoch": 0.02029598308668076, "grad_norm": 2.220853567123413, "learning_rate": 3e-06, "loss": 2.2193, "step": 6 }, { "epoch": 0.023678646934460888, "grad_norm": 2.385507583618164, "learning_rate": 3.5000000000000004e-06, "loss": 2.2173, "step": 7 }, { "epoch": 0.027061310782241013, "grad_norm": 2.2680976390838623, "learning_rate": 4.000000000000001e-06, "loss": 2.2432, "step": 8 }, { "epoch": 0.03044397463002114, "grad_norm": 2.045863628387451, "learning_rate": 4.5e-06, "loss": 2.1571, "step": 9 }, { "epoch": 0.03382663847780127, "grad_norm": 2.005089044570923, "learning_rate": 5e-06, "loss": 2.1658, "step": 10 }, { "epoch": 0.037209302325581395, "grad_norm": 1.9319759607315063, "learning_rate": 5.500000000000001e-06, "loss": 2.2337, "step": 11 }, { "epoch": 0.04059196617336152, "grad_norm": 1.7805265188217163, "learning_rate": 6e-06, "loss": 2.117, "step": 12 }, { "epoch": 0.04397463002114165, "grad_norm": 1.699826717376709, "learning_rate": 6.5000000000000004e-06, "loss": 2.1592, "step": 13 }, { "epoch": 0.047357293868921777, "grad_norm": 1.6120779514312744, "learning_rate": 7.000000000000001e-06, "loss": 2.1491, "step": 14 }, { "epoch": 0.0507399577167019, "grad_norm": 1.5634993314743042, "learning_rate": 7.5e-06, "loss": 2.0976, "step": 15 }, { "epoch": 0.054122621564482026, "grad_norm": 1.6402438879013062, "learning_rate": 8.000000000000001e-06, "loss": 2.0846, "step": 16 }, { "epoch": 0.05750528541226216, "grad_norm": 1.5344332456588745, "learning_rate": 8.500000000000002e-06, "loss": 1.9933, "step": 17 }, { "epoch": 0.06088794926004228, "grad_norm": 1.4738293886184692, "learning_rate": 9e-06, "loss": 2.0167, "step": 18 }, { "epoch": 0.06427061310782241, "grad_norm": 1.6113697290420532, "learning_rate": 9.5e-06, "loss": 2.0144, "step": 19 }, { "epoch": 0.06765327695560254, "grad_norm": 1.7016642093658447, "learning_rate": 1e-05, "loss": 2.0101, "step": 20 }, { "epoch": 0.07103594080338266, "grad_norm": 1.785592794418335, "learning_rate": 1.05e-05, "loss": 1.9704, "step": 21 }, { "epoch": 0.07441860465116279, "grad_norm": 1.612625241279602, "learning_rate": 1.1000000000000001e-05, "loss": 1.9504, "step": 22 }, { "epoch": 0.07780126849894292, "grad_norm": 1.89138925075531, "learning_rate": 1.1500000000000002e-05, "loss": 1.9573, "step": 23 }, { "epoch": 0.08118393234672304, "grad_norm": 1.769402027130127, "learning_rate": 1.2e-05, "loss": 1.9108, "step": 24 }, { "epoch": 0.08456659619450317, "grad_norm": 1.9443429708480835, "learning_rate": 1.25e-05, "loss": 1.9334, "step": 25 }, { "epoch": 0.08456659619450317, "eval_loss": 1.8751963376998901, "eval_runtime": 59.497, "eval_samples_per_second": 16.74, "eval_steps_per_second": 4.185, "step": 25 }, { "epoch": 0.0879492600422833, "grad_norm": 1.7100105285644531, "learning_rate": 1.3000000000000001e-05, "loss": 1.9201, "step": 26 }, { "epoch": 0.09133192389006342, "grad_norm": 1.7522183656692505, "learning_rate": 1.3500000000000001e-05, "loss": 1.8164, "step": 27 }, { "epoch": 0.09471458773784355, "grad_norm": 1.6645402908325195, "learning_rate": 1.4000000000000001e-05, "loss": 1.8408, "step": 28 }, { "epoch": 0.09809725158562368, "grad_norm": 1.6604903936386108, "learning_rate": 1.45e-05, "loss": 1.7671, "step": 29 }, { "epoch": 0.1014799154334038, "grad_norm": 1.7911064624786377, "learning_rate": 1.5e-05, "loss": 1.757, "step": 30 }, { "epoch": 0.10486257928118393, "grad_norm": 1.8748377561569214, "learning_rate": 1.55e-05, "loss": 1.752, "step": 31 }, { "epoch": 0.10824524312896405, "grad_norm": 2.364097833633423, "learning_rate": 1.6000000000000003e-05, "loss": 1.7415, "step": 32 }, { "epoch": 0.11162790697674418, "grad_norm": 2.2695372104644775, "learning_rate": 1.65e-05, "loss": 1.7044, "step": 33 }, { "epoch": 0.11501057082452432, "grad_norm": 1.5664398670196533, "learning_rate": 1.7000000000000003e-05, "loss": 1.7009, "step": 34 }, { "epoch": 0.11839323467230443, "grad_norm": 1.7595115900039673, "learning_rate": 1.75e-05, "loss": 1.6647, "step": 35 }, { "epoch": 0.12177589852008457, "grad_norm": 1.6659307479858398, "learning_rate": 1.8e-05, "loss": 1.6732, "step": 36 }, { "epoch": 0.12515856236786468, "grad_norm": 1.1403136253356934, "learning_rate": 1.85e-05, "loss": 1.6259, "step": 37 }, { "epoch": 0.12854122621564482, "grad_norm": 1.0617878437042236, "learning_rate": 1.9e-05, "loss": 1.62, "step": 38 }, { "epoch": 0.13192389006342495, "grad_norm": 0.9418096542358398, "learning_rate": 1.9500000000000003e-05, "loss": 1.5283, "step": 39 }, { "epoch": 0.13530655391120508, "grad_norm": 0.9303457736968994, "learning_rate": 2e-05, "loss": 1.5858, "step": 40 }, { "epoch": 0.1386892177589852, "grad_norm": 0.8767881989479065, "learning_rate": 2.05e-05, "loss": 1.6119, "step": 41 }, { "epoch": 0.14207188160676532, "grad_norm": 0.9987740516662598, "learning_rate": 2.1e-05, "loss": 1.6721, "step": 42 }, { "epoch": 0.14545454545454545, "grad_norm": 0.8637537360191345, "learning_rate": 2.15e-05, "loss": 1.6756, "step": 43 }, { "epoch": 0.14883720930232558, "grad_norm": 0.8661416172981262, "learning_rate": 2.2000000000000003e-05, "loss": 1.5943, "step": 44 }, { "epoch": 0.1522198731501057, "grad_norm": 0.8084216117858887, "learning_rate": 2.25e-05, "loss": 1.612, "step": 45 }, { "epoch": 0.15560253699788584, "grad_norm": 0.8370161056518555, "learning_rate": 2.3000000000000003e-05, "loss": 1.5512, "step": 46 }, { "epoch": 0.15898520084566597, "grad_norm": 0.911957859992981, "learning_rate": 2.35e-05, "loss": 1.6142, "step": 47 }, { "epoch": 0.16236786469344608, "grad_norm": 0.9367987513542175, "learning_rate": 2.4e-05, "loss": 1.5459, "step": 48 }, { "epoch": 0.1657505285412262, "grad_norm": 0.8949148654937744, "learning_rate": 2.45e-05, "loss": 1.6109, "step": 49 }, { "epoch": 0.16913319238900634, "grad_norm": 0.8378682136535645, "learning_rate": 2.5e-05, "loss": 1.5156, "step": 50 }, { "epoch": 0.16913319238900634, "eval_loss": 1.5792856216430664, "eval_runtime": 59.5283, "eval_samples_per_second": 16.732, "eval_steps_per_second": 4.183, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.565359742517248e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }