|
{ |
|
"best_metric": 1.5792856216430664, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.16913319238900634, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0033826638477801266, |
|
"grad_norm": 2.0565555095672607, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.2282, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0033826638477801266, |
|
"eval_loss": 2.194361448287964, |
|
"eval_runtime": 58.7509, |
|
"eval_samples_per_second": 16.953, |
|
"eval_steps_per_second": 4.238, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006765327695560253, |
|
"grad_norm": 2.1685779094696045, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.149, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01014799154334038, |
|
"grad_norm": 2.2771434783935547, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.1532, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.013530655391120507, |
|
"grad_norm": 2.1824076175689697, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.1714, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.016913319238900635, |
|
"grad_norm": 2.2797279357910156, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.2472, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02029598308668076, |
|
"grad_norm": 2.220853567123413, |
|
"learning_rate": 3e-06, |
|
"loss": 2.2193, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.023678646934460888, |
|
"grad_norm": 2.385507583618164, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 2.2173, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.027061310782241013, |
|
"grad_norm": 2.2680976390838623, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.2432, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03044397463002114, |
|
"grad_norm": 2.045863628387451, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.1571, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03382663847780127, |
|
"grad_norm": 2.005089044570923, |
|
"learning_rate": 5e-06, |
|
"loss": 2.1658, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.037209302325581395, |
|
"grad_norm": 1.9319759607315063, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 2.2337, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04059196617336152, |
|
"grad_norm": 1.7805265188217163, |
|
"learning_rate": 6e-06, |
|
"loss": 2.117, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04397463002114165, |
|
"grad_norm": 1.699826717376709, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 2.1592, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.047357293868921777, |
|
"grad_norm": 1.6120779514312744, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 2.1491, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0507399577167019, |
|
"grad_norm": 1.5634993314743042, |
|
"learning_rate": 7.5e-06, |
|
"loss": 2.0976, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.054122621564482026, |
|
"grad_norm": 1.6402438879013062, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.0846, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05750528541226216, |
|
"grad_norm": 1.5344332456588745, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 1.9933, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06088794926004228, |
|
"grad_norm": 1.4738293886184692, |
|
"learning_rate": 9e-06, |
|
"loss": 2.0167, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06427061310782241, |
|
"grad_norm": 1.6113697290420532, |
|
"learning_rate": 9.5e-06, |
|
"loss": 2.0144, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06765327695560254, |
|
"grad_norm": 1.7016642093658447, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0101, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07103594080338266, |
|
"grad_norm": 1.785592794418335, |
|
"learning_rate": 1.05e-05, |
|
"loss": 1.9704, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07441860465116279, |
|
"grad_norm": 1.612625241279602, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 1.9504, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07780126849894292, |
|
"grad_norm": 1.89138925075531, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 1.9573, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08118393234672304, |
|
"grad_norm": 1.769402027130127, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.9108, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08456659619450317, |
|
"grad_norm": 1.9443429708480835, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.9334, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08456659619450317, |
|
"eval_loss": 1.8751963376998901, |
|
"eval_runtime": 59.497, |
|
"eval_samples_per_second": 16.74, |
|
"eval_steps_per_second": 4.185, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0879492600422833, |
|
"grad_norm": 1.7100105285644531, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.9201, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09133192389006342, |
|
"grad_norm": 1.7522183656692505, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 1.8164, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.09471458773784355, |
|
"grad_norm": 1.6645402908325195, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.8408, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.09809725158562368, |
|
"grad_norm": 1.6604903936386108, |
|
"learning_rate": 1.45e-05, |
|
"loss": 1.7671, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1014799154334038, |
|
"grad_norm": 1.7911064624786377, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.757, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10486257928118393, |
|
"grad_norm": 1.8748377561569214, |
|
"learning_rate": 1.55e-05, |
|
"loss": 1.752, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.10824524312896405, |
|
"grad_norm": 2.364097833633423, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.7415, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.11162790697674418, |
|
"grad_norm": 2.2695372104644775, |
|
"learning_rate": 1.65e-05, |
|
"loss": 1.7044, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11501057082452432, |
|
"grad_norm": 1.5664398670196533, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 1.7009, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.11839323467230443, |
|
"grad_norm": 1.7595115900039673, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.6647, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12177589852008457, |
|
"grad_norm": 1.6659307479858398, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.6732, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12515856236786468, |
|
"grad_norm": 1.1403136253356934, |
|
"learning_rate": 1.85e-05, |
|
"loss": 1.6259, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.12854122621564482, |
|
"grad_norm": 1.0617878437042236, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.62, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.13192389006342495, |
|
"grad_norm": 0.9418096542358398, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 1.5283, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.13530655391120508, |
|
"grad_norm": 0.9303457736968994, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5858, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1386892177589852, |
|
"grad_norm": 0.8767881989479065, |
|
"learning_rate": 2.05e-05, |
|
"loss": 1.6119, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.14207188160676532, |
|
"grad_norm": 0.9987740516662598, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.6721, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 0.8637537360191345, |
|
"learning_rate": 2.15e-05, |
|
"loss": 1.6756, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.14883720930232558, |
|
"grad_norm": 0.8661416172981262, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.5943, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1522198731501057, |
|
"grad_norm": 0.8084216117858887, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.612, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.15560253699788584, |
|
"grad_norm": 0.8370161056518555, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 1.5512, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.15898520084566597, |
|
"grad_norm": 0.911957859992981, |
|
"learning_rate": 2.35e-05, |
|
"loss": 1.6142, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.16236786469344608, |
|
"grad_norm": 0.9367987513542175, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.5459, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1657505285412262, |
|
"grad_norm": 0.8949148654937744, |
|
"learning_rate": 2.45e-05, |
|
"loss": 1.6109, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.16913319238900634, |
|
"grad_norm": 0.8378682136535645, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5156, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16913319238900634, |
|
"eval_loss": 1.5792856216430664, |
|
"eval_runtime": 59.5283, |
|
"eval_samples_per_second": 16.732, |
|
"eval_steps_per_second": 4.183, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.565359742517248e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|