|
{ |
|
"best_metric": 1.2163466215133667, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-300", |
|
"epoch": 0.047934808660222095, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00015978269553407366, |
|
"eval_loss": 1.6500585079193115, |
|
"eval_runtime": 49.2063, |
|
"eval_samples_per_second": 53.55, |
|
"eval_steps_per_second": 13.393, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015978269553407365, |
|
"grad_norm": 0.31304410099983215, |
|
"learning_rate": 4.24e-05, |
|
"loss": 1.6422, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003195653910681473, |
|
"grad_norm": 0.5056787729263306, |
|
"learning_rate": 8.48e-05, |
|
"loss": 1.6523, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00479348086602221, |
|
"grad_norm": 0.5264449715614319, |
|
"learning_rate": 0.0001272, |
|
"loss": 1.4867, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006391307821362946, |
|
"grad_norm": 0.7551277875900269, |
|
"learning_rate": 0.0001696, |
|
"loss": 1.4439, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007989134776703682, |
|
"grad_norm": 1.3374431133270264, |
|
"learning_rate": 0.000212, |
|
"loss": 1.2566, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007989134776703682, |
|
"eval_loss": 1.3853693008422852, |
|
"eval_runtime": 49.2099, |
|
"eval_samples_per_second": 53.546, |
|
"eval_steps_per_second": 13.392, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00958696173204442, |
|
"grad_norm": 0.36312878131866455, |
|
"learning_rate": 0.00021174178932754136, |
|
"loss": 1.538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011184788687385157, |
|
"grad_norm": 0.49091804027557373, |
|
"learning_rate": 0.00021096841528660647, |
|
"loss": 1.4567, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.012782615642725892, |
|
"grad_norm": 0.5926417112350464, |
|
"learning_rate": 0.0002096836456777834, |
|
"loss": 1.2948, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01438044259806663, |
|
"grad_norm": 0.6516161561012268, |
|
"learning_rate": 0.00020789373976946182, |
|
"loss": 1.1847, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.015978269553407365, |
|
"grad_norm": 1.2274402379989624, |
|
"learning_rate": 0.0002056074178033063, |
|
"loss": 1.0668, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015978269553407365, |
|
"eval_loss": 1.308932900428772, |
|
"eval_runtime": 49.1943, |
|
"eval_samples_per_second": 53.563, |
|
"eval_steps_per_second": 13.396, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.017576096508748102, |
|
"grad_norm": 0.3842248022556305, |
|
"learning_rate": 0.00020283581851011567, |
|
"loss": 1.5373, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01917392346408884, |
|
"grad_norm": 0.4492928981781006, |
|
"learning_rate": 0.00019959244484304625, |
|
"loss": 1.4419, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.020771750419429576, |
|
"grad_norm": 0.5767350196838379, |
|
"learning_rate": 0.00019589309819258114, |
|
"loss": 1.3442, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.022369577374770314, |
|
"grad_norm": 0.6538853049278259, |
|
"learning_rate": 0.00019175580140374444, |
|
"loss": 1.0945, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.023967404330111047, |
|
"grad_norm": 1.4180448055267334, |
|
"learning_rate": 0.00018720071097061167, |
|
"loss": 1.0496, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023967404330111047, |
|
"eval_loss": 1.282002568244934, |
|
"eval_runtime": 49.137, |
|
"eval_samples_per_second": 53.626, |
|
"eval_steps_per_second": 13.411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.025565231285451784, |
|
"grad_norm": 0.38757893443107605, |
|
"learning_rate": 0.00018225001883589702, |
|
"loss": 1.4831, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02716305824079252, |
|
"grad_norm": 0.4853920340538025, |
|
"learning_rate": 0.00017692784427403898, |
|
"loss": 1.4429, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02876088519613326, |
|
"grad_norm": 0.5605722069740295, |
|
"learning_rate": 0.00017126011638451976, |
|
"loss": 1.2459, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.030358712151473996, |
|
"grad_norm": 0.6170331239700317, |
|
"learning_rate": 0.00016527444776789915, |
|
"loss": 1.009, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03195653910681473, |
|
"grad_norm": 1.6762750148773193, |
|
"learning_rate": 0.00015900000000000002, |
|
"loss": 1.0656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03195653910681473, |
|
"eval_loss": 1.2561191320419312, |
|
"eval_runtime": 49.3809, |
|
"eval_samples_per_second": 53.361, |
|
"eval_steps_per_second": 13.345, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03355436606215547, |
|
"grad_norm": 0.37146052718162537, |
|
"learning_rate": 0.0001524673415596422, |
|
"loss": 1.4539, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.035152193017496204, |
|
"grad_norm": 0.47493624687194824, |
|
"learning_rate": 0.00014570829890208668, |
|
"loss": 1.3451, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.036750019972836945, |
|
"grad_norm": 0.5821739435195923, |
|
"learning_rate": 0.00013875580140374443, |
|
"loss": 1.3524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03834784692817768, |
|
"grad_norm": 0.6922153234481812, |
|
"learning_rate": 0.00013164372093356477, |
|
"loss": 1.1759, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03994567388351841, |
|
"grad_norm": 1.3326733112335205, |
|
"learning_rate": 0.00012440670683269464, |
|
"loss": 1.0188, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03994567388351841, |
|
"eval_loss": 1.2323858737945557, |
|
"eval_runtime": 49.4073, |
|
"eval_samples_per_second": 53.332, |
|
"eval_steps_per_second": 13.338, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04154350083885915, |
|
"grad_norm": 0.3916065990924835, |
|
"learning_rate": 0.00011708001710637128, |
|
"loss": 1.4353, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04314132779419989, |
|
"grad_norm": 0.47572290897369385, |
|
"learning_rate": 0.00010969934665046512, |
|
"loss": 1.3617, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04473915474954063, |
|
"grad_norm": 0.5964052081108093, |
|
"learning_rate": 0.00010230065334953492, |
|
"loss": 1.2104, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04633698170488136, |
|
"grad_norm": 0.7098691463470459, |
|
"learning_rate": 9.491998289362875e-05, |
|
"loss": 1.0909, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.047934808660222095, |
|
"grad_norm": 1.1634002923965454, |
|
"learning_rate": 8.759329316730539e-05, |
|
"loss": 0.9742, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.047934808660222095, |
|
"eval_loss": 1.2163466215133667, |
|
"eval_runtime": 49.1098, |
|
"eval_samples_per_second": 53.655, |
|
"eval_steps_per_second": 13.419, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8402222601732096.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|