|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.017106079072850514, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.553039536425257e-05, |
|
"eval_loss": 2.381143808364868, |
|
"eval_runtime": 144.1703, |
|
"eval_samples_per_second": 34.147, |
|
"eval_steps_per_second": 17.077, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008553039536425257, |
|
"grad_norm": 5.450172424316406, |
|
"learning_rate": 0.00019967573081342103, |
|
"loss": 9.0162, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017106079072850514, |
|
"grad_norm": 4.5537848472595215, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 8.8664, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002565911860927577, |
|
"grad_norm": 7.618182182312012, |
|
"learning_rate": 0.00019199794436588243, |
|
"loss": 9.0587, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003421215814570103, |
|
"grad_norm": 5.113912582397461, |
|
"learning_rate": 0.0001845190085543795, |
|
"loss": 8.1141, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004276519768212628, |
|
"grad_norm": 7.7498955726623535, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 8.7842, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004276519768212628, |
|
"eval_loss": 2.1146717071533203, |
|
"eval_runtime": 142.2534, |
|
"eval_samples_per_second": 34.607, |
|
"eval_steps_per_second": 17.307, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005131823721855154, |
|
"grad_norm": 4.736423969268799, |
|
"learning_rate": 0.00016324453755953773, |
|
"loss": 8.2631, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00598712767549768, |
|
"grad_norm": 3.9354634284973145, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 8.4239, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006842431629140206, |
|
"grad_norm": 5.779056072235107, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 8.0989, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007697735582782732, |
|
"grad_norm": 5.675789833068848, |
|
"learning_rate": 0.00012000256937760445, |
|
"loss": 8.2106, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008553039536425257, |
|
"grad_norm": 5.744926452636719, |
|
"learning_rate": 0.00010402659401094152, |
|
"loss": 8.2246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008553039536425257, |
|
"eval_loss": 2.0408310890197754, |
|
"eval_runtime": 142.0789, |
|
"eval_samples_per_second": 34.65, |
|
"eval_steps_per_second": 17.328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009408343490067783, |
|
"grad_norm": 5.404253005981445, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 7.483, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.010263647443710309, |
|
"grad_norm": 7.058231830596924, |
|
"learning_rate": 7.217825360835473e-05, |
|
"loss": 8.0571, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011118951397352835, |
|
"grad_norm": 5.664774417877197, |
|
"learning_rate": 5.713074385969457e-05, |
|
"loss": 8.5309, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01197425535099536, |
|
"grad_norm": 4.67970085144043, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 7.8239, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.012829559304637885, |
|
"grad_norm": 6.211084365844727, |
|
"learning_rate": 3.072756464904006e-05, |
|
"loss": 7.6217, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012829559304637885, |
|
"eval_loss": 2.0031981468200684, |
|
"eval_runtime": 141.879, |
|
"eval_samples_per_second": 34.699, |
|
"eval_steps_per_second": 17.353, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.013684863258280411, |
|
"grad_norm": 5.578930377960205, |
|
"learning_rate": 2.0055723659649904e-05, |
|
"loss": 7.9683, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.014540167211922937, |
|
"grad_norm": 5.688002109527588, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"loss": 8.0386, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.015395471165565463, |
|
"grad_norm": 4.525981903076172, |
|
"learning_rate": 5.146355805285452e-06, |
|
"loss": 7.3559, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01625077511920799, |
|
"grad_norm": 5.752941608428955, |
|
"learning_rate": 1.2949737362087156e-06, |
|
"loss": 7.7123, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.017106079072850514, |
|
"grad_norm": 15.219685554504395, |
|
"learning_rate": 0.0, |
|
"loss": 8.2222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.017106079072850514, |
|
"eval_loss": 1.9949531555175781, |
|
"eval_runtime": 142.5551, |
|
"eval_samples_per_second": 34.534, |
|
"eval_steps_per_second": 17.271, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.534314936519885e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|