File size: 2,001 Bytes
9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 9b3480b c3a108b 775f6d0 9b3480b c3a108b 9b3480b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.5555555555555554,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5925925925925926,
"grad_norm": 1.0686613321304321,
"learning_rate": 0.0001666666666666667,
"loss": 9.4726,
"step": 2
},
{
"epoch": 1.1851851851851851,
"grad_norm": 1.2996327877044678,
"learning_rate": 0.00013333333333333334,
"loss": 9.1175,
"step": 4
},
{
"epoch": 1.7777777777777777,
"grad_norm": 2.3309664726257324,
"learning_rate": 0.0001,
"loss": 8.7037,
"step": 6
},
{
"epoch": 2.3703703703703702,
"grad_norm": 2.276108503341675,
"learning_rate": 6.666666666666667e-05,
"loss": 8.2289,
"step": 8
},
{
"epoch": 2.962962962962963,
"grad_norm": 1.6182162761688232,
"learning_rate": 3.3333333333333335e-05,
"loss": 7.8725,
"step": 10
},
{
"epoch": 3.5555555555555554,
"grad_norm": 1.3706101179122925,
"learning_rate": 0.0,
"loss": 7.7669,
"step": 12
},
{
"epoch": 3.5555555555555554,
"step": 12,
"total_flos": 60069698944968.0,
"train_loss": 8.527011315027872,
"train_runtime": 334.2918,
"train_samples_per_second": 0.646,
"train_steps_per_second": 0.036
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 60069698944968.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|