File size: 1,904 Bytes
22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad 0c65bb7 22b26ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.64,
"grad_norm": 8.833975791931152,
"learning_rate": 0.0001666666666666667,
"loss": 76.2267,
"step": 2
},
{
"epoch": 1.0,
"grad_norm": 1.7217656373977661,
"learning_rate": 0.00013333333333333334,
"loss": 41.8397,
"step": 4
},
{
"epoch": 1.6400000000000001,
"grad_norm": 16.44710350036621,
"learning_rate": 0.0001,
"loss": 70.0907,
"step": 6
},
{
"epoch": 2.0,
"grad_norm": 2.9887499809265137,
"learning_rate": 6.666666666666667e-05,
"loss": 38.6177,
"step": 8
},
{
"epoch": 2.64,
"grad_norm": 15.259599685668945,
"learning_rate": 3.3333333333333335e-05,
"loss": 65.9877,
"step": 10
},
{
"epoch": 3.0,
"grad_norm": 3.4123783111572266,
"learning_rate": 0.0,
"loss": 36.3016,
"step": 12
},
{
"epoch": 3.0,
"step": 12,
"total_flos": 40668535028568.0,
"train_loss": 54.844011306762695,
"train_runtime": 47.4541,
"train_samples_per_second": 4.215,
"train_steps_per_second": 0.253
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 40668535028568.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|