File size: 1,997 Bytes
73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd 0fbc3db 73dfccd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.5555555555555554,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5925925925925926,
"grad_norm": 1.454345464706421,
"learning_rate": 0.0001666666666666667,
"loss": 9.486,
"step": 2
},
{
"epoch": 1.1851851851851851,
"grad_norm": 1.290387511253357,
"learning_rate": 0.00013333333333333334,
"loss": 9.1386,
"step": 4
},
{
"epoch": 1.7777777777777777,
"grad_norm": 2.150188684463501,
"learning_rate": 0.0001,
"loss": 8.7163,
"step": 6
},
{
"epoch": 2.3703703703703702,
"grad_norm": 2.306529998779297,
"learning_rate": 6.666666666666667e-05,
"loss": 8.2646,
"step": 8
},
{
"epoch": 2.962962962962963,
"grad_norm": 1.6873130798339844,
"learning_rate": 3.3333333333333335e-05,
"loss": 7.9025,
"step": 10
},
{
"epoch": 3.5555555555555554,
"grad_norm": 1.427616000175476,
"learning_rate": 0.0,
"loss": 7.7933,
"step": 12
},
{
"epoch": 3.5555555555555554,
"step": 12,
"total_flos": 55764488285424.0,
"train_loss": 8.55021588007609,
"train_runtime": 312.1882,
"train_samples_per_second": 0.692,
"train_steps_per_second": 0.038
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 55764488285424.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|