File size: 2,568 Bytes
893fbd2 5ee030d 893fbd2 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 cb636b5 5ee030d 893fbd2 5ee030d 107bea7 893fbd2 5ee030d 893fbd2 5ee030d 893fbd2 5ee030d 893fbd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9047619047619047,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19047619047619047,
"grad_norm": 2.075279951095581,
"learning_rate": 0.00018,
"loss": 9.5584,
"step": 2
},
{
"epoch": 0.38095238095238093,
"grad_norm": 1.2216403484344482,
"learning_rate": 0.00016,
"loss": 9.2621,
"step": 4
},
{
"epoch": 0.5714285714285714,
"grad_norm": 1.9454573392868042,
"learning_rate": 0.00014,
"loss": 8.8941,
"step": 6
},
{
"epoch": 0.7619047619047619,
"grad_norm": 2.4250237941741943,
"learning_rate": 0.00012,
"loss": 8.4539,
"step": 8
},
{
"epoch": 0.9523809523809523,
"grad_norm": 1.720076560974121,
"learning_rate": 0.0001,
"loss": 8.1194,
"step": 10
},
{
"epoch": 1.1428571428571428,
"grad_norm": 1.4687391519546509,
"learning_rate": 8e-05,
"loss": 7.9072,
"step": 12
},
{
"epoch": 1.3333333333333333,
"grad_norm": 1.1075403690338135,
"learning_rate": 6e-05,
"loss": 7.7305,
"step": 14
},
{
"epoch": 1.5238095238095237,
"grad_norm": 1.532745361328125,
"learning_rate": 4e-05,
"loss": 7.6572,
"step": 16
},
{
"epoch": 1.7142857142857144,
"grad_norm": 1.2055246829986572,
"learning_rate": 2e-05,
"loss": 7.6057,
"step": 18
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.9867807030677795,
"learning_rate": 0.0,
"loss": 7.5695,
"step": 20
},
{
"epoch": 1.9047619047619047,
"step": 20,
"total_flos": 99814369025136.0,
"train_loss": 8.27579402923584,
"train_runtime": 84.8689,
"train_samples_per_second": 3.959,
"train_steps_per_second": 0.236
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 99814369025136.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|