besimray's picture
Training in progress, step 30, checkpoint
db6eae8 verified
raw
history blame
6.94 kB
{
"best_metric": 1.6834224462509155,
"best_model_checkpoint": "miner_id_24/checkpoint-30",
"epoch": 0.008471584892340275,
"eval_steps": 10,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0002823861630780092,
"grad_norm": 1.2056736946105957,
"learning_rate": 2e-05,
"loss": 1.9521,
"step": 1
},
{
"epoch": 0.0002823861630780092,
"eval_loss": 2.352551221847534,
"eval_runtime": 133.636,
"eval_samples_per_second": 5.582,
"eval_steps_per_second": 5.582,
"step": 1
},
{
"epoch": 0.0005647723261560184,
"grad_norm": 4.347576141357422,
"learning_rate": 4e-05,
"loss": 3.1192,
"step": 2
},
{
"epoch": 0.0008471584892340275,
"grad_norm": 0.7937899231910706,
"learning_rate": 6e-05,
"loss": 1.1903,
"step": 3
},
{
"epoch": 0.0011295446523120368,
"grad_norm": 3.9262712001800537,
"learning_rate": 8e-05,
"loss": 3.5143,
"step": 4
},
{
"epoch": 0.0014119308153900459,
"grad_norm": 2.7941317558288574,
"learning_rate": 0.0001,
"loss": 2.9205,
"step": 5
},
{
"epoch": 0.001694316978468055,
"grad_norm": 0.9424725770950317,
"learning_rate": 0.00012,
"loss": 2.582,
"step": 6
},
{
"epoch": 0.001976703141546064,
"grad_norm": 1.8026996850967407,
"learning_rate": 0.00014,
"loss": 2.0555,
"step": 7
},
{
"epoch": 0.0022590893046240735,
"grad_norm": 1.0503661632537842,
"learning_rate": 0.00016,
"loss": 2.6065,
"step": 8
},
{
"epoch": 0.0025414754677020824,
"grad_norm": 2.3215556144714355,
"learning_rate": 0.00018,
"loss": 2.6192,
"step": 9
},
{
"epoch": 0.0028238616307800918,
"grad_norm": 1.60389244556427,
"learning_rate": 0.0002,
"loss": 2.1819,
"step": 10
},
{
"epoch": 0.0028238616307800918,
"eval_loss": 2.1240005493164062,
"eval_runtime": 133.7157,
"eval_samples_per_second": 5.579,
"eval_steps_per_second": 5.579,
"step": 10
},
{
"epoch": 0.003106247793858101,
"grad_norm": 1.0237597227096558,
"learning_rate": 0.0001999979446958366,
"loss": 1.8844,
"step": 11
},
{
"epoch": 0.00338863395693611,
"grad_norm": 4.726717472076416,
"learning_rate": 0.00019999177886783194,
"loss": 3.1172,
"step": 12
},
{
"epoch": 0.0036710201200141194,
"grad_norm": 1.6403062343597412,
"learning_rate": 0.00019998150276943902,
"loss": 1.7007,
"step": 13
},
{
"epoch": 0.003953406283092128,
"grad_norm": 2.1164722442626953,
"learning_rate": 0.000199967116823068,
"loss": 2.5321,
"step": 14
},
{
"epoch": 0.004235792446170138,
"grad_norm": 3.5340867042541504,
"learning_rate": 0.0001999486216200688,
"loss": 2.3608,
"step": 15
},
{
"epoch": 0.004518178609248147,
"grad_norm": 5.55496072769165,
"learning_rate": 0.00019992601792070679,
"loss": 1.6141,
"step": 16
},
{
"epoch": 0.004800564772326156,
"grad_norm": 7.0048136711120605,
"learning_rate": 0.00019989930665413147,
"loss": 2.2728,
"step": 17
},
{
"epoch": 0.005082950935404165,
"grad_norm": 1.7807990312576294,
"learning_rate": 0.00019986848891833845,
"loss": 1.6759,
"step": 18
},
{
"epoch": 0.005365337098482174,
"grad_norm": 3.1774418354034424,
"learning_rate": 0.0001998335659801241,
"loss": 0.8362,
"step": 19
},
{
"epoch": 0.0056477232615601836,
"grad_norm": 5.73573637008667,
"learning_rate": 0.00019979453927503364,
"loss": 2.7934,
"step": 20
},
{
"epoch": 0.0056477232615601836,
"eval_loss": 1.7092158794403076,
"eval_runtime": 133.3036,
"eval_samples_per_second": 5.596,
"eval_steps_per_second": 5.596,
"step": 20
},
{
"epoch": 0.005930109424638193,
"grad_norm": 1.705460548400879,
"learning_rate": 0.00019975141040730207,
"loss": 1.3571,
"step": 21
},
{
"epoch": 0.006212495587716202,
"grad_norm": 1.3597909212112427,
"learning_rate": 0.0001997041811497882,
"loss": 1.491,
"step": 22
},
{
"epoch": 0.006494881750794211,
"grad_norm": 1.3038731813430786,
"learning_rate": 0.00019965285344390184,
"loss": 2.4389,
"step": 23
},
{
"epoch": 0.00677726791387222,
"grad_norm": 2.3923144340515137,
"learning_rate": 0.00019959742939952392,
"loss": 1.3591,
"step": 24
},
{
"epoch": 0.0070596540769502295,
"grad_norm": 2.964477300643921,
"learning_rate": 0.00019953791129491983,
"loss": 1.1022,
"step": 25
},
{
"epoch": 0.007342040240028239,
"grad_norm": 3.184072732925415,
"learning_rate": 0.00019947430157664576,
"loss": 1.4837,
"step": 26
},
{
"epoch": 0.007624426403106248,
"grad_norm": 1.576446294784546,
"learning_rate": 0.00019940660285944803,
"loss": 2.0992,
"step": 27
},
{
"epoch": 0.007906812566184257,
"grad_norm": 2.878796100616455,
"learning_rate": 0.00019933481792615583,
"loss": 2.4712,
"step": 28
},
{
"epoch": 0.008189198729262267,
"grad_norm": 2.5952467918395996,
"learning_rate": 0.0001992589497275665,
"loss": 1.6412,
"step": 29
},
{
"epoch": 0.008471584892340275,
"grad_norm": 2.8451197147369385,
"learning_rate": 0.0001991790013823246,
"loss": 1.6372,
"step": 30
},
{
"epoch": 0.008471584892340275,
"eval_loss": 1.6834224462509155,
"eval_runtime": 133.2875,
"eval_samples_per_second": 5.597,
"eval_steps_per_second": 5.597,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2936410755563520.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}