eungupmodel1 / trainer_state.json
hongyunjeong's picture
Upload 11 files
b53b60d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15822784810126583,
"grad_norm": 2.233290195465088,
"learning_rate": 0.0002,
"loss": 3.0567,
"step": 25
},
{
"epoch": 0.31645569620253167,
"grad_norm": 1.9256885051727295,
"learning_rate": 0.0002,
"loss": 2.6434,
"step": 50
},
{
"epoch": 0.47468354430379744,
"grad_norm": 4.249744415283203,
"learning_rate": 0.0002,
"loss": 2.0778,
"step": 75
},
{
"epoch": 0.6329113924050633,
"grad_norm": 1.954801082611084,
"learning_rate": 0.0002,
"loss": 2.1352,
"step": 100
},
{
"epoch": 0.7911392405063291,
"grad_norm": 1.9269670248031616,
"learning_rate": 0.0002,
"loss": 1.8299,
"step": 125
},
{
"epoch": 0.9493670886075949,
"grad_norm": 4.059688091278076,
"learning_rate": 0.0002,
"loss": 1.6206,
"step": 150
},
{
"epoch": 1.1075949367088607,
"grad_norm": 2.5162670612335205,
"learning_rate": 0.0002,
"loss": 1.7301,
"step": 175
},
{
"epoch": 1.2658227848101267,
"grad_norm": 2.2635657787323,
"learning_rate": 0.0002,
"loss": 1.3103,
"step": 200
},
{
"epoch": 1.4240506329113924,
"grad_norm": 2.5782394409179688,
"learning_rate": 0.0002,
"loss": 1.2166,
"step": 225
},
{
"epoch": 1.5822784810126582,
"grad_norm": 2.443361282348633,
"learning_rate": 0.0002,
"loss": 1.4792,
"step": 250
},
{
"epoch": 1.740506329113924,
"grad_norm": 4.522688388824463,
"learning_rate": 0.0002,
"loss": 1.2199,
"step": 275
},
{
"epoch": 1.8987341772151898,
"grad_norm": 3.9393839836120605,
"learning_rate": 0.0002,
"loss": 1.3172,
"step": 300
},
{
"epoch": 2.0569620253164556,
"grad_norm": 1.763312816619873,
"learning_rate": 0.0002,
"loss": 1.1909,
"step": 325
},
{
"epoch": 2.2151898734177213,
"grad_norm": 2.383930206298828,
"learning_rate": 0.0002,
"loss": 0.9682,
"step": 350
},
{
"epoch": 2.3734177215189876,
"grad_norm": 3.6665306091308594,
"learning_rate": 0.0002,
"loss": 1.1693,
"step": 375
},
{
"epoch": 2.5316455696202533,
"grad_norm": 1.7745016813278198,
"learning_rate": 0.0002,
"loss": 1.0193,
"step": 400
},
{
"epoch": 2.689873417721519,
"grad_norm": 1.569421410560608,
"learning_rate": 0.0002,
"loss": 0.9753,
"step": 425
},
{
"epoch": 2.848101265822785,
"grad_norm": 2.2681877613067627,
"learning_rate": 0.0002,
"loss": 1.0567,
"step": 450
},
{
"epoch": 3.0063291139240507,
"grad_norm": 1.752241849899292,
"learning_rate": 0.0002,
"loss": 1.0343,
"step": 475
},
{
"epoch": 3.1645569620253164,
"grad_norm": 0.7529569268226624,
"learning_rate": 0.0002,
"loss": 0.8624,
"step": 500
},
{
"epoch": 3.3227848101265822,
"grad_norm": 2.013693332672119,
"learning_rate": 0.0002,
"loss": 0.9729,
"step": 525
},
{
"epoch": 3.481012658227848,
"grad_norm": 2.212862730026245,
"learning_rate": 0.0002,
"loss": 0.8433,
"step": 550
},
{
"epoch": 3.6392405063291138,
"grad_norm": 2.6525330543518066,
"learning_rate": 0.0002,
"loss": 0.9046,
"step": 575
},
{
"epoch": 3.7974683544303796,
"grad_norm": 1.9108997583389282,
"learning_rate": 0.0002,
"loss": 0.9368,
"step": 600
},
{
"epoch": 3.9556962025316453,
"grad_norm": 1.4593428373336792,
"learning_rate": 0.0002,
"loss": 0.8079,
"step": 625
},
{
"epoch": 4.113924050632911,
"grad_norm": 1.0320943593978882,
"learning_rate": 0.0002,
"loss": 0.8961,
"step": 650
},
{
"epoch": 4.272151898734177,
"grad_norm": 2.041616439819336,
"learning_rate": 0.0002,
"loss": 0.7348,
"step": 675
},
{
"epoch": 4.430379746835443,
"grad_norm": 2.494473457336426,
"learning_rate": 0.0002,
"loss": 0.7822,
"step": 700
},
{
"epoch": 4.588607594936709,
"grad_norm": 1.134831428527832,
"learning_rate": 0.0002,
"loss": 0.8666,
"step": 725
},
{
"epoch": 4.746835443037975,
"grad_norm": 1.860443353652954,
"learning_rate": 0.0002,
"loss": 0.7721,
"step": 750
},
{
"epoch": 4.905063291139241,
"grad_norm": 3.339151620864868,
"learning_rate": 0.0002,
"loss": 0.8407,
"step": 775
},
{
"epoch": 5.063291139240507,
"grad_norm": 1.3228943347930908,
"learning_rate": 0.0002,
"loss": 0.833,
"step": 800
},
{
"epoch": 5.2215189873417724,
"grad_norm": 2.0199851989746094,
"learning_rate": 0.0002,
"loss": 0.6558,
"step": 825
},
{
"epoch": 5.379746835443038,
"grad_norm": 1.0233032703399658,
"learning_rate": 0.0002,
"loss": 0.7571,
"step": 850
},
{
"epoch": 5.537974683544304,
"grad_norm": 1.8455493450164795,
"learning_rate": 0.0002,
"loss": 0.7673,
"step": 875
},
{
"epoch": 5.69620253164557,
"grad_norm": 1.3019192218780518,
"learning_rate": 0.0002,
"loss": 0.6765,
"step": 900
},
{
"epoch": 5.8544303797468356,
"grad_norm": 1.6968228816986084,
"learning_rate": 0.0002,
"loss": 0.8249,
"step": 925
},
{
"epoch": 6.012658227848101,
"grad_norm": 1.5166069269180298,
"learning_rate": 0.0002,
"loss": 0.765,
"step": 950
},
{
"epoch": 6.170886075949367,
"grad_norm": 1.438341498374939,
"learning_rate": 0.0002,
"loss": 0.628,
"step": 975
},
{
"epoch": 6.329113924050633,
"grad_norm": 1.4135054349899292,
"learning_rate": 0.0002,
"loss": 0.7128,
"step": 1000
},
{
"epoch": 6.487341772151899,
"grad_norm": 1.8510311841964722,
"learning_rate": 0.0002,
"loss": 0.6726,
"step": 1025
},
{
"epoch": 6.6455696202531644,
"grad_norm": 0.8984973430633545,
"learning_rate": 0.0002,
"loss": 0.7169,
"step": 1050
},
{
"epoch": 6.80379746835443,
"grad_norm": 1.762295126914978,
"learning_rate": 0.0002,
"loss": 0.7315,
"step": 1075
},
{
"epoch": 6.962025316455696,
"grad_norm": 1.3354698419570923,
"learning_rate": 0.0002,
"loss": 0.6275,
"step": 1100
},
{
"epoch": 7.120253164556962,
"grad_norm": 1.680066466331482,
"learning_rate": 0.0002,
"loss": 0.6706,
"step": 1125
},
{
"epoch": 7.2784810126582276,
"grad_norm": 1.5245403051376343,
"learning_rate": 0.0002,
"loss": 0.6232,
"step": 1150
},
{
"epoch": 7.436708860759493,
"grad_norm": 1.4877965450286865,
"learning_rate": 0.0002,
"loss": 0.5902,
"step": 1175
},
{
"epoch": 7.594936708860759,
"grad_norm": 0.7956791520118713,
"learning_rate": 0.0002,
"loss": 0.6998,
"step": 1200
},
{
"epoch": 7.753164556962025,
"grad_norm": 2.1762688159942627,
"learning_rate": 0.0002,
"loss": 0.7275,
"step": 1225
},
{
"epoch": 7.911392405063291,
"grad_norm": 1.2218317985534668,
"learning_rate": 0.0002,
"loss": 0.6267,
"step": 1250
},
{
"epoch": 8.069620253164556,
"grad_norm": 1.339480996131897,
"learning_rate": 0.0002,
"loss": 0.6799,
"step": 1275
},
{
"epoch": 8.227848101265822,
"grad_norm": 1.3387433290481567,
"learning_rate": 0.0002,
"loss": 0.577,
"step": 1300
},
{
"epoch": 8.386075949367088,
"grad_norm": 1.0354127883911133,
"learning_rate": 0.0002,
"loss": 0.6526,
"step": 1325
},
{
"epoch": 8.544303797468354,
"grad_norm": 1.4868078231811523,
"learning_rate": 0.0002,
"loss": 0.6638,
"step": 1350
},
{
"epoch": 8.70253164556962,
"grad_norm": 0.7492271065711975,
"learning_rate": 0.0002,
"loss": 0.5833,
"step": 1375
},
{
"epoch": 8.860759493670885,
"grad_norm": 1.3193756341934204,
"learning_rate": 0.0002,
"loss": 0.6851,
"step": 1400
},
{
"epoch": 9.018987341772151,
"grad_norm": 1.924387812614441,
"learning_rate": 0.0002,
"loss": 0.6335,
"step": 1425
},
{
"epoch": 9.177215189873417,
"grad_norm": 1.1999796628952026,
"learning_rate": 0.0002,
"loss": 0.4827,
"step": 1450
},
{
"epoch": 9.335443037974684,
"grad_norm": 1.647176742553711,
"learning_rate": 0.0002,
"loss": 0.6423,
"step": 1475
},
{
"epoch": 9.49367088607595,
"grad_norm": 1.3660459518432617,
"learning_rate": 0.0002,
"loss": 0.6176,
"step": 1500
},
{
"epoch": 9.651898734177216,
"grad_norm": 0.9778301119804382,
"learning_rate": 0.0002,
"loss": 0.5802,
"step": 1525
},
{
"epoch": 9.810126582278482,
"grad_norm": 1.5528557300567627,
"learning_rate": 0.0002,
"loss": 0.6645,
"step": 1550
},
{
"epoch": 9.968354430379748,
"grad_norm": 1.8788762092590332,
"learning_rate": 0.0002,
"loss": 0.5932,
"step": 1575
}
],
"logging_steps": 25,
"max_steps": 1580,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5358729709043712.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}