deberta-v3-xsmall-zyda-2 / trainer_state.json
agentlans's picture
Upload 13 files
6a3f3f2 verified
raw
history blame
10.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 28366,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017626736233519003,
"grad_norm": 5.204168796539307,
"learning_rate": 4.9118663188324054e-05,
"loss": 3.9535,
"step": 500
},
{
"epoch": 0.035253472467038006,
"grad_norm": 5.162827968597412,
"learning_rate": 4.82373263766481e-05,
"loss": 3.761,
"step": 1000
},
{
"epoch": 0.052880208700557006,
"grad_norm": 5.309798240661621,
"learning_rate": 4.735598956497215e-05,
"loss": 3.7096,
"step": 1500
},
{
"epoch": 0.07050694493407601,
"grad_norm": 5.0922369956970215,
"learning_rate": 4.64746527532962e-05,
"loss": 3.6577,
"step": 2000
},
{
"epoch": 0.08813368116759501,
"grad_norm": 5.067632675170898,
"learning_rate": 4.559331594162025e-05,
"loss": 3.6288,
"step": 2500
},
{
"epoch": 0.10576041740111401,
"grad_norm": 5.3605475425720215,
"learning_rate": 4.4711979129944304e-05,
"loss": 3.6192,
"step": 3000
},
{
"epoch": 0.12338715363463301,
"grad_norm": 5.510789394378662,
"learning_rate": 4.383064231826835e-05,
"loss": 3.559,
"step": 3500
},
{
"epoch": 0.14101388986815203,
"grad_norm": 5.7333855628967285,
"learning_rate": 4.29493055065924e-05,
"loss": 3.5382,
"step": 4000
},
{
"epoch": 0.158640626101671,
"grad_norm": 5.04295539855957,
"learning_rate": 4.206796869491645e-05,
"loss": 3.4962,
"step": 4500
},
{
"epoch": 0.17626736233519003,
"grad_norm": 4.932398796081543,
"learning_rate": 4.11866318832405e-05,
"loss": 3.5339,
"step": 5000
},
{
"epoch": 0.193894098568709,
"grad_norm": 5.262182235717773,
"learning_rate": 4.0305295071564555e-05,
"loss": 3.4758,
"step": 5500
},
{
"epoch": 0.21152083480222802,
"grad_norm": 5.248316764831543,
"learning_rate": 3.94239582598886e-05,
"loss": 3.4524,
"step": 6000
},
{
"epoch": 0.229147571035747,
"grad_norm": 5.176753520965576,
"learning_rate": 3.854262144821265e-05,
"loss": 3.4403,
"step": 6500
},
{
"epoch": 0.24677430726926602,
"grad_norm": 5.396851539611816,
"learning_rate": 3.76612846365367e-05,
"loss": 3.4066,
"step": 7000
},
{
"epoch": 0.26440104350278504,
"grad_norm": 4.905313968658447,
"learning_rate": 3.677994782486075e-05,
"loss": 3.4277,
"step": 7500
},
{
"epoch": 0.28202777973630405,
"grad_norm": 5.581764221191406,
"learning_rate": 3.58986110131848e-05,
"loss": 3.3977,
"step": 8000
},
{
"epoch": 0.299654515969823,
"grad_norm": 4.564020156860352,
"learning_rate": 3.501727420150885e-05,
"loss": 3.3739,
"step": 8500
},
{
"epoch": 0.317281252203342,
"grad_norm": 5.451286315917969,
"learning_rate": 3.41359373898329e-05,
"loss": 3.3724,
"step": 9000
},
{
"epoch": 0.33490798843686104,
"grad_norm": 5.060819149017334,
"learning_rate": 3.325460057815695e-05,
"loss": 3.3393,
"step": 9500
},
{
"epoch": 0.35253472467038005,
"grad_norm": 5.474411487579346,
"learning_rate": 3.2373263766481e-05,
"loss": 3.3186,
"step": 10000
},
{
"epoch": 0.370161460903899,
"grad_norm": 5.26786994934082,
"learning_rate": 3.149192695480505e-05,
"loss": 3.3223,
"step": 10500
},
{
"epoch": 0.387788197137418,
"grad_norm": 5.467500686645508,
"learning_rate": 3.06105901431291e-05,
"loss": 3.3054,
"step": 11000
},
{
"epoch": 0.40541493337093704,
"grad_norm": 5.263679027557373,
"learning_rate": 2.972925333145315e-05,
"loss": 3.3193,
"step": 11500
},
{
"epoch": 0.42304166960445605,
"grad_norm": 4.835860729217529,
"learning_rate": 2.88479165197772e-05,
"loss": 3.2871,
"step": 12000
},
{
"epoch": 0.44066840583797506,
"grad_norm": 4.88271951675415,
"learning_rate": 2.7966579708101248e-05,
"loss": 3.2783,
"step": 12500
},
{
"epoch": 0.458295142071494,
"grad_norm": 5.228416442871094,
"learning_rate": 2.70852428964253e-05,
"loss": 3.2845,
"step": 13000
},
{
"epoch": 0.47592187830501304,
"grad_norm": 5.097890853881836,
"learning_rate": 2.6203906084749348e-05,
"loss": 3.2731,
"step": 13500
},
{
"epoch": 0.49354861453853205,
"grad_norm": 4.9926066398620605,
"learning_rate": 2.53225692730734e-05,
"loss": 3.27,
"step": 14000
},
{
"epoch": 0.511175350772051,
"grad_norm": 5.329204559326172,
"learning_rate": 2.4441232461397447e-05,
"loss": 3.253,
"step": 14500
},
{
"epoch": 0.5288020870055701,
"grad_norm": 4.740358352661133,
"learning_rate": 2.35598956497215e-05,
"loss": 3.2511,
"step": 15000
},
{
"epoch": 0.546428823239089,
"grad_norm": 5.418153285980225,
"learning_rate": 2.267855883804555e-05,
"loss": 3.2315,
"step": 15500
},
{
"epoch": 0.5640555594726081,
"grad_norm": 4.993420600891113,
"learning_rate": 2.1797222026369598e-05,
"loss": 3.2453,
"step": 16000
},
{
"epoch": 0.5816822957061271,
"grad_norm": 5.474274635314941,
"learning_rate": 2.091588521469365e-05,
"loss": 3.2328,
"step": 16500
},
{
"epoch": 0.599309031939646,
"grad_norm": 4.977609157562256,
"learning_rate": 2.0034548403017698e-05,
"loss": 3.2181,
"step": 17000
},
{
"epoch": 0.6169357681731651,
"grad_norm": 4.982664585113525,
"learning_rate": 1.915321159134175e-05,
"loss": 3.2106,
"step": 17500
},
{
"epoch": 0.634562504406684,
"grad_norm": 5.291051387786865,
"learning_rate": 1.8271874779665797e-05,
"loss": 3.2134,
"step": 18000
},
{
"epoch": 0.652189240640203,
"grad_norm": 5.687000751495361,
"learning_rate": 1.739053796798985e-05,
"loss": 3.1905,
"step": 18500
},
{
"epoch": 0.6698159768737221,
"grad_norm": 5.048547267913818,
"learning_rate": 1.6509201156313897e-05,
"loss": 3.2165,
"step": 19000
},
{
"epoch": 0.687442713107241,
"grad_norm": 5.21890926361084,
"learning_rate": 1.5627864344637945e-05,
"loss": 3.216,
"step": 19500
},
{
"epoch": 0.7050694493407601,
"grad_norm": 4.901352405548096,
"learning_rate": 1.4746527532961998e-05,
"loss": 3.1903,
"step": 20000
},
{
"epoch": 0.7226961855742791,
"grad_norm": 5.835772514343262,
"learning_rate": 1.3865190721286048e-05,
"loss": 3.1971,
"step": 20500
},
{
"epoch": 0.740322921807798,
"grad_norm": 4.900722503662109,
"learning_rate": 1.2983853909610097e-05,
"loss": 3.1832,
"step": 21000
},
{
"epoch": 0.7579496580413171,
"grad_norm": 4.764721870422363,
"learning_rate": 1.2102517097934147e-05,
"loss": 3.1808,
"step": 21500
},
{
"epoch": 0.775576394274836,
"grad_norm": 5.3555731773376465,
"learning_rate": 1.1221180286258197e-05,
"loss": 3.1847,
"step": 22000
},
{
"epoch": 0.7932031305083551,
"grad_norm": 5.72691535949707,
"learning_rate": 1.0339843474582247e-05,
"loss": 3.1689,
"step": 22500
},
{
"epoch": 0.8108298667418741,
"grad_norm": 5.263107776641846,
"learning_rate": 9.458506662906296e-06,
"loss": 3.1666,
"step": 23000
},
{
"epoch": 0.828456602975393,
"grad_norm": 5.273736476898193,
"learning_rate": 8.577169851230346e-06,
"loss": 3.1583,
"step": 23500
},
{
"epoch": 0.8460833392089121,
"grad_norm": 5.418051719665527,
"learning_rate": 7.695833039554396e-06,
"loss": 3.1429,
"step": 24000
},
{
"epoch": 0.8637100754424311,
"grad_norm": 4.837016582489014,
"learning_rate": 6.814496227878446e-06,
"loss": 3.1831,
"step": 24500
},
{
"epoch": 0.8813368116759501,
"grad_norm": 5.3440680503845215,
"learning_rate": 5.933159416202496e-06,
"loss": 3.151,
"step": 25000
},
{
"epoch": 0.8989635479094691,
"grad_norm": 5.674468517303467,
"learning_rate": 5.051822604526546e-06,
"loss": 3.142,
"step": 25500
},
{
"epoch": 0.916590284142988,
"grad_norm": 5.245038986206055,
"learning_rate": 4.170485792850596e-06,
"loss": 3.1537,
"step": 26000
},
{
"epoch": 0.9342170203765071,
"grad_norm": 5.040459632873535,
"learning_rate": 3.289148981174646e-06,
"loss": 3.1496,
"step": 26500
},
{
"epoch": 0.9518437566100261,
"grad_norm": 4.918792724609375,
"learning_rate": 2.4078121694986958e-06,
"loss": 3.1541,
"step": 27000
},
{
"epoch": 0.9694704928435451,
"grad_norm": 5.169427394866943,
"learning_rate": 1.5264753578227457e-06,
"loss": 3.1609,
"step": 27500
},
{
"epoch": 0.9870972290770641,
"grad_norm": 5.406129837036133,
"learning_rate": 6.451385461467955e-07,
"loss": 3.1467,
"step": 28000
},
{
"epoch": 1.0,
"step": 28366,
"total_flos": 1.5038202327662592e+16,
"train_loss": 3.3210895868885175,
"train_runtime": 6630.0799,
"train_samples_per_second": 34.227,
"train_steps_per_second": 4.278
}
],
"logging_steps": 500,
"max_steps": 28366,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5038202327662592e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}