gpt2-xl-lora-multi-3 / trainer_state.json
MHGanainy's picture
MHGanainy/gpt2-xl-lora-multi-3
fa018c9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5746,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01740341106856944,
"grad_norm": 0.046589791774749756,
"learning_rate": 4.351610095735422e-07,
"loss": 2.7999,
"step": 100
},
{
"epoch": 0.03480682213713888,
"grad_norm": 0.04616040736436844,
"learning_rate": 8.703220191470844e-07,
"loss": 2.7996,
"step": 200
},
{
"epoch": 0.05221023320570832,
"grad_norm": 0.05549981817603111,
"learning_rate": 1.305483028720627e-06,
"loss": 2.797,
"step": 300
},
{
"epoch": 0.06961364427427776,
"grad_norm": 0.063571035861969,
"learning_rate": 1.7406440382941688e-06,
"loss": 2.7909,
"step": 400
},
{
"epoch": 0.0870170553428472,
"grad_norm": 0.08422163128852844,
"learning_rate": 2.1758050478677113e-06,
"loss": 2.7951,
"step": 500
},
{
"epoch": 0.10442046641141664,
"grad_norm": 0.09257014095783234,
"learning_rate": 2.610966057441254e-06,
"loss": 2.7803,
"step": 600
},
{
"epoch": 0.12182387747998608,
"grad_norm": 0.11055697500705719,
"learning_rate": 3.046127067014796e-06,
"loss": 2.7681,
"step": 700
},
{
"epoch": 0.1392272885485555,
"grad_norm": 0.10759040713310242,
"learning_rate": 3.4812880765883376e-06,
"loss": 2.7611,
"step": 800
},
{
"epoch": 0.15663069961712495,
"grad_norm": 0.12318646907806396,
"learning_rate": 3.9164490861618806e-06,
"loss": 2.7402,
"step": 900
},
{
"epoch": 0.1740341106856944,
"grad_norm": 0.12962989509105682,
"learning_rate": 4.351610095735423e-06,
"loss": 2.7451,
"step": 1000
},
{
"epoch": 0.19143752175426385,
"grad_norm": 0.13981275260448456,
"learning_rate": 4.786771105308965e-06,
"loss": 2.735,
"step": 1100
},
{
"epoch": 0.20884093282283328,
"grad_norm": 0.14711035788059235,
"learning_rate": 5.221932114882508e-06,
"loss": 2.7469,
"step": 1200
},
{
"epoch": 0.22624434389140272,
"grad_norm": 0.15727241337299347,
"learning_rate": 5.657093124456049e-06,
"loss": 2.7327,
"step": 1300
},
{
"epoch": 0.24364775495997215,
"grad_norm": 0.15055705606937408,
"learning_rate": 6.092254134029592e-06,
"loss": 2.7234,
"step": 1400
},
{
"epoch": 0.2610511660285416,
"grad_norm": 0.16661331057548523,
"learning_rate": 6.527415143603134e-06,
"loss": 2.7174,
"step": 1500
},
{
"epoch": 0.278454577097111,
"grad_norm": 0.17976854741573334,
"learning_rate": 6.962576153176675e-06,
"loss": 2.719,
"step": 1600
},
{
"epoch": 0.2958579881656805,
"grad_norm": 0.1790621429681778,
"learning_rate": 7.397737162750218e-06,
"loss": 2.7173,
"step": 1700
},
{
"epoch": 0.3132613992342499,
"grad_norm": 0.19079644978046417,
"learning_rate": 7.832898172323761e-06,
"loss": 2.7131,
"step": 1800
},
{
"epoch": 0.33066481030281936,
"grad_norm": 0.19005636870861053,
"learning_rate": 8.268059181897302e-06,
"loss": 2.7168,
"step": 1900
},
{
"epoch": 0.3480682213713888,
"grad_norm": 0.19910404086112976,
"learning_rate": 8.703220191470845e-06,
"loss": 2.7061,
"step": 2000
},
{
"epoch": 0.3654716324399582,
"grad_norm": 0.20510949194431305,
"learning_rate": 9.138381201044387e-06,
"loss": 2.6862,
"step": 2100
},
{
"epoch": 0.3828750435085277,
"grad_norm": 0.20418143272399902,
"learning_rate": 9.57354221061793e-06,
"loss": 2.6802,
"step": 2200
},
{
"epoch": 0.4002784545770971,
"grad_norm": 0.21713656187057495,
"learning_rate": 1.000870322019147e-05,
"loss": 2.6923,
"step": 2300
},
{
"epoch": 0.41768186564566656,
"grad_norm": 0.2298802137374878,
"learning_rate": 1.0443864229765015e-05,
"loss": 2.6818,
"step": 2400
},
{
"epoch": 0.43508527671423597,
"grad_norm": 0.2294008880853653,
"learning_rate": 1.0879025239338557e-05,
"loss": 2.6896,
"step": 2500
},
{
"epoch": 0.45248868778280543,
"grad_norm": 0.21464629471302032,
"learning_rate": 1.1314186248912098e-05,
"loss": 2.6805,
"step": 2600
},
{
"epoch": 0.4698920988513749,
"grad_norm": 0.25449061393737793,
"learning_rate": 1.174934725848564e-05,
"loss": 2.6806,
"step": 2700
},
{
"epoch": 0.4872955099199443,
"grad_norm": 0.24079586565494537,
"learning_rate": 1.2184508268059184e-05,
"loss": 2.6844,
"step": 2800
},
{
"epoch": 0.5046989209885138,
"grad_norm": 0.2414436638355255,
"learning_rate": 1.2619669277632725e-05,
"loss": 2.6817,
"step": 2900
},
{
"epoch": 0.5221023320570832,
"grad_norm": 0.2530564069747925,
"learning_rate": 1.3054830287206268e-05,
"loss": 2.6556,
"step": 3000
},
{
"epoch": 0.5395057431256526,
"grad_norm": 0.26441535353660583,
"learning_rate": 1.348999129677981e-05,
"loss": 2.6749,
"step": 3100
},
{
"epoch": 0.556909154194222,
"grad_norm": 0.2584131062030792,
"learning_rate": 1.392515230635335e-05,
"loss": 2.6575,
"step": 3200
},
{
"epoch": 0.5743125652627915,
"grad_norm": 0.25025609135627747,
"learning_rate": 1.4360313315926895e-05,
"loss": 2.6658,
"step": 3300
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.26518625020980835,
"learning_rate": 1.4795474325500436e-05,
"loss": 2.6586,
"step": 3400
},
{
"epoch": 0.6091193873999304,
"grad_norm": 0.26597312092781067,
"learning_rate": 1.5230635335073978e-05,
"loss": 2.6451,
"step": 3500
},
{
"epoch": 0.6265227984684998,
"grad_norm": 0.2725384831428528,
"learning_rate": 1.5665796344647522e-05,
"loss": 2.6521,
"step": 3600
},
{
"epoch": 0.6439262095370692,
"grad_norm": 0.2752222716808319,
"learning_rate": 1.6100957354221064e-05,
"loss": 2.6398,
"step": 3700
},
{
"epoch": 0.6613296206056387,
"grad_norm": 0.2558598518371582,
"learning_rate": 1.6536118363794605e-05,
"loss": 2.6486,
"step": 3800
},
{
"epoch": 0.6787330316742082,
"grad_norm": 0.26938167214393616,
"learning_rate": 1.697127937336815e-05,
"loss": 2.641,
"step": 3900
},
{
"epoch": 0.6961364427427776,
"grad_norm": 0.28793784976005554,
"learning_rate": 1.740644038294169e-05,
"loss": 2.6344,
"step": 4000
},
{
"epoch": 0.713539853811347,
"grad_norm": 0.2677360773086548,
"learning_rate": 1.7841601392515232e-05,
"loss": 2.6542,
"step": 4100
},
{
"epoch": 0.7309432648799165,
"grad_norm": 0.28143930435180664,
"learning_rate": 1.8276762402088773e-05,
"loss": 2.6446,
"step": 4200
},
{
"epoch": 0.7483466759484859,
"grad_norm": 0.28870299458503723,
"learning_rate": 1.8711923411662314e-05,
"loss": 2.6243,
"step": 4300
},
{
"epoch": 0.7657500870170554,
"grad_norm": 0.296633780002594,
"learning_rate": 1.914708442123586e-05,
"loss": 2.6306,
"step": 4400
},
{
"epoch": 0.7831534980856247,
"grad_norm": 0.2806219160556793,
"learning_rate": 1.95822454308094e-05,
"loss": 2.6356,
"step": 4500
},
{
"epoch": 0.8005569091541942,
"grad_norm": 0.2914940416812897,
"learning_rate": 1.999940297883134e-05,
"loss": 2.644,
"step": 4600
},
{
"epoch": 0.8179603202227637,
"grad_norm": 0.28510311245918274,
"learning_rate": 1.9599117132813187e-05,
"loss": 2.6357,
"step": 4700
},
{
"epoch": 0.8353637312913331,
"grad_norm": 0.3171123266220093,
"learning_rate": 1.8486908682093175e-05,
"loss": 2.6307,
"step": 4800
},
{
"epoch": 0.8527671423599026,
"grad_norm": 0.2955775558948517,
"learning_rate": 1.674526503944611e-05,
"loss": 2.6315,
"step": 4900
},
{
"epoch": 0.8701705534284719,
"grad_norm": 0.2767013907432556,
"learning_rate": 1.450335594635761e-05,
"loss": 2.6138,
"step": 5000
},
{
"epoch": 0.8875739644970414,
"grad_norm": 0.27960339188575745,
"learning_rate": 1.1927453544210397e-05,
"loss": 2.6305,
"step": 5100
},
{
"epoch": 0.9049773755656109,
"grad_norm": 0.31521016359329224,
"learning_rate": 9.20860073020234e-06,
"loss": 2.6249,
"step": 5200
},
{
"epoch": 0.9223807866341803,
"grad_norm": 0.2640378773212433,
"learning_rate": 6.548442379624425e-06,
"loss": 2.6257,
"step": 5300
},
{
"epoch": 0.9397841977027498,
"grad_norm": 0.28068870306015015,
"learning_rate": 4.144270267924306e-06,
"loss": 2.6261,
"step": 5400
},
{
"epoch": 0.9571876087713191,
"grad_norm": 0.2999429702758789,
"learning_rate": 2.1743908422712135e-06,
"loss": 2.6245,
"step": 5500
},
{
"epoch": 0.9745910198398886,
"grad_norm": 0.2793658971786499,
"learning_rate": 7.849010480670938e-07,
"loss": 2.6209,
"step": 5600
},
{
"epoch": 0.9919944309084581,
"grad_norm": 0.30049070715904236,
"learning_rate": 7.885298685522235e-08,
"loss": 2.6215,
"step": 5700
},
{
"epoch": 1.0,
"step": 5746,
"total_flos": 8.372955480242258e+17,
"train_loss": 2.6846868539578486,
"train_runtime": 1624.688,
"train_samples_per_second": 56.585,
"train_steps_per_second": 3.537
}
],
"logging_steps": 100,
"max_steps": 5746,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.372955480242258e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}