qlora70b / checkpoint-47 /trainer_state.json
mjobe105's picture
Upload folder using huggingface_hub
b9d484b verified
raw
history blame
9.27 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 12,
"global_step": 47,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02127659574468085,
"grad_norm": 0.42955278162899063,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.1272,
"step": 1
},
{
"epoch": 0.02127659574468085,
"eval_loss": 1.130263328552246,
"eval_runtime": 46.1353,
"eval_samples_per_second": 2.146,
"eval_steps_per_second": 0.217,
"step": 1
},
{
"epoch": 0.0425531914893617,
"grad_norm": 0.35905455037073136,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.2237,
"step": 2
},
{
"epoch": 0.06382978723404255,
"grad_norm": 0.3859662975436329,
"learning_rate": 3e-06,
"loss": 1.2109,
"step": 3
},
{
"epoch": 0.0851063829787234,
"grad_norm": 0.3458897777003975,
"learning_rate": 4.000000000000001e-06,
"loss": 1.3168,
"step": 4
},
{
"epoch": 0.10638297872340426,
"grad_norm": 0.297024954598104,
"learning_rate": 5e-06,
"loss": 1.1879,
"step": 5
},
{
"epoch": 0.1276595744680851,
"grad_norm": 0.2974375637230888,
"learning_rate": 6e-06,
"loss": 1.1898,
"step": 6
},
{
"epoch": 0.14893617021276595,
"grad_norm": 0.35483433691691424,
"learning_rate": 7e-06,
"loss": 1.2275,
"step": 7
},
{
"epoch": 0.1702127659574468,
"grad_norm": 0.3817564026852017,
"learning_rate": 8.000000000000001e-06,
"loss": 1.2163,
"step": 8
},
{
"epoch": 0.19148936170212766,
"grad_norm": 0.40183241982975176,
"learning_rate": 9e-06,
"loss": 1.0668,
"step": 9
},
{
"epoch": 0.2127659574468085,
"grad_norm": 0.3748400230734179,
"learning_rate": 1e-05,
"loss": 1.2589,
"step": 10
},
{
"epoch": 0.23404255319148937,
"grad_norm": 0.4732373123638865,
"learning_rate": 9.981987442712634e-06,
"loss": 1.209,
"step": 11
},
{
"epoch": 0.2553191489361702,
"grad_norm": 0.34097014836642403,
"learning_rate": 9.928079551738542e-06,
"loss": 1.1222,
"step": 12
},
{
"epoch": 0.2553191489361702,
"eval_loss": 1.1231276988983154,
"eval_runtime": 45.5524,
"eval_samples_per_second": 2.173,
"eval_steps_per_second": 0.22,
"step": 12
},
{
"epoch": 0.2765957446808511,
"grad_norm": 0.3865545405035091,
"learning_rate": 9.838664734667496e-06,
"loss": 1.2509,
"step": 13
},
{
"epoch": 0.2978723404255319,
"grad_norm": 0.5747305954760134,
"learning_rate": 9.714387227305422e-06,
"loss": 1.1448,
"step": 14
},
{
"epoch": 0.3191489361702128,
"grad_norm": 0.43342726982955576,
"learning_rate": 9.55614245194068e-06,
"loss": 1.1433,
"step": 15
},
{
"epoch": 0.3404255319148936,
"grad_norm": 0.38621360188314374,
"learning_rate": 9.365070565805941e-06,
"loss": 1.2438,
"step": 16
},
{
"epoch": 0.3617021276595745,
"grad_norm": 0.4586858888503175,
"learning_rate": 9.142548246219212e-06,
"loss": 1.217,
"step": 17
},
{
"epoch": 0.3829787234042553,
"grad_norm": 0.4605021995799594,
"learning_rate": 8.890178771592198e-06,
"loss": 1.1023,
"step": 18
},
{
"epoch": 0.40425531914893614,
"grad_norm": 0.4149444720869162,
"learning_rate": 8.609780469772623e-06,
"loss": 1.1057,
"step": 19
},
{
"epoch": 0.425531914893617,
"grad_norm": 0.4776677990058847,
"learning_rate": 8.303373616950408e-06,
"loss": 1.1878,
"step": 20
},
{
"epoch": 0.44680851063829785,
"grad_norm": 0.6083128734022943,
"learning_rate": 7.973165881521435e-06,
"loss": 1.0719,
"step": 21
},
{
"epoch": 0.46808510638297873,
"grad_norm": 0.427407991852223,
"learning_rate": 7.621536417786159e-06,
"loss": 1.1414,
"step": 22
},
{
"epoch": 0.48936170212765956,
"grad_norm": 0.41689291665199874,
"learning_rate": 7.251018724088367e-06,
"loss": 1.1651,
"step": 23
},
{
"epoch": 0.5106382978723404,
"grad_norm": 0.5049216829996077,
"learning_rate": 6.864282388901544e-06,
"loss": 1.1099,
"step": 24
},
{
"epoch": 0.5106382978723404,
"eval_loss": 1.0642483234405518,
"eval_runtime": 45.5272,
"eval_samples_per_second": 2.175,
"eval_steps_per_second": 0.22,
"step": 24
},
{
"epoch": 0.5319148936170213,
"grad_norm": 0.3940106585784572,
"learning_rate": 6.464113856382752e-06,
"loss": 0.9954,
"step": 25
},
{
"epoch": 0.5531914893617021,
"grad_norm": 0.35275642793537804,
"learning_rate": 6.053396349978632e-06,
"loss": 1.0469,
"step": 26
},
{
"epoch": 0.574468085106383,
"grad_norm": 0.5184071159973599,
"learning_rate": 5.635089098734394e-06,
"loss": 1.1481,
"step": 27
},
{
"epoch": 0.5957446808510638,
"grad_norm": 0.4370219832674898,
"learning_rate": 5.212206015980742e-06,
"loss": 1.1364,
"step": 28
},
{
"epoch": 0.6170212765957447,
"grad_norm": 0.36478900402279923,
"learning_rate": 4.78779398401926e-06,
"loss": 1.0762,
"step": 29
},
{
"epoch": 0.6382978723404256,
"grad_norm": 0.40328712656577087,
"learning_rate": 4.364910901265607e-06,
"loss": 1.085,
"step": 30
},
{
"epoch": 0.6595744680851063,
"grad_norm": 0.341826695457334,
"learning_rate": 3.94660365002137e-06,
"loss": 1.0013,
"step": 31
},
{
"epoch": 0.6808510638297872,
"grad_norm": 0.4177223934587119,
"learning_rate": 3.5358861436172487e-06,
"loss": 1.0372,
"step": 32
},
{
"epoch": 0.7021276595744681,
"grad_norm": 0.4218122745580511,
"learning_rate": 3.1357176110984578e-06,
"loss": 0.9904,
"step": 33
},
{
"epoch": 0.723404255319149,
"grad_norm": 0.3778418743271345,
"learning_rate": 2.748981275911633e-06,
"loss": 1.0609,
"step": 34
},
{
"epoch": 0.7446808510638298,
"grad_norm": 0.5024526255778048,
"learning_rate": 2.3784635822138424e-06,
"loss": 1.0193,
"step": 35
},
{
"epoch": 0.7659574468085106,
"grad_norm": 0.29310915204897864,
"learning_rate": 2.0268341184785674e-06,
"loss": 1.105,
"step": 36
},
{
"epoch": 0.7659574468085106,
"eval_loss": 1.0241957902908325,
"eval_runtime": 45.4699,
"eval_samples_per_second": 2.177,
"eval_steps_per_second": 0.22,
"step": 36
},
{
"epoch": 0.7872340425531915,
"grad_norm": 0.38019500098636755,
"learning_rate": 1.6966263830495939e-06,
"loss": 1.122,
"step": 37
},
{
"epoch": 0.8085106382978723,
"grad_norm": 0.4515729624152646,
"learning_rate": 1.390219530227378e-06,
"loss": 1.1268,
"step": 38
},
{
"epoch": 0.8297872340425532,
"grad_norm": 0.3174904808093991,
"learning_rate": 1.1098212284078037e-06,
"loss": 1.1366,
"step": 39
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.32824416887732244,
"learning_rate": 8.574517537807897e-07,
"loss": 1.085,
"step": 40
},
{
"epoch": 0.8723404255319149,
"grad_norm": 0.3723789314131866,
"learning_rate": 6.349294341940593e-07,
"loss": 1.0074,
"step": 41
},
{
"epoch": 0.8936170212765957,
"grad_norm": 0.3863665218570595,
"learning_rate": 4.43857548059321e-07,
"loss": 1.0786,
"step": 42
},
{
"epoch": 0.9148936170212766,
"grad_norm": 0.28822889001191754,
"learning_rate": 2.85612772694579e-07,
"loss": 1.0286,
"step": 43
},
{
"epoch": 0.9361702127659575,
"grad_norm": 0.4310630390723365,
"learning_rate": 1.6133526533250566e-07,
"loss": 0.8951,
"step": 44
},
{
"epoch": 0.9574468085106383,
"grad_norm": 0.3151119202195982,
"learning_rate": 7.192044826145772e-08,
"loss": 0.9489,
"step": 45
},
{
"epoch": 0.9787234042553191,
"grad_norm": 0.3577888537210098,
"learning_rate": 1.8012557287367394e-08,
"loss": 1.1146,
"step": 46
},
{
"epoch": 1.0,
"grad_norm": 0.383122905902664,
"learning_rate": 0.0,
"loss": 1.031,
"step": 47
}
],
"logging_steps": 1,
"max_steps": 47,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 286172412641280.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}