franfj's picture
Upload folder using huggingface_hub
ad5a06b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2544,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0589622641509434,
"grad_norm": 1.1042355298995972,
"learning_rate": 4.9017295597484283e-05,
"loss": 0.6615,
"step": 50
},
{
"epoch": 0.1179245283018868,
"grad_norm": 2.1375088691711426,
"learning_rate": 4.803459119496855e-05,
"loss": 0.6046,
"step": 100
},
{
"epoch": 0.17688679245283018,
"grad_norm": 2.163625717163086,
"learning_rate": 4.705188679245283e-05,
"loss": 0.5757,
"step": 150
},
{
"epoch": 0.2358490566037736,
"grad_norm": 1.8961758613586426,
"learning_rate": 4.606918238993711e-05,
"loss": 0.5779,
"step": 200
},
{
"epoch": 0.294811320754717,
"grad_norm": 1.833200216293335,
"learning_rate": 4.508647798742139e-05,
"loss": 0.5505,
"step": 250
},
{
"epoch": 0.35377358490566035,
"grad_norm": 1.3514596223831177,
"learning_rate": 4.410377358490566e-05,
"loss": 0.5552,
"step": 300
},
{
"epoch": 0.41273584905660377,
"grad_norm": 1.499182105064392,
"learning_rate": 4.312106918238994e-05,
"loss": 0.5432,
"step": 350
},
{
"epoch": 0.4716981132075472,
"grad_norm": 1.5426216125488281,
"learning_rate": 4.213836477987422e-05,
"loss": 0.528,
"step": 400
},
{
"epoch": 0.5306603773584906,
"grad_norm": 2.095034122467041,
"learning_rate": 4.115566037735849e-05,
"loss": 0.5397,
"step": 450
},
{
"epoch": 0.589622641509434,
"grad_norm": 1.8828486204147339,
"learning_rate": 4.017295597484277e-05,
"loss": 0.5402,
"step": 500
},
{
"epoch": 0.6485849056603774,
"grad_norm": 1.1266496181488037,
"learning_rate": 3.9190251572327046e-05,
"loss": 0.5325,
"step": 550
},
{
"epoch": 0.7075471698113207,
"grad_norm": 1.8232479095458984,
"learning_rate": 3.820754716981133e-05,
"loss": 0.5207,
"step": 600
},
{
"epoch": 0.7665094339622641,
"grad_norm": 1.7382409572601318,
"learning_rate": 3.7224842767295595e-05,
"loss": 0.5174,
"step": 650
},
{
"epoch": 0.8254716981132075,
"grad_norm": 1.7190814018249512,
"learning_rate": 3.6242138364779876e-05,
"loss": 0.5138,
"step": 700
},
{
"epoch": 0.8844339622641509,
"grad_norm": 1.7162833213806152,
"learning_rate": 3.525943396226416e-05,
"loss": 0.5117,
"step": 750
},
{
"epoch": 0.9433962264150944,
"grad_norm": 1.8863391876220703,
"learning_rate": 3.4276729559748424e-05,
"loss": 0.4996,
"step": 800
},
{
"epoch": 1.0,
"eval_accuracy": 0.7362162162162162,
"eval_f1": 0.7330681253107907,
"eval_loss": 0.49908825755119324,
"eval_runtime": 54.2047,
"eval_samples_per_second": 375.429,
"eval_steps_per_second": 3.911,
"step": 848
},
{
"epoch": 1.0023584905660377,
"grad_norm": 1.9871962070465088,
"learning_rate": 3.3294025157232705e-05,
"loss": 0.501,
"step": 850
},
{
"epoch": 1.0613207547169812,
"grad_norm": 3.0909602642059326,
"learning_rate": 3.2311320754716986e-05,
"loss": 0.4468,
"step": 900
},
{
"epoch": 1.1202830188679245,
"grad_norm": 2.0430283546447754,
"learning_rate": 3.132861635220126e-05,
"loss": 0.4495,
"step": 950
},
{
"epoch": 1.179245283018868,
"grad_norm": 2.5914931297302246,
"learning_rate": 3.0345911949685535e-05,
"loss": 0.4515,
"step": 1000
},
{
"epoch": 1.2382075471698113,
"grad_norm": 3.2264254093170166,
"learning_rate": 2.9363207547169812e-05,
"loss": 0.4505,
"step": 1050
},
{
"epoch": 1.2971698113207548,
"grad_norm": 1.447571873664856,
"learning_rate": 2.838050314465409e-05,
"loss": 0.4422,
"step": 1100
},
{
"epoch": 1.3561320754716981,
"grad_norm": 2.17903995513916,
"learning_rate": 2.7397798742138364e-05,
"loss": 0.4465,
"step": 1150
},
{
"epoch": 1.4150943396226414,
"grad_norm": 2.608694076538086,
"learning_rate": 2.641509433962264e-05,
"loss": 0.4445,
"step": 1200
},
{
"epoch": 1.474056603773585,
"grad_norm": 1.0498720407485962,
"learning_rate": 2.543238993710692e-05,
"loss": 0.4554,
"step": 1250
},
{
"epoch": 1.5330188679245285,
"grad_norm": 2.429417133331299,
"learning_rate": 2.4449685534591197e-05,
"loss": 0.4569,
"step": 1300
},
{
"epoch": 1.5919811320754715,
"grad_norm": 1.741003394126892,
"learning_rate": 2.346698113207547e-05,
"loss": 0.4484,
"step": 1350
},
{
"epoch": 1.650943396226415,
"grad_norm": 1.6256601810455322,
"learning_rate": 2.248427672955975e-05,
"loss": 0.4494,
"step": 1400
},
{
"epoch": 1.7099056603773586,
"grad_norm": 1.4815946817398071,
"learning_rate": 2.1501572327044026e-05,
"loss": 0.4444,
"step": 1450
},
{
"epoch": 1.7688679245283019,
"grad_norm": 1.878029227256775,
"learning_rate": 2.0518867924528304e-05,
"loss": 0.4529,
"step": 1500
},
{
"epoch": 1.8278301886792452,
"grad_norm": 1.870025873184204,
"learning_rate": 1.9536163522012578e-05,
"loss": 0.4634,
"step": 1550
},
{
"epoch": 1.8867924528301887,
"grad_norm": 2.8423280715942383,
"learning_rate": 1.8553459119496856e-05,
"loss": 0.4517,
"step": 1600
},
{
"epoch": 1.9457547169811322,
"grad_norm": 1.5151439905166626,
"learning_rate": 1.7570754716981134e-05,
"loss": 0.4226,
"step": 1650
},
{
"epoch": 2.0,
"eval_accuracy": 0.742014742014742,
"eval_f1": 0.7299938284303641,
"eval_loss": 0.49465227127075195,
"eval_runtime": 54.3383,
"eval_samples_per_second": 374.505,
"eval_steps_per_second": 3.901,
"step": 1696
},
{
"epoch": 2.0047169811320753,
"grad_norm": 1.8239262104034424,
"learning_rate": 1.6588050314465408e-05,
"loss": 0.4294,
"step": 1700
},
{
"epoch": 2.063679245283019,
"grad_norm": 5.227139472961426,
"learning_rate": 1.5605345911949685e-05,
"loss": 0.3895,
"step": 1750
},
{
"epoch": 2.1226415094339623,
"grad_norm": 3.1078689098358154,
"learning_rate": 1.4622641509433963e-05,
"loss": 0.3782,
"step": 1800
},
{
"epoch": 2.1816037735849054,
"grad_norm": 2.545759916305542,
"learning_rate": 1.363993710691824e-05,
"loss": 0.3835,
"step": 1850
},
{
"epoch": 2.240566037735849,
"grad_norm": 2.3632497787475586,
"learning_rate": 1.2657232704402517e-05,
"loss": 0.3899,
"step": 1900
},
{
"epoch": 2.2995283018867925,
"grad_norm": 3.032485008239746,
"learning_rate": 1.1674528301886793e-05,
"loss": 0.3714,
"step": 1950
},
{
"epoch": 2.358490566037736,
"grad_norm": 3.313594341278076,
"learning_rate": 1.069182389937107e-05,
"loss": 0.3809,
"step": 2000
},
{
"epoch": 2.417452830188679,
"grad_norm": 2.6334567070007324,
"learning_rate": 9.709119496855348e-06,
"loss": 0.3752,
"step": 2050
},
{
"epoch": 2.4764150943396226,
"grad_norm": 2.706216812133789,
"learning_rate": 8.726415094339622e-06,
"loss": 0.3917,
"step": 2100
},
{
"epoch": 2.535377358490566,
"grad_norm": 3.003523588180542,
"learning_rate": 7.7437106918239e-06,
"loss": 0.3715,
"step": 2150
},
{
"epoch": 2.5943396226415096,
"grad_norm": 3.0342845916748047,
"learning_rate": 6.761006289308176e-06,
"loss": 0.3875,
"step": 2200
},
{
"epoch": 2.6533018867924527,
"grad_norm": 2.371635913848877,
"learning_rate": 5.778301886792453e-06,
"loss": 0.381,
"step": 2250
},
{
"epoch": 2.7122641509433962,
"grad_norm": 2.7369866371154785,
"learning_rate": 4.79559748427673e-06,
"loss": 0.3785,
"step": 2300
},
{
"epoch": 2.7712264150943398,
"grad_norm": 4.16819953918457,
"learning_rate": 3.8128930817610063e-06,
"loss": 0.3897,
"step": 2350
},
{
"epoch": 2.830188679245283,
"grad_norm": 1.599187970161438,
"learning_rate": 2.830188679245283e-06,
"loss": 0.381,
"step": 2400
},
{
"epoch": 2.8891509433962264,
"grad_norm": 1.6142021417617798,
"learning_rate": 1.8474842767295599e-06,
"loss": 0.3773,
"step": 2450
},
{
"epoch": 2.94811320754717,
"grad_norm": 5.334159851074219,
"learning_rate": 8.647798742138365e-07,
"loss": 0.3814,
"step": 2500
},
{
"epoch": 3.0,
"eval_accuracy": 0.7441769041769042,
"eval_f1": 0.7459992193598751,
"eval_loss": 0.5200024247169495,
"eval_runtime": 54.3504,
"eval_samples_per_second": 374.422,
"eval_steps_per_second": 3.901,
"step": 2544
}
],
"logging_steps": 50,
"max_steps": 2544,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.234814134959923e+16,
"train_batch_size": 96,
"trial_name": null,
"trial_params": null
}