hf-100's picture
Upload folder using huggingface_hub
7268f34 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.07226442208991993,
"eval_steps": 88,
"global_step": 88,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000821186614658181,
"grad_norm": 4.866796016693115,
"learning_rate": 2e-05,
"loss": 1.5096,
"step": 1
},
{
"epoch": 0.001642373229316362,
"grad_norm": 6.223461627960205,
"learning_rate": 4e-05,
"loss": 1.4917,
"step": 2
},
{
"epoch": 0.002463559843974543,
"grad_norm": 2.4966073036193848,
"learning_rate": 6e-05,
"loss": 1.474,
"step": 3
},
{
"epoch": 0.003284746458632724,
"grad_norm": 3.924987316131592,
"learning_rate": 8e-05,
"loss": 1.4303,
"step": 4
},
{
"epoch": 0.0041059330732909054,
"grad_norm": 1.223948359489441,
"learning_rate": 0.0001,
"loss": 1.2468,
"step": 5
},
{
"epoch": 0.004927119687949086,
"grad_norm": 0.7974618077278137,
"learning_rate": 9.997257268239166e-05,
"loss": 1.2757,
"step": 6
},
{
"epoch": 0.005748306302607267,
"grad_norm": 0.3620994985103607,
"learning_rate": 9.994514536478333e-05,
"loss": 1.1615,
"step": 7
},
{
"epoch": 0.006569492917265448,
"grad_norm": 0.2654111087322235,
"learning_rate": 9.9917718047175e-05,
"loss": 1.0671,
"step": 8
},
{
"epoch": 0.00739067953192363,
"grad_norm": 0.2543610632419586,
"learning_rate": 9.989029072956665e-05,
"loss": 0.9546,
"step": 9
},
{
"epoch": 0.008211866146581811,
"grad_norm": 0.2834194600582123,
"learning_rate": 9.986286341195832e-05,
"loss": 0.8958,
"step": 10
},
{
"epoch": 0.009033052761239991,
"grad_norm": 0.43697845935821533,
"learning_rate": 9.983543609434997e-05,
"loss": 0.7524,
"step": 11
},
{
"epoch": 0.009854239375898173,
"grad_norm": 1.3894637823104858,
"learning_rate": 9.980800877674164e-05,
"loss": 0.7509,
"step": 12
},
{
"epoch": 0.010675425990556354,
"grad_norm": 0.5497334003448486,
"learning_rate": 9.978058145913331e-05,
"loss": 0.6424,
"step": 13
},
{
"epoch": 0.011496612605214535,
"grad_norm": 0.4010011851787567,
"learning_rate": 9.975315414152496e-05,
"loss": 0.6201,
"step": 14
},
{
"epoch": 0.012317799219872716,
"grad_norm": 0.41307681798934937,
"learning_rate": 9.972572682391662e-05,
"loss": 0.544,
"step": 15
},
{
"epoch": 0.013138985834530896,
"grad_norm": 0.39948198199272156,
"learning_rate": 9.969829950630828e-05,
"loss": 0.4942,
"step": 16
},
{
"epoch": 0.013960172449189078,
"grad_norm": 0.3839815557003021,
"learning_rate": 9.967087218869995e-05,
"loss": 0.4952,
"step": 17
},
{
"epoch": 0.01478135906384726,
"grad_norm": 0.40473300218582153,
"learning_rate": 9.96434448710916e-05,
"loss": 0.4832,
"step": 18
},
{
"epoch": 0.01560254567850544,
"grad_norm": 0.2300078272819519,
"learning_rate": 9.961601755348327e-05,
"loss": 0.4624,
"step": 19
},
{
"epoch": 0.016423732293163622,
"grad_norm": 0.20218200981616974,
"learning_rate": 9.958859023587493e-05,
"loss": 0.4354,
"step": 20
},
{
"epoch": 0.017244918907821802,
"grad_norm": 0.20956912636756897,
"learning_rate": 9.95611629182666e-05,
"loss": 0.4463,
"step": 21
},
{
"epoch": 0.018066105522479982,
"grad_norm": 0.16660131514072418,
"learning_rate": 9.953373560065826e-05,
"loss": 0.4104,
"step": 22
},
{
"epoch": 0.018887292137138165,
"grad_norm": 0.15235203504562378,
"learning_rate": 9.950630828304992e-05,
"loss": 0.4328,
"step": 23
},
{
"epoch": 0.019708478751796345,
"grad_norm": 0.14054065942764282,
"learning_rate": 9.947888096544159e-05,
"loss": 0.4126,
"step": 24
},
{
"epoch": 0.020529665366454525,
"grad_norm": 0.18133644759655,
"learning_rate": 9.945145364783325e-05,
"loss": 0.4214,
"step": 25
},
{
"epoch": 0.02135085198111271,
"grad_norm": 0.1237025335431099,
"learning_rate": 9.942402633022491e-05,
"loss": 0.4138,
"step": 26
},
{
"epoch": 0.02217203859577089,
"grad_norm": 0.1338941603899002,
"learning_rate": 9.939659901261658e-05,
"loss": 0.4198,
"step": 27
},
{
"epoch": 0.02299322521042907,
"grad_norm": 0.24965497851371765,
"learning_rate": 9.936917169500823e-05,
"loss": 0.4292,
"step": 28
},
{
"epoch": 0.023814411825087253,
"grad_norm": 0.2095515877008438,
"learning_rate": 9.93417443773999e-05,
"loss": 0.4321,
"step": 29
},
{
"epoch": 0.024635598439745433,
"grad_norm": 0.14506715536117554,
"learning_rate": 9.931431705979157e-05,
"loss": 0.403,
"step": 30
},
{
"epoch": 0.025456785054403613,
"grad_norm": 0.13434380292892456,
"learning_rate": 9.928688974218322e-05,
"loss": 0.4205,
"step": 31
},
{
"epoch": 0.026277971669061793,
"grad_norm": 0.14898717403411865,
"learning_rate": 9.925946242457488e-05,
"loss": 0.4095,
"step": 32
},
{
"epoch": 0.027099158283719976,
"grad_norm": 0.1183394193649292,
"learning_rate": 9.923203510696654e-05,
"loss": 0.3941,
"step": 33
},
{
"epoch": 0.027920344898378156,
"grad_norm": 0.14402946829795837,
"learning_rate": 9.920460778935821e-05,
"loss": 0.4224,
"step": 34
},
{
"epoch": 0.028741531513036336,
"grad_norm": 0.14066942036151886,
"learning_rate": 9.917718047174987e-05,
"loss": 0.4728,
"step": 35
},
{
"epoch": 0.02956271812769452,
"grad_norm": 2.1825764179229736,
"learning_rate": 9.914975315414153e-05,
"loss": 0.4013,
"step": 36
},
{
"epoch": 0.0303839047423527,
"grad_norm": 0.15306037664413452,
"learning_rate": 9.912232583653319e-05,
"loss": 0.3776,
"step": 37
},
{
"epoch": 0.03120509135701088,
"grad_norm": 1.2928482294082642,
"learning_rate": 9.909489851892486e-05,
"loss": 0.3766,
"step": 38
},
{
"epoch": 0.032026277971669063,
"grad_norm": 0.12138387560844421,
"learning_rate": 9.906747120131652e-05,
"loss": 0.4439,
"step": 39
},
{
"epoch": 0.032847464586327244,
"grad_norm": 0.13965290784835815,
"learning_rate": 9.904004388370818e-05,
"loss": 0.3758,
"step": 40
},
{
"epoch": 0.033668651200985424,
"grad_norm": 0.11665050685405731,
"learning_rate": 9.901261656609983e-05,
"loss": 0.3539,
"step": 41
},
{
"epoch": 0.034489837815643604,
"grad_norm": 0.12246105074882507,
"learning_rate": 9.89851892484915e-05,
"loss": 0.385,
"step": 42
},
{
"epoch": 0.035311024430301784,
"grad_norm": 0.11154136061668396,
"learning_rate": 9.895776193088317e-05,
"loss": 0.3675,
"step": 43
},
{
"epoch": 0.036132211044959964,
"grad_norm": 0.13517113029956818,
"learning_rate": 9.893033461327482e-05,
"loss": 0.409,
"step": 44
},
{
"epoch": 0.03695339765961815,
"grad_norm": 0.1510034054517746,
"learning_rate": 9.890290729566649e-05,
"loss": 0.356,
"step": 45
},
{
"epoch": 0.03777458427427633,
"grad_norm": 0.12618917226791382,
"learning_rate": 9.887547997805814e-05,
"loss": 0.3635,
"step": 46
},
{
"epoch": 0.03859577088893451,
"grad_norm": 0.17770665884017944,
"learning_rate": 9.884805266044981e-05,
"loss": 0.3801,
"step": 47
},
{
"epoch": 0.03941695750359269,
"grad_norm": 0.13217146694660187,
"learning_rate": 9.882062534284148e-05,
"loss": 0.3771,
"step": 48
},
{
"epoch": 0.04023814411825087,
"grad_norm": 0.11666197329759598,
"learning_rate": 9.879319802523313e-05,
"loss": 0.3837,
"step": 49
},
{
"epoch": 0.04105933073290905,
"grad_norm": 0.20090733468532562,
"learning_rate": 9.876577070762479e-05,
"loss": 0.3767,
"step": 50
},
{
"epoch": 0.04188051734756724,
"grad_norm": 0.3209711015224457,
"learning_rate": 9.873834339001646e-05,
"loss": 0.4027,
"step": 51
},
{
"epoch": 0.04270170396222542,
"grad_norm": 0.11906739324331284,
"learning_rate": 9.871091607240812e-05,
"loss": 0.3776,
"step": 52
},
{
"epoch": 0.0435228905768836,
"grad_norm": 0.3295115530490875,
"learning_rate": 9.868348875479978e-05,
"loss": 0.3484,
"step": 53
},
{
"epoch": 0.04434407719154178,
"grad_norm": 0.10566671937704086,
"learning_rate": 9.865606143719145e-05,
"loss": 0.3645,
"step": 54
},
{
"epoch": 0.04516526380619996,
"grad_norm": 0.18777306377887726,
"learning_rate": 9.86286341195831e-05,
"loss": 0.4219,
"step": 55
},
{
"epoch": 0.04598645042085814,
"grad_norm": 0.11774461716413498,
"learning_rate": 9.860120680197478e-05,
"loss": 0.375,
"step": 56
},
{
"epoch": 0.04680763703551632,
"grad_norm": 0.1274806559085846,
"learning_rate": 9.857377948436644e-05,
"loss": 0.4609,
"step": 57
},
{
"epoch": 0.047628823650174505,
"grad_norm": 0.1770283281803131,
"learning_rate": 9.854635216675809e-05,
"loss": 0.3577,
"step": 58
},
{
"epoch": 0.048450010264832685,
"grad_norm": 0.278679758310318,
"learning_rate": 9.851892484914976e-05,
"loss": 0.3748,
"step": 59
},
{
"epoch": 0.049271196879490865,
"grad_norm": 0.13674406707286835,
"learning_rate": 9.849149753154143e-05,
"loss": 0.3828,
"step": 60
},
{
"epoch": 0.050092383494149045,
"grad_norm": 0.1524430513381958,
"learning_rate": 9.846407021393308e-05,
"loss": 0.3906,
"step": 61
},
{
"epoch": 0.050913570108807225,
"grad_norm": 0.12199753522872925,
"learning_rate": 9.843664289632475e-05,
"loss": 0.4007,
"step": 62
},
{
"epoch": 0.051734756723465405,
"grad_norm": 0.19670936465263367,
"learning_rate": 9.84092155787164e-05,
"loss": 0.4018,
"step": 63
},
{
"epoch": 0.052555943338123585,
"grad_norm": 0.1128976121544838,
"learning_rate": 9.838178826110807e-05,
"loss": 0.3909,
"step": 64
},
{
"epoch": 0.05337712995278177,
"grad_norm": 0.1778184324502945,
"learning_rate": 9.835436094349974e-05,
"loss": 0.3736,
"step": 65
},
{
"epoch": 0.05419831656743995,
"grad_norm": 0.19817706942558289,
"learning_rate": 9.83269336258914e-05,
"loss": 0.3505,
"step": 66
},
{
"epoch": 0.05501950318209813,
"grad_norm": 0.09127096086740494,
"learning_rate": 9.829950630828305e-05,
"loss": 0.3504,
"step": 67
},
{
"epoch": 0.05584068979675631,
"grad_norm": 0.13604852557182312,
"learning_rate": 9.827207899067472e-05,
"loss": 0.4266,
"step": 68
},
{
"epoch": 0.05666187641141449,
"grad_norm": 0.11077171564102173,
"learning_rate": 9.824465167306638e-05,
"loss": 0.3602,
"step": 69
},
{
"epoch": 0.05748306302607267,
"grad_norm": 0.10381105542182922,
"learning_rate": 9.821722435545804e-05,
"loss": 0.3405,
"step": 70
},
{
"epoch": 0.05830424964073085,
"grad_norm": 0.13518977165222168,
"learning_rate": 9.81897970378497e-05,
"loss": 0.3348,
"step": 71
},
{
"epoch": 0.05912543625538904,
"grad_norm": 0.10194771736860275,
"learning_rate": 9.816236972024136e-05,
"loss": 0.349,
"step": 72
},
{
"epoch": 0.05994662287004722,
"grad_norm": 0.12088090181350708,
"learning_rate": 9.813494240263303e-05,
"loss": 0.357,
"step": 73
},
{
"epoch": 0.0607678094847054,
"grad_norm": 0.1529798060655594,
"learning_rate": 9.81075150850247e-05,
"loss": 0.3618,
"step": 74
},
{
"epoch": 0.06158899609936358,
"grad_norm": 0.10943326354026794,
"learning_rate": 9.808008776741635e-05,
"loss": 0.3273,
"step": 75
},
{
"epoch": 0.06241018271402176,
"grad_norm": 0.11236156523227692,
"learning_rate": 9.8052660449808e-05,
"loss": 0.3511,
"step": 76
},
{
"epoch": 0.06323136932867994,
"grad_norm": 0.11936212331056595,
"learning_rate": 9.802523313219967e-05,
"loss": 0.3669,
"step": 77
},
{
"epoch": 0.06405255594333813,
"grad_norm": 0.2718499004840851,
"learning_rate": 9.799780581459134e-05,
"loss": 0.3488,
"step": 78
},
{
"epoch": 0.0648737425579963,
"grad_norm": 0.13413332402706146,
"learning_rate": 9.7970378496983e-05,
"loss": 0.3741,
"step": 79
},
{
"epoch": 0.06569492917265449,
"grad_norm": 0.4024653136730194,
"learning_rate": 9.794295117937466e-05,
"loss": 0.3714,
"step": 80
},
{
"epoch": 0.06651611578731266,
"grad_norm": 0.12206799536943436,
"learning_rate": 9.791552386176632e-05,
"loss": 0.4094,
"step": 81
},
{
"epoch": 0.06733730240197085,
"grad_norm": 0.17678625881671906,
"learning_rate": 9.788809654415799e-05,
"loss": 0.3662,
"step": 82
},
{
"epoch": 0.06815848901662903,
"grad_norm": 0.1201493889093399,
"learning_rate": 9.786066922654965e-05,
"loss": 0.3974,
"step": 83
},
{
"epoch": 0.06897967563128721,
"grad_norm": 0.11645176261663437,
"learning_rate": 9.783324190894131e-05,
"loss": 0.3676,
"step": 84
},
{
"epoch": 0.0698008622459454,
"grad_norm": 0.20770376920700073,
"learning_rate": 9.780581459133296e-05,
"loss": 0.3442,
"step": 85
},
{
"epoch": 0.07062204886060357,
"grad_norm": 0.3476441502571106,
"learning_rate": 9.777838727372464e-05,
"loss": 0.4063,
"step": 86
},
{
"epoch": 0.07144323547526175,
"grad_norm": 0.10448214411735535,
"learning_rate": 9.77509599561163e-05,
"loss": 0.381,
"step": 87
},
{
"epoch": 0.07226442208991993,
"grad_norm": 0.11250001937150955,
"learning_rate": 9.772353263850797e-05,
"loss": 0.3508,
"step": 88
},
{
"epoch": 0.07226442208991993,
"eval_runtime": 492.6417,
"eval_samples_per_second": 0.4,
"eval_steps_per_second": 0.201,
"step": 88
}
],
"logging_steps": 1,
"max_steps": 3651,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 88,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.872555937317585e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}