email-tuned-qwen2-lora / trainer_state.json
doubleyyh's picture
Upload folder using huggingface_hub
d199afb verified
raw
history blame
11.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.113015284854099,
"eval_steps": 200,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018527095877721167,
"grad_norm": 5.348576545715332,
"learning_rate": 1.234567901234568e-06,
"loss": 1.7335,
"step": 10
},
{
"epoch": 0.037054191755442334,
"grad_norm": 5.819892883300781,
"learning_rate": 2.469135802469136e-06,
"loss": 1.7063,
"step": 20
},
{
"epoch": 0.0555812876331635,
"grad_norm": 2.0946009159088135,
"learning_rate": 3.7037037037037037e-06,
"loss": 1.5389,
"step": 30
},
{
"epoch": 0.07410838351088467,
"grad_norm": 6.124255657196045,
"learning_rate": 4.938271604938272e-06,
"loss": 1.9017,
"step": 40
},
{
"epoch": 0.09263547938860583,
"grad_norm": 4.986006736755371,
"learning_rate": 6.17283950617284e-06,
"loss": 1.4697,
"step": 50
},
{
"epoch": 0.111162575266327,
"grad_norm": 3.69557785987854,
"learning_rate": 7.4074074074074075e-06,
"loss": 1.2454,
"step": 60
},
{
"epoch": 0.12968967114404817,
"grad_norm": 4.338206768035889,
"learning_rate": 8.641975308641975e-06,
"loss": 1.2242,
"step": 70
},
{
"epoch": 0.14821676702176934,
"grad_norm": 2.3303167819976807,
"learning_rate": 9.876543209876543e-06,
"loss": 0.7272,
"step": 80
},
{
"epoch": 0.1667438628994905,
"grad_norm": 2.708115339279175,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.5907,
"step": 90
},
{
"epoch": 0.18527095877721167,
"grad_norm": 1.4415699243545532,
"learning_rate": 1.234567901234568e-05,
"loss": 0.6154,
"step": 100
},
{
"epoch": 0.20379805465493284,
"grad_norm": 1.7580137252807617,
"learning_rate": 1.3580246913580248e-05,
"loss": 0.4568,
"step": 110
},
{
"epoch": 0.222325150532654,
"grad_norm": 2.5971596240997314,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.4862,
"step": 120
},
{
"epoch": 0.24085224641037517,
"grad_norm": 1.3559226989746094,
"learning_rate": 1.6049382716049385e-05,
"loss": 0.4366,
"step": 130
},
{
"epoch": 0.25937934228809634,
"grad_norm": 0.6591945290565491,
"learning_rate": 1.728395061728395e-05,
"loss": 0.3757,
"step": 140
},
{
"epoch": 0.2779064381658175,
"grad_norm": 2.574704170227051,
"learning_rate": 1.851851851851852e-05,
"loss": 0.4693,
"step": 150
},
{
"epoch": 0.29643353404353867,
"grad_norm": 3.002263307571411,
"learning_rate": 1.9753086419753087e-05,
"loss": 0.4896,
"step": 160
},
{
"epoch": 0.31496062992125984,
"grad_norm": 3.431332588195801,
"learning_rate": 1.999850819197622e-05,
"loss": 0.4864,
"step": 170
},
{
"epoch": 0.333487725798981,
"grad_norm": 1.1350328922271729,
"learning_rate": 1.99924484847108e-05,
"loss": 0.3713,
"step": 180
},
{
"epoch": 0.35201482167670217,
"grad_norm": 1.6894770860671997,
"learning_rate": 1.9981730462964303e-05,
"loss": 0.4814,
"step": 190
},
{
"epoch": 0.37054191755442334,
"grad_norm": 1.3769453763961792,
"learning_rate": 1.9966359123301492e-05,
"loss": 0.4288,
"step": 200
},
{
"epoch": 0.3890690134321445,
"grad_norm": 1.1856595277786255,
"learning_rate": 1.9946341631587086e-05,
"loss": 0.4447,
"step": 210
},
{
"epoch": 0.4075961093098657,
"grad_norm": 1.7599550485610962,
"learning_rate": 1.9921687319645183e-05,
"loss": 0.349,
"step": 220
},
{
"epoch": 0.42612320518758684,
"grad_norm": 1.5848398208618164,
"learning_rate": 1.9892407680908904e-05,
"loss": 0.396,
"step": 230
},
{
"epoch": 0.444650301065308,
"grad_norm": 1.9259053468704224,
"learning_rate": 1.9858516365062334e-05,
"loss": 0.3352,
"step": 240
},
{
"epoch": 0.4631773969430292,
"grad_norm": 1.4261807203292847,
"learning_rate": 1.9820029171677288e-05,
"loss": 0.3511,
"step": 250
},
{
"epoch": 0.48170449282075034,
"grad_norm": 1.699010968208313,
"learning_rate": 1.977696404284779e-05,
"loss": 0.4073,
"step": 260
},
{
"epoch": 0.5002315886984715,
"grad_norm": 1.3403549194335938,
"learning_rate": 1.9729341054825783e-05,
"loss": 0.4454,
"step": 270
},
{
"epoch": 0.5187586845761927,
"grad_norm": 1.2229658365249634,
"learning_rate": 1.9677182408661894e-05,
"loss": 0.4352,
"step": 280
},
{
"epoch": 0.5372857804539138,
"grad_norm": 2.2487080097198486,
"learning_rate": 1.9620512419855684e-05,
"loss": 0.392,
"step": 290
},
{
"epoch": 0.555812876331635,
"grad_norm": 2.6429977416992188,
"learning_rate": 1.9559357507020163e-05,
"loss": 0.4013,
"step": 300
},
{
"epoch": 0.5743399722093562,
"grad_norm": 2.240354061126709,
"learning_rate": 1.9493746179565854e-05,
"loss": 0.4111,
"step": 310
},
{
"epoch": 0.5928670680870773,
"grad_norm": 1.2388675212860107,
"learning_rate": 1.94237090244102e-05,
"loss": 0.3653,
"step": 320
},
{
"epoch": 0.6113941639647985,
"grad_norm": 2.2535054683685303,
"learning_rate": 1.9349278691718426e-05,
"loss": 0.3956,
"step": 330
},
{
"epoch": 0.6299212598425197,
"grad_norm": 2.5032520294189453,
"learning_rate": 1.9270489879682592e-05,
"loss": 0.3697,
"step": 340
},
{
"epoch": 0.6484483557202408,
"grad_norm": 2.4367105960845947,
"learning_rate": 1.9187379318345845e-05,
"loss": 0.4188,
"step": 350
},
{
"epoch": 0.666975451597962,
"grad_norm": 2.6159491539001465,
"learning_rate": 1.9099985752479505e-05,
"loss": 0.4415,
"step": 360
},
{
"epoch": 0.6855025474756832,
"grad_norm": 1.8182092905044556,
"learning_rate": 1.900834992352087e-05,
"loss": 0.3273,
"step": 370
},
{
"epoch": 0.7040296433534043,
"grad_norm": 3.406963348388672,
"learning_rate": 1.8912514550580242e-05,
"loss": 0.4069,
"step": 380
},
{
"epoch": 0.7225567392311255,
"grad_norm": 1.886953353881836,
"learning_rate": 1.881252431052599e-05,
"loss": 0.3452,
"step": 390
},
{
"epoch": 0.7410838351088467,
"grad_norm": 2.346081018447876,
"learning_rate": 1.870842581715691e-05,
"loss": 0.2954,
"step": 400
},
{
"epoch": 0.7596109309865678,
"grad_norm": 1.7905707359313965,
"learning_rate": 1.8600267599471663e-05,
"loss": 0.344,
"step": 410
},
{
"epoch": 0.778138026864289,
"grad_norm": 1.5074595212936401,
"learning_rate": 1.8488100079045345e-05,
"loss": 0.4834,
"step": 420
},
{
"epoch": 0.7966651227420102,
"grad_norm": 2.0368354320526123,
"learning_rate": 1.8371975546523795e-05,
"loss": 0.4263,
"step": 430
},
{
"epoch": 0.8151922186197313,
"grad_norm": 2.4880967140197754,
"learning_rate": 1.825194813724654e-05,
"loss": 0.2868,
"step": 440
},
{
"epoch": 0.8337193144974525,
"grad_norm": 1.4323982000350952,
"learning_rate": 1.81280738060098e-05,
"loss": 0.3404,
"step": 450
},
{
"epoch": 0.8522464103751737,
"grad_norm": 1.955913782119751,
"learning_rate": 1.8000410300981305e-05,
"loss": 0.329,
"step": 460
},
{
"epoch": 0.8707735062528948,
"grad_norm": 2.0698235034942627,
"learning_rate": 1.786901713677902e-05,
"loss": 0.3959,
"step": 470
},
{
"epoch": 0.889300602130616,
"grad_norm": 2.9616572856903076,
"learning_rate": 1.7733955566726438e-05,
"loss": 0.3973,
"step": 480
},
{
"epoch": 0.9078276980083372,
"grad_norm": 3.0657591819763184,
"learning_rate": 1.7595288554297295e-05,
"loss": 0.4035,
"step": 490
},
{
"epoch": 0.9263547938860583,
"grad_norm": 1.5825896263122559,
"learning_rate": 1.7453080743763e-05,
"loss": 0.3797,
"step": 500
},
{
"epoch": 0.9448818897637795,
"grad_norm": 1.8893063068389893,
"learning_rate": 1.7307398430056595e-05,
"loss": 0.2627,
"step": 510
},
{
"epoch": 0.9634089856415007,
"grad_norm": 1.5115277767181396,
"learning_rate": 1.7158309527867117e-05,
"loss": 0.281,
"step": 520
},
{
"epoch": 0.9819360815192218,
"grad_norm": 4.204952239990234,
"learning_rate": 1.700588353997891e-05,
"loss": 0.4839,
"step": 530
},
{
"epoch": 1.001852709587772,
"grad_norm": 3.4891834259033203,
"learning_rate": 1.6850191524870548e-05,
"loss": 0.4232,
"step": 540
},
{
"epoch": 1.0203798054654933,
"grad_norm": 2.1796462535858154,
"learning_rate": 1.6691306063588583e-05,
"loss": 0.3583,
"step": 550
},
{
"epoch": 1.0389069013432144,
"grad_norm": 1.7773243188858032,
"learning_rate": 1.6529301225911433e-05,
"loss": 0.3135,
"step": 560
},
{
"epoch": 1.0574339972209357,
"grad_norm": 1.6911367177963257,
"learning_rate": 1.6364252535819284e-05,
"loss": 0.2577,
"step": 570
},
{
"epoch": 1.0759610930986567,
"grad_norm": 1.934979796409607,
"learning_rate": 1.619623693628605e-05,
"loss": 0.2957,
"step": 580
},
{
"epoch": 1.094488188976378,
"grad_norm": 2.352208137512207,
"learning_rate": 1.602533275340984e-05,
"loss": 0.3576,
"step": 590
},
{
"epoch": 1.113015284854099,
"grad_norm": 1.4006640911102295,
"learning_rate": 1.5851619659898623e-05,
"loss": 0.3574,
"step": 600
}
],
"logging_steps": 10,
"max_steps": 1617,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.3080296690801705e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}