MiniCPM-V-2_6_LoRA_Adapter / trainer_state.json
lei-HuggingFace's picture
Upload folder using huggingface_hub
8a349c6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 34.285714285714285,
"eval_steps": 10,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0,
"loss": 1.3795,
"step": 1
},
{
"epoch": 1.1428571428571428,
"grad_norm": 2.172917366027832,
"learning_rate": 0.0,
"loss": 0.2103,
"step": 2
},
{
"epoch": 2.0,
"grad_norm": 2.172917366027832,
"learning_rate": 0.0,
"loss": 0.9608,
"step": 3
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.879162609577179,
"learning_rate": 0.0002,
"loss": 0.2294,
"step": 4
},
{
"epoch": 3.0,
"grad_norm": 0.879162609577179,
"learning_rate": 0.0002,
"loss": 0.8781,
"step": 5
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.752986490726471,
"learning_rate": 0.0002,
"loss": 0.5153,
"step": 6
},
{
"epoch": 4.0,
"grad_norm": 0.752986490726471,
"learning_rate": 0.0002,
"loss": 0.5011,
"step": 7
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.5658770203590393,
"learning_rate": 0.0002,
"loss": 0.5315,
"step": 8
},
{
"epoch": 5.0,
"grad_norm": 0.5658770203590393,
"learning_rate": 0.0002,
"loss": 0.3573,
"step": 9
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.6311860084533691,
"learning_rate": 0.0002,
"loss": 0.632,
"step": 10
},
{
"epoch": 5.714285714285714,
"eval_loss": 0.794529914855957,
"eval_runtime": 1.124,
"eval_samples_per_second": 6.228,
"eval_steps_per_second": 6.228,
"step": 10
},
{
"epoch": 6.0,
"grad_norm": 0.6311860084533691,
"learning_rate": 0.0002,
"loss": 0.1586,
"step": 11
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.6447564959526062,
"learning_rate": 0.0002,
"loss": 0.5467,
"step": 12
},
{
"epoch": 7.0,
"grad_norm": 0.6447564959526062,
"learning_rate": 0.0002,
"loss": 0.1358,
"step": 13
},
{
"epoch": 8.0,
"grad_norm": 0.502592921257019,
"learning_rate": 0.0002,
"loss": 0.586,
"step": 14
},
{
"epoch": 9.0,
"grad_norm": 0.502592921257019,
"learning_rate": 0.0002,
"loss": 0.4717,
"step": 15
},
{
"epoch": 9.142857142857142,
"grad_norm": 0.515450119972229,
"learning_rate": 0.0002,
"loss": 0.0492,
"step": 16
},
{
"epoch": 10.0,
"grad_norm": 0.515450119972229,
"learning_rate": 0.0002,
"loss": 0.3283,
"step": 17
},
{
"epoch": 10.285714285714286,
"grad_norm": 0.6195679903030396,
"learning_rate": 0.0002,
"loss": 0.1528,
"step": 18
},
{
"epoch": 11.0,
"grad_norm": 0.6195679903030396,
"learning_rate": 0.0002,
"loss": 0.1333,
"step": 19
},
{
"epoch": 11.428571428571429,
"grad_norm": 0.593528151512146,
"learning_rate": 0.0002,
"loss": 0.1008,
"step": 20
},
{
"epoch": 11.428571428571429,
"eval_loss": 0.1764988899230957,
"eval_runtime": 1.1181,
"eval_samples_per_second": 6.26,
"eval_steps_per_second": 6.26,
"step": 20
},
{
"epoch": 12.0,
"grad_norm": 0.593528151512146,
"learning_rate": 0.0002,
"loss": 0.1048,
"step": 21
},
{
"epoch": 12.571428571428571,
"grad_norm": 0.5447816848754883,
"learning_rate": 0.0002,
"loss": 0.1318,
"step": 22
},
{
"epoch": 13.0,
"grad_norm": 0.5447816848754883,
"learning_rate": 0.0002,
"loss": 0.0098,
"step": 23
},
{
"epoch": 13.714285714285714,
"grad_norm": 0.37343642115592957,
"learning_rate": 0.0002,
"loss": 0.069,
"step": 24
},
{
"epoch": 14.0,
"grad_norm": 0.37343642115592957,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 25
},
{
"epoch": 14.857142857142858,
"grad_norm": 0.5212247967720032,
"learning_rate": 0.0002,
"loss": 0.0554,
"step": 26
},
{
"epoch": 15.0,
"grad_norm": 0.5212247967720032,
"learning_rate": 0.0002,
"loss": 0.0011,
"step": 27
},
{
"epoch": 16.0,
"grad_norm": 0.39550113677978516,
"learning_rate": 0.0002,
"loss": 0.027,
"step": 28
},
{
"epoch": 17.0,
"grad_norm": 0.39550113677978516,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 29
},
{
"epoch": 17.142857142857142,
"grad_norm": 0.1997860223054886,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 30
},
{
"epoch": 17.142857142857142,
"eval_loss": 0.009444376453757286,
"eval_runtime": 1.1094,
"eval_samples_per_second": 6.309,
"eval_steps_per_second": 6.309,
"step": 30
},
{
"epoch": 18.0,
"grad_norm": 0.1997860223054886,
"learning_rate": 0.0002,
"loss": 0.008,
"step": 31
},
{
"epoch": 18.285714285714285,
"grad_norm": 0.17780642211437225,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 32
},
{
"epoch": 19.0,
"grad_norm": 0.17780642211437225,
"learning_rate": 0.0002,
"loss": 0.0024,
"step": 33
},
{
"epoch": 19.428571428571427,
"grad_norm": 0.09276885539293289,
"learning_rate": 0.0002,
"loss": 0.0016,
"step": 34
},
{
"epoch": 20.0,
"grad_norm": 0.09276885539293289,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 35
},
{
"epoch": 20.571428571428573,
"grad_norm": 0.03949622064828873,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 36
},
{
"epoch": 21.0,
"grad_norm": 0.03949622064828873,
"learning_rate": 0.0002,
"loss": 0.0026,
"step": 37
},
{
"epoch": 21.714285714285715,
"grad_norm": 0.48974359035491943,
"learning_rate": 0.0002,
"loss": 0.0027,
"step": 38
},
{
"epoch": 22.0,
"grad_norm": 0.48974359035491943,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 39
},
{
"epoch": 22.857142857142858,
"grad_norm": 0.037879057228565216,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 40
},
{
"epoch": 22.857142857142858,
"eval_loss": 0.0010270120110362768,
"eval_runtime": 1.107,
"eval_samples_per_second": 6.323,
"eval_steps_per_second": 6.323,
"step": 40
},
{
"epoch": 23.0,
"grad_norm": 0.037879057228565216,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 41
},
{
"epoch": 24.0,
"grad_norm": 0.07712631672620773,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 42
},
{
"epoch": 25.0,
"grad_norm": 0.07712631672620773,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 43
},
{
"epoch": 25.142857142857142,
"grad_norm": 0.03069213591516018,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 44
},
{
"epoch": 26.0,
"grad_norm": 0.03069213591516018,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 45
},
{
"epoch": 26.285714285714285,
"grad_norm": 0.04276096820831299,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 46
},
{
"epoch": 27.0,
"grad_norm": 0.04276096820831299,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 47
},
{
"epoch": 27.428571428571427,
"grad_norm": 0.018633099272847176,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 48
},
{
"epoch": 28.0,
"grad_norm": 0.018633099272847176,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 49
},
{
"epoch": 28.571428571428573,
"grad_norm": 0.05017812177538872,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 50
},
{
"epoch": 28.571428571428573,
"eval_loss": 0.00034485122887417674,
"eval_runtime": 1.12,
"eval_samples_per_second": 6.25,
"eval_steps_per_second": 6.25,
"step": 50
},
{
"epoch": 29.0,
"grad_norm": 0.05017812177538872,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 51
},
{
"epoch": 29.714285714285715,
"grad_norm": 0.010933789424598217,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 52
},
{
"epoch": 30.0,
"grad_norm": 0.010933789424598217,
"learning_rate": 0.0002,
"loss": 0.0001,
"step": 53
},
{
"epoch": 30.857142857142858,
"grad_norm": 0.00900160800665617,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 54
},
{
"epoch": 31.0,
"grad_norm": 0.00900160800665617,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 55
},
{
"epoch": 32.0,
"grad_norm": 0.018903745338320732,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 56
},
{
"epoch": 33.0,
"grad_norm": 0.018903745338320732,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 57
},
{
"epoch": 33.142857142857146,
"grad_norm": 0.01036656741052866,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 58
},
{
"epoch": 34.0,
"grad_norm": 0.01036656741052866,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 59
},
{
"epoch": 34.285714285714285,
"grad_norm": 0.007697099819779396,
"learning_rate": 0.0002,
"loss": 0.0,
"step": 60
},
{
"epoch": 34.285714285714285,
"eval_loss": 0.00019181256357114762,
"eval_runtime": 1.1116,
"eval_samples_per_second": 6.297,
"eval_steps_per_second": 6.297,
"step": 60
},
{
"epoch": 34.285714285714285,
"step": 60,
"total_flos": 4016214881861632.0,
"train_loss": 0.1552126432957569,
"train_runtime": 117.5216,
"train_samples_per_second": 4.084,
"train_steps_per_second": 0.511
}
],
"logging_steps": 1.0,
"max_steps": 60,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 1000,
"total_flos": 4016214881861632.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}