St4r4x-NV's picture
End of training
775b8f5 verified
{
"best_metric": 0.8205128205128205,
"best_model_checkpoint": "videomae-base-finetuned-sample_kine/checkpoint-45",
"epoch": 9.035714285714286,
"eval_steps": 500,
"global_step": 140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07142857142857142,
"grad_norm": 3.850412607192993,
"learning_rate": 3.571428571428572e-05,
"loss": 0.7564,
"step": 10
},
{
"epoch": 0.10714285714285714,
"eval_accuracy": 0.6923076923076923,
"eval_loss": 0.666049599647522,
"eval_runtime": 17.3838,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 0.288,
"step": 15
},
{
"epoch": 1.0357142857142858,
"grad_norm": 4.889276027679443,
"learning_rate": 4.761904761904762e-05,
"loss": 0.6276,
"step": 20
},
{
"epoch": 1.1071428571428572,
"grad_norm": 11.487154006958008,
"learning_rate": 4.3650793650793655e-05,
"loss": 0.6614,
"step": 30
},
{
"epoch": 1.1071428571428572,
"eval_accuracy": 0.6923076923076923,
"eval_loss": 0.5676791071891785,
"eval_runtime": 17.4778,
"eval_samples_per_second": 2.231,
"eval_steps_per_second": 0.286,
"step": 30
},
{
"epoch": 2.0714285714285716,
"grad_norm": 4.425611972808838,
"learning_rate": 3.968253968253968e-05,
"loss": 0.5941,
"step": 40
},
{
"epoch": 2.107142857142857,
"eval_accuracy": 0.8205128205128205,
"eval_loss": 0.5078679323196411,
"eval_runtime": 17.0179,
"eval_samples_per_second": 2.292,
"eval_steps_per_second": 0.294,
"step": 45
},
{
"epoch": 3.0357142857142856,
"grad_norm": 4.037534713745117,
"learning_rate": 3.571428571428572e-05,
"loss": 0.428,
"step": 50
},
{
"epoch": 3.107142857142857,
"grad_norm": 5.8046040534973145,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.3661,
"step": 60
},
{
"epoch": 3.107142857142857,
"eval_accuracy": 0.6923076923076923,
"eval_loss": 0.6174768209457397,
"eval_runtime": 17.3537,
"eval_samples_per_second": 2.247,
"eval_steps_per_second": 0.288,
"step": 60
},
{
"epoch": 4.071428571428571,
"grad_norm": 1.8920663595199585,
"learning_rate": 2.777777777777778e-05,
"loss": 0.3258,
"step": 70
},
{
"epoch": 4.107142857142857,
"eval_accuracy": 0.7435897435897436,
"eval_loss": 1.1648730039596558,
"eval_runtime": 17.2838,
"eval_samples_per_second": 2.256,
"eval_steps_per_second": 0.289,
"step": 75
},
{
"epoch": 5.035714285714286,
"grad_norm": 23.643049240112305,
"learning_rate": 2.380952380952381e-05,
"loss": 0.4292,
"step": 80
},
{
"epoch": 5.107142857142857,
"grad_norm": 9.202912330627441,
"learning_rate": 1.984126984126984e-05,
"loss": 0.5887,
"step": 90
},
{
"epoch": 5.107142857142857,
"eval_accuracy": 0.717948717948718,
"eval_loss": 0.4697076380252838,
"eval_runtime": 17.1039,
"eval_samples_per_second": 2.28,
"eval_steps_per_second": 0.292,
"step": 90
},
{
"epoch": 6.071428571428571,
"grad_norm": 7.702420711517334,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.3907,
"step": 100
},
{
"epoch": 6.107142857142857,
"eval_accuracy": 0.6153846153846154,
"eval_loss": 0.9873863458633423,
"eval_runtime": 17.325,
"eval_samples_per_second": 2.251,
"eval_steps_per_second": 0.289,
"step": 105
},
{
"epoch": 7.035714285714286,
"grad_norm": 8.652463912963867,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.193,
"step": 110
},
{
"epoch": 7.107142857142857,
"grad_norm": 1.5207479000091553,
"learning_rate": 7.936507936507936e-06,
"loss": 0.1948,
"step": 120
},
{
"epoch": 7.107142857142857,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9959497451782227,
"eval_runtime": 17.4802,
"eval_samples_per_second": 2.231,
"eval_steps_per_second": 0.286,
"step": 120
},
{
"epoch": 8.071428571428571,
"grad_norm": 11.438685417175293,
"learning_rate": 3.968253968253968e-06,
"loss": 0.1424,
"step": 130
},
{
"epoch": 8.107142857142858,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.1356935501098633,
"eval_runtime": 16.5116,
"eval_samples_per_second": 2.362,
"eval_steps_per_second": 0.303,
"step": 135
},
{
"epoch": 9.035714285714286,
"grad_norm": 1.0725109577178955,
"learning_rate": 0.0,
"loss": 0.2198,
"step": 140
},
{
"epoch": 9.035714285714286,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.1466848850250244,
"eval_runtime": 17.0652,
"eval_samples_per_second": 2.285,
"eval_steps_per_second": 0.293,
"step": 140
},
{
"epoch": 9.035714285714286,
"step": 140,
"total_flos": 1.3619485962926162e+18,
"train_loss": 0.4227044131074633,
"train_runtime": 879.8726,
"train_samples_per_second": 1.273,
"train_steps_per_second": 0.159
},
{
"epoch": 9.035714285714286,
"eval_accuracy": 0.8205128205128205,
"eval_loss": 0.5078679919242859,
"eval_runtime": 16.7803,
"eval_samples_per_second": 2.324,
"eval_steps_per_second": 0.298,
"step": 140
},
{
"epoch": 9.035714285714286,
"eval_accuracy": 0.8205128205128205,
"eval_loss": 0.5078681111335754,
"eval_runtime": 17.1173,
"eval_samples_per_second": 2.278,
"eval_steps_per_second": 0.292,
"step": 140
}
],
"logging_steps": 10,
"max_steps": 140,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3619485962926162e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}