|
{ |
|
"best_metric": 0.7575757575757576, |
|
"best_model_checkpoint": "dit-base-rvlcdip-finetuned-grp-actual/checkpoint-93", |
|
"epoch": 6.72, |
|
"eval_steps": 500, |
|
"global_step": 126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 2.3577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5113636363636364, |
|
"eval_loss": 2.086306571960449, |
|
"eval_runtime": 240.3077, |
|
"eval_samples_per_second": 1.099, |
|
"eval_steps_per_second": 0.037, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.690265486725664e-05, |
|
"loss": 2.2163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.247787610619469e-05, |
|
"loss": 2.0601, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.6477272727272727, |
|
"eval_loss": 1.8153679370880127, |
|
"eval_runtime": 14.6331, |
|
"eval_samples_per_second": 18.041, |
|
"eval_steps_per_second": 0.615, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.8053097345132744e-05, |
|
"loss": 1.943, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3628318584070804e-05, |
|
"loss": 1.8068, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.6704545454545454, |
|
"eval_loss": 1.5881296396255493, |
|
"eval_runtime": 14.8318, |
|
"eval_samples_per_second": 17.8, |
|
"eval_steps_per_second": 0.607, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.9203539823008852e-05, |
|
"loss": 1.66, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.4778761061946905e-05, |
|
"loss": 1.5953, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7159090909090909, |
|
"eval_loss": 1.4111517667770386, |
|
"eval_runtime": 14.6707, |
|
"eval_samples_per_second": 17.995, |
|
"eval_steps_per_second": 0.613, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.0353982300884957e-05, |
|
"loss": 1.4929, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.592920353982301e-05, |
|
"loss": 1.4304, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.7575757575757576, |
|
"eval_loss": 1.3033273220062256, |
|
"eval_runtime": 14.9724, |
|
"eval_samples_per_second": 17.632, |
|
"eval_steps_per_second": 0.601, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.1504424778761062e-05, |
|
"loss": 1.3606, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.079646017699115e-06, |
|
"loss": 1.3458, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.2400753498077393, |
|
"eval_runtime": 14.8483, |
|
"eval_samples_per_second": 17.78, |
|
"eval_steps_per_second": 0.606, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 2.6548672566371683e-06, |
|
"loss": 1.3523, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_accuracy": 0.7575757575757576, |
|
"eval_loss": 1.2240339517593384, |
|
"eval_runtime": 14.9425, |
|
"eval_samples_per_second": 17.668, |
|
"eval_steps_per_second": 0.602, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"step": 126, |
|
"total_flos": 1.2388649195611423e+18, |
|
"train_loss": 1.7003454405163962, |
|
"train_runtime": 3667.1764, |
|
"train_samples_per_second": 4.535, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 126, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 1.2388649195611423e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|