|
{ |
|
"best_metric": 0.2435225546360016, |
|
"best_model_checkpoint": "finetuned-blurr-nonblur/checkpoint-112", |
|
"epoch": 10.0, |
|
"eval_steps": 100, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 54505.6171875, |
|
"learning_rate": 1.8571428571428575e-05, |
|
"loss": 0.6486, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6645569620253164, |
|
"eval_loss": 0.625545084476471, |
|
"eval_runtime": 7.9446, |
|
"eval_samples_per_second": 19.888, |
|
"eval_steps_per_second": 0.378, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 57206.48828125, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.552, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6772151898734177, |
|
"eval_loss": 0.5736974477767944, |
|
"eval_runtime": 7.72, |
|
"eval_samples_per_second": 20.466, |
|
"eval_steps_per_second": 0.389, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 53612.984375, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.5206, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 51806.32421875, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.4207, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.5174736380577087, |
|
"eval_runtime": 7.726, |
|
"eval_samples_per_second": 20.45, |
|
"eval_steps_per_second": 0.388, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 56256.515625, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 0.3545, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8860759493670886, |
|
"eval_loss": 0.44835516810417175, |
|
"eval_runtime": 7.7183, |
|
"eval_samples_per_second": 20.471, |
|
"eval_steps_per_second": 0.389, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 70625.890625, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.286, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 57182.4140625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2082, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8860759493670886, |
|
"eval_loss": 0.3621000051498413, |
|
"eval_runtime": 7.8704, |
|
"eval_samples_per_second": 20.075, |
|
"eval_steps_per_second": 0.381, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"grad_norm": 41587.15625, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.167, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9050632911392406, |
|
"eval_loss": 0.2930063307285309, |
|
"eval_runtime": 7.906, |
|
"eval_samples_per_second": 19.985, |
|
"eval_steps_per_second": 0.379, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"grad_norm": 89662.453125, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.176, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8860759493670886, |
|
"eval_loss": 0.3003353178501129, |
|
"eval_runtime": 7.7836, |
|
"eval_samples_per_second": 20.299, |
|
"eval_steps_per_second": 0.385, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"grad_norm": 105894.6796875, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.1271, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"grad_norm": 73103.46875, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.1275, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9240506329113924, |
|
"eval_loss": 0.2435225546360016, |
|
"eval_runtime": 7.7728, |
|
"eval_samples_per_second": 20.327, |
|
"eval_steps_per_second": 0.386, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"grad_norm": 65041.953125, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.11, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9050632911392406, |
|
"eval_loss": 0.25813814997673035, |
|
"eval_runtime": 7.8334, |
|
"eval_samples_per_second": 20.17, |
|
"eval_steps_per_second": 0.383, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"grad_norm": 42873.71484375, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.107, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 64285.99609375, |
|
"learning_rate": 0.0, |
|
"loss": 0.1009, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9113924050632911, |
|
"eval_loss": 0.24741442501544952, |
|
"eval_runtime": 7.9653, |
|
"eval_samples_per_second": 19.836, |
|
"eval_steps_per_second": 0.377, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 140, |
|
"total_flos": 6.912285473621606e+17, |
|
"train_loss": 0.2789982352937971, |
|
"train_runtime": 718.4172, |
|
"train_samples_per_second": 12.416, |
|
"train_steps_per_second": 0.195 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 6.912285473621606e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|