|
{ |
|
"best_metric": 2.3539514541625977, |
|
"best_model_checkpoint": "desarrolloasesoreslocales/legal-mistral/checkpoint-170", |
|
"epoch": 3.0, |
|
"eval_steps": 10, |
|
"global_step": 174, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.885057471264368e-05, |
|
"loss": 2.4243, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.5809202194213867, |
|
"eval_runtime": 79.9298, |
|
"eval_samples_per_second": 11.623, |
|
"eval_steps_per_second": 1.464, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.770114942528736e-05, |
|
"loss": 2.32, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.5187058448791504, |
|
"eval_runtime": 79.9944, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.463, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6551724137931037e-05, |
|
"loss": 2.2652, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.479558229446411, |
|
"eval_runtime": 79.9192, |
|
"eval_samples_per_second": 11.624, |
|
"eval_steps_per_second": 1.464, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.540229885057471e-05, |
|
"loss": 2.2424, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 2.449814796447754, |
|
"eval_runtime": 80.3275, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4252873563218392e-05, |
|
"loss": 2.1759, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 2.430422782897949, |
|
"eval_runtime": 80.3296, |
|
"eval_samples_per_second": 11.565, |
|
"eval_steps_per_second": 1.456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.310344827586207e-05, |
|
"loss": 2.1698, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.4160897731781006, |
|
"eval_runtime": 79.8167, |
|
"eval_samples_per_second": 11.639, |
|
"eval_steps_per_second": 1.466, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1954022988505748e-05, |
|
"loss": 2.1337, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.40354585647583, |
|
"eval_runtime": 79.8387, |
|
"eval_samples_per_second": 11.636, |
|
"eval_steps_per_second": 1.465, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0804597701149427e-05, |
|
"loss": 2.1193, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.3962202072143555, |
|
"eval_runtime": 79.8113, |
|
"eval_samples_per_second": 11.64, |
|
"eval_steps_per_second": 1.466, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 2.0928, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.3851864337921143, |
|
"eval_runtime": 79.8078, |
|
"eval_samples_per_second": 11.64, |
|
"eval_steps_per_second": 1.466, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.505747126436782e-06, |
|
"loss": 2.1072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.376269817352295, |
|
"eval_runtime": 79.9299, |
|
"eval_samples_per_second": 11.623, |
|
"eval_steps_per_second": 1.464, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.35632183908046e-06, |
|
"loss": 2.0701, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.3708786964416504, |
|
"eval_runtime": 79.9941, |
|
"eval_samples_per_second": 11.613, |
|
"eval_steps_per_second": 1.463, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 2.0584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.3670151233673096, |
|
"eval_runtime": 80.0335, |
|
"eval_samples_per_second": 11.608, |
|
"eval_steps_per_second": 1.462, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.057471264367817e-06, |
|
"loss": 2.0569, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 2.3638148307800293, |
|
"eval_runtime": 80.0359, |
|
"eval_samples_per_second": 11.607, |
|
"eval_steps_per_second": 1.462, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.908045977011495e-06, |
|
"loss": 2.0396, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 2.3619213104248047, |
|
"eval_runtime": 80.0238, |
|
"eval_samples_per_second": 11.609, |
|
"eval_steps_per_second": 1.462, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 2.0406, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.3583548069000244, |
|
"eval_runtime": 80.0658, |
|
"eval_samples_per_second": 11.603, |
|
"eval_steps_per_second": 1.461, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6091954022988506e-06, |
|
"loss": 2.0259, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.3552348613739014, |
|
"eval_runtime": 80.0434, |
|
"eval_samples_per_second": 11.606, |
|
"eval_steps_per_second": 1.462, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.5977011494252875e-07, |
|
"loss": 2.0492, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 2.3539514541625977, |
|
"eval_runtime": 80.0092, |
|
"eval_samples_per_second": 11.611, |
|
"eval_steps_per_second": 1.462, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 174, |
|
"total_flos": 7.500749066993664e+16, |
|
"train_loss": 2.138888589267073, |
|
"train_runtime": 4525.1585, |
|
"train_samples_per_second": 3.032, |
|
"train_steps_per_second": 0.038 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.3539514541625977, |
|
"eval_runtime": 80.117, |
|
"eval_samples_per_second": 11.596, |
|
"eval_steps_per_second": 1.46, |
|
"step": 174 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 174, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"total_flos": 7.500749066993664e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|