llm2vec-scandi-mntp / trainer_state.json
jealk's picture
Checkpoint 2000 of MNTP on scandinavian Wiki with llama-swe-basemodel
67a275a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.19063959584405682,
"eval_steps": 200,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019063959584405682,
"eval_accuracy": 0.6766402862189922,
"eval_loss": 1.5877882242202759,
"eval_runtime": 1128.2057,
"eval_samples_per_second": 15.517,
"eval_steps_per_second": 0.486,
"step": 200
},
{
"epoch": 0.038127919168811364,
"eval_accuracy": 0.7249642161946639,
"eval_loss": 1.3184435367584229,
"eval_runtime": 1127.8694,
"eval_samples_per_second": 15.521,
"eval_steps_per_second": 0.486,
"step": 400
},
{
"epoch": 0.047659898961014205,
"grad_norm": 2.379204750061035,
"learning_rate": 4.920566835064976e-05,
"loss": 1.8513,
"step": 500
},
{
"epoch": 0.057191878753217046,
"eval_accuracy": 0.7487254293909154,
"eval_loss": 1.188010334968567,
"eval_runtime": 1128.193,
"eval_samples_per_second": 15.517,
"eval_steps_per_second": 0.486,
"step": 600
},
{
"epoch": 0.07625583833762273,
"eval_accuracy": 0.7630523662181098,
"eval_loss": 1.1084955930709839,
"eval_runtime": 1128.3934,
"eval_samples_per_second": 15.514,
"eval_steps_per_second": 0.486,
"step": 800
},
{
"epoch": 0.09531979792202841,
"grad_norm": 2.218456268310547,
"learning_rate": 4.841133670129953e-05,
"loss": 1.1387,
"step": 1000
},
{
"epoch": 0.09531979792202841,
"eval_accuracy": 0.7720419453759662,
"eval_loss": 1.057796597480774,
"eval_runtime": 1128.1566,
"eval_samples_per_second": 15.517,
"eval_steps_per_second": 0.486,
"step": 1000
},
{
"epoch": 0.11438375750643409,
"eval_accuracy": 0.7792805618298705,
"eval_loss": 1.0198323726654053,
"eval_runtime": 1128.4409,
"eval_samples_per_second": 15.513,
"eval_steps_per_second": 0.486,
"step": 1200
},
{
"epoch": 0.13344771709083977,
"eval_accuracy": 0.7836537788805386,
"eval_loss": 0.9978336691856384,
"eval_runtime": 1127.624,
"eval_samples_per_second": 15.525,
"eval_steps_per_second": 0.486,
"step": 1400
},
{
"epoch": 0.1429796968830426,
"grad_norm": 2.3090548515319824,
"learning_rate": 4.761700505194929e-05,
"loss": 1.0151,
"step": 1500
},
{
"epoch": 0.15251167667524546,
"eval_accuracy": 0.7889919141146797,
"eval_loss": 0.9722086191177368,
"eval_runtime": 1128.0241,
"eval_samples_per_second": 15.519,
"eval_steps_per_second": 0.486,
"step": 1600
},
{
"epoch": 0.17157563625965114,
"eval_accuracy": 0.7922293997022891,
"eval_loss": 0.9535930156707764,
"eval_runtime": 1128.3297,
"eval_samples_per_second": 15.515,
"eval_steps_per_second": 0.486,
"step": 1800
},
{
"epoch": 0.19063959584405682,
"grad_norm": 2.6724276542663574,
"learning_rate": 4.682267340259906e-05,
"loss": 0.9603,
"step": 2000
},
{
"epoch": 0.19063959584405682,
"eval_accuracy": 0.7961234680845941,
"eval_loss": 0.9353302121162415,
"eval_runtime": 1128.8643,
"eval_samples_per_second": 15.508,
"eval_steps_per_second": 0.485,
"step": 2000
}
],
"logging_steps": 500,
"max_steps": 31473,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 1.483774567120896e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}