llm2vec-qwen2.5-0.5-instruct / trainer_state.json
bartekupartek's picture
End of training
289332c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08279516476237787,
"eval_steps": 100,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008279516476237788,
"eval_accuracy": 0.5510729122994245,
"eval_loss": 2.3376457691192627,
"eval_runtime": 6.8445,
"eval_samples_per_second": 59.61,
"eval_steps_per_second": 1.899,
"step": 100
},
{
"epoch": 0.016559032952475575,
"eval_accuracy": 0.5764662925666846,
"eval_loss": 2.173574447631836,
"eval_runtime": 6.6808,
"eval_samples_per_second": 61.071,
"eval_steps_per_second": 1.946,
"step": 200
},
{
"epoch": 0.024838549428713365,
"eval_accuracy": 0.5929699147520929,
"eval_loss": 2.0678670406341553,
"eval_runtime": 6.4137,
"eval_samples_per_second": 63.614,
"eval_steps_per_second": 2.027,
"step": 300
},
{
"epoch": 0.03311806590495115,
"eval_accuracy": 0.6055573666749765,
"eval_loss": 1.9839483499526978,
"eval_runtime": 6.4017,
"eval_samples_per_second": 63.734,
"eval_steps_per_second": 2.031,
"step": 400
},
{
"epoch": 0.04139758238118894,
"grad_norm": 8.5625,
"learning_rate": 4.931004029364685e-05,
"loss": 2.2761,
"step": 500
},
{
"epoch": 0.04139758238118894,
"eval_accuracy": 0.6084943562272814,
"eval_loss": 1.9611371755599976,
"eval_runtime": 6.4249,
"eval_samples_per_second": 63.503,
"eval_steps_per_second": 2.023,
"step": 500
},
{
"epoch": 0.04967709885742673,
"eval_accuracy": 0.6203082851637765,
"eval_loss": 1.905377984046936,
"eval_runtime": 6.4365,
"eval_samples_per_second": 63.388,
"eval_steps_per_second": 2.02,
"step": 600
},
{
"epoch": 0.057956615333664516,
"eval_accuracy": 0.6241699612328715,
"eval_loss": 1.8838109970092773,
"eval_runtime": 6.4118,
"eval_samples_per_second": 63.632,
"eval_steps_per_second": 2.028,
"step": 700
},
{
"epoch": 0.0662361318099023,
"eval_accuracy": 0.6295839990759813,
"eval_loss": 1.8403326272964478,
"eval_runtime": 6.397,
"eval_samples_per_second": 63.78,
"eval_steps_per_second": 2.032,
"step": 800
},
{
"epoch": 0.07451564828614009,
"eval_accuracy": 0.6300428691724719,
"eval_loss": 1.8234734535217285,
"eval_runtime": 6.4304,
"eval_samples_per_second": 63.449,
"eval_steps_per_second": 2.022,
"step": 900
},
{
"epoch": 0.08279516476237787,
"grad_norm": 7.65625,
"learning_rate": 4.862008058729371e-05,
"loss": 1.8887,
"step": 1000
},
{
"epoch": 0.08279516476237787,
"eval_accuracy": 0.6351211866350639,
"eval_loss": 1.7919981479644775,
"eval_runtime": 6.4,
"eval_samples_per_second": 63.75,
"eval_steps_per_second": 2.031,
"step": 1000
},
{
"epoch": 0.08279516476237787,
"step": 1000,
"total_flos": 2.198926000128e+16,
"train_loss": 2.0824306030273436,
"train_runtime": 944.4845,
"train_samples_per_second": 613.773,
"train_steps_per_second": 38.364
}
],
"logging_steps": 500,
"max_steps": 36234,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 2.198926000128e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}