open_llama_3b_v2-qlora / trainer_state.json
Sohaib's picture
Upload 10 files
a1ff1b2
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 891,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.34,
"learning_rate": 1e-05,
"loss": 2.4387,
"step": 100
},
{
"epoch": 0.34,
"eval_loss": 1.9587514400482178,
"eval_runtime": 41.9011,
"eval_samples_per_second": 5.966,
"eval_steps_per_second": 0.764,
"step": 100
},
{
"epoch": 0.67,
"learning_rate": 1e-05,
"loss": 1.8451,
"step": 200
},
{
"epoch": 0.67,
"eval_loss": 1.839536428451538,
"eval_runtime": 42.0627,
"eval_samples_per_second": 5.944,
"eval_steps_per_second": 0.761,
"step": 200
},
{
"epoch": 1.01,
"learning_rate": 1e-05,
"loss": 1.7983,
"step": 300
},
{
"epoch": 1.01,
"eval_loss": 1.8240922689437866,
"eval_runtime": 41.9798,
"eval_samples_per_second": 5.955,
"eval_steps_per_second": 0.762,
"step": 300
},
{
"epoch": 1.35,
"learning_rate": 1e-05,
"loss": 1.7717,
"step": 400
},
{
"epoch": 1.35,
"eval_loss": 1.8145651817321777,
"eval_runtime": 41.9722,
"eval_samples_per_second": 5.956,
"eval_steps_per_second": 0.762,
"step": 400
},
{
"epoch": 1.68,
"learning_rate": 1e-05,
"loss": 1.7786,
"step": 500
},
{
"epoch": 1.68,
"eval_loss": 1.8092119693756104,
"eval_runtime": 41.9735,
"eval_samples_per_second": 5.956,
"eval_steps_per_second": 0.762,
"step": 500
},
{
"epoch": 2.02,
"learning_rate": 1e-05,
"loss": 1.7616,
"step": 600
},
{
"epoch": 2.02,
"eval_loss": 1.803665280342102,
"eval_runtime": 41.6297,
"eval_samples_per_second": 6.005,
"eval_steps_per_second": 0.769,
"step": 600
},
{
"epoch": 2.36,
"learning_rate": 1e-05,
"loss": 1.7515,
"step": 700
},
{
"epoch": 2.36,
"eval_loss": 1.8004812002182007,
"eval_runtime": 42.1099,
"eval_samples_per_second": 5.937,
"eval_steps_per_second": 0.76,
"step": 700
},
{
"epoch": 2.69,
"learning_rate": 1e-05,
"loss": 1.7505,
"step": 800
},
{
"epoch": 2.69,
"eval_loss": 1.7984752655029297,
"eval_runtime": 42.0116,
"eval_samples_per_second": 5.951,
"eval_steps_per_second": 0.762,
"step": 800
}
],
"logging_steps": 100,
"max_steps": 891,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2.541658938273792e+16,
"trial_name": null,
"trial_params": null
}