beamaia's picture
Training in progress, step 100, checkpoint
2f0140d verified
{
"best_metric": 0.47333332896232605,
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
"epoch": 0.684931506849315,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 0.0,
"learning_rate": 0.00018,
"loss": 0.4713,
"step": 20,
"train/kl": 0.0,
"train/logps/chosen": -1423.408907312925,
"train/logps/rejected": -1301.3419436416184,
"train/rewards/chosen": -113.72140731292517,
"train/rewards/margins": -11.091552724774871,
"train/rewards/rejected": -102.6298545881503
},
{
"epoch": 0.27,
"grad_norm": 0.0,
"learning_rate": 0.00015142857142857143,
"loss": 0.45,
"step": 40,
"train/kl": 0.0,
"train/logps/chosen": -2404.9383680555557,
"train/logps/rejected": -2267.5051491477275,
"train/rewards/chosen": -211.85392252604166,
"train/rewards/margins": -15.253491950757564,
"train/rewards/rejected": -196.6004305752841
},
{
"epoch": 0.34,
"eval/kl": 0.0,
"eval/logps/chosen": -2179.756602112676,
"eval/logps/rejected": -1974.1839398734178,
"eval/rewards/chosen": -189.60969135123239,
"eval/rewards/margins": -18.62818423097923,
"eval/rewards/rejected": -170.98150712025316,
"eval_loss": 0.47333332896232605,
"eval_runtime": 140.7497,
"eval_samples_per_second": 2.131,
"eval_steps_per_second": 0.533,
"step": 50
},
{
"epoch": 0.41,
"grad_norm": 0.0,
"learning_rate": 0.00012285714285714287,
"loss": 0.4469,
"step": 60,
"train/kl": 0.0,
"train/logps/chosen": -2463.7814685314684,
"train/logps/rejected": -2104.4410310734465,
"train/rewards/chosen": -216.41613854895104,
"train/rewards/margins": -33.82629479895104,
"train/rewards/rejected": -182.58984375
},
{
"epoch": 0.55,
"grad_norm": 0.0,
"learning_rate": 9.428571428571429e-05,
"loss": 0.4281,
"step": 80,
"train/kl": 0.0,
"train/logps/chosen": -2216.6425638686133,
"train/logps/rejected": -2146.411031420765,
"train/rewards/chosen": -193.77794251824818,
"train/rewards/margins": -6.814528925351993,
"train/rewards/rejected": -186.9634135928962
},
{
"epoch": 0.68,
"grad_norm": 0.0,
"learning_rate": 6.571428571428571e-05,
"loss": 0.5031,
"step": 100,
"train/kl": 0.0,
"train/logps/chosen": -2453.4375,
"train/logps/rejected": -2285.3565251572327,
"train/rewards/chosen": -214.367794060559,
"train/rewards/margins": -16.124685275181633,
"train/rewards/rejected": -198.24310878537736
},
{
"epoch": 0.68,
"eval/kl": 0.0,
"eval/logps/chosen": -2181.0286091549297,
"eval/logps/rejected": -1975.4440268987341,
"eval/rewards/chosen": -189.73686454665494,
"eval/rewards/margins": -18.629343779249865,
"eval/rewards/rejected": -171.10752076740508,
"eval_loss": 0.47333332896232605,
"eval_runtime": 140.6959,
"eval_samples_per_second": 2.132,
"eval_steps_per_second": 0.533,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 145,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}