|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.556900978088379, |
|
"logits/rejected": -2.42356276512146, |
|
"logps/chosen": -384.0308532714844, |
|
"logps/rejected": -355.0808410644531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.611865997314453, |
|
"logits/rejected": -2.5766751766204834, |
|
"logps/chosen": -304.4154052734375, |
|
"logps/rejected": -320.59613037109375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.003964083269238472, |
|
"rewards/margins": 0.000560062238946557, |
|
"rewards/rejected": 0.003404021030291915, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.6072850227355957, |
|
"logits/rejected": -2.522531270980835, |
|
"logps/chosen": -300.6770324707031, |
|
"logps/rejected": -291.0304870605469, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0014870109735056758, |
|
"rewards/margins": 0.011296196840703487, |
|
"rewards/rejected": -0.009809186682105064, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.6037135124206543, |
|
"logits/rejected": -2.523104190826416, |
|
"logps/chosen": -316.41357421875, |
|
"logps/rejected": -328.901611328125, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.03885359317064285, |
|
"rewards/margins": 0.034083932638168335, |
|
"rewards/rejected": -0.07293752580881119, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.5474514961242676, |
|
"logits/rejected": -2.465183734893799, |
|
"logps/chosen": -314.0546569824219, |
|
"logps/rejected": -331.37213134765625, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.09560319036245346, |
|
"rewards/margins": 0.046604275703430176, |
|
"rewards/rejected": -0.14220745861530304, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -2.5767412185668945, |
|
"logits/rejected": -2.5177969932556152, |
|
"logps/chosen": -303.9281311035156, |
|
"logps/rejected": -316.2641296386719, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.10496363788843155, |
|
"rewards/margins": 0.07360312342643738, |
|
"rewards/rejected": -0.17856675386428833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.5027236938476562, |
|
"logits/rejected": -2.4811007976531982, |
|
"logps/chosen": -309.5362243652344, |
|
"logps/rejected": -332.8122253417969, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.12640534341335297, |
|
"rewards/margins": 0.08432017266750336, |
|
"rewards/rejected": -0.21072551608085632, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -2.476938486099243, |
|
"logits/rejected": -2.454585313796997, |
|
"logps/chosen": -311.40582275390625, |
|
"logps/rejected": -342.4869384765625, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.14647440612316132, |
|
"rewards/margins": 0.11680541932582855, |
|
"rewards/rejected": -0.26327982544898987, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.5043561458587646, |
|
"logits/rejected": -2.4481701850891113, |
|
"logps/chosen": -330.54974365234375, |
|
"logps/rejected": -360.7945556640625, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1902867704629898, |
|
"rewards/margins": 0.09953726828098297, |
|
"rewards/rejected": -0.2898240387439728, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -2.5335912704467773, |
|
"logits/rejected": -2.4210548400878906, |
|
"logps/chosen": -342.34173583984375, |
|
"logps/rejected": -353.93780517578125, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.18997912108898163, |
|
"rewards/margins": 0.11557333171367645, |
|
"rewards/rejected": -0.30555248260498047, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.4827916622161865, |
|
"logits/rejected": -2.40724515914917, |
|
"logps/chosen": -337.3109436035156, |
|
"logps/rejected": -340.58880615234375, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.1919889748096466, |
|
"rewards/margins": 0.11718972772359848, |
|
"rewards/rejected": -0.3091786801815033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.445232629776001, |
|
"eval_logits/rejected": -2.3392903804779053, |
|
"eval_logps/chosen": -329.4806213378906, |
|
"eval_logps/rejected": -334.0378723144531, |
|
"eval_loss": 0.6402544379234314, |
|
"eval_rewards/accuracies": 0.6159999966621399, |
|
"eval_rewards/chosen": -0.3107789158821106, |
|
"eval_rewards/margins": 0.13125000894069672, |
|
"eval_rewards/rejected": -0.4420289993286133, |
|
"eval_runtime": 384.1641, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -2.487079620361328, |
|
"logits/rejected": -2.445570468902588, |
|
"logps/chosen": -350.1665344238281, |
|
"logps/rejected": -376.484375, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2455425262451172, |
|
"rewards/margins": 0.10617774724960327, |
|
"rewards/rejected": -0.35172027349472046, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.469482183456421, |
|
"logits/rejected": -2.386582612991333, |
|
"logps/chosen": -364.2555236816406, |
|
"logps/rejected": -356.3113708496094, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.24673068523406982, |
|
"rewards/margins": 0.10269691050052643, |
|
"rewards/rejected": -0.34942758083343506, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -2.4042646884918213, |
|
"logits/rejected": -2.3691623210906982, |
|
"logps/chosen": -304.0820007324219, |
|
"logps/rejected": -340.99212646484375, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.22045591473579407, |
|
"rewards/margins": 0.12117135524749756, |
|
"rewards/rejected": -0.3416272699832916, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.393618583679199, |
|
"logits/rejected": -2.3461203575134277, |
|
"logps/chosen": -338.6348571777344, |
|
"logps/rejected": -369.8921203613281, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.230948805809021, |
|
"rewards/margins": 0.1425984799861908, |
|
"rewards/rejected": -0.3735472559928894, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -2.4482178688049316, |
|
"logits/rejected": -2.404700756072998, |
|
"logps/chosen": -317.01458740234375, |
|
"logps/rejected": -332.4150695800781, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.25384414196014404, |
|
"rewards/margins": 0.13600240647792816, |
|
"rewards/rejected": -0.389846533536911, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6604029192374303, |
|
"train_runtime": 7258.1064, |
|
"train_samples_per_second": 2.756, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|