{ "best_metric": 1.1082465648651123, "best_model_checkpoint": "./outputs/llava-mistral/RLAIF-V-Cosi-q0_25/checkpoint-200", "epoch": 3.0, "eval_steps": 50, "global_step": 216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06944444444444445, "grad_norm": 27.170432023765155, "learning_rate": 5e-07, "loss": 1.6754, "step": 5 }, { "epoch": 0.1388888888888889, "grad_norm": 14.247587906112043, "learning_rate": 1e-06, "loss": 1.5482, "step": 10 }, { "epoch": 0.20833333333333334, "grad_norm": 7.671787021030683, "learning_rate": 9.985471028179154e-07, "loss": 1.3299, "step": 15 }, { "epoch": 0.2777777777777778, "grad_norm": 6.453004152776314, "learning_rate": 9.94196854912548e-07, "loss": 1.2749, "step": 20 }, { "epoch": 0.3472222222222222, "grad_norm": 5.369919876340136, "learning_rate": 9.869745381355905e-07, "loss": 1.2236, "step": 25 }, { "epoch": 0.4166666666666667, "grad_norm": 5.219833369218594, "learning_rate": 9.769221256218162e-07, "loss": 1.1995, "step": 30 }, { "epoch": 0.4861111111111111, "grad_norm": 5.172890690456561, "learning_rate": 9.64098037858483e-07, "loss": 1.1886, "step": 35 }, { "epoch": 0.5555555555555556, "grad_norm": 4.842256114253865, "learning_rate": 9.485768031694871e-07, "loss": 1.1867, "step": 40 }, { "epoch": 0.625, "grad_norm": 4.960268983249933, "learning_rate": 9.304486245873971e-07, "loss": 1.1606, "step": 45 }, { "epoch": 0.6944444444444444, "grad_norm": 5.100919525621712, "learning_rate": 9.098188556305262e-07, "loss": 1.1561, "step": 50 }, { "epoch": 0.6944444444444444, "eval_loss": 1.1514836549758911, "eval_runtime": 36.3445, "eval_samples_per_second": 56.212, "eval_steps_per_second": 0.88, "step": 50 }, { "epoch": 0.7638888888888888, "grad_norm": 4.954408629703573, "learning_rate": 8.868073880316123e-07, "loss": 1.1461, "step": 55 }, { "epoch": 0.8333333333333334, "grad_norm": 5.005034060908683, "learning_rate": 8.615479549763755e-07, "loss": 1.1405, "step": 60 }, { "epoch": 0.9027777777777778, "grad_norm": 4.953280817436457, "learning_rate": 8.341873539012443e-07, "loss": 1.1317, "step": 65 }, { "epoch": 0.9722222222222222, "grad_norm": 4.812467946627597, "learning_rate": 8.048845933670271e-07, "loss": 1.1163, "step": 70 }, { "epoch": 1.0416666666666667, "grad_norm": 5.832648715058256, "learning_rate": 7.738099689665539e-07, "loss": 1.0551, "step": 75 }, { "epoch": 1.1111111111111112, "grad_norm": 5.011097198106542, "learning_rate": 7.41144073636728e-07, "loss": 1.0026, "step": 80 }, { "epoch": 1.1805555555555556, "grad_norm": 5.207816191466898, "learning_rate": 7.070767481266492e-07, "loss": 1.0002, "step": 85 }, { "epoch": 1.25, "grad_norm": 5.288582074627771, "learning_rate": 6.718059777212565e-07, "loss": 0.9809, "step": 90 }, { "epoch": 1.3194444444444444, "grad_norm": 5.272544049813405, "learning_rate": 6.355367416322778e-07, "loss": 0.982, "step": 95 }, { "epoch": 1.3888888888888888, "grad_norm": 5.34229662194457, "learning_rate": 5.984798217433531e-07, "loss": 0.9725, "step": 100 }, { "epoch": 1.3888888888888888, "eval_loss": 1.1154539585113525, "eval_runtime": 35.7418, "eval_samples_per_second": 57.16, "eval_steps_per_second": 0.895, "step": 100 }, { "epoch": 1.4583333333333333, "grad_norm": 5.332872312807783, "learning_rate": 5.608505776324157e-07, "loss": 0.9741, "step": 105 }, { "epoch": 1.5277777777777777, "grad_norm": 5.039881296049518, "learning_rate": 5.228676949903973e-07, "loss": 0.9787, "step": 110 }, { "epoch": 1.5972222222222223, "grad_norm": 5.272528296120645, "learning_rate": 4.847519147099294e-07, "loss": 0.9577, "step": 115 }, { "epoch": 1.6666666666666665, "grad_norm": 6.108766118631545, "learning_rate": 4.46724750030062e-07, "loss": 0.9597, "step": 120 }, { "epoch": 1.7361111111111112, "grad_norm": 5.206098606588932, "learning_rate": 4.0900719919241935e-07, "loss": 0.9648, "step": 125 }, { "epoch": 1.8055555555555556, "grad_norm": 5.197849839562957, "learning_rate": 3.7181846109031e-07, "loss": 0.947, "step": 130 }, { "epoch": 1.875, "grad_norm": 5.3111405062375905, "learning_rate": 3.353746613749093e-07, "loss": 0.9594, "step": 135 }, { "epoch": 1.9444444444444444, "grad_norm": 5.445921545754994, "learning_rate": 2.9988759642186093e-07, "loss": 0.9702, "step": 140 }, { "epoch": 2.013888888888889, "grad_norm": 5.933633048275964, "learning_rate": 2.655635024578483e-07, "loss": 0.9456, "step": 145 }, { "epoch": 2.0833333333333335, "grad_norm": 5.549115678624608, "learning_rate": 2.3260185700046292e-07, "loss": 0.8717, "step": 150 }, { "epoch": 2.0833333333333335, "eval_loss": 1.1032688617706299, "eval_runtime": 35.642, "eval_samples_per_second": 57.32, "eval_steps_per_second": 0.898, "step": 150 }, { "epoch": 2.1527777777777777, "grad_norm": 5.425494474442028, "learning_rate": 2.0119421957691218e-07, "loss": 0.8643, "step": 155 }, { "epoch": 2.2222222222222223, "grad_norm": 5.818398592523753, "learning_rate": 1.7152311845883094e-07, "loss": 0.8643, "step": 160 }, { "epoch": 2.2916666666666665, "grad_norm": 5.873632446353599, "learning_rate": 1.4376098988303404e-07, "loss": 0.8696, "step": 165 }, { "epoch": 2.361111111111111, "grad_norm": 5.4343592118456785, "learning_rate": 1.1806917592302761e-07, "loss": 0.8444, "step": 170 }, { "epoch": 2.4305555555555554, "grad_norm": 5.333034391959962, "learning_rate": 9.459698683523204e-08, "loss": 0.8694, "step": 175 }, { "epoch": 2.5, "grad_norm": 5.655910129037808, "learning_rate": 7.348083332917926e-08, "loss": 0.8773, "step": 180 }, { "epoch": 2.5694444444444446, "grad_norm": 5.278576061447754, "learning_rate": 5.484343380457124e-08, "loss": 0.8622, "step": 185 }, { "epoch": 2.638888888888889, "grad_norm": 5.493429379631295, "learning_rate": 3.879310116241041e-08, "loss": 0.8657, "step": 190 }, { "epoch": 2.7083333333333335, "grad_norm": 5.821505606490213, "learning_rate": 2.5423113334966218e-08, "loss": 0.8589, "step": 195 }, { "epoch": 2.7777777777777777, "grad_norm": 5.676459483240144, "learning_rate": 1.4811171192794624e-08, "loss": 0.8608, "step": 200 }, { "epoch": 2.7777777777777777, "eval_loss": 1.1082465648651123, "eval_runtime": 35.5876, "eval_samples_per_second": 57.408, "eval_steps_per_second": 0.899, "step": 200 }, { "epoch": 2.8472222222222223, "grad_norm": 5.585848892693237, "learning_rate": 7.018946979234997e-09, "loss": 0.8773, "step": 205 }, { "epoch": 2.9166666666666665, "grad_norm": 5.591393456061243, "learning_rate": 2.0917258966953734e-09, "loss": 0.8621, "step": 210 }, { "epoch": 2.986111111111111, "grad_norm": 5.629015994718156, "learning_rate": 5.814292768108187e-11, "loss": 0.8677, "step": 215 }, { "epoch": 3.0, "step": 216, "total_flos": 1273399955423232.0, "train_loss": 1.028549515262798, "train_runtime": 3153.0326, "train_samples_per_second": 17.493, "train_steps_per_second": 0.069 } ], "logging_steps": 5, "max_steps": 216, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1273399955423232.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }