{ "best_metric": 1.5659970045089722, "best_model_checkpoint": "finetuned-websites/checkpoint-200", "epoch": 10.0, "eval_steps": 50, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4166666666666667, "grad_norm": 4.125311851501465, "learning_rate": 1.0000000000000002e-06, "loss": 1.9272, "step": 10 }, { "epoch": 0.8333333333333334, "grad_norm": 3.0228281021118164, "learning_rate": 2.0000000000000003e-06, "loss": 1.928, "step": 20 }, { "epoch": 1.25, "grad_norm": 2.5070629119873047, "learning_rate": 3e-06, "loss": 1.921, "step": 30 }, { "epoch": 1.6666666666666665, "grad_norm": 2.7787866592407227, "learning_rate": 4.000000000000001e-06, "loss": 1.8972, "step": 40 }, { "epoch": 2.0833333333333335, "grad_norm": 2.7862040996551514, "learning_rate": 5e-06, "loss": 1.8678, "step": 50 }, { "epoch": 2.0833333333333335, "eval_accuracy": 0.3088235294117647, "eval_loss": 1.8965561389923096, "eval_runtime": 2.6013, "eval_samples_per_second": 26.141, "eval_steps_per_second": 3.46, "step": 50 }, { "epoch": 2.5, "grad_norm": 3.540709972381592, "learning_rate": 6e-06, "loss": 1.8526, "step": 60 }, { "epoch": 2.9166666666666665, "grad_norm": 3.426781415939331, "learning_rate": 7.000000000000001e-06, "loss": 1.8198, "step": 70 }, { "epoch": 3.3333333333333335, "grad_norm": 4.007580757141113, "learning_rate": 8.000000000000001e-06, "loss": 1.7369, "step": 80 }, { "epoch": 3.75, "grad_norm": 4.467859745025635, "learning_rate": 9e-06, "loss": 1.7144, "step": 90 }, { "epoch": 4.166666666666667, "grad_norm": 3.9431729316711426, "learning_rate": 1e-05, "loss": 1.6483, "step": 100 }, { "epoch": 4.166666666666667, "eval_accuracy": 0.3382352941176471, "eval_loss": 1.793816089630127, "eval_runtime": 2.9764, "eval_samples_per_second": 22.847, "eval_steps_per_second": 3.024, "step": 100 }, { "epoch": 4.583333333333333, "grad_norm": 3.2608771324157715, "learning_rate": 1.1000000000000001e-05, "loss": 1.5856, "step": 110 }, { "epoch": 5.0, "grad_norm": 3.638972520828247, "learning_rate": 1.2e-05, "loss": 1.5157, "step": 120 }, { "epoch": 5.416666666666667, "grad_norm": 3.3878164291381836, "learning_rate": 1.3000000000000001e-05, "loss": 1.4388, "step": 130 }, { "epoch": 5.833333333333333, "grad_norm": 4.086641311645508, "learning_rate": 1.4000000000000001e-05, "loss": 1.4522, "step": 140 }, { "epoch": 6.25, "grad_norm": 3.5550928115844727, "learning_rate": 1.5e-05, "loss": 1.3419, "step": 150 }, { "epoch": 6.25, "eval_accuracy": 0.38235294117647056, "eval_loss": 1.6614487171173096, "eval_runtime": 2.1852, "eval_samples_per_second": 31.118, "eval_steps_per_second": 4.119, "step": 150 }, { "epoch": 6.666666666666667, "grad_norm": 4.774816036224365, "learning_rate": 1.6000000000000003e-05, "loss": 1.3147, "step": 160 }, { "epoch": 7.083333333333333, "grad_norm": 4.092309951782227, "learning_rate": 1.7000000000000003e-05, "loss": 1.2643, "step": 170 }, { "epoch": 7.5, "grad_norm": 4.992920398712158, "learning_rate": 1.8e-05, "loss": 1.1924, "step": 180 }, { "epoch": 7.916666666666667, "grad_norm": 6.251681327819824, "learning_rate": 1.9e-05, "loss": 1.1461, "step": 190 }, { "epoch": 8.333333333333334, "grad_norm": 5.500473499298096, "learning_rate": 2e-05, "loss": 1.1189, "step": 200 }, { "epoch": 8.333333333333334, "eval_accuracy": 0.39705882352941174, "eval_loss": 1.5659970045089722, "eval_runtime": 2.0786, "eval_samples_per_second": 32.715, "eval_steps_per_second": 4.33, "step": 200 }, { "epoch": 8.75, "grad_norm": 4.436168670654297, "learning_rate": 2.1e-05, "loss": 1.0734, "step": 210 }, { "epoch": 9.166666666666666, "grad_norm": 4.706101417541504, "learning_rate": 2.2000000000000003e-05, "loss": 0.9833, "step": 220 }, { "epoch": 9.583333333333334, "grad_norm": 4.173801898956299, "learning_rate": 2.3000000000000003e-05, "loss": 0.9902, "step": 230 }, { "epoch": 10.0, "grad_norm": 5.312504291534424, "learning_rate": 2.4e-05, "loss": 0.9339, "step": 240 }, { "epoch": 10.0, "step": 240, "total_flos": 2.9603266596900864e+17, "train_loss": 1.4860339760780334, "train_runtime": 184.3796, "train_samples_per_second": 20.718, "train_steps_per_second": 1.302 } ], "logging_steps": 10, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.9603266596900864e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }