{ "best_metric": 0.9506395189108336, "best_model_checkpoint": "./output//25_roberta-large_nace_5__5e-6_0.01_0.06_07-21-22_10-40/checkpoint-54000", "epoch": 3.01255230125523, "global_step": 54000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.8594272963927112e-06, "loss": 0.2318, "step": 2000 }, { "epoch": 0.11, "eval_accuracy": 0.876702441473671, "eval_f1": 0.90677030703459, "eval_loss": 0.08175961673259735, "eval_roc_auc": 0.9387159436934914, "eval_runtime": 942.0949, "eval_samples_per_second": 33.825, "step": 2000 }, { "epoch": 0.22, "learning_rate": 3.7188545927854224e-06, "loss": 0.0732, "step": 4000 }, { "epoch": 0.22, "eval_accuracy": 0.8966296366032762, "eval_f1": 0.9234467455621302, "eval_loss": 0.060932476073503494, "eval_roc_auc": 0.9514088251746582, "eval_runtime": 942.3301, "eval_samples_per_second": 33.816, "step": 4000 }, { "epoch": 0.33, "learning_rate": 4.963084738922455e-06, "loss": 0.0584, "step": 6000 }, { "epoch": 0.33, "eval_accuracy": 0.8969434506998054, "eval_f1": 0.9278133577310156, "eval_loss": 0.05285193771123886, "eval_roc_auc": 0.9578889777085196, "eval_runtime": 943.2116, "eval_samples_per_second": 33.785, "step": 6000 }, { "epoch": 0.45, "learning_rate": 4.844386150248674e-06, "loss": 0.0521, "step": 8000 }, { "epoch": 0.45, "eval_accuracy": 0.9035649281365719, "eval_f1": 0.931070919922204, "eval_loss": 0.049892134964466095, "eval_roc_auc": 0.9581022789802229, "eval_runtime": 943.267, "eval_samples_per_second": 33.783, "step": 8000 }, { "epoch": 0.56, "learning_rate": 4.7256875615748935e-06, "loss": 0.0488, "step": 10000 }, { "epoch": 0.56, "eval_accuracy": 0.9092449632837507, "eval_f1": 0.9369276016030965, "eval_loss": 0.046968165785074234, "eval_roc_auc": 0.9633660743801509, "eval_runtime": 943.3238, "eval_samples_per_second": 33.781, "step": 10000 }, { "epoch": 0.67, "learning_rate": 4.606988972901112e-06, "loss": 0.0469, "step": 12000 }, { "epoch": 0.67, "eval_accuracy": 0.913512834996548, "eval_f1": 0.941321897843637, "eval_loss": 0.04364275932312012, "eval_roc_auc": 0.9653120464218385, "eval_runtime": 943.108, "eval_samples_per_second": 33.788, "step": 12000 }, { "epoch": 0.78, "learning_rate": 4.4882903842273315e-06, "loss": 0.0447, "step": 14000 }, { "epoch": 0.78, "eval_accuracy": 0.9181886650348333, "eval_f1": 0.9415976522286926, "eval_loss": 0.04153330251574516, "eval_roc_auc": 0.9628170726293566, "eval_runtime": 943.4861, "eval_samples_per_second": 33.775, "step": 14000 }, { "epoch": 0.89, "learning_rate": 4.369591795553551e-06, "loss": 0.0444, "step": 16000 }, { "epoch": 0.89, "eval_accuracy": 0.9147994727923179, "eval_f1": 0.9419169981558534, "eval_loss": 0.042323991656303406, "eval_roc_auc": 0.9661668065711105, "eval_runtime": 943.053, "eval_samples_per_second": 33.79, "step": 16000 }, { "epoch": 1.0, "learning_rate": 4.25089320687977e-06, "loss": 0.0428, "step": 18000 }, { "epoch": 1.0, "eval_accuracy": 0.92073055921672, "eval_f1": 0.945392908407904, "eval_loss": 0.040571972727775574, "eval_roc_auc": 0.967396285229951, "eval_runtime": 943.6862, "eval_samples_per_second": 33.768, "step": 18000 }, { "epoch": 1.12, "learning_rate": 4.13219461820599e-06, "loss": 0.038, "step": 20000 }, { "epoch": 1.12, "eval_accuracy": 0.9201343124333144, "eval_f1": 0.9434847253943194, "eval_loss": 0.04074249416589737, "eval_roc_auc": 0.9648247119998007, "eval_runtime": 943.7404, "eval_samples_per_second": 33.766, "step": 20000 }, { "epoch": 1.23, "learning_rate": 4.013496029532209e-06, "loss": 0.0365, "step": 22000 }, { "epoch": 1.23, "eval_accuracy": 0.9186280047699743, "eval_f1": 0.9448325191374088, "eval_loss": 0.04050755128264427, "eval_roc_auc": 0.968414370803744, "eval_runtime": 944.2502, "eval_samples_per_second": 33.747, "step": 22000 }, { "epoch": 1.34, "learning_rate": 3.8947974408584285e-06, "loss": 0.0363, "step": 24000 }, { "epoch": 1.34, "eval_accuracy": 0.9201343124333144, "eval_f1": 0.9443637975031708, "eval_loss": 0.0401209257543087, "eval_roc_auc": 0.9665495515278828, "eval_runtime": 943.3154, "eval_samples_per_second": 33.781, "step": 24000 }, { "epoch": 1.45, "learning_rate": 3.7760988521846475e-06, "loss": 0.0369, "step": 26000 }, { "epoch": 1.45, "eval_accuracy": 0.9228331136634658, "eval_f1": 0.9453032652246665, "eval_loss": 0.03975052386522293, "eval_roc_auc": 0.9652725030917011, "eval_runtime": 943.3891, "eval_samples_per_second": 33.778, "step": 26000 }, { "epoch": 1.56, "learning_rate": 3.6574002635108673e-06, "loss": 0.0367, "step": 28000 }, { "epoch": 1.56, "eval_accuracy": 0.924308039917153, "eval_f1": 0.9483727178359894, "eval_loss": 0.03769804537296295, "eval_roc_auc": 0.9692338448531075, "eval_runtime": 943.731, "eval_samples_per_second": 33.766, "step": 28000 }, { "epoch": 1.67, "learning_rate": 3.5387016748370867e-06, "loss": 0.0357, "step": 30000 }, { "epoch": 1.67, "eval_accuracy": 0.9256888219418816, "eval_f1": 0.9485330073349634, "eval_loss": 0.03771040216088295, "eval_roc_auc": 0.9689151700049551, "eval_runtime": 943.5531, "eval_samples_per_second": 33.772, "step": 30000 }, { "epoch": 1.79, "learning_rate": 3.420003086163306e-06, "loss": 0.0353, "step": 32000 }, { "epoch": 1.79, "eval_accuracy": 0.9218289085545722, "eval_f1": 0.945624456015514, "eval_loss": 0.03922228887677193, "eval_roc_auc": 0.9675467885174716, "eval_runtime": 943.7902, "eval_samples_per_second": 33.764, "step": 32000 }, { "epoch": 1.9, "learning_rate": 3.3013044974895255e-06, "loss": 0.0361, "step": 34000 }, { "epoch": 1.9, "eval_accuracy": 0.9247159982426411, "eval_f1": 0.9477583383080007, "eval_loss": 0.03780689090490341, "eval_roc_auc": 0.9678151508441292, "eval_runtime": 943.8287, "eval_samples_per_second": 33.762, "step": 34000 }, { "epoch": 2.01, "learning_rate": 3.1826059088157445e-06, "loss": 0.0352, "step": 36000 }, { "epoch": 2.01, "eval_accuracy": 0.9262536873156342, "eval_f1": 0.9494783086216365, "eval_loss": 0.03738059848546982, "eval_roc_auc": 0.970156686689613, "eval_runtime": 944.0794, "eval_samples_per_second": 33.754, "step": 36000 }, { "epoch": 2.12, "learning_rate": 3.063907320141964e-06, "loss": 0.0304, "step": 38000 }, { "epoch": 2.12, "eval_accuracy": 0.9259084918094521, "eval_f1": 0.948512218605291, "eval_loss": 0.03789151459932327, "eval_roc_auc": 0.9688610378201717, "eval_runtime": 943.994, "eval_samples_per_second": 33.757, "step": 38000 }, { "epoch": 2.23, "learning_rate": 2.9452087314681833e-06, "loss": 0.03, "step": 40000 }, { "epoch": 2.23, "eval_accuracy": 0.9245904726040294, "eval_f1": 0.9488459898909932, "eval_loss": 0.03766411170363426, "eval_roc_auc": 0.9705674249639594, "eval_runtime": 943.9278, "eval_samples_per_second": 33.759, "step": 40000 }, { "epoch": 2.34, "learning_rate": 2.8265101427944027e-06, "loss": 0.0293, "step": 42000 }, { "epoch": 2.34, "eval_accuracy": 0.9265675014121635, "eval_f1": 0.9496503709571762, "eval_loss": 0.03824329748749733, "eval_roc_auc": 0.9707690753304716, "eval_runtime": 943.7438, "eval_samples_per_second": 33.766, "step": 42000 }, { "epoch": 2.45, "learning_rate": 2.7078115541206216e-06, "loss": 0.0295, "step": 44000 }, { "epoch": 2.45, "eval_accuracy": 0.9280110462561978, "eval_f1": 0.9498280736608847, "eval_loss": 0.03822626546025276, "eval_roc_auc": 0.9696253499219528, "eval_runtime": 944.0868, "eval_samples_per_second": 33.753, "step": 44000 }, { "epoch": 2.57, "learning_rate": 2.589112965446841e-06, "loss": 0.0293, "step": 46000 }, { "epoch": 2.57, "eval_accuracy": 0.9273520366534864, "eval_f1": 0.950136518250736, "eval_loss": 0.0382731668651104, "eval_roc_auc": 0.9705518129264687, "eval_runtime": 944.2173, "eval_samples_per_second": 33.749, "step": 46000 }, { "epoch": 2.68, "learning_rate": 2.4704143767730604e-06, "loss": 0.0289, "step": 48000 }, { "epoch": 2.68, "eval_accuracy": 0.9288583443168267, "eval_f1": 0.9496914100188371, "eval_loss": 0.038598936051130295, "eval_roc_auc": 0.9687129340225382, "eval_runtime": 943.702, "eval_samples_per_second": 33.767, "step": 48000 }, { "epoch": 2.79, "learning_rate": 2.35171578809928e-06, "loss": 0.0298, "step": 50000 }, { "epoch": 2.79, "eval_accuracy": 0.9273206552438336, "eval_f1": 0.9502127205355373, "eval_loss": 0.037168510258197784, "eval_roc_auc": 0.9707164609217411, "eval_runtime": 943.7733, "eval_samples_per_second": 33.764, "step": 50000 }, { "epoch": 2.9, "learning_rate": 2.233017199425499e-06, "loss": 0.0287, "step": 52000 }, { "epoch": 2.9, "eval_accuracy": 0.9278855206175861, "eval_f1": 0.950306102196913, "eval_loss": 0.03820985183119774, "eval_roc_auc": 0.970297989008143, "eval_runtime": 944.0102, "eval_samples_per_second": 33.756, "step": 52000 }, { "epoch": 3.01, "learning_rate": 2.1143186107517182e-06, "loss": 0.0286, "step": 54000 }, { "epoch": 3.01, "eval_accuracy": 0.9286072930396033, "eval_f1": 0.9506395189108336, "eval_loss": 0.03879757598042488, "eval_roc_auc": 0.9705917028008494, "eval_runtime": 943.6083, "eval_samples_per_second": 33.77, "step": 54000 } ], "max_steps": 89625, "num_train_epochs": 5, "total_flos": 6.986708904362803e+16, "trial_name": null, "trial_params": null }