{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7506463659867854, "eval_steps": 88, "global_step": 2613, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02528009192760701, "grad_norm": 8.922952651977539, "learning_rate": 1.5143603133159272e-06, "loss": 7.5831, "step": 88 }, { "epoch": 0.02528009192760701, "eval_nli-pairs_loss": 6.087147235870361, "eval_nli-pairs_runtime": 38.8312, "eval_nli-pairs_samples_per_second": 175.323, "eval_nli-pairs_steps_per_second": 10.971, "step": 88 }, { "epoch": 0.02528009192760701, "eval_scitail-pairs-pos_loss": 3.7706263065338135, "eval_scitail-pairs-pos_runtime": 8.0579, "eval_scitail-pairs-pos_samples_per_second": 161.829, "eval_scitail-pairs-pos_steps_per_second": 10.176, "step": 88 }, { "epoch": 0.05056018385521402, "grad_norm": 4.343107223510742, "learning_rate": 3.046127067014796e-06, "loss": 6.7273, "step": 176 }, { "epoch": 0.05056018385521402, "eval_nli-pairs_loss": 5.9269890785217285, "eval_nli-pairs_runtime": 38.115, "eval_nli-pairs_samples_per_second": 178.618, "eval_nli-pairs_steps_per_second": 11.177, "step": 176 }, { "epoch": 0.05056018385521402, "eval_scitail-pairs-pos_loss": 3.7313730716705322, "eval_scitail-pairs-pos_runtime": 7.805, "eval_scitail-pairs-pos_samples_per_second": 167.071, "eval_scitail-pairs-pos_steps_per_second": 10.506, "step": 176 }, { "epoch": 0.07584027578282103, "grad_norm": 16.866355895996094, "learning_rate": 4.577893820713664e-06, "loss": 5.9091, "step": 264 }, { "epoch": 0.07584027578282103, "eval_nli-pairs_loss": 4.598970890045166, "eval_nli-pairs_runtime": 38.0313, "eval_nli-pairs_samples_per_second": 179.01, "eval_nli-pairs_steps_per_second": 11.201, "step": 264 }, { "epoch": 0.07584027578282103, "eval_scitail-pairs-pos_loss": 2.8227546215057373, "eval_scitail-pairs-pos_runtime": 7.7737, "eval_scitail-pairs-pos_samples_per_second": 167.746, "eval_scitail-pairs-pos_steps_per_second": 10.548, "step": 264 }, { "epoch": 0.10112036771042804, "grad_norm": 13.346307754516602, "learning_rate": 6.1096605744125335e-06, "loss": 4.2126, "step": 352 }, { "epoch": 0.10112036771042804, "eval_nli-pairs_loss": 3.062242269515991, "eval_nli-pairs_runtime": 38.0333, "eval_nli-pairs_samples_per_second": 179.001, "eval_nli-pairs_steps_per_second": 11.201, "step": 352 }, { "epoch": 0.10112036771042804, "eval_scitail-pairs-pos_loss": 1.792738437652588, "eval_scitail-pairs-pos_runtime": 7.7584, "eval_scitail-pairs-pos_samples_per_second": 168.075, "eval_scitail-pairs-pos_steps_per_second": 10.569, "step": 352 }, { "epoch": 0.12640045963803503, "grad_norm": 54.115352630615234, "learning_rate": 7.641427328111402e-06, "loss": 3.0055, "step": 440 }, { "epoch": 0.12640045963803503, "eval_nli-pairs_loss": 2.324322462081909, "eval_nli-pairs_runtime": 38.4568, "eval_nli-pairs_samples_per_second": 177.03, "eval_nli-pairs_steps_per_second": 11.077, "step": 440 }, { "epoch": 0.12640045963803503, "eval_scitail-pairs-pos_loss": 1.1716609001159668, "eval_scitail-pairs-pos_runtime": 7.7757, "eval_scitail-pairs-pos_samples_per_second": 167.701, "eval_scitail-pairs-pos_steps_per_second": 10.546, "step": 440 }, { "epoch": 0.15168055156564206, "grad_norm": 11.725848197937012, "learning_rate": 9.155787641427328e-06, "loss": 2.4462, "step": 528 }, { "epoch": 0.15168055156564206, "eval_nli-pairs_loss": 1.9132238626480103, "eval_nli-pairs_runtime": 38.209, "eval_nli-pairs_samples_per_second": 178.178, "eval_nli-pairs_steps_per_second": 11.149, "step": 528 }, { "epoch": 0.15168055156564206, "eval_scitail-pairs-pos_loss": 1.1093688011169434, "eval_scitail-pairs-pos_runtime": 7.8235, "eval_scitail-pairs-pos_samples_per_second": 166.678, "eval_scitail-pairs-pos_steps_per_second": 10.481, "step": 528 }, { "epoch": 0.17696064349324905, "grad_norm": 11.779143333435059, "learning_rate": 1.06875543951262e-05, "loss": 2.0925, "step": 616 }, { "epoch": 0.17696064349324905, "eval_nli-pairs_loss": 1.6520535945892334, "eval_nli-pairs_runtime": 38.3278, "eval_nli-pairs_samples_per_second": 177.626, "eval_nli-pairs_steps_per_second": 11.115, "step": 616 }, { "epoch": 0.17696064349324905, "eval_scitail-pairs-pos_loss": 0.9030593037605286, "eval_scitail-pairs-pos_runtime": 7.8736, "eval_scitail-pairs-pos_samples_per_second": 165.617, "eval_scitail-pairs-pos_steps_per_second": 10.415, "step": 616 }, { "epoch": 0.20224073542085608, "grad_norm": 12.419939994812012, "learning_rate": 1.2219321148825067e-05, "loss": 2.0016, "step": 704 }, { "epoch": 0.20224073542085608, "eval_nli-pairs_loss": 1.4989789724349976, "eval_nli-pairs_runtime": 38.1814, "eval_nli-pairs_samples_per_second": 178.307, "eval_nli-pairs_steps_per_second": 11.157, "step": 704 }, { "epoch": 0.20224073542085608, "eval_scitail-pairs-pos_loss": 0.8707832098007202, "eval_scitail-pairs-pos_runtime": 7.8875, "eval_scitail-pairs-pos_samples_per_second": 165.324, "eval_scitail-pairs-pos_steps_per_second": 10.396, "step": 704 }, { "epoch": 0.22752082734846307, "grad_norm": 9.998259544372559, "learning_rate": 1.3751087902523935e-05, "loss": 1.7607, "step": 792 }, { "epoch": 0.22752082734846307, "eval_nli-pairs_loss": 1.410436749458313, "eval_nli-pairs_runtime": 38.5476, "eval_nli-pairs_samples_per_second": 176.613, "eval_nli-pairs_steps_per_second": 11.051, "step": 792 }, { "epoch": 0.22752082734846307, "eval_scitail-pairs-pos_loss": 0.8443933129310608, "eval_scitail-pairs-pos_runtime": 8.0485, "eval_scitail-pairs-pos_samples_per_second": 162.017, "eval_scitail-pairs-pos_steps_per_second": 10.188, "step": 792 }, { "epoch": 0.25280091927607007, "grad_norm": 9.332258224487305, "learning_rate": 1.5282854656222804e-05, "loss": 1.7801, "step": 880 }, { "epoch": 0.25280091927607007, "eval_nli-pairs_loss": 1.3014748096466064, "eval_nli-pairs_runtime": 38.916, "eval_nli-pairs_samples_per_second": 174.941, "eval_nli-pairs_steps_per_second": 10.947, "step": 880 }, { "epoch": 0.25280091927607007, "eval_scitail-pairs-pos_loss": 0.8059829473495483, "eval_scitail-pairs-pos_runtime": 8.1209, "eval_scitail-pairs-pos_samples_per_second": 160.573, "eval_scitail-pairs-pos_steps_per_second": 10.097, "step": 880 }, { "epoch": 0.2780810112036771, "grad_norm": 10.008861541748047, "learning_rate": 1.6814621409921673e-05, "loss": 1.5522, "step": 968 }, { "epoch": 0.2780810112036771, "eval_nli-pairs_loss": 1.2200833559036255, "eval_nli-pairs_runtime": 38.7857, "eval_nli-pairs_samples_per_second": 175.529, "eval_nli-pairs_steps_per_second": 10.983, "step": 968 }, { "epoch": 0.2780810112036771, "eval_scitail-pairs-pos_loss": 0.7629444003105164, "eval_scitail-pairs-pos_runtime": 8.1057, "eval_scitail-pairs-pos_samples_per_second": 160.874, "eval_scitail-pairs-pos_steps_per_second": 10.116, "step": 968 }, { "epoch": 0.3033611031312841, "grad_norm": 4.277113437652588, "learning_rate": 1.834638816362054e-05, "loss": 1.4041, "step": 1056 }, { "epoch": 0.3033611031312841, "eval_nli-pairs_loss": 1.1746500730514526, "eval_nli-pairs_runtime": 38.7125, "eval_nli-pairs_samples_per_second": 175.86, "eval_nli-pairs_steps_per_second": 11.004, "step": 1056 }, { "epoch": 0.3033611031312841, "eval_scitail-pairs-pos_loss": 0.6737743020057678, "eval_scitail-pairs-pos_runtime": 8.0882, "eval_scitail-pairs-pos_samples_per_second": 161.222, "eval_scitail-pairs-pos_steps_per_second": 10.138, "step": 1056 }, { "epoch": 0.3286411950588911, "grad_norm": 12.264771461486816, "learning_rate": 1.987815491731941e-05, "loss": 1.3716, "step": 1144 }, { "epoch": 0.3286411950588911, "eval_nli-pairs_loss": 1.1800155639648438, "eval_nli-pairs_runtime": 39.3141, "eval_nli-pairs_samples_per_second": 173.169, "eval_nli-pairs_steps_per_second": 10.836, "step": 1144 }, { "epoch": 0.3286411950588911, "eval_scitail-pairs-pos_loss": 0.6005298495292664, "eval_scitail-pairs-pos_runtime": 8.1438, "eval_scitail-pairs-pos_samples_per_second": 160.123, "eval_scitail-pairs-pos_steps_per_second": 10.069, "step": 1144 }, { "epoch": 0.3539212869864981, "grad_norm": 8.721504211425781, "learning_rate": 1.994052263586742e-05, "loss": 1.3107, "step": 1232 }, { "epoch": 0.3539212869864981, "eval_nli-pairs_loss": 1.0875309705734253, "eval_nli-pairs_runtime": 38.8492, "eval_nli-pairs_samples_per_second": 175.242, "eval_nli-pairs_steps_per_second": 10.965, "step": 1232 }, { "epoch": 0.3539212869864981, "eval_scitail-pairs-pos_loss": 0.6326610445976257, "eval_scitail-pairs-pos_runtime": 8.1233, "eval_scitail-pairs-pos_samples_per_second": 160.525, "eval_scitail-pairs-pos_steps_per_second": 10.094, "step": 1232 }, { "epoch": 0.37920137891410516, "grad_norm": 7.441675186157227, "learning_rate": 1.974194676073964e-05, "loss": 1.3468, "step": 1320 }, { "epoch": 0.37920137891410516, "eval_nli-pairs_loss": 1.0540093183517456, "eval_nli-pairs_runtime": 38.2019, "eval_nli-pairs_samples_per_second": 178.211, "eval_nli-pairs_steps_per_second": 11.151, "step": 1320 }, { "epoch": 0.37920137891410516, "eval_scitail-pairs-pos_loss": 0.5582771301269531, "eval_scitail-pairs-pos_runtime": 7.795, "eval_scitail-pairs-pos_samples_per_second": 167.286, "eval_scitail-pairs-pos_steps_per_second": 10.52, "step": 1320 }, { "epoch": 0.40448147084171215, "grad_norm": 9.178886413574219, "learning_rate": 1.9406615307701736e-05, "loss": 1.2303, "step": 1408 }, { "epoch": 0.40448147084171215, "eval_nli-pairs_loss": 1.0082660913467407, "eval_nli-pairs_runtime": 38.0965, "eval_nli-pairs_samples_per_second": 178.704, "eval_nli-pairs_steps_per_second": 11.182, "step": 1408 }, { "epoch": 0.40448147084171215, "eval_scitail-pairs-pos_loss": 0.5665512084960938, "eval_scitail-pairs-pos_runtime": 7.756, "eval_scitail-pairs-pos_samples_per_second": 168.128, "eval_scitail-pairs-pos_steps_per_second": 10.572, "step": 1408 }, { "epoch": 0.42976156276931915, "grad_norm": 7.995066165924072, "learning_rate": 1.8939235595298756e-05, "loss": 1.1907, "step": 1496 }, { "epoch": 0.42976156276931915, "eval_nli-pairs_loss": 0.9647029042243958, "eval_nli-pairs_runtime": 38.1584, "eval_nli-pairs_samples_per_second": 178.414, "eval_nli-pairs_steps_per_second": 11.164, "step": 1496 }, { "epoch": 0.42976156276931915, "eval_scitail-pairs-pos_loss": 0.5922390818595886, "eval_scitail-pairs-pos_runtime": 7.8063, "eval_scitail-pairs-pos_samples_per_second": 167.044, "eval_scitail-pairs-pos_steps_per_second": 10.504, "step": 1496 }, { "epoch": 0.45504165469692615, "grad_norm": 6.889362335205078, "learning_rate": 1.8346368610183863e-05, "loss": 1.1587, "step": 1584 }, { "epoch": 0.45504165469692615, "eval_nli-pairs_loss": 0.9536527991294861, "eval_nli-pairs_runtime": 38.1977, "eval_nli-pairs_samples_per_second": 178.231, "eval_nli-pairs_steps_per_second": 11.152, "step": 1584 }, { "epoch": 0.45504165469692615, "eval_scitail-pairs-pos_loss": 0.5585244297981262, "eval_scitail-pairs-pos_runtime": 7.8456, "eval_scitail-pairs-pos_samples_per_second": 166.207, "eval_scitail-pairs-pos_steps_per_second": 10.452, "step": 1584 }, { "epoch": 0.4803217466245332, "grad_norm": 9.089669227600098, "learning_rate": 1.7636336905247625e-05, "loss": 0.9554, "step": 1672 }, { "epoch": 0.4803217466245332, "eval_nli-pairs_loss": 0.9304406046867371, "eval_nli-pairs_runtime": 38.1126, "eval_nli-pairs_samples_per_second": 178.629, "eval_nli-pairs_steps_per_second": 11.177, "step": 1672 }, { "epoch": 0.4803217466245332, "eval_scitail-pairs-pos_loss": 0.5591565370559692, "eval_scitail-pairs-pos_runtime": 7.8171, "eval_scitail-pairs-pos_samples_per_second": 166.813, "eval_scitail-pairs-pos_steps_per_second": 10.49, "step": 1672 }, { "epoch": 0.5056018385521401, "grad_norm": 8.821345329284668, "learning_rate": 1.681910776921864e-05, "loss": 0.9837, "step": 1760 }, { "epoch": 0.5056018385521401, "eval_nli-pairs_loss": 0.9164705276489258, "eval_nli-pairs_runtime": 38.0836, "eval_nli-pairs_samples_per_second": 178.765, "eval_nli-pairs_steps_per_second": 11.186, "step": 1760 }, { "epoch": 0.5056018385521401, "eval_scitail-pairs-pos_loss": 0.5467000007629395, "eval_scitail-pairs-pos_runtime": 7.7942, "eval_scitail-pairs-pos_samples_per_second": 167.304, "eval_scitail-pairs-pos_steps_per_second": 10.521, "step": 1760 }, { "epoch": 0.5308819304797472, "grad_norm": 9.250692367553711, "learning_rate": 1.5906153307778405e-05, "loss": 0.8857, "step": 1848 }, { "epoch": 0.5308819304797472, "eval_nli-pairs_loss": 0.8931341171264648, "eval_nli-pairs_runtime": 38.0639, "eval_nli-pairs_samples_per_second": 178.857, "eval_nli-pairs_steps_per_second": 11.192, "step": 1848 }, { "epoch": 0.5308819304797472, "eval_scitail-pairs-pos_loss": 0.5374401807785034, "eval_scitail-pairs-pos_runtime": 7.8097, "eval_scitail-pairs-pos_samples_per_second": 166.972, "eval_scitail-pairs-pos_steps_per_second": 10.5, "step": 1848 }, { "epoch": 0.5561620224073542, "grad_norm": 5.3266706466674805, "learning_rate": 1.491028940034468e-05, "loss": 0.9305, "step": 1936 }, { "epoch": 0.5561620224073542, "eval_nli-pairs_loss": 0.8841533064842224, "eval_nli-pairs_runtime": 38.1566, "eval_nli-pairs_samples_per_second": 178.423, "eval_nli-pairs_steps_per_second": 11.165, "step": 1936 }, { "epoch": 0.5561620224073542, "eval_scitail-pairs-pos_loss": 0.5330824851989746, "eval_scitail-pairs-pos_runtime": 7.8415, "eval_scitail-pairs-pos_samples_per_second": 166.294, "eval_scitail-pairs-pos_steps_per_second": 10.457, "step": 1936 }, { "epoch": 0.5814421143349612, "grad_norm": 6.629028797149658, "learning_rate": 1.3845495793217223e-05, "loss": 0.8061, "step": 2024 }, { "epoch": 0.5814421143349612, "eval_nli-pairs_loss": 0.8853806257247925, "eval_nli-pairs_runtime": 38.172, "eval_nli-pairs_samples_per_second": 178.351, "eval_nli-pairs_steps_per_second": 11.16, "step": 2024 }, { "epoch": 0.5814421143349612, "eval_scitail-pairs-pos_loss": 0.5477445125579834, "eval_scitail-pairs-pos_runtime": 7.8333, "eval_scitail-pairs-pos_samples_per_second": 166.469, "eval_scitail-pairs-pos_steps_per_second": 10.468, "step": 2024 }, { "epoch": 0.6067222062625682, "grad_norm": 4.16071081161499, "learning_rate": 1.2726719854583736e-05, "loss": 0.8286, "step": 2112 }, { "epoch": 0.6067222062625682, "eval_nli-pairs_loss": 0.8693087697029114, "eval_nli-pairs_runtime": 38.1088, "eval_nli-pairs_samples_per_second": 178.646, "eval_nli-pairs_steps_per_second": 11.179, "step": 2112 }, { "epoch": 0.6067222062625682, "eval_scitail-pairs-pos_loss": 0.5196370482444763, "eval_scitail-pairs-pos_runtime": 7.8534, "eval_scitail-pairs-pos_samples_per_second": 166.042, "eval_scitail-pairs-pos_steps_per_second": 10.441, "step": 2112 }, { "epoch": 0.6320022981901753, "grad_norm": 2.518064498901367, "learning_rate": 1.1569666746235527e-05, "loss": 0.7854, "step": 2200 }, { "epoch": 0.6320022981901753, "eval_nli-pairs_loss": 0.859151303768158, "eval_nli-pairs_runtime": 38.0838, "eval_nli-pairs_samples_per_second": 178.764, "eval_nli-pairs_steps_per_second": 11.186, "step": 2200 }, { "epoch": 0.6320022981901753, "eval_scitail-pairs-pos_loss": 0.5159358978271484, "eval_scitail-pairs-pos_runtime": 7.7611, "eval_scitail-pairs-pos_samples_per_second": 168.018, "eval_scitail-pairs-pos_steps_per_second": 10.566, "step": 2200 }, { "epoch": 0.6572823901177822, "grad_norm": 4.033371925354004, "learning_rate": 1.0390578957522117e-05, "loss": 0.8374, "step": 2288 }, { "epoch": 0.6572823901177822, "eval_nli-pairs_loss": 0.8537901043891907, "eval_nli-pairs_runtime": 38.0742, "eval_nli-pairs_samples_per_second": 178.809, "eval_nli-pairs_steps_per_second": 11.189, "step": 2288 }, { "epoch": 0.6572823901177822, "eval_scitail-pairs-pos_loss": 0.509048581123352, "eval_scitail-pairs-pos_runtime": 7.7812, "eval_scitail-pairs-pos_samples_per_second": 167.582, "eval_scitail-pairs-pos_steps_per_second": 10.538, "step": 2288 }, { "epoch": 0.6825624820453893, "grad_norm": 6.1265363693237305, "learning_rate": 9.206008296404724e-06, "loss": 0.7678, "step": 2376 }, { "epoch": 0.6825624820453893, "eval_nli-pairs_loss": 0.8425480723381042, "eval_nli-pairs_runtime": 38.0596, "eval_nli-pairs_samples_per_second": 178.877, "eval_nli-pairs_steps_per_second": 11.193, "step": 2376 }, { "epoch": 0.6825624820453893, "eval_scitail-pairs-pos_loss": 0.5174906253814697, "eval_scitail-pairs-pos_runtime": 7.7617, "eval_scitail-pairs-pos_samples_per_second": 168.003, "eval_scitail-pairs-pos_steps_per_second": 10.565, "step": 2376 }, { "epoch": 0.7078425739729962, "grad_norm": 3.0078606605529785, "learning_rate": 8.032583538354534e-06, "loss": 0.7064, "step": 2464 }, { "epoch": 0.7078425739729962, "eval_nli-pairs_loss": 0.8283973336219788, "eval_nli-pairs_runtime": 38.2909, "eval_nli-pairs_samples_per_second": 177.797, "eval_nli-pairs_steps_per_second": 11.125, "step": 2464 }, { "epoch": 0.7078425739729962, "eval_scitail-pairs-pos_loss": 0.5045931935310364, "eval_scitail-pairs-pos_runtime": 7.8174, "eval_scitail-pairs-pos_samples_per_second": 166.806, "eval_scitail-pairs-pos_steps_per_second": 10.489, "step": 2464 }, { "epoch": 0.7331226659006033, "grad_norm": 8.649880409240723, "learning_rate": 6.8867769947957765e-06, "loss": 0.8849, "step": 2552 }, { "epoch": 0.7331226659006033, "eval_nli-pairs_loss": 0.8328748941421509, "eval_nli-pairs_runtime": 38.2288, "eval_nli-pairs_samples_per_second": 178.086, "eval_nli-pairs_steps_per_second": 11.143, "step": 2552 }, { "epoch": 0.7331226659006033, "eval_scitail-pairs-pos_loss": 0.478294312953949, "eval_scitail-pairs-pos_runtime": 7.8918, "eval_scitail-pairs-pos_samples_per_second": 165.235, "eval_scitail-pairs-pos_steps_per_second": 10.391, "step": 2552 } ], "logging_steps": 88, "max_steps": 3481, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 871, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 250, "trial_name": null, "trial_params": null }