|
{ |
|
"best_metric": 0.6971784234046936, |
|
"best_model_checkpoint": "./cardiffnlp-twitter-xlmr-finetuned-txtnly-all-42/checkpoint-16500", |
|
"epoch": 1.9841269841269842, |
|
"eval_steps": 500, |
|
"global_step": 16500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.298647880554199, |
|
"learning_rate": 4.994023569023569e-05, |
|
"loss": 0.6122, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_f1": 0.4840638597456899, |
|
"eval_loss": 0.854165256023407, |
|
"eval_precision": 0.6558887250350466, |
|
"eval_recall": 0.49045198529069495, |
|
"eval_runtime": 5.9285, |
|
"eval_samples_per_second": 148.099, |
|
"eval_steps_per_second": 9.277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.411099433898926, |
|
"learning_rate": 4.988011063011063e-05, |
|
"loss": 0.5497, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_f1": 0.6209225023342669, |
|
"eval_loss": 0.8037390112876892, |
|
"eval_precision": 0.704421745545341, |
|
"eval_recall": 0.6070083321696225, |
|
"eval_runtime": 6.1691, |
|
"eval_samples_per_second": 142.322, |
|
"eval_steps_per_second": 8.915, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.836483001708984, |
|
"learning_rate": 4.9820105820105825e-05, |
|
"loss": 0.5404, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_f1": 0.3652071944289921, |
|
"eval_loss": 0.9700150489807129, |
|
"eval_precision": 0.5591482310679367, |
|
"eval_recall": 0.4176288227901131, |
|
"eval_runtime": 5.8886, |
|
"eval_samples_per_second": 149.101, |
|
"eval_steps_per_second": 9.34, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 13.717193603515625, |
|
"learning_rate": 4.975998075998076e-05, |
|
"loss": 0.5165, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_f1": 0.5369027892847279, |
|
"eval_loss": 0.744874894618988, |
|
"eval_precision": 0.7349445049700448, |
|
"eval_recall": 0.529664385793418, |
|
"eval_runtime": 5.996, |
|
"eval_samples_per_second": 146.43, |
|
"eval_steps_per_second": 9.173, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.4534995555877686, |
|
"learning_rate": 4.969997594997595e-05, |
|
"loss": 0.5136, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_f1": 0.5001381202499963, |
|
"eval_loss": 0.7884698510169983, |
|
"eval_precision": 0.6766332095394413, |
|
"eval_recall": 0.5025275799469348, |
|
"eval_runtime": 5.9085, |
|
"eval_samples_per_second": 148.6, |
|
"eval_steps_per_second": 9.309, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.195244550704956, |
|
"learning_rate": 4.963985088985089e-05, |
|
"loss": 0.5072, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1": 0.5917137619940201, |
|
"eval_loss": 0.8123684525489807, |
|
"eval_precision": 0.6076358199852175, |
|
"eval_recall": 0.6132374435600242, |
|
"eval_runtime": 6.1108, |
|
"eval_samples_per_second": 143.68, |
|
"eval_steps_per_second": 9.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.579603672027588, |
|
"learning_rate": 4.957972582972583e-05, |
|
"loss": 0.5011, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_f1": 0.578405909718061, |
|
"eval_loss": 0.8767459392547607, |
|
"eval_precision": 0.642659899090607, |
|
"eval_recall": 0.5987143322627193, |
|
"eval_runtime": 6.1563, |
|
"eval_samples_per_second": 142.618, |
|
"eval_steps_per_second": 8.934, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.266787052154541, |
|
"learning_rate": 4.951960076960077e-05, |
|
"loss": 0.5021, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_f1": 0.6502990015105321, |
|
"eval_loss": 0.7957776784896851, |
|
"eval_precision": 0.6847923256926328, |
|
"eval_recall": 0.636192338127822, |
|
"eval_runtime": 6.5221, |
|
"eval_samples_per_second": 134.618, |
|
"eval_steps_per_second": 8.433, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.044332027435303, |
|
"learning_rate": 4.945959595959596e-05, |
|
"loss": 0.4946, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_f1": 0.4982912515017284, |
|
"eval_loss": 0.8045271039009094, |
|
"eval_precision": 0.7220405815528763, |
|
"eval_recall": 0.4968300516687614, |
|
"eval_runtime": 6.1928, |
|
"eval_samples_per_second": 141.778, |
|
"eval_steps_per_second": 8.881, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.152063846588135, |
|
"learning_rate": 4.93994708994709e-05, |
|
"loss": 0.4928, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_f1": 0.550273048506264, |
|
"eval_loss": 0.780342698097229, |
|
"eval_precision": 0.7581894624319455, |
|
"eval_recall": 0.5380887213145278, |
|
"eval_runtime": 6.123, |
|
"eval_samples_per_second": 143.395, |
|
"eval_steps_per_second": 8.983, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.54200553894043, |
|
"learning_rate": 4.933934583934584e-05, |
|
"loss": 0.5008, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_f1": 0.4594232264185665, |
|
"eval_loss": 0.7507085204124451, |
|
"eval_precision": 0.44070483572560937, |
|
"eval_recall": 0.47984452823162504, |
|
"eval_runtime": 5.932, |
|
"eval_samples_per_second": 148.011, |
|
"eval_steps_per_second": 9.272, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.075632095336914, |
|
"learning_rate": 4.927922077922078e-05, |
|
"loss": 0.4966, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_f1": 0.6310991936984806, |
|
"eval_loss": 0.8238988518714905, |
|
"eval_precision": 0.6139657275796522, |
|
"eval_recall": 0.6767434715821813, |
|
"eval_runtime": 5.8918, |
|
"eval_samples_per_second": 149.02, |
|
"eval_steps_per_second": 9.335, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.8725104331970215, |
|
"learning_rate": 4.921909571909572e-05, |
|
"loss": 0.4791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_f1": 0.5412559573187593, |
|
"eval_loss": 0.7028306722640991, |
|
"eval_precision": 0.6567775474615866, |
|
"eval_recall": 0.520631196760229, |
|
"eval_runtime": 6.3113, |
|
"eval_samples_per_second": 139.116, |
|
"eval_steps_per_second": 8.715, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.4915893077850342, |
|
"learning_rate": 4.915897065897066e-05, |
|
"loss": 0.494, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_f1": 0.5227267406470947, |
|
"eval_loss": 0.8033522367477417, |
|
"eval_precision": 0.6660302960734323, |
|
"eval_recall": 0.5188623562817111, |
|
"eval_runtime": 6.1252, |
|
"eval_samples_per_second": 143.342, |
|
"eval_steps_per_second": 8.979, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.151014804840088, |
|
"learning_rate": 4.90988455988456e-05, |
|
"loss": 0.4861, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_f1": 0.4541201667750796, |
|
"eval_loss": 0.900325357913971, |
|
"eval_precision": 0.5780562441152168, |
|
"eval_recall": 0.4784564539403249, |
|
"eval_runtime": 6.144, |
|
"eval_samples_per_second": 142.903, |
|
"eval_steps_per_second": 8.952, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 4.770496368408203, |
|
"learning_rate": 4.903872053872054e-05, |
|
"loss": 0.4804, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_f1": 0.5791890202588422, |
|
"eval_loss": 0.773960530757904, |
|
"eval_precision": 0.6238945275403609, |
|
"eval_recall": 0.5775003491132523, |
|
"eval_runtime": 6.556, |
|
"eval_samples_per_second": 133.923, |
|
"eval_steps_per_second": 8.389, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.520460367202759, |
|
"learning_rate": 4.897859547859548e-05, |
|
"loss": 0.4614, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_f1": 0.6470888284841774, |
|
"eval_loss": 0.7397181391716003, |
|
"eval_precision": 0.6848151355984641, |
|
"eval_recall": 0.6312358609132803, |
|
"eval_runtime": 6.1813, |
|
"eval_samples_per_second": 142.042, |
|
"eval_steps_per_second": 8.898, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 4.375688552856445, |
|
"learning_rate": 4.891847041847042e-05, |
|
"loss": 0.4315, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_f1": 0.614857769662433, |
|
"eval_loss": 0.788919985294342, |
|
"eval_precision": 0.6641593406916259, |
|
"eval_recall": 0.6034743750872783, |
|
"eval_runtime": 6.1798, |
|
"eval_samples_per_second": 142.076, |
|
"eval_steps_per_second": 8.9, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 4.091088771820068, |
|
"learning_rate": 4.885834535834536e-05, |
|
"loss": 0.4506, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_f1": 0.4967964786589283, |
|
"eval_loss": 0.8783875703811646, |
|
"eval_precision": 0.6387377173091459, |
|
"eval_recall": 0.5016645719871526, |
|
"eval_runtime": 5.9164, |
|
"eval_samples_per_second": 148.401, |
|
"eval_steps_per_second": 9.296, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.3903276920318604, |
|
"learning_rate": 4.87982202982203e-05, |
|
"loss": 0.4489, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_f1": 0.4949153076705755, |
|
"eval_loss": 0.7994188070297241, |
|
"eval_precision": 0.5340329579250159, |
|
"eval_recall": 0.49638597961178615, |
|
"eval_runtime": 5.9029, |
|
"eval_samples_per_second": 148.74, |
|
"eval_steps_per_second": 9.317, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 3.929879903793335, |
|
"learning_rate": 4.8738095238095235e-05, |
|
"loss": 0.4466, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_f1": 0.44642812881455524, |
|
"eval_loss": 0.8109920024871826, |
|
"eval_precision": 0.5776119229607602, |
|
"eval_recall": 0.47351207931853095, |
|
"eval_runtime": 5.9766, |
|
"eval_samples_per_second": 146.907, |
|
"eval_steps_per_second": 9.203, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 6.443171501159668, |
|
"learning_rate": 4.8677970177970176e-05, |
|
"loss": 0.4319, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_f1": 0.5481427288492505, |
|
"eval_loss": 0.8068605661392212, |
|
"eval_precision": 0.6612496177619213, |
|
"eval_recall": 0.5399497276916632, |
|
"eval_runtime": 5.9001, |
|
"eval_samples_per_second": 148.811, |
|
"eval_steps_per_second": 9.322, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 7.633645057678223, |
|
"learning_rate": 4.8617845117845116e-05, |
|
"loss": 0.4243, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_f1": 0.5797306372413114, |
|
"eval_loss": 0.7941620349884033, |
|
"eval_precision": 0.5948358635007136, |
|
"eval_recall": 0.5704752595075175, |
|
"eval_runtime": 6.145, |
|
"eval_samples_per_second": 142.881, |
|
"eval_steps_per_second": 8.95, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 3.275371789932251, |
|
"learning_rate": 4.8557720057720056e-05, |
|
"loss": 0.4398, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_f1": 0.5247242844808815, |
|
"eval_loss": 0.9738017916679382, |
|
"eval_precision": 0.5370369073777802, |
|
"eval_recall": 0.6070139179816599, |
|
"eval_runtime": 6.219, |
|
"eval_samples_per_second": 141.18, |
|
"eval_steps_per_second": 8.844, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.4162724018096924, |
|
"learning_rate": 4.8497594997595e-05, |
|
"loss": 0.4526, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_f1": 0.5589742980399895, |
|
"eval_loss": 0.7195601463317871, |
|
"eval_precision": 0.7046240283838195, |
|
"eval_recall": 0.5477959316668994, |
|
"eval_runtime": 6.3918, |
|
"eval_samples_per_second": 137.363, |
|
"eval_steps_per_second": 8.605, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 6.926381587982178, |
|
"learning_rate": 4.8437469937469944e-05, |
|
"loss": 0.4529, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_f1": 0.5863097712686139, |
|
"eval_loss": 0.8049713969230652, |
|
"eval_precision": 0.6419448505612538, |
|
"eval_recall": 0.5730605595121724, |
|
"eval_runtime": 6.3636, |
|
"eval_samples_per_second": 137.971, |
|
"eval_steps_per_second": 8.643, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.8420650959014893, |
|
"learning_rate": 4.837746512746513e-05, |
|
"loss": 0.446, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_f1": 0.6107236144330398, |
|
"eval_loss": 0.7564206719398499, |
|
"eval_precision": 0.6520992658162544, |
|
"eval_recall": 0.5912358609132803, |
|
"eval_runtime": 6.4128, |
|
"eval_samples_per_second": 136.914, |
|
"eval_steps_per_second": 8.577, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.423569679260254, |
|
"learning_rate": 4.831746031746032e-05, |
|
"loss": 0.4315, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_f1": 0.621245910301715, |
|
"eval_loss": 0.751511812210083, |
|
"eval_precision": 0.6474767054531395, |
|
"eval_recall": 0.6069198901456967, |
|
"eval_runtime": 5.9833, |
|
"eval_samples_per_second": 146.741, |
|
"eval_steps_per_second": 9.192, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 6.773381233215332, |
|
"learning_rate": 4.825733525733526e-05, |
|
"loss": 0.4464, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_f1": 0.559868694735591, |
|
"eval_loss": 0.8307517170906067, |
|
"eval_precision": 0.627583612882644, |
|
"eval_recall": 0.5512991667830377, |
|
"eval_runtime": 6.1679, |
|
"eval_samples_per_second": 142.35, |
|
"eval_steps_per_second": 8.917, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.220128059387207, |
|
"learning_rate": 4.8197330447330455e-05, |
|
"loss": 0.4423, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_f1": 0.5991996711711277, |
|
"eval_loss": 0.798150360584259, |
|
"eval_precision": 0.6176196711770697, |
|
"eval_recall": 0.5936535865568123, |
|
"eval_runtime": 6.0738, |
|
"eval_samples_per_second": 144.556, |
|
"eval_steps_per_second": 9.055, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.1065833568572998, |
|
"learning_rate": 4.8137205387205395e-05, |
|
"loss": 0.4551, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_f1": 0.6019748538222912, |
|
"eval_loss": 0.822293221950531, |
|
"eval_precision": 0.6355921902599784, |
|
"eval_recall": 0.5933528836754642, |
|
"eval_runtime": 6.1197, |
|
"eval_samples_per_second": 143.472, |
|
"eval_steps_per_second": 8.987, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 8.631648063659668, |
|
"learning_rate": 4.807708032708033e-05, |
|
"loss": 0.4408, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_f1": 0.5131249172090748, |
|
"eval_loss": 0.7691208124160767, |
|
"eval_precision": 0.608759764068229, |
|
"eval_recall": 0.5147484057161477, |
|
"eval_runtime": 6.3609, |
|
"eval_samples_per_second": 138.031, |
|
"eval_steps_per_second": 8.647, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 6.755849361419678, |
|
"learning_rate": 4.801695526695527e-05, |
|
"loss": 0.4389, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_f1": 0.6702519892656928, |
|
"eval_loss": 0.6971784234046936, |
|
"eval_precision": 0.6686766810877821, |
|
"eval_recall": 0.6729106735558349, |
|
"eval_runtime": 6.1341, |
|
"eval_samples_per_second": 143.134, |
|
"eval_steps_per_second": 8.966, |
|
"step": 16500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 415800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6.945931114601472e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|