|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 392, |
|
"global_step": 15669, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015125406854298296, |
|
"grad_norm": 199.77919006347656, |
|
"learning_rate": 5.589586523736601e-07, |
|
"loss": 12.973, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.030250813708596593, |
|
"grad_norm": 70.86483764648438, |
|
"learning_rate": 1.1638591117917304e-06, |
|
"loss": 8.4661, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.045376220562894885, |
|
"grad_norm": 17.361167907714844, |
|
"learning_rate": 1.768759571209801e-06, |
|
"loss": 6.136, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.060501627417193185, |
|
"grad_norm": 13.593123435974121, |
|
"learning_rate": 2.3736600306278715e-06, |
|
"loss": 6.2319, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.07505265173272066, |
|
"eval_nli-pairs_loss": 5.313699245452881, |
|
"eval_nli-pairs_runtime": 12.1282, |
|
"eval_nli-pairs_samples_per_second": 123.679, |
|
"eval_nli-pairs_steps_per_second": 5.195, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07505265173272066, |
|
"eval_scitail-pairs-pos_loss": 3.977630615234375, |
|
"eval_scitail-pairs-pos_runtime": 15.2195, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.68, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.614, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07505265173272066, |
|
"eval_qnli-contrastive_loss": 5.520341396331787, |
|
"eval_qnli-contrastive_runtime": 4.7394, |
|
"eval_qnli-contrastive_samples_per_second": 316.494, |
|
"eval_qnli-contrastive_steps_per_second": 13.293, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07562703427149148, |
|
"grad_norm": 15.363186836242676, |
|
"learning_rate": 2.978560490045942e-06, |
|
"loss": 5.6068, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.09075244112578977, |
|
"grad_norm": 18.922758102416992, |
|
"learning_rate": 3.5834609494640125e-06, |
|
"loss": 5.2502, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.10587784798008808, |
|
"grad_norm": 20.363380432128906, |
|
"learning_rate": 4.188361408882083e-06, |
|
"loss": 4.8699, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.12100325483438637, |
|
"grad_norm": 14.830269813537598, |
|
"learning_rate": 4.793261868300153e-06, |
|
"loss": 4.9584, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.13612866168868468, |
|
"grad_norm": 26.075838088989258, |
|
"learning_rate": 5.398162327718224e-06, |
|
"loss": 4.5632, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.15010530346544132, |
|
"eval_nli-pairs_loss": 4.265738487243652, |
|
"eval_nli-pairs_runtime": 12.0989, |
|
"eval_nli-pairs_samples_per_second": 123.978, |
|
"eval_nli-pairs_steps_per_second": 5.207, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.15010530346544132, |
|
"eval_scitail-pairs-pos_loss": 2.458251476287842, |
|
"eval_scitail-pairs-pos_runtime": 15.2215, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.668, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.613, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.15010530346544132, |
|
"eval_qnli-contrastive_loss": 4.81198263168335, |
|
"eval_qnli-contrastive_runtime": 4.724, |
|
"eval_qnli-contrastive_samples_per_second": 317.525, |
|
"eval_qnli-contrastive_steps_per_second": 13.336, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.15125406854298296, |
|
"grad_norm": 17.387819290161133, |
|
"learning_rate": 6.003062787136294e-06, |
|
"loss": 4.223, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16637947539728126, |
|
"grad_norm": 24.702957153320312, |
|
"learning_rate": 6.607963246554365e-06, |
|
"loss": 3.8496, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.18150488225157954, |
|
"grad_norm": 20.878055572509766, |
|
"learning_rate": 7.212863705972435e-06, |
|
"loss": 3.4414, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.19663028910587785, |
|
"grad_norm": 28.57908821105957, |
|
"learning_rate": 7.817764165390506e-06, |
|
"loss": 3.3513, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.21175569596017615, |
|
"grad_norm": 37.09183120727539, |
|
"learning_rate": 8.422664624808575e-06, |
|
"loss": 3.5611, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.22515795519816198, |
|
"eval_nli-pairs_loss": 3.178299903869629, |
|
"eval_nli-pairs_runtime": 12.0715, |
|
"eval_nli-pairs_samples_per_second": 124.26, |
|
"eval_nli-pairs_steps_per_second": 5.219, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.22515795519816198, |
|
"eval_scitail-pairs-pos_loss": 1.983331561088562, |
|
"eval_scitail-pairs-pos_runtime": 15.1626, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.001, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.627, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.22515795519816198, |
|
"eval_qnli-contrastive_loss": 3.4507648944854736, |
|
"eval_qnli-contrastive_runtime": 4.7752, |
|
"eval_qnli-contrastive_samples_per_second": 314.125, |
|
"eval_qnli-contrastive_steps_per_second": 13.193, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.22688110281447443, |
|
"grad_norm": 22.88146209716797, |
|
"learning_rate": 9.027565084226646e-06, |
|
"loss": 3.4039, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.24200650966877274, |
|
"grad_norm": 20.4180908203125, |
|
"learning_rate": 9.632465543644716e-06, |
|
"loss": 3.4269, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.25713191652307105, |
|
"grad_norm": 23.59966278076172, |
|
"learning_rate": 1.0237366003062788e-05, |
|
"loss": 3.1573, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.27225732337736935, |
|
"grad_norm": 10.84000301361084, |
|
"learning_rate": 1.0842266462480856e-05, |
|
"loss": 3.253, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 0.2873827302316676, |
|
"grad_norm": 16.418413162231445, |
|
"learning_rate": 1.1447166921898928e-05, |
|
"loss": 2.7614, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 0.30021060693088264, |
|
"eval_nli-pairs_loss": 2.722890615463257, |
|
"eval_nli-pairs_runtime": 12.0687, |
|
"eval_nli-pairs_samples_per_second": 124.288, |
|
"eval_nli-pairs_steps_per_second": 5.22, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.30021060693088264, |
|
"eval_scitail-pairs-pos_loss": 1.6435188055038452, |
|
"eval_scitail-pairs-pos_runtime": 15.2101, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.732, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.616, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.30021060693088264, |
|
"eval_qnli-contrastive_loss": 2.944777011871338, |
|
"eval_qnli-contrastive_runtime": 4.7212, |
|
"eval_qnli-contrastive_samples_per_second": 317.713, |
|
"eval_qnli-contrastive_steps_per_second": 13.344, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.3025081370859659, |
|
"grad_norm": 20.777223587036133, |
|
"learning_rate": 1.2052067381317e-05, |
|
"loss": 2.9549, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3176335439402642, |
|
"grad_norm": 15.09938907623291, |
|
"learning_rate": 1.265696784073507e-05, |
|
"loss": 2.8357, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 0.3327589507945625, |
|
"grad_norm": 5.233273983001709, |
|
"learning_rate": 1.326186830015314e-05, |
|
"loss": 2.8964, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 0.34788435764886083, |
|
"grad_norm": 16.8189640045166, |
|
"learning_rate": 1.386676875957121e-05, |
|
"loss": 2.8274, |
|
"step": 1817 |
|
}, |
|
{ |
|
"epoch": 0.3630097645031591, |
|
"grad_norm": 8.114161491394043, |
|
"learning_rate": 1.4471669218989282e-05, |
|
"loss": 2.6809, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.37526325866360327, |
|
"eval_nli-pairs_loss": 2.428619384765625, |
|
"eval_nli-pairs_runtime": 12.0706, |
|
"eval_nli-pairs_samples_per_second": 124.269, |
|
"eval_nli-pairs_steps_per_second": 5.219, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37526325866360327, |
|
"eval_scitail-pairs-pos_loss": 1.3531062602996826, |
|
"eval_scitail-pairs-pos_runtime": 15.2633, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.434, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.603, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37526325866360327, |
|
"eval_qnli-contrastive_loss": 2.404916286468506, |
|
"eval_qnli-contrastive_runtime": 4.7194, |
|
"eval_qnli-contrastive_samples_per_second": 317.838, |
|
"eval_qnli-contrastive_steps_per_second": 13.349, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3781351713574574, |
|
"grad_norm": 22.405332565307617, |
|
"learning_rate": 1.5076569678407352e-05, |
|
"loss": 2.3456, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3932605782117557, |
|
"grad_norm": 33.843994140625, |
|
"learning_rate": 1.5681470137825424e-05, |
|
"loss": 2.5316, |
|
"step": 2054 |
|
}, |
|
{ |
|
"epoch": 0.408385985066054, |
|
"grad_norm": 3.7852566242218018, |
|
"learning_rate": 1.6286370597243492e-05, |
|
"loss": 2.653, |
|
"step": 2133 |
|
}, |
|
{ |
|
"epoch": 0.4235113919203523, |
|
"grad_norm": 28.830053329467773, |
|
"learning_rate": 1.689127105666156e-05, |
|
"loss": 2.699, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 0.43863679877465056, |
|
"grad_norm": 26.699514389038086, |
|
"learning_rate": 1.7496171516079635e-05, |
|
"loss": 2.424, |
|
"step": 2291 |
|
}, |
|
{ |
|
"epoch": 0.45031591039632396, |
|
"eval_nli-pairs_loss": 2.207122564315796, |
|
"eval_nli-pairs_runtime": 12.0919, |
|
"eval_nli-pairs_samples_per_second": 124.05, |
|
"eval_nli-pairs_steps_per_second": 5.21, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.45031591039632396, |
|
"eval_scitail-pairs-pos_loss": 1.2252534627914429, |
|
"eval_scitail-pairs-pos_runtime": 15.1733, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.941, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.625, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.45031591039632396, |
|
"eval_qnli-contrastive_loss": 2.292630672454834, |
|
"eval_qnli-contrastive_runtime": 4.7338, |
|
"eval_qnli-contrastive_samples_per_second": 316.868, |
|
"eval_qnli-contrastive_steps_per_second": 13.308, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.45376220562894887, |
|
"grad_norm": 3.1586949825286865, |
|
"learning_rate": 1.8101071975497704e-05, |
|
"loss": 2.4716, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.4688876124832472, |
|
"grad_norm": 15.398905754089355, |
|
"learning_rate": 1.8705972434915772e-05, |
|
"loss": 2.0097, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.4840130193375455, |
|
"grad_norm": 2.9506657123565674, |
|
"learning_rate": 1.9310872894333844e-05, |
|
"loss": 2.3993, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.4991384261918438, |
|
"grad_norm": 18.736677169799805, |
|
"learning_rate": 1.9915773353751916e-05, |
|
"loss": 2.3295, |
|
"step": 2607 |
|
}, |
|
{ |
|
"epoch": 0.5142638330461421, |
|
"grad_norm": 16.75814437866211, |
|
"learning_rate": 2.0520673813169984e-05, |
|
"loss": 2.348, |
|
"step": 2686 |
|
}, |
|
{ |
|
"epoch": 0.5253685621290446, |
|
"eval_nli-pairs_loss": 2.0092170238494873, |
|
"eval_nli-pairs_runtime": 12.0787, |
|
"eval_nli-pairs_samples_per_second": 124.185, |
|
"eval_nli-pairs_steps_per_second": 5.216, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 0.5253685621290446, |
|
"eval_scitail-pairs-pos_loss": 1.0735079050064087, |
|
"eval_scitail-pairs-pos_runtime": 14.9317, |
|
"eval_scitail-pairs-pos_samples_per_second": 87.331, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.683, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 0.5253685621290446, |
|
"eval_qnli-contrastive_loss": 1.9999727010726929, |
|
"eval_qnli-contrastive_runtime": 4.659, |
|
"eval_qnli-contrastive_samples_per_second": 321.961, |
|
"eval_qnli-contrastive_steps_per_second": 13.522, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 0.5293892399004404, |
|
"grad_norm": 3.6279871463775635, |
|
"learning_rate": 2.1125574272588056e-05, |
|
"loss": 2.0747, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.5445146467547387, |
|
"grad_norm": 102.07367706298828, |
|
"learning_rate": 2.1730474732006124e-05, |
|
"loss": 2.3592, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 0.5596400536090369, |
|
"grad_norm": 12.037158966064453, |
|
"learning_rate": 2.23353751914242e-05, |
|
"loss": 2.2563, |
|
"step": 2923 |
|
}, |
|
{ |
|
"epoch": 0.5747654604633352, |
|
"grad_norm": 11.711392402648926, |
|
"learning_rate": 2.2940275650842267e-05, |
|
"loss": 2.3484, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 0.5898908673176335, |
|
"grad_norm": 20.607454299926758, |
|
"learning_rate": 2.3545176110260336e-05, |
|
"loss": 1.868, |
|
"step": 3081 |
|
}, |
|
{ |
|
"epoch": 0.6004212138617653, |
|
"eval_nli-pairs_loss": 1.846701979637146, |
|
"eval_nli-pairs_runtime": 11.9121, |
|
"eval_nli-pairs_samples_per_second": 125.922, |
|
"eval_nli-pairs_steps_per_second": 5.289, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.6004212138617653, |
|
"eval_scitail-pairs-pos_loss": 0.9629871249198914, |
|
"eval_scitail-pairs-pos_runtime": 15.006, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.899, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.665, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.6004212138617653, |
|
"eval_qnli-contrastive_loss": 1.9593416452407837, |
|
"eval_qnli-contrastive_runtime": 4.653, |
|
"eval_qnli-contrastive_samples_per_second": 322.374, |
|
"eval_qnli-contrastive_steps_per_second": 13.54, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.6050162741719318, |
|
"grad_norm": 15.901214599609375, |
|
"learning_rate": 2.4150076569678408e-05, |
|
"loss": 1.9958, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6201416810262301, |
|
"grad_norm": 13.168147087097168, |
|
"learning_rate": 2.475497702909648e-05, |
|
"loss": 2.0089, |
|
"step": 3239 |
|
}, |
|
{ |
|
"epoch": 0.6352670878805284, |
|
"grad_norm": 21.926223754882812, |
|
"learning_rate": 2.5359877488514548e-05, |
|
"loss": 1.8303, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 0.6503924947348267, |
|
"grad_norm": 21.501989364624023, |
|
"learning_rate": 2.596477794793262e-05, |
|
"loss": 1.6892, |
|
"step": 3397 |
|
}, |
|
{ |
|
"epoch": 0.665517901589125, |
|
"grad_norm": 3.5192618370056152, |
|
"learning_rate": 2.6569678407350688e-05, |
|
"loss": 1.8379, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 0.675473865594486, |
|
"eval_nli-pairs_loss": 1.7486572265625, |
|
"eval_nli-pairs_runtime": 12.0369, |
|
"eval_nli-pairs_samples_per_second": 124.617, |
|
"eval_nli-pairs_steps_per_second": 5.234, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 0.675473865594486, |
|
"eval_scitail-pairs-pos_loss": 0.9056742191314697, |
|
"eval_scitail-pairs-pos_runtime": 14.8901, |
|
"eval_scitail-pairs-pos_samples_per_second": 87.575, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.694, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 0.675473865594486, |
|
"eval_qnli-contrastive_loss": 1.7076925039291382, |
|
"eval_qnli-contrastive_runtime": 4.6837, |
|
"eval_qnli-contrastive_samples_per_second": 320.259, |
|
"eval_qnli-contrastive_steps_per_second": 13.451, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 0.6806433084434234, |
|
"grad_norm": 13.107728004455566, |
|
"learning_rate": 2.717457886676876e-05, |
|
"loss": 1.4958, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.6957687152977217, |
|
"grad_norm": 10.731244087219238, |
|
"learning_rate": 2.777947932618683e-05, |
|
"loss": 1.9504, |
|
"step": 3634 |
|
}, |
|
{ |
|
"epoch": 0.7108941221520199, |
|
"grad_norm": 1.3723793029785156, |
|
"learning_rate": 2.83843797856049e-05, |
|
"loss": 1.6017, |
|
"step": 3713 |
|
}, |
|
{ |
|
"epoch": 0.7260195290063182, |
|
"grad_norm": 16.096094131469727, |
|
"learning_rate": 2.8989280245022975e-05, |
|
"loss": 1.7229, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 0.7411449358606165, |
|
"grad_norm": 14.629384994506836, |
|
"learning_rate": 2.9594180704441043e-05, |
|
"loss": 1.5996, |
|
"step": 3871 |
|
}, |
|
{ |
|
"epoch": 0.7505265173272065, |
|
"eval_nli-pairs_loss": 1.6035664081573486, |
|
"eval_nli-pairs_runtime": 12.0239, |
|
"eval_nli-pairs_samples_per_second": 124.752, |
|
"eval_nli-pairs_steps_per_second": 5.24, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7505265173272065, |
|
"eval_scitail-pairs-pos_loss": 0.7905139923095703, |
|
"eval_scitail-pairs-pos_runtime": 15.2398, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.566, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.609, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7505265173272065, |
|
"eval_qnli-contrastive_loss": 1.7369401454925537, |
|
"eval_qnli-contrastive_runtime": 4.726, |
|
"eval_qnli-contrastive_samples_per_second": 317.396, |
|
"eval_qnli-contrastive_steps_per_second": 13.331, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7562703427149148, |
|
"grad_norm": 12.058998107910156, |
|
"learning_rate": 2.999673874450528e-05, |
|
"loss": 1.6257, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7713957495692131, |
|
"grad_norm": 4.181306838989258, |
|
"learning_rate": 2.9946841125275615e-05, |
|
"loss": 1.6094, |
|
"step": 4029 |
|
}, |
|
{ |
|
"epoch": 0.7865211564235114, |
|
"grad_norm": 14.733617782592773, |
|
"learning_rate": 2.983695736786804e-05, |
|
"loss": 1.6061, |
|
"step": 4108 |
|
}, |
|
{ |
|
"epoch": 0.8016465632778097, |
|
"grad_norm": 75.19181823730469, |
|
"learning_rate": 2.96675284686242e-05, |
|
"loss": 1.8917, |
|
"step": 4187 |
|
}, |
|
{ |
|
"epoch": 0.816771970132108, |
|
"grad_norm": 17.123188018798828, |
|
"learning_rate": 2.943923439632653e-05, |
|
"loss": 1.766, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 0.8255791690599272, |
|
"eval_nli-pairs_loss": 1.5217715501785278, |
|
"eval_nli-pairs_runtime": 12.1712, |
|
"eval_nli-pairs_samples_per_second": 123.241, |
|
"eval_nli-pairs_steps_per_second": 5.176, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 0.8255791690599272, |
|
"eval_scitail-pairs-pos_loss": 0.7310367226600647, |
|
"eval_scitail-pairs-pos_runtime": 15.0699, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.53, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.65, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 0.8255791690599272, |
|
"eval_qnli-contrastive_loss": 1.8110274076461792, |
|
"eval_qnli-contrastive_runtime": 4.7354, |
|
"eval_qnli-contrastive_samples_per_second": 316.764, |
|
"eval_qnli-contrastive_steps_per_second": 13.304, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 0.8318973769864063, |
|
"grad_norm": 26.6308536529541, |
|
"learning_rate": 2.9152991363280456e-05, |
|
"loss": 1.6544, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.8470227838407046, |
|
"grad_norm": 11.87916088104248, |
|
"learning_rate": 2.8809948148280698e-05, |
|
"loss": 1.5872, |
|
"step": 4424 |
|
}, |
|
{ |
|
"epoch": 0.8621481906950029, |
|
"grad_norm": 5.825096607208252, |
|
"learning_rate": 2.841148148621882e-05, |
|
"loss": 1.6237, |
|
"step": 4503 |
|
}, |
|
{ |
|
"epoch": 0.8772735975493011, |
|
"grad_norm": 7.624891757965088, |
|
"learning_rate": 2.7959190542834895e-05, |
|
"loss": 1.5713, |
|
"step": 4582 |
|
}, |
|
{ |
|
"epoch": 0.8923990044035994, |
|
"grad_norm": 11.067708969116211, |
|
"learning_rate": 2.7454890496787676e-05, |
|
"loss": 1.5109, |
|
"step": 4661 |
|
}, |
|
{ |
|
"epoch": 0.9006318207926479, |
|
"eval_nli-pairs_loss": 1.4145296812057495, |
|
"eval_nli-pairs_runtime": 12.1688, |
|
"eval_nli-pairs_samples_per_second": 123.266, |
|
"eval_nli-pairs_steps_per_second": 5.177, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.9006318207926479, |
|
"eval_scitail-pairs-pos_loss": 0.7044198513031006, |
|
"eval_scitail-pairs-pos_runtime": 15.0745, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.504, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.649, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.9006318207926479, |
|
"eval_qnli-contrastive_loss": 1.5929718017578125, |
|
"eval_qnli-contrastive_runtime": 4.7378, |
|
"eval_qnli-contrastive_samples_per_second": 316.603, |
|
"eval_qnli-contrastive_steps_per_second": 13.297, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.9075244112578977, |
|
"grad_norm": 18.31964874267578, |
|
"learning_rate": 2.6900605254800455e-05, |
|
"loss": 1.8614, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.922649818112196, |
|
"grad_norm": 11.028084754943848, |
|
"learning_rate": 2.6298559329118796e-05, |
|
"loss": 1.2809, |
|
"step": 4819 |
|
}, |
|
{ |
|
"epoch": 0.9377752249664943, |
|
"grad_norm": 11.14758586883545, |
|
"learning_rate": 2.565116890987845e-05, |
|
"loss": 1.4557, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.9529006318207927, |
|
"grad_norm": 12.307340621948242, |
|
"learning_rate": 2.4970023905369427e-05, |
|
"loss": 2.285, |
|
"step": 4977 |
|
}, |
|
{ |
|
"epoch": 0.968026038675091, |
|
"grad_norm": 19.368682861328125, |
|
"learning_rate": 2.4249872456580537e-05, |
|
"loss": 1.5918, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.9756844725253686, |
|
"eval_nli-pairs_loss": 1.3622660636901855, |
|
"eval_nli-pairs_runtime": 12.1119, |
|
"eval_nli-pairs_samples_per_second": 123.845, |
|
"eval_nli-pairs_steps_per_second": 5.201, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 0.9756844725253686, |
|
"eval_scitail-pairs-pos_loss": 0.6618204116821289, |
|
"eval_scitail-pairs-pos_runtime": 15.1844, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.877, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.622, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 0.9756844725253686, |
|
"eval_qnli-contrastive_loss": 1.5225657224655151, |
|
"eval_qnli-contrastive_runtime": 4.73, |
|
"eval_qnli-contrastive_samples_per_second": 317.125, |
|
"eval_qnli-contrastive_steps_per_second": 13.319, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 0.9831514455293893, |
|
"grad_norm": 23.91764259338379, |
|
"learning_rate": 2.349353206401398e-05, |
|
"loss": 1.5956, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.9982768523836876, |
|
"grad_norm": 28.184560775756836, |
|
"learning_rate": 2.269363669859137e-05, |
|
"loss": 1.309, |
|
"step": 5214 |
|
}, |
|
{ |
|
"epoch": 1.0134022592379859, |
|
"grad_norm": 1.2889472246170044, |
|
"learning_rate": 2.186286447094588e-05, |
|
"loss": 1.6033, |
|
"step": 5293 |
|
}, |
|
{ |
|
"epoch": 1.0285276660922842, |
|
"grad_norm": 9.043930053710938, |
|
"learning_rate": 2.1004549518185432e-05, |
|
"loss": 1.2943, |
|
"step": 5372 |
|
}, |
|
{ |
|
"epoch": 1.0436530729465825, |
|
"grad_norm": 15.558199882507324, |
|
"learning_rate": 2.012213651460107e-05, |
|
"loss": 1.4881, |
|
"step": 5451 |
|
}, |
|
{ |
|
"epoch": 1.0507371242580892, |
|
"eval_nli-pairs_loss": 1.3221956491470337, |
|
"eval_nli-pairs_runtime": 12.1205, |
|
"eval_nli-pairs_samples_per_second": 123.757, |
|
"eval_nli-pairs_steps_per_second": 5.198, |
|
"step": 5488 |
|
}, |
|
{ |
|
"epoch": 1.0507371242580892, |
|
"eval_scitail-pairs-pos_loss": 0.6279736161231995, |
|
"eval_scitail-pairs-pos_runtime": 15.0898, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.416, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.645, |
|
"step": 5488 |
|
}, |
|
{ |
|
"epoch": 1.0507371242580892, |
|
"eval_qnli-contrastive_loss": 1.5666921138763428, |
|
"eval_qnli-contrastive_runtime": 4.7489, |
|
"eval_qnli-contrastive_samples_per_second": 315.863, |
|
"eval_qnli-contrastive_steps_per_second": 13.266, |
|
"step": 5488 |
|
}, |
|
{ |
|
"epoch": 1.0587784798008808, |
|
"grad_norm": 22.18709373474121, |
|
"learning_rate": 1.921916684716005e-05, |
|
"loss": 1.6734, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.073903886655179, |
|
"grad_norm": 2.1289186477661133, |
|
"learning_rate": 1.8299264402862166e-05, |
|
"loss": 1.6602, |
|
"step": 5609 |
|
}, |
|
{ |
|
"epoch": 1.0890292935094774, |
|
"grad_norm": 8.099466323852539, |
|
"learning_rate": 1.7366121024998667e-05, |
|
"loss": 1.4626, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 1.1041547003637757, |
|
"grad_norm": 11.092597007751465, |
|
"learning_rate": 1.642348169668238e-05, |
|
"loss": 1.4048, |
|
"step": 5767 |
|
}, |
|
{ |
|
"epoch": 1.1192801072180738, |
|
"grad_norm": 1.632265329360962, |
|
"learning_rate": 1.5475129511111833e-05, |
|
"loss": 1.5961, |
|
"step": 5846 |
|
}, |
|
{ |
|
"epoch": 1.12578977599081, |
|
"eval_nli-pairs_loss": 1.257077932357788, |
|
"eval_nli-pairs_runtime": 12.0966, |
|
"eval_nli-pairs_samples_per_second": 124.002, |
|
"eval_nli-pairs_steps_per_second": 5.208, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.12578977599081, |
|
"eval_scitail-pairs-pos_loss": 0.6171609163284302, |
|
"eval_scitail-pairs-pos_runtime": 15.2057, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.757, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.617, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.12578977599081, |
|
"eval_qnli-contrastive_loss": 1.4182076454162598, |
|
"eval_qnli-contrastive_runtime": 4.7646, |
|
"eval_qnli-contrastive_samples_per_second": 314.825, |
|
"eval_qnli-contrastive_steps_per_second": 13.223, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.1344055140723721, |
|
"grad_norm": 17.874731063842773, |
|
"learning_rate": 1.452487048888817e-05, |
|
"loss": 1.4949, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.1495309209266704, |
|
"grad_norm": 5.625218391418457, |
|
"learning_rate": 1.357651830331762e-05, |
|
"loss": 1.7542, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 1.1646563277809687, |
|
"grad_norm": 12.764110565185547, |
|
"learning_rate": 1.2633878975001336e-05, |
|
"loss": 1.3177, |
|
"step": 6083 |
|
}, |
|
{ |
|
"epoch": 1.179781734635267, |
|
"grad_norm": 14.75761890411377, |
|
"learning_rate": 1.1700735597137837e-05, |
|
"loss": 1.1522, |
|
"step": 6162 |
|
}, |
|
{ |
|
"epoch": 1.1949071414895653, |
|
"grad_norm": 7.778223037719727, |
|
"learning_rate": 1.078083315283995e-05, |
|
"loss": 1.0727, |
|
"step": 6241 |
|
}, |
|
{ |
|
"epoch": 1.2008424277235306, |
|
"eval_nli-pairs_loss": 1.2002286911010742, |
|
"eval_nli-pairs_runtime": 12.1083, |
|
"eval_nli-pairs_samples_per_second": 123.882, |
|
"eval_nli-pairs_steps_per_second": 5.203, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 1.2008424277235306, |
|
"eval_scitail-pairs-pos_loss": 0.587746798992157, |
|
"eval_scitail-pairs-pos_runtime": 15.2398, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.565, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.609, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 1.2008424277235306, |
|
"eval_qnli-contrastive_loss": 1.5079773664474487, |
|
"eval_qnli-contrastive_runtime": 4.7468, |
|
"eval_qnli-contrastive_samples_per_second": 316.005, |
|
"eval_qnli-contrastive_steps_per_second": 13.272, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 1.2100325483438636, |
|
"grad_norm": 5.742403507232666, |
|
"learning_rate": 9.877863485398942e-06, |
|
"loss": 1.598, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.225157955198162, |
|
"grad_norm": 13.002484321594238, |
|
"learning_rate": 8.995450481814567e-06, |
|
"loss": 1.3773, |
|
"step": 6399 |
|
}, |
|
{ |
|
"epoch": 1.2402833620524603, |
|
"grad_norm": 12.662968635559082, |
|
"learning_rate": 8.137135529054122e-06, |
|
"loss": 1.6495, |
|
"step": 6478 |
|
}, |
|
{ |
|
"epoch": 1.2554087689067586, |
|
"grad_norm": 7.513673305511475, |
|
"learning_rate": 7.306363301408635e-06, |
|
"loss": 1.3042, |
|
"step": 6557 |
|
}, |
|
{ |
|
"epoch": 1.2705341757610569, |
|
"grad_norm": 92.78031158447266, |
|
"learning_rate": 6.506467935986024e-06, |
|
"loss": 1.5158, |
|
"step": 6636 |
|
}, |
|
{ |
|
"epoch": 1.2758950794562511, |
|
"eval_nli-pairs_loss": 1.1646167039871216, |
|
"eval_nli-pairs_runtime": 12.3376, |
|
"eval_nli-pairs_samples_per_second": 121.579, |
|
"eval_nli-pairs_steps_per_second": 5.106, |
|
"step": 6664 |
|
}, |
|
{ |
|
"epoch": 1.2758950794562511, |
|
"eval_scitail-pairs-pos_loss": 0.5752041339874268, |
|
"eval_scitail-pairs-pos_runtime": 15.5528, |
|
"eval_scitail-pairs-pos_samples_per_second": 83.843, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.536, |
|
"step": 6664 |
|
}, |
|
{ |
|
"epoch": 1.2758950794562511, |
|
"eval_qnli-contrastive_loss": 1.331896424293518, |
|
"eval_qnli-contrastive_runtime": 4.7695, |
|
"eval_qnli-contrastive_samples_per_second": 314.501, |
|
"eval_qnli-contrastive_steps_per_second": 13.209, |
|
"step": 6664 |
|
}, |
|
{ |
|
"epoch": 1.2856595826153552, |
|
"grad_norm": 11.36242961883545, |
|
"learning_rate": 5.740659651822936e-06, |
|
"loss": 1.2205, |
|
"step": 6715 |
|
}, |
|
{ |
|
"epoch": 1.3007849894696535, |
|
"grad_norm": 10.5322904586792, |
|
"learning_rate": 5.012011866316839e-06, |
|
"loss": 1.3909, |
|
"step": 6794 |
|
}, |
|
{ |
|
"epoch": 1.3159103963239518, |
|
"grad_norm": 2.6958863735198975, |
|
"learning_rate": 4.323448860683947e-06, |
|
"loss": 1.4255, |
|
"step": 6873 |
|
}, |
|
{ |
|
"epoch": 1.33103580317825, |
|
"grad_norm": 19.98720359802246, |
|
"learning_rate": 3.677734043945192e-06, |
|
"loss": 1.5415, |
|
"step": 6952 |
|
}, |
|
{ |
|
"epoch": 1.3461612100325484, |
|
"grad_norm": 3.684659719467163, |
|
"learning_rate": 3.077458862540392e-06, |
|
"loss": 1.3355, |
|
"step": 7031 |
|
}, |
|
{ |
|
"epoch": 1.350947731188972, |
|
"eval_nli-pairs_loss": 1.1400986909866333, |
|
"eval_nli-pairs_runtime": 12.0157, |
|
"eval_nli-pairs_samples_per_second": 124.836, |
|
"eval_nli-pairs_steps_per_second": 5.243, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 1.350947731188972, |
|
"eval_scitail-pairs-pos_loss": 0.5660089254379272, |
|
"eval_scitail-pairs-pos_runtime": 15.1309, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.181, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.635, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 1.350947731188972, |
|
"eval_qnli-contrastive_loss": 1.2624869346618652, |
|
"eval_qnli-contrastive_runtime": 4.6898, |
|
"eval_qnli-contrastive_samples_per_second": 319.843, |
|
"eval_qnli-contrastive_steps_per_second": 13.433, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 1.3612866168868467, |
|
"grad_norm": 11.162321090698242, |
|
"learning_rate": 2.5250324000795594e-06, |
|
"loss": 1.5326, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.376412023741145, |
|
"grad_norm": 9.399407386779785, |
|
"learning_rate": 2.0226717089707925e-06, |
|
"loss": 1.0109, |
|
"step": 7189 |
|
}, |
|
{ |
|
"epoch": 1.3915374305954433, |
|
"grad_norm": 0.5825966596603394, |
|
"learning_rate": 1.5723929127267211e-06, |
|
"loss": 1.2729, |
|
"step": 7268 |
|
}, |
|
{ |
|
"epoch": 1.4066628374497414, |
|
"grad_norm": 7.376439094543457, |
|
"learning_rate": 1.1760031146585697e-06, |
|
"loss": 1.605, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 1.42178824430404, |
|
"grad_norm": 0.5974981188774109, |
|
"learning_rate": 8.350931454308347e-07, |
|
"loss": 1.4983, |
|
"step": 7426 |
|
}, |
|
{ |
|
"epoch": 1.4260003829216925, |
|
"eval_nli-pairs_loss": 1.1365835666656494, |
|
"eval_nli-pairs_runtime": 11.9569, |
|
"eval_nli-pairs_samples_per_second": 125.451, |
|
"eval_nli-pairs_steps_per_second": 5.269, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 1.4260003829216925, |
|
"eval_scitail-pairs-pos_loss": 0.5671288371086121, |
|
"eval_scitail-pairs-pos_runtime": 14.9551, |
|
"eval_scitail-pairs-pos_samples_per_second": 87.194, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.678, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 1.4260003829216925, |
|
"eval_qnli-contrastive_loss": 1.2691177129745483, |
|
"eval_qnli-contrastive_runtime": 4.6835, |
|
"eval_qnli-contrastive_samples_per_second": 320.27, |
|
"eval_qnli-contrastive_steps_per_second": 13.451, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 1.436913651158338, |
|
"grad_norm": 8.548786163330078, |
|
"learning_rate": 5.5103117858258e-07, |
|
"loss": 1.2901, |
|
"step": 7505 |
|
}, |
|
{ |
|
"epoch": 1.4520390580126366, |
|
"grad_norm": 9.624091148376465, |
|
"learning_rate": 3.2495723963837597e-07, |
|
"loss": 1.4993, |
|
"step": 7584 |
|
}, |
|
{ |
|
"epoch": 1.4671644648669346, |
|
"grad_norm": 18.643239974975586, |
|
"learning_rate": 1.5777863084531385e-07, |
|
"loss": 1.0473, |
|
"step": 7663 |
|
}, |
|
{ |
|
"epoch": 1.482289871721233, |
|
"grad_norm": 10.979313850402832, |
|
"learning_rate": 5.0166289898085916e-08, |
|
"loss": 1.2113, |
|
"step": 7742 |
|
}, |
|
{ |
|
"epoch": 1.4974152785755313, |
|
"grad_norm": 10.067323684692383, |
|
"learning_rate": 2.55209726558292e-09, |
|
"loss": 1.3604, |
|
"step": 7821 |
|
}, |
|
{ |
|
"epoch": 1.5010530346544133, |
|
"eval_nli-pairs_loss": 1.1346535682678223, |
|
"eval_nli-pairs_runtime": 12.2237, |
|
"eval_nli-pairs_samples_per_second": 122.712, |
|
"eval_nli-pairs_steps_per_second": 5.154, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.5010530346544133, |
|
"eval_scitail-pairs-pos_loss": 0.5651898980140686, |
|
"eval_scitail-pairs-pos_runtime": 15.2453, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.535, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.608, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.5010530346544133, |
|
"eval_qnli-contrastive_loss": 1.2610852718353271, |
|
"eval_qnli-contrastive_runtime": 4.7666, |
|
"eval_qnli-contrastive_samples_per_second": 314.687, |
|
"eval_qnli-contrastive_steps_per_second": 13.217, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.5125406854298296, |
|
"grad_norm": 12.913325309753418, |
|
"learning_rate": 2.9984872857074416e-05, |
|
"loss": 1.4627, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5276660922841279, |
|
"grad_norm": 13.103713035583496, |
|
"learning_rate": 2.9912159040536404e-05, |
|
"loss": 1.1015, |
|
"step": 7979 |
|
}, |
|
{ |
|
"epoch": 1.5427914991384262, |
|
"grad_norm": 10.095404624938965, |
|
"learning_rate": 2.9779598275386362e-05, |
|
"loss": 1.4538, |
|
"step": 8058 |
|
}, |
|
{ |
|
"epoch": 1.5579169059927245, |
|
"grad_norm": 0.5388267040252686, |
|
"learning_rate": 2.9587722567571802e-05, |
|
"loss": 1.4412, |
|
"step": 8137 |
|
}, |
|
{ |
|
"epoch": 1.5730423128470228, |
|
"grad_norm": 20.366121292114258, |
|
"learning_rate": 2.933730197162302e-05, |
|
"loss": 1.4793, |
|
"step": 8216 |
|
}, |
|
{ |
|
"epoch": 1.5761056863871339, |
|
"eval_nli-pairs_loss": 1.1918026208877563, |
|
"eval_nli-pairs_runtime": 12.158, |
|
"eval_nli-pairs_samples_per_second": 123.375, |
|
"eval_nli-pairs_steps_per_second": 5.182, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 1.5761056863871339, |
|
"eval_scitail-pairs-pos_loss": 0.5848828554153442, |
|
"eval_scitail-pairs-pos_runtime": 15.3425, |
|
"eval_scitail-pairs-pos_samples_per_second": 84.993, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.585, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 1.5761056863871339, |
|
"eval_qnli-contrastive_loss": 1.4694615602493286, |
|
"eval_qnli-contrastive_runtime": 4.7988, |
|
"eval_qnli-contrastive_samples_per_second": 312.58, |
|
"eval_qnli-contrastive_steps_per_second": 13.128, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 1.588167719701321, |
|
"grad_norm": 19.943920135498047, |
|
"learning_rate": 2.9029341500194198e-05, |
|
"loss": 1.1267, |
|
"step": 8295 |
|
}, |
|
{ |
|
"epoch": 1.6032931265556194, |
|
"grad_norm": 14.96302318572998, |
|
"learning_rate": 2.8665077090647462e-05, |
|
"loss": 1.1734, |
|
"step": 8374 |
|
}, |
|
{ |
|
"epoch": 1.6184185334099177, |
|
"grad_norm": 6.065411567687988, |
|
"learning_rate": 2.8245970644867055e-05, |
|
"loss": 1.2193, |
|
"step": 8453 |
|
}, |
|
{ |
|
"epoch": 1.633543940264216, |
|
"grad_norm": 15.93069076538086, |
|
"learning_rate": 2.7773704162210366e-05, |
|
"loss": 1.1381, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 1.6486693471185143, |
|
"grad_norm": 5.890163898468018, |
|
"learning_rate": 2.725017298914211e-05, |
|
"loss": 0.9632, |
|
"step": 8611 |
|
}, |
|
{ |
|
"epoch": 1.6511583381198545, |
|
"eval_nli-pairs_loss": 1.1099625825881958, |
|
"eval_nli-pairs_runtime": 12.0731, |
|
"eval_nli-pairs_samples_per_second": 124.243, |
|
"eval_nli-pairs_steps_per_second": 5.218, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 1.6511583381198545, |
|
"eval_scitail-pairs-pos_loss": 0.5500022172927856, |
|
"eval_scitail-pairs-pos_runtime": 15.0341, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.736, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.658, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 1.6511583381198545, |
|
"eval_qnli-contrastive_loss": 1.208964467048645, |
|
"eval_qnli-contrastive_runtime": 4.6959, |
|
"eval_qnli-contrastive_samples_per_second": 319.43, |
|
"eval_qnli-contrastive_steps_per_second": 13.416, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 1.6637947539728124, |
|
"grad_norm": 11.52648639678955, |
|
"learning_rate": 2.6677478212642807e-05, |
|
"loss": 1.0842, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.678920160827111, |
|
"grad_norm": 8.958113670349121, |
|
"learning_rate": 2.6057918227919096e-05, |
|
"loss": 0.7981, |
|
"step": 8769 |
|
}, |
|
{ |
|
"epoch": 1.694045567681409, |
|
"grad_norm": 12.083248138427734, |
|
"learning_rate": 2.5393979514257247e-05, |
|
"loss": 1.2196, |
|
"step": 8848 |
|
}, |
|
{ |
|
"epoch": 1.7091709745357075, |
|
"grad_norm": 2.1500277519226074, |
|
"learning_rate": 2.4688326656039045e-05, |
|
"loss": 0.8321, |
|
"step": 8927 |
|
}, |
|
{ |
|
"epoch": 1.7242963813900056, |
|
"grad_norm": 3.8833096027374268, |
|
"learning_rate": 2.3943791648968727e-05, |
|
"loss": 0.938, |
|
"step": 9006 |
|
}, |
|
{ |
|
"epoch": 1.726210989852575, |
|
"eval_nli-pairs_loss": 1.1021158695220947, |
|
"eval_nli-pairs_runtime": 12.05, |
|
"eval_nli-pairs_samples_per_second": 124.482, |
|
"eval_nli-pairs_steps_per_second": 5.228, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 1.726210989852575, |
|
"eval_scitail-pairs-pos_loss": 0.519660472869873, |
|
"eval_scitail-pairs-pos_runtime": 15.121, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.238, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.637, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 1.726210989852575, |
|
"eval_qnli-contrastive_loss": 1.3204244375228882, |
|
"eval_qnli-contrastive_runtime": 4.6913, |
|
"eval_qnli-contrastive_samples_per_second": 319.739, |
|
"eval_qnli-contrastive_steps_per_second": 13.429, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 1.7394217882443042, |
|
"grad_norm": 9.389202117919922, |
|
"learning_rate": 2.316336253442829e-05, |
|
"loss": 1.0008, |
|
"step": 9085 |
|
}, |
|
{ |
|
"epoch": 1.7545471950986022, |
|
"grad_norm": 0.5910531282424927, |
|
"learning_rate": 2.235017140757486e-05, |
|
"loss": 0.8644, |
|
"step": 9164 |
|
}, |
|
{ |
|
"epoch": 1.7696726019529008, |
|
"grad_norm": 5.645143032073975, |
|
"learning_rate": 2.1507481847307262e-05, |
|
"loss": 1.0459, |
|
"step": 9243 |
|
}, |
|
{ |
|
"epoch": 1.7847980088071989, |
|
"grad_norm": 2.0821499824523926, |
|
"learning_rate": 2.0638675818549023e-05, |
|
"loss": 0.9344, |
|
"step": 9322 |
|
}, |
|
{ |
|
"epoch": 1.7999234156614974, |
|
"grad_norm": 10.352788925170898, |
|
"learning_rate": 1.9747240099412936e-05, |
|
"loss": 1.0636, |
|
"step": 9401 |
|
}, |
|
{ |
|
"epoch": 1.8012636415852958, |
|
"eval_nli-pairs_loss": 1.0661962032318115, |
|
"eval_nli-pairs_runtime": 11.9602, |
|
"eval_nli-pairs_samples_per_second": 125.416, |
|
"eval_nli-pairs_steps_per_second": 5.267, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 1.8012636415852958, |
|
"eval_scitail-pairs-pos_loss": 0.5188334584236145, |
|
"eval_scitail-pairs-pos_runtime": 15.0572, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.603, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.653, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 1.8012636415852958, |
|
"eval_qnli-contrastive_loss": 0.9691615700721741, |
|
"eval_qnli-contrastive_runtime": 4.7039, |
|
"eval_qnli-contrastive_samples_per_second": 318.886, |
|
"eval_qnli-contrastive_steps_per_second": 13.393, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 1.8150488225157955, |
|
"grad_norm": 7.344937801361084, |
|
"learning_rate": 1.8836752287718936e-05, |
|
"loss": 1.2482, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.8301742293700938, |
|
"grad_norm": 1.0527677536010742, |
|
"learning_rate": 1.7910866443025426e-05, |
|
"loss": 1.0134, |
|
"step": 9559 |
|
}, |
|
{ |
|
"epoch": 1.845299636224392, |
|
"grad_norm": 13.278373718261719, |
|
"learning_rate": 1.6973298421796733e-05, |
|
"loss": 0.981, |
|
"step": 9638 |
|
}, |
|
{ |
|
"epoch": 1.8604250430786904, |
|
"grad_norm": 2.146714448928833, |
|
"learning_rate": 1.6027810964561188e-05, |
|
"loss": 1.0289, |
|
"step": 9717 |
|
}, |
|
{ |
|
"epoch": 1.8755504499329887, |
|
"grad_norm": 13.393159866333008, |
|
"learning_rate": 1.5078198594909435e-05, |
|
"loss": 0.9656, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 1.8763162933180164, |
|
"eval_nli-pairs_loss": 1.0254323482513428, |
|
"eval_nli-pairs_runtime": 12.1952, |
|
"eval_nli-pairs_samples_per_second": 122.999, |
|
"eval_nli-pairs_steps_per_second": 5.166, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.8763162933180164, |
|
"eval_scitail-pairs-pos_loss": 0.496192991733551, |
|
"eval_scitail-pairs-pos_runtime": 15.1968, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.808, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.619, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.8763162933180164, |
|
"eval_qnli-contrastive_loss": 1.0920603275299072, |
|
"eval_qnli-contrastive_runtime": 4.731, |
|
"eval_qnli-contrastive_samples_per_second": 317.056, |
|
"eval_qnli-contrastive_steps_per_second": 13.316, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.890675856787287, |
|
"grad_norm": 15.524497032165527, |
|
"learning_rate": 1.412827239093775e-05, |
|
"loss": 0.9088, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 1.9058012636415853, |
|
"grad_norm": 67.18510437011719, |
|
"learning_rate": 1.3181844690253298e-05, |
|
"loss": 1.2097, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 1.9209266704958836, |
|
"grad_norm": 5.732685565948486, |
|
"learning_rate": 1.2242713789924544e-05, |
|
"loss": 0.7741, |
|
"step": 10033 |
|
}, |
|
{ |
|
"epoch": 1.936052077350182, |
|
"grad_norm": 6.51609992980957, |
|
"learning_rate": 1.13146487027805e-05, |
|
"loss": 0.8206, |
|
"step": 10112 |
|
}, |
|
{ |
|
"epoch": 1.9511774842044802, |
|
"grad_norm": 6.481364727020264, |
|
"learning_rate": 1.040137403123638e-05, |
|
"loss": 0.8686, |
|
"step": 10191 |
|
}, |
|
{ |
|
"epoch": 1.9513689450507372, |
|
"eval_nli-pairs_loss": 1.0038272142410278, |
|
"eval_nli-pairs_runtime": 12.0711, |
|
"eval_nli-pairs_samples_per_second": 124.264, |
|
"eval_nli-pairs_steps_per_second": 5.219, |
|
"step": 10192 |
|
}, |
|
{ |
|
"epoch": 1.9513689450507372, |
|
"eval_scitail-pairs-pos_loss": 0.4778198003768921, |
|
"eval_scitail-pairs-pos_runtime": 15.3152, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.144, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.591, |
|
"step": 10192 |
|
}, |
|
{ |
|
"epoch": 1.9513689450507372, |
|
"eval_qnli-contrastive_loss": 0.9486138820648193, |
|
"eval_qnli-contrastive_runtime": 4.7421, |
|
"eval_qnli-contrastive_samples_per_second": 316.315, |
|
"eval_qnli-contrastive_steps_per_second": 13.285, |
|
"step": 10192 |
|
}, |
|
{ |
|
"epoch": 1.9663028910587785, |
|
"grad_norm": 9.677536964416504, |
|
"learning_rate": 9.50655501935166e-06, |
|
"loss": 0.7649, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 1.9814282979130766, |
|
"grad_norm": 8.125744819641113, |
|
"learning_rate": 8.633782843110642e-06, |
|
"loss": 0.9249, |
|
"step": 10349 |
|
}, |
|
{ |
|
"epoch": 1.9965537047673751, |
|
"grad_norm": 3.1055586338043213, |
|
"learning_rate": 7.797110684759332e-06, |
|
"loss": 0.6997, |
|
"step": 10428 |
|
}, |
|
{ |
|
"epoch": 2.0116791116216732, |
|
"grad_norm": 7.393470764160156, |
|
"learning_rate": 6.978450495850865e-06, |
|
"loss": 1.06, |
|
"step": 10507 |
|
}, |
|
{ |
|
"epoch": 2.026421596783458, |
|
"eval_nli-pairs_loss": 1.000571608543396, |
|
"eval_nli-pairs_runtime": 12.5554, |
|
"eval_nli-pairs_samples_per_second": 119.47, |
|
"eval_nli-pairs_steps_per_second": 5.018, |
|
"step": 10584 |
|
}, |
|
{ |
|
"epoch": 2.026421596783458, |
|
"eval_scitail-pairs-pos_loss": 0.48184335231781006, |
|
"eval_scitail-pairs-pos_runtime": 15.4715, |
|
"eval_scitail-pairs-pos_samples_per_second": 84.284, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.555, |
|
"step": 10584 |
|
}, |
|
{ |
|
"epoch": 2.026421596783458, |
|
"eval_qnli-contrastive_loss": 0.9664335250854492, |
|
"eval_qnli-contrastive_runtime": 4.7851, |
|
"eval_qnli-contrastive_samples_per_second": 313.474, |
|
"eval_qnli-contrastive_steps_per_second": 13.166, |
|
"step": 10584 |
|
}, |
|
{ |
|
"epoch": 2.0268045184759718, |
|
"grad_norm": 12.336913108825684, |
|
"learning_rate": 6.191983181204208e-06, |
|
"loss": 0.9447, |
|
"step": 10586 |
|
}, |
|
{ |
|
"epoch": 2.04192992533027, |
|
"grad_norm": 4.7379984855651855, |
|
"learning_rate": 5.440865069077124e-06, |
|
"loss": 1.0151, |
|
"step": 10665 |
|
}, |
|
{ |
|
"epoch": 2.0570553321845684, |
|
"grad_norm": 27.00238800048828, |
|
"learning_rate": 4.728110620818674e-06, |
|
"loss": 1.113, |
|
"step": 10744 |
|
}, |
|
{ |
|
"epoch": 2.0721807390388665, |
|
"grad_norm": 17.84748649597168, |
|
"learning_rate": 4.0565803329351935e-06, |
|
"loss": 1.1183, |
|
"step": 10823 |
|
}, |
|
{ |
|
"epoch": 2.087306145893165, |
|
"grad_norm": 15.165081977844238, |
|
"learning_rate": 3.4289692570634956e-06, |
|
"loss": 1.1639, |
|
"step": 10902 |
|
}, |
|
{ |
|
"epoch": 2.1014742485161784, |
|
"eval_nli-pairs_loss": 0.9944142699241638, |
|
"eval_nli-pairs_runtime": 12.004, |
|
"eval_nli-pairs_samples_per_second": 124.958, |
|
"eval_nli-pairs_steps_per_second": 5.248, |
|
"step": 10976 |
|
}, |
|
{ |
|
"epoch": 2.1014742485161784, |
|
"eval_scitail-pairs-pos_loss": 0.47857147455215454, |
|
"eval_scitail-pairs-pos_runtime": 15.1823, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.89, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.623, |
|
"step": 10976 |
|
}, |
|
{ |
|
"epoch": 2.1014742485161784, |
|
"eval_qnli-contrastive_loss": 0.9332481026649475, |
|
"eval_qnli-contrastive_runtime": 4.7377, |
|
"eval_qnli-contrastive_samples_per_second": 316.609, |
|
"eval_qnli-contrastive_steps_per_second": 13.298, |
|
"step": 10976 |
|
}, |
|
{ |
|
"epoch": 2.102431552747463, |
|
"grad_norm": 14.085611343383789, |
|
"learning_rate": 2.847796183923562e-06, |
|
"loss": 1.0222, |
|
"step": 10981 |
|
}, |
|
{ |
|
"epoch": 2.1175569596017616, |
|
"grad_norm": 9.214906692504883, |
|
"learning_rate": 2.3153935346589784e-06, |
|
"loss": 1.244, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.1326823664560597, |
|
"grad_norm": 28.293725967407227, |
|
"learning_rate": 1.8338980001342158e-06, |
|
"loss": 1.0128, |
|
"step": 11139 |
|
}, |
|
{ |
|
"epoch": 2.147807773310358, |
|
"grad_norm": 7.782803058624268, |
|
"learning_rate": 1.4052419657559468e-06, |
|
"loss": 1.3783, |
|
"step": 11218 |
|
}, |
|
{ |
|
"epoch": 2.1629331801646563, |
|
"grad_norm": 8.853714942932129, |
|
"learning_rate": 1.0311457562331311e-06, |
|
"loss": 1.0301, |
|
"step": 11297 |
|
}, |
|
{ |
|
"epoch": 2.176526900248899, |
|
"eval_nli-pairs_loss": 0.9802760481834412, |
|
"eval_nli-pairs_runtime": 11.9822, |
|
"eval_nli-pairs_samples_per_second": 125.185, |
|
"eval_nli-pairs_steps_per_second": 5.258, |
|
"step": 11368 |
|
}, |
|
{ |
|
"epoch": 2.176526900248899, |
|
"eval_scitail-pairs-pos_loss": 0.47513890266418457, |
|
"eval_scitail-pairs-pos_runtime": 15.0277, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.773, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.66, |
|
"step": 11368 |
|
}, |
|
{ |
|
"epoch": 2.176526900248899, |
|
"eval_qnli-contrastive_loss": 0.9649375677108765, |
|
"eval_qnli-contrastive_runtime": 4.7258, |
|
"eval_qnli-contrastive_samples_per_second": 317.404, |
|
"eval_qnli-contrastive_steps_per_second": 13.331, |
|
"step": 11368 |
|
}, |
|
{ |
|
"epoch": 2.178058587018955, |
|
"grad_norm": 9.836175918579102, |
|
"learning_rate": 7.131107314001456e-07, |
|
"loss": 0.8699, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 2.193183993873253, |
|
"grad_norm": 6.889993190765381, |
|
"learning_rate": 4.5241326081128687e-07, |
|
"loss": 0.7565, |
|
"step": 11455 |
|
}, |
|
{ |
|
"epoch": 2.2083094007275514, |
|
"grad_norm": 7.592372894287109, |
|
"learning_rate": 2.500996012884593e-07, |
|
"loss": 1.3038, |
|
"step": 11534 |
|
}, |
|
{ |
|
"epoch": 2.2234348075818495, |
|
"grad_norm": 2.2131893634796143, |
|
"learning_rate": 1.069816979800553e-07, |
|
"loss": 0.9584, |
|
"step": 11613 |
|
}, |
|
{ |
|
"epoch": 2.2385602144361476, |
|
"grad_norm": 129.076904296875, |
|
"learning_rate": 2.3633925782526324e-08, |
|
"loss": 1.4689, |
|
"step": 11692 |
|
}, |
|
{ |
|
"epoch": 2.25157955198162, |
|
"eval_nli-pairs_loss": 0.9801518321037292, |
|
"eval_nli-pairs_runtime": 12.0172, |
|
"eval_nli-pairs_samples_per_second": 124.821, |
|
"eval_nli-pairs_steps_per_second": 5.242, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.25157955198162, |
|
"eval_scitail-pairs-pos_loss": 0.4722036123275757, |
|
"eval_scitail-pairs-pos_runtime": 15.1727, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.944, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.625, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.25157955198162, |
|
"eval_qnli-contrastive_loss": 0.9584055542945862, |
|
"eval_qnli-contrastive_runtime": 4.7605, |
|
"eval_qnli-contrastive_samples_per_second": 315.092, |
|
"eval_qnli-contrastive_steps_per_second": 13.234, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.253685621290446, |
|
"grad_norm": 9.094249725341797, |
|
"learning_rate": 2.999960921579765e-05, |
|
"loss": 0.9979, |
|
"step": 11771 |
|
}, |
|
{ |
|
"epoch": 2.2688110281447442, |
|
"grad_norm": 14.057835578918457, |
|
"learning_rate": 2.9962654445090394e-05, |
|
"loss": 1.3444, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.2839364349990428, |
|
"grad_norm": 8.33903694152832, |
|
"learning_rate": 2.9865650072629244e-05, |
|
"loss": 1.2052, |
|
"step": 11929 |
|
}, |
|
{ |
|
"epoch": 2.299061841853341, |
|
"grad_norm": 5.676733493804932, |
|
"learning_rate": 2.970898540593688e-05, |
|
"loss": 1.2007, |
|
"step": 12008 |
|
}, |
|
{ |
|
"epoch": 2.3141872487076394, |
|
"grad_norm": 3.648158550262451, |
|
"learning_rate": 2.9493289187117727e-05, |
|
"loss": 1.1402, |
|
"step": 12087 |
|
}, |
|
{ |
|
"epoch": 2.3266322037143405, |
|
"eval_nli-pairs_loss": 1.0052505731582642, |
|
"eval_nli-pairs_runtime": 12.1373, |
|
"eval_nli-pairs_samples_per_second": 123.586, |
|
"eval_nli-pairs_steps_per_second": 5.191, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 2.3266322037143405, |
|
"eval_scitail-pairs-pos_loss": 0.47668519616127014, |
|
"eval_scitail-pairs-pos_runtime": 15.0626, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.572, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.651, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 2.3266322037143405, |
|
"eval_qnli-contrastive_loss": 1.2372807264328003, |
|
"eval_qnli-contrastive_runtime": 4.7164, |
|
"eval_qnli-contrastive_samples_per_second": 318.038, |
|
"eval_qnli-contrastive_steps_per_second": 13.358, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 2.3293126555619375, |
|
"grad_norm": 4.789942741394043, |
|
"learning_rate": 2.9219427069528128e-05, |
|
"loss": 1.5263, |
|
"step": 12166 |
|
}, |
|
{ |
|
"epoch": 2.344438062416236, |
|
"grad_norm": 14.52586555480957, |
|
"learning_rate": 2.8888498143650785e-05, |
|
"loss": 1.263, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 2.359563469270534, |
|
"grad_norm": 2.835966110229492, |
|
"learning_rate": 2.8501830526116386e-05, |
|
"loss": 1.1912, |
|
"step": 12324 |
|
}, |
|
{ |
|
"epoch": 2.3746888761248326, |
|
"grad_norm": 14.9393949508667, |
|
"learning_rate": 2.8060976029574842e-05, |
|
"loss": 1.0982, |
|
"step": 12403 |
|
}, |
|
{ |
|
"epoch": 2.3898142829791307, |
|
"grad_norm": 8.84047794342041, |
|
"learning_rate": 2.7567703934807572e-05, |
|
"loss": 1.1574, |
|
"step": 12482 |
|
}, |
|
{ |
|
"epoch": 2.401684855447061, |
|
"eval_nli-pairs_loss": 0.9759184122085571, |
|
"eval_nli-pairs_runtime": 12.2553, |
|
"eval_nli-pairs_samples_per_second": 122.396, |
|
"eval_nli-pairs_steps_per_second": 5.141, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 2.401684855447061, |
|
"eval_scitail-pairs-pos_loss": 0.4914855659008026, |
|
"eval_scitail-pairs-pos_runtime": 15.0918, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.404, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.644, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 2.401684855447061, |
|
"eval_qnli-contrastive_loss": 1.1089410781860352, |
|
"eval_qnli-contrastive_runtime": 4.7223, |
|
"eval_qnli-contrastive_samples_per_second": 317.644, |
|
"eval_qnli-contrastive_steps_per_second": 13.341, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 2.404939689833429, |
|
"grad_norm": 11.71249008178711, |
|
"learning_rate": 2.7023993890075236e-05, |
|
"loss": 1.4077, |
|
"step": 12561 |
|
}, |
|
{ |
|
"epoch": 2.4200650966877273, |
|
"grad_norm": 2.904869794845581, |
|
"learning_rate": 2.6432027966197927e-05, |
|
"loss": 1.3183, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.435190503542026, |
|
"grad_norm": 9.094073295593262, |
|
"learning_rate": 2.579418189925317e-05, |
|
"loss": 1.0883, |
|
"step": 12719 |
|
}, |
|
{ |
|
"epoch": 2.450315910396324, |
|
"grad_norm": 9.701898574829102, |
|
"learning_rate": 2.5113015556037383e-05, |
|
"loss": 1.3182, |
|
"step": 12798 |
|
}, |
|
{ |
|
"epoch": 2.4654413172506224, |
|
"grad_norm": 6.8915581703186035, |
|
"learning_rate": 2.4391262660555785e-05, |
|
"loss": 1.0089, |
|
"step": 12877 |
|
}, |
|
{ |
|
"epoch": 2.4767375071797817, |
|
"eval_nli-pairs_loss": 0.9481552243232727, |
|
"eval_nli-pairs_runtime": 12.17, |
|
"eval_nli-pairs_samples_per_second": 123.254, |
|
"eval_nli-pairs_steps_per_second": 5.177, |
|
"step": 12936 |
|
}, |
|
{ |
|
"epoch": 2.4767375071797817, |
|
"eval_scitail-pairs-pos_loss": 0.4552152752876282, |
|
"eval_scitail-pairs-pos_runtime": 15.2525, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.494, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.606, |
|
"step": 12936 |
|
}, |
|
{ |
|
"epoch": 2.4767375071797817, |
|
"eval_qnli-contrastive_loss": 1.1650612354278564, |
|
"eval_qnli-contrastive_runtime": 4.7586, |
|
"eval_qnli-contrastive_samples_per_second": 315.216, |
|
"eval_qnli-contrastive_steps_per_second": 13.239, |
|
"step": 12936 |
|
}, |
|
{ |
|
"epoch": 2.4805667241049205, |
|
"grad_norm": 9.97049617767334, |
|
"learning_rate": 2.3631819822771357e-05, |
|
"loss": 1.0616, |
|
"step": 12956 |
|
}, |
|
{ |
|
"epoch": 2.4956921309592186, |
|
"grad_norm": 10.72946548461914, |
|
"learning_rate": 2.2837734913643845e-05, |
|
"loss": 1.1083, |
|
"step": 13035 |
|
}, |
|
{ |
|
"epoch": 2.510817537813517, |
|
"grad_norm": 6.889919281005859, |
|
"learning_rate": 2.2012194833113163e-05, |
|
"loss": 1.2687, |
|
"step": 13114 |
|
}, |
|
{ |
|
"epoch": 2.5259429446678157, |
|
"grad_norm": 2.167541742324829, |
|
"learning_rate": 2.1158512720117925e-05, |
|
"loss": 0.698, |
|
"step": 13193 |
|
}, |
|
{ |
|
"epoch": 2.5410683515221137, |
|
"grad_norm": 6.788521766662598, |
|
"learning_rate": 2.0280114655979378e-05, |
|
"loss": 1.0596, |
|
"step": 13272 |
|
}, |
|
{ |
|
"epoch": 2.5517901589125023, |
|
"eval_nli-pairs_loss": 0.9386218786239624, |
|
"eval_nli-pairs_runtime": 12.1882, |
|
"eval_nli-pairs_samples_per_second": 123.07, |
|
"eval_nli-pairs_steps_per_second": 5.169, |
|
"step": 13328 |
|
}, |
|
{ |
|
"epoch": 2.5517901589125023, |
|
"eval_scitail-pairs-pos_loss": 0.45524224638938904, |
|
"eval_scitail-pairs-pos_runtime": 15.3268, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.08, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.588, |
|
"step": 13328 |
|
}, |
|
{ |
|
"epoch": 2.5517901589125023, |
|
"eval_qnli-contrastive_loss": 1.053303837776184, |
|
"eval_qnli-contrastive_runtime": 4.7606, |
|
"eval_qnli-contrastive_samples_per_second": 315.086, |
|
"eval_qnli-contrastive_steps_per_second": 13.234, |
|
"step": 13328 |
|
}, |
|
{ |
|
"epoch": 2.556193758376412, |
|
"grad_norm": 5.612150192260742, |
|
"learning_rate": 1.9380525914513508e-05, |
|
"loss": 1.1182, |
|
"step": 13351 |
|
}, |
|
{ |
|
"epoch": 2.5713191652307104, |
|
"grad_norm": 5.856744289398193, |
|
"learning_rate": 1.8463356814054177e-05, |
|
"loss": 0.9092, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.586444572085009, |
|
"grad_norm": 3.5007331371307373, |
|
"learning_rate": 1.7532288228167412e-05, |
|
"loss": 0.8628, |
|
"step": 13509 |
|
}, |
|
{ |
|
"epoch": 2.601569978939307, |
|
"grad_norm": 3.8348581790924072, |
|
"learning_rate": 1.6591056813206084e-05, |
|
"loss": 0.762, |
|
"step": 13588 |
|
}, |
|
{ |
|
"epoch": 2.616695385793605, |
|
"grad_norm": 3.7152531147003174, |
|
"learning_rate": 1.564344001199179e-05, |
|
"loss": 0.9521, |
|
"step": 13667 |
|
}, |
|
{ |
|
"epoch": 2.626842810645223, |
|
"eval_nli-pairs_loss": 0.8889521956443787, |
|
"eval_nli-pairs_runtime": 12.1548, |
|
"eval_nli-pairs_samples_per_second": 123.408, |
|
"eval_nli-pairs_steps_per_second": 5.183, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.626842810645223, |
|
"eval_scitail-pairs-pos_loss": 0.45236507058143616, |
|
"eval_scitail-pairs-pos_runtime": 15.2247, |
|
"eval_scitail-pairs-pos_samples_per_second": 85.65, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.613, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.626842810645223, |
|
"eval_qnli-contrastive_loss": 0.794640302658081, |
|
"eval_qnli-contrastive_runtime": 4.8223, |
|
"eval_qnli-contrastive_samples_per_second": 311.053, |
|
"eval_qnli-contrastive_steps_per_second": 13.064, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.6318207926479036, |
|
"grad_norm": 8.717215538024902, |
|
"learning_rate": 1.4693240893808674e-05, |
|
"loss": 0.8631, |
|
"step": 13746 |
|
}, |
|
{ |
|
"epoch": 2.6469461995022017, |
|
"grad_norm": 0.3876877725124359, |
|
"learning_rate": 1.3744272891550144e-05, |
|
"loss": 0.6899, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.6620716063565, |
|
"grad_norm": 0.41043633222579956, |
|
"learning_rate": 1.2800344497273615e-05, |
|
"loss": 0.6552, |
|
"step": 13904 |
|
}, |
|
{ |
|
"epoch": 2.6771970132107983, |
|
"grad_norm": 0.8379763960838318, |
|
"learning_rate": 1.1865243977584432e-05, |
|
"loss": 0.572, |
|
"step": 13983 |
|
}, |
|
{ |
|
"epoch": 2.692322420065097, |
|
"grad_norm": 4.94291877746582, |
|
"learning_rate": 1.0942724170190126e-05, |
|
"loss": 0.9809, |
|
"step": 14062 |
|
}, |
|
{ |
|
"epoch": 2.701895462377944, |
|
"eval_nli-pairs_loss": 0.8912826180458069, |
|
"eval_nli-pairs_runtime": 12.096, |
|
"eval_nli-pairs_samples_per_second": 124.008, |
|
"eval_nli-pairs_steps_per_second": 5.208, |
|
"step": 14112 |
|
}, |
|
{ |
|
"epoch": 2.701895462377944, |
|
"eval_scitail-pairs-pos_loss": 0.4352218210697174, |
|
"eval_scitail-pairs-pos_runtime": 15.0606, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.584, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.652, |
|
"step": 14112 |
|
}, |
|
{ |
|
"epoch": 2.701895462377944, |
|
"eval_qnli-contrastive_loss": 0.727630078792572, |
|
"eval_qnli-contrastive_runtime": 4.7927, |
|
"eval_qnli-contrastive_samples_per_second": 312.979, |
|
"eval_qnli-contrastive_steps_per_second": 13.145, |
|
"step": 14112 |
|
}, |
|
{ |
|
"epoch": 2.707447826919395, |
|
"grad_norm": 2.8381199836730957, |
|
"learning_rate": 1.0036487422641892e-05, |
|
"loss": 0.5392, |
|
"step": 14141 |
|
}, |
|
{ |
|
"epoch": 2.7225732337736934, |
|
"grad_norm": 9.423616409301758, |
|
"learning_rate": 9.150170733707937e-06, |
|
"loss": 0.6777, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.7376986406279915, |
|
"grad_norm": 0.6272808909416199, |
|
"learning_rate": 8.287331157010844e-06, |
|
"loss": 0.6523, |
|
"step": 14299 |
|
}, |
|
{ |
|
"epoch": 2.75282404748229, |
|
"grad_norm": 0.7308062314987183, |
|
"learning_rate": 7.4514315255090594e-06, |
|
"loss": 0.6416, |
|
"step": 14378 |
|
}, |
|
{ |
|
"epoch": 2.767949454336588, |
|
"grad_norm": 4.945492267608643, |
|
"learning_rate": 6.645826554113819e-06, |
|
"loss": 0.7713, |
|
"step": 14457 |
|
}, |
|
{ |
|
"epoch": 2.7769481141106644, |
|
"eval_nli-pairs_loss": 0.872556209564209, |
|
"eval_nli-pairs_runtime": 12.1015, |
|
"eval_nli-pairs_samples_per_second": 123.952, |
|
"eval_nli-pairs_steps_per_second": 5.206, |
|
"step": 14504 |
|
}, |
|
{ |
|
"epoch": 2.7769481141106644, |
|
"eval_scitail-pairs-pos_loss": 0.42709970474243164, |
|
"eval_scitail-pairs-pos_runtime": 15.0845, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.446, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.646, |
|
"step": 14504 |
|
}, |
|
{ |
|
"epoch": 2.7769481141106644, |
|
"eval_qnli-contrastive_loss": 0.7923160791397095, |
|
"eval_qnli-contrastive_runtime": 4.7233, |
|
"eval_qnli-contrastive_samples_per_second": 317.576, |
|
"eval_qnli-contrastive_steps_per_second": 13.338, |
|
"step": 14504 |
|
}, |
|
{ |
|
"epoch": 2.7830748611908867, |
|
"grad_norm": 9.502604484558105, |
|
"learning_rate": 5.873749376215993e-06, |
|
"loss": 0.6531, |
|
"step": 14536 |
|
}, |
|
{ |
|
"epoch": 2.7982002680451847, |
|
"grad_norm": 6.348124980926514, |
|
"learning_rate": 5.138298568156192e-06, |
|
"loss": 0.7056, |
|
"step": 14615 |
|
}, |
|
{ |
|
"epoch": 2.813325674899483, |
|
"grad_norm": 4.395310401916504, |
|
"learning_rate": 4.442425713712258e-06, |
|
"loss": 1.054, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 2.8284510817537813, |
|
"grad_norm": 5.8618011474609375, |
|
"learning_rate": 3.7889235585119115e-06, |
|
"loss": 0.8535, |
|
"step": 14773 |
|
}, |
|
{ |
|
"epoch": 2.84357648860808, |
|
"grad_norm": 7.8259406089782715, |
|
"learning_rate": 3.1804148019103528e-06, |
|
"loss": 0.7321, |
|
"step": 14852 |
|
}, |
|
{ |
|
"epoch": 2.852000765843385, |
|
"eval_nli-pairs_loss": 0.8661790490150452, |
|
"eval_nli-pairs_runtime": 12.1048, |
|
"eval_nli-pairs_samples_per_second": 123.917, |
|
"eval_nli-pairs_steps_per_second": 5.205, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 2.852000765843385, |
|
"eval_scitail-pairs-pos_loss": 0.4211391508579254, |
|
"eval_scitail-pairs-pos_runtime": 15.1135, |
|
"eval_scitail-pairs-pos_samples_per_second": 86.28, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.639, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 2.852000765843385, |
|
"eval_qnli-contrastive_loss": 0.7693744897842407, |
|
"eval_qnli-contrastive_runtime": 4.7208, |
|
"eval_qnli-contrastive_samples_per_second": 317.743, |
|
"eval_qnli-contrastive_steps_per_second": 13.345, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 2.858701895462378, |
|
"grad_norm": 8.156476974487305, |
|
"learning_rate": 2.6193415713143028e-06, |
|
"loss": 0.8236, |
|
"step": 14931 |
|
}, |
|
{ |
|
"epoch": 2.873827302316676, |
|
"grad_norm": 0.3863189220428467, |
|
"learning_rate": 2.107955621195247e-06, |
|
"loss": 0.776, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 2.8889527091709746, |
|
"grad_norm": 0.4337412118911743, |
|
"learning_rate": 1.6483092961261291e-06, |
|
"loss": 0.7049, |
|
"step": 15089 |
|
}, |
|
{ |
|
"epoch": 2.904078116025273, |
|
"grad_norm": 0.5512604117393494, |
|
"learning_rate": 1.2422472941095199e-06, |
|
"loss": 0.9409, |
|
"step": 15168 |
|
}, |
|
{ |
|
"epoch": 2.919203522879571, |
|
"grad_norm": 4.254249572753906, |
|
"learning_rate": 8.913992632535123e-07, |
|
"loss": 0.7416, |
|
"step": 15247 |
|
}, |
|
{ |
|
"epoch": 2.9270534175761056, |
|
"eval_nli-pairs_loss": 0.8609779477119446, |
|
"eval_nli-pairs_runtime": 12.2133, |
|
"eval_nli-pairs_samples_per_second": 122.817, |
|
"eval_nli-pairs_steps_per_second": 5.158, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 2.9270534175761056, |
|
"eval_scitail-pairs-pos_loss": 0.42045190930366516, |
|
"eval_scitail-pairs-pos_runtime": 15.4078, |
|
"eval_scitail-pairs-pos_samples_per_second": 84.632, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.57, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 2.9270534175761056, |
|
"eval_qnli-contrastive_loss": 0.7351691722869873, |
|
"eval_qnli-contrastive_runtime": 4.7717, |
|
"eval_qnli-contrastive_samples_per_second": 314.351, |
|
"eval_qnli-contrastive_steps_per_second": 13.203, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 2.9343289297338693, |
|
"grad_norm": 6.785557270050049, |
|
"learning_rate": 5.971732615070724e-07, |
|
"loss": 0.6059, |
|
"step": 15326 |
|
}, |
|
{ |
|
"epoch": 2.949454336588168, |
|
"grad_norm": 14.958471298217773, |
|
"learning_rate": 3.6075010570289336e-07, |
|
"loss": 0.6598, |
|
"step": 15405 |
|
}, |
|
{ |
|
"epoch": 2.964579743442466, |
|
"grad_norm": 0.34104809165000916, |
|
"learning_rate": 1.8307863258672674e-07, |
|
"loss": 0.5777, |
|
"step": 15484 |
|
}, |
|
{ |
|
"epoch": 2.9797051502967644, |
|
"grad_norm": 0.6522515416145325, |
|
"learning_rate": 6.487189085208289e-08, |
|
"loss": 0.8212, |
|
"step": 15563 |
|
}, |
|
{ |
|
"epoch": 2.9948305571510625, |
|
"grad_norm": 0.3607589304447174, |
|
"learning_rate": 6.6042794628590194e-09, |
|
"loss": 0.5638, |
|
"step": 15642 |
|
} |
|
], |
|
"logging_steps": 79, |
|
"max_steps": 15669, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 3918, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|