xval_compot_500 / trainer_state.json
alishudi's picture
Upload folder using huggingface_hub
b268910 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.3020833333333333,
"eval_steps": 250,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0026041666666666665,
"grad_norm": 29.526924338468568,
"learning_rate": 5e-06,
"loss": 0.7869,
"num_input_tokens_seen": 172856,
"step": 1
},
{
"epoch": 0.0026041666666666665,
"loss": 0.8704751133918762,
"loss_ce": 0.5689004063606262,
"loss_iou": 0.435546875,
"loss_num": 0.060302734375,
"loss_xval": 0.30078125,
"num_input_tokens_seen": 172856,
"step": 1
},
{
"epoch": 0.005208333333333333,
"grad_norm": 70.41913440760779,
"learning_rate": 5e-06,
"loss": 0.4046,
"num_input_tokens_seen": 345648,
"step": 2
},
{
"epoch": 0.005208333333333333,
"loss": 0.36316120624542236,
"loss_ce": 0.12207232415676117,
"loss_iou": 0.52734375,
"loss_num": 0.048095703125,
"loss_xval": 0.2412109375,
"num_input_tokens_seen": 345648,
"step": 2
},
{
"epoch": 0.0078125,
"grad_norm": 27.92164379971255,
"learning_rate": 5e-06,
"loss": 1.9793,
"num_input_tokens_seen": 518228,
"step": 3
},
{
"epoch": 0.0078125,
"loss": 1.6886000633239746,
"loss_ce": 0.06360010802745819,
"loss_iou": 0.98828125,
"loss_num": 0.32421875,
"loss_xval": 1.625,
"num_input_tokens_seen": 518228,
"step": 3
},
{
"epoch": 0.010416666666666666,
"grad_norm": 8.939505658432642,
"learning_rate": 5e-06,
"loss": 0.3657,
"num_input_tokens_seen": 690760,
"step": 4
},
{
"epoch": 0.010416666666666666,
"loss": 0.330140084028244,
"loss_ce": 0.023987744003534317,
"loss_iou": 0.154296875,
"loss_num": 0.061279296875,
"loss_xval": 0.306640625,
"num_input_tokens_seen": 690760,
"step": 4
},
{
"epoch": 0.013020833333333334,
"grad_norm": 19.28831412717169,
"learning_rate": 5e-06,
"loss": 0.468,
"num_input_tokens_seen": 863320,
"step": 5
},
{
"epoch": 0.013020833333333334,
"loss": 0.5520405769348145,
"loss_ce": 0.0034565767273306847,
"loss_iou": 0.162109375,
"loss_num": 0.10986328125,
"loss_xval": 0.546875,
"num_input_tokens_seen": 863320,
"step": 5
},
{
"epoch": 0.015625,
"grad_norm": 10.714818321426886,
"learning_rate": 5e-06,
"loss": 0.457,
"num_input_tokens_seen": 1035776,
"step": 6
},
{
"epoch": 0.015625,
"loss": 0.5038242340087891,
"loss_ce": 0.0006504527991637588,
"loss_iou": 0.0,
"loss_num": 0.1005859375,
"loss_xval": 0.50390625,
"num_input_tokens_seen": 1035776,
"step": 6
},
{
"epoch": 0.018229166666666668,
"grad_norm": 21.33070900107311,
"learning_rate": 5e-06,
"loss": 0.3384,
"num_input_tokens_seen": 1208264,
"step": 7
},
{
"epoch": 0.018229166666666668,
"loss": 0.3143744468688965,
"loss_ce": 0.0038276039995253086,
"loss_iou": 0.25390625,
"loss_num": 0.06201171875,
"loss_xval": 0.310546875,
"num_input_tokens_seen": 1208264,
"step": 7
},
{
"epoch": 0.020833333333333332,
"grad_norm": 15.620099673180961,
"learning_rate": 5e-06,
"loss": 0.3601,
"num_input_tokens_seen": 1380784,
"step": 8
},
{
"epoch": 0.020833333333333332,
"loss": 0.37209784984588623,
"loss_ce": 0.0007599706877954304,
"loss_iou": 0.0,
"loss_num": 0.07421875,
"loss_xval": 0.37109375,
"num_input_tokens_seen": 1380784,
"step": 8
},
{
"epoch": 0.0234375,
"grad_norm": 8.787794677847923,
"learning_rate": 5e-06,
"loss": 0.2684,
"num_input_tokens_seen": 1553796,
"step": 9
},
{
"epoch": 0.0234375,
"loss": 0.20013384521007538,
"loss_ce": 0.0006099226884543896,
"loss_iou": 0.427734375,
"loss_num": 0.039794921875,
"loss_xval": 0.19921875,
"num_input_tokens_seen": 1553796,
"step": 9
},
{
"epoch": 0.026041666666666668,
"grad_norm": 8.085511029078585,
"learning_rate": 5e-06,
"loss": 0.2969,
"num_input_tokens_seen": 1726712,
"step": 10
},
{
"epoch": 0.026041666666666668,
"loss": 0.2954822778701782,
"loss_ce": 0.0004383414634503424,
"loss_iou": 0.0,
"loss_num": 0.05908203125,
"loss_xval": 0.294921875,
"num_input_tokens_seen": 1726712,
"step": 10
},
{
"epoch": 0.028645833333333332,
"grad_norm": 19.923996243710985,
"learning_rate": 5e-06,
"loss": 0.3582,
"num_input_tokens_seen": 1898600,
"step": 11
},
{
"epoch": 0.028645833333333332,
"loss": 0.3439289331436157,
"loss_ce": 0.00030101489392109215,
"loss_iou": 0.166015625,
"loss_num": 0.06884765625,
"loss_xval": 0.34375,
"num_input_tokens_seen": 1898600,
"step": 11
},
{
"epoch": 0.03125,
"grad_norm": 8.414953842541747,
"learning_rate": 5e-06,
"loss": 0.2661,
"num_input_tokens_seen": 2071032,
"step": 12
},
{
"epoch": 0.03125,
"loss": 0.2936800718307495,
"loss_ce": 0.0003450897347647697,
"loss_iou": 0.23828125,
"loss_num": 0.05859375,
"loss_xval": 0.29296875,
"num_input_tokens_seen": 2071032,
"step": 12
},
{
"epoch": 0.033854166666666664,
"grad_norm": 19.99273085290305,
"learning_rate": 5e-06,
"loss": 0.3361,
"num_input_tokens_seen": 2243868,
"step": 13
},
{
"epoch": 0.033854166666666664,
"loss": 0.31856128573417664,
"loss_ce": 0.00044603750575333834,
"loss_iou": 0.22265625,
"loss_num": 0.0634765625,
"loss_xval": 0.318359375,
"num_input_tokens_seen": 2243868,
"step": 13
},
{
"epoch": 0.036458333333333336,
"grad_norm": 8.014341595032883,
"learning_rate": 5e-06,
"loss": 0.2489,
"num_input_tokens_seen": 2415868,
"step": 14
},
{
"epoch": 0.036458333333333336,
"loss": 0.17592763900756836,
"loss_ce": 0.00026846557739190757,
"loss_iou": 0.578125,
"loss_num": 0.03515625,
"loss_xval": 0.17578125,
"num_input_tokens_seen": 2415868,
"step": 14
},
{
"epoch": 0.0390625,
"grad_norm": 14.081056874922753,
"learning_rate": 5e-06,
"loss": 0.2469,
"num_input_tokens_seen": 2588144,
"step": 15
},
{
"epoch": 0.0390625,
"loss": 0.3065241575241089,
"loss_ce": 0.00037182882078923285,
"loss_iou": 0.298828125,
"loss_num": 0.061279296875,
"loss_xval": 0.306640625,
"num_input_tokens_seen": 2588144,
"step": 15
},
{
"epoch": 0.041666666666666664,
"grad_norm": 6.867314736910267,
"learning_rate": 5e-06,
"loss": 0.3062,
"num_input_tokens_seen": 2760456,
"step": 16
},
{
"epoch": 0.041666666666666664,
"loss": 0.4008222818374634,
"loss_ce": 0.0002485612640157342,
"loss_iou": 0.0,
"loss_num": 0.080078125,
"loss_xval": 0.400390625,
"num_input_tokens_seen": 2760456,
"step": 16
},
{
"epoch": 0.044270833333333336,
"grad_norm": 6.841838623253362,
"learning_rate": 5e-06,
"loss": 0.195,
"num_input_tokens_seen": 2933256,
"step": 17
},
{
"epoch": 0.044270833333333336,
"loss": 0.20536868274211884,
"loss_ce": 0.00022953077859710902,
"loss_iou": 0.53515625,
"loss_num": 0.041015625,
"loss_xval": 0.205078125,
"num_input_tokens_seen": 2933256,
"step": 17
},
{
"epoch": 0.046875,
"grad_norm": 8.073482751973284,
"learning_rate": 5e-06,
"loss": 0.2924,
"num_input_tokens_seen": 3105724,
"step": 18
},
{
"epoch": 0.046875,
"loss": 0.2312113493680954,
"loss_ce": 0.000376395124476403,
"loss_iou": 0.26953125,
"loss_num": 0.046142578125,
"loss_xval": 0.23046875,
"num_input_tokens_seen": 3105724,
"step": 18
},
{
"epoch": 0.049479166666666664,
"grad_norm": 7.523822902492111,
"learning_rate": 5e-06,
"loss": 0.152,
"num_input_tokens_seen": 3278360,
"step": 19
},
{
"epoch": 0.049479166666666664,
"loss": 0.14852207899093628,
"loss_ce": 0.00020665550255216658,
"loss_iou": 0.45703125,
"loss_num": 0.0296630859375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 3278360,
"step": 19
},
{
"epoch": 0.052083333333333336,
"grad_norm": 6.544223523818296,
"learning_rate": 5e-06,
"loss": 0.2287,
"num_input_tokens_seen": 3450936,
"step": 20
},
{
"epoch": 0.052083333333333336,
"loss": 0.28778478503227234,
"loss_ce": 0.00030919513665139675,
"loss_iou": 0.453125,
"loss_num": 0.0576171875,
"loss_xval": 0.287109375,
"num_input_tokens_seen": 3450936,
"step": 20
},
{
"epoch": 0.0546875,
"grad_norm": 11.63193790977644,
"learning_rate": 5e-06,
"loss": 0.1695,
"num_input_tokens_seen": 3623740,
"step": 21
},
{
"epoch": 0.0546875,
"loss": 0.19103749096393585,
"loss_ce": 0.00018056559201795608,
"loss_iou": 0.306640625,
"loss_num": 0.0380859375,
"loss_xval": 0.1904296875,
"num_input_tokens_seen": 3623740,
"step": 21
},
{
"epoch": 0.057291666666666664,
"grad_norm": 7.497321698776006,
"learning_rate": 5e-06,
"loss": 0.1598,
"num_input_tokens_seen": 3796836,
"step": 22
},
{
"epoch": 0.057291666666666664,
"loss": 0.1259785294532776,
"loss_ce": 0.00018507592903915793,
"loss_iou": 0.5078125,
"loss_num": 0.025146484375,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 3796836,
"step": 22
},
{
"epoch": 0.059895833333333336,
"grad_norm": 30.78448133351319,
"learning_rate": 5e-06,
"loss": 0.1943,
"num_input_tokens_seen": 3969500,
"step": 23
},
{
"epoch": 0.059895833333333336,
"loss": 0.15796104073524475,
"loss_ce": 0.0006429227069020271,
"loss_iou": 0.55859375,
"loss_num": 0.031494140625,
"loss_xval": 0.1572265625,
"num_input_tokens_seen": 3969500,
"step": 23
},
{
"epoch": 0.0625,
"grad_norm": 5.237378782764295,
"learning_rate": 5e-06,
"loss": 0.1909,
"num_input_tokens_seen": 4141940,
"step": 24
},
{
"epoch": 0.0625,
"loss": 0.17854130268096924,
"loss_ce": 0.00025760685093700886,
"loss_iou": 0.4296875,
"loss_num": 0.03564453125,
"loss_xval": 0.1787109375,
"num_input_tokens_seen": 4141940,
"step": 24
},
{
"epoch": 0.06510416666666667,
"grad_norm": 13.60608392035419,
"learning_rate": 5e-06,
"loss": 0.1772,
"num_input_tokens_seen": 4314172,
"step": 25
},
{
"epoch": 0.06510416666666667,
"loss": 0.19210708141326904,
"loss_ce": 0.0002125638711731881,
"loss_iou": 0.484375,
"loss_num": 0.038330078125,
"loss_xval": 0.19140625,
"num_input_tokens_seen": 4314172,
"step": 25
},
{
"epoch": 0.06770833333333333,
"grad_norm": 7.390014761942961,
"learning_rate": 5e-06,
"loss": 0.2151,
"num_input_tokens_seen": 4486776,
"step": 26
},
{
"epoch": 0.06770833333333333,
"loss": 0.24654103815555573,
"loss_ce": 0.00044729292858392,
"loss_iou": 0.53125,
"loss_num": 0.04931640625,
"loss_xval": 0.24609375,
"num_input_tokens_seen": 4486776,
"step": 26
},
{
"epoch": 0.0703125,
"grad_norm": 6.597800961680885,
"learning_rate": 5e-06,
"loss": 0.1804,
"num_input_tokens_seen": 4659796,
"step": 27
},
{
"epoch": 0.0703125,
"loss": 0.18685418367385864,
"loss_ce": 0.00020867137936875224,
"loss_iou": 0.546875,
"loss_num": 0.037353515625,
"loss_xval": 0.1865234375,
"num_input_tokens_seen": 4659796,
"step": 27
},
{
"epoch": 0.07291666666666667,
"grad_norm": 15.848602164160235,
"learning_rate": 5e-06,
"loss": 0.1935,
"num_input_tokens_seen": 4832580,
"step": 28
},
{
"epoch": 0.07291666666666667,
"loss": 0.1529167890548706,
"loss_ce": 0.00038992700865492225,
"loss_iou": 0.453125,
"loss_num": 0.030517578125,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 4832580,
"step": 28
},
{
"epoch": 0.07552083333333333,
"grad_norm": 7.656983950050504,
"learning_rate": 5e-06,
"loss": 0.2008,
"num_input_tokens_seen": 5005204,
"step": 29
},
{
"epoch": 0.07552083333333333,
"loss": 0.26389509439468384,
"loss_ce": 0.00028428525547496974,
"loss_iou": 0.484375,
"loss_num": 0.052734375,
"loss_xval": 0.263671875,
"num_input_tokens_seen": 5005204,
"step": 29
},
{
"epoch": 0.078125,
"grad_norm": 4.507917280431056,
"learning_rate": 5e-06,
"loss": 0.1874,
"num_input_tokens_seen": 5177580,
"step": 30
},
{
"epoch": 0.078125,
"loss": 0.13707002997398376,
"loss_ce": 0.0004123126564081758,
"loss_iou": 0.515625,
"loss_num": 0.02734375,
"loss_xval": 0.13671875,
"num_input_tokens_seen": 5177580,
"step": 30
},
{
"epoch": 0.08072916666666667,
"grad_norm": 10.885923079707904,
"learning_rate": 5e-06,
"loss": 0.2153,
"num_input_tokens_seen": 5350148,
"step": 31
},
{
"epoch": 0.08072916666666667,
"loss": 0.2522280216217041,
"loss_ce": 0.0002138598938472569,
"loss_iou": 0.59375,
"loss_num": 0.050537109375,
"loss_xval": 0.251953125,
"num_input_tokens_seen": 5350148,
"step": 31
},
{
"epoch": 0.08333333333333333,
"grad_norm": 5.223864875647863,
"learning_rate": 5e-06,
"loss": 0.2242,
"num_input_tokens_seen": 5522620,
"step": 32
},
{
"epoch": 0.08333333333333333,
"loss": 0.22422021627426147,
"loss_ce": 0.0002517293323762715,
"loss_iou": 0.69921875,
"loss_num": 0.044677734375,
"loss_xval": 0.2236328125,
"num_input_tokens_seen": 5522620,
"step": 32
},
{
"epoch": 0.0859375,
"grad_norm": 8.823576859140516,
"learning_rate": 5e-06,
"loss": 0.1796,
"num_input_tokens_seen": 5695340,
"step": 33
},
{
"epoch": 0.0859375,
"loss": 0.19266511499881744,
"loss_ce": 0.0003433418460190296,
"loss_iou": 0.6953125,
"loss_num": 0.03857421875,
"loss_xval": 0.1923828125,
"num_input_tokens_seen": 5695340,
"step": 33
},
{
"epoch": 0.08854166666666667,
"grad_norm": 54.77946225550538,
"learning_rate": 5e-06,
"loss": 0.1289,
"num_input_tokens_seen": 5868252,
"step": 34
},
{
"epoch": 0.08854166666666667,
"loss": 0.13593435287475586,
"loss_ce": 0.0002532090584281832,
"loss_iou": 0.58984375,
"loss_num": 0.027099609375,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 5868252,
"step": 34
},
{
"epoch": 0.09114583333333333,
"grad_norm": 10.116131484083123,
"learning_rate": 5e-06,
"loss": 0.132,
"num_input_tokens_seen": 6041036,
"step": 35
},
{
"epoch": 0.09114583333333333,
"loss": 0.15703758597373962,
"loss_ce": 0.0003603329823818058,
"loss_iou": 0.6796875,
"loss_num": 0.03125,
"loss_xval": 0.15625,
"num_input_tokens_seen": 6041036,
"step": 35
},
{
"epoch": 0.09375,
"grad_norm": 3.6841467905553884,
"learning_rate": 5e-06,
"loss": 0.1766,
"num_input_tokens_seen": 6213444,
"step": 36
},
{
"epoch": 0.09375,
"loss": 0.11154159903526306,
"loss_ce": 0.00021347634901758283,
"loss_iou": 0.486328125,
"loss_num": 0.022216796875,
"loss_xval": 0.111328125,
"num_input_tokens_seen": 6213444,
"step": 36
},
{
"epoch": 0.09635416666666667,
"grad_norm": 7.922965723176142,
"learning_rate": 5e-06,
"loss": 0.1496,
"num_input_tokens_seen": 6386028,
"step": 37
},
{
"epoch": 0.09635416666666667,
"loss": 0.14449915289878845,
"loss_ce": 0.00021204788936302066,
"loss_iou": 0.6796875,
"loss_num": 0.02880859375,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 6386028,
"step": 37
},
{
"epoch": 0.09895833333333333,
"grad_norm": 5.266919761801281,
"learning_rate": 5e-06,
"loss": 0.1401,
"num_input_tokens_seen": 6558240,
"step": 38
},
{
"epoch": 0.09895833333333333,
"loss": 0.1739426553249359,
"loss_ce": 0.00029765223735012114,
"loss_iou": 0.7578125,
"loss_num": 0.03466796875,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 6558240,
"step": 38
},
{
"epoch": 0.1015625,
"grad_norm": 7.135945650156497,
"learning_rate": 5e-06,
"loss": 0.1703,
"num_input_tokens_seen": 6731156,
"step": 39
},
{
"epoch": 0.1015625,
"loss": 0.1407906413078308,
"loss_ce": 0.00022666863515041769,
"loss_iou": 0.62890625,
"loss_num": 0.0281982421875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 6731156,
"step": 39
},
{
"epoch": 0.10416666666666667,
"grad_norm": 14.956590253309306,
"learning_rate": 5e-06,
"loss": 0.1741,
"num_input_tokens_seen": 6903828,
"step": 40
},
{
"epoch": 0.10416666666666667,
"loss": 0.14085114002227783,
"loss_ce": 0.000409250904340297,
"loss_iou": 0.55078125,
"loss_num": 0.028076171875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 6903828,
"step": 40
},
{
"epoch": 0.10677083333333333,
"grad_norm": 6.97548951750633,
"learning_rate": 5e-06,
"loss": 0.1407,
"num_input_tokens_seen": 7076944,
"step": 41
},
{
"epoch": 0.10677083333333333,
"loss": 0.13742247223854065,
"loss_ce": 0.000459590955870226,
"loss_iou": 0.6484375,
"loss_num": 0.02734375,
"loss_xval": 0.13671875,
"num_input_tokens_seen": 7076944,
"step": 41
},
{
"epoch": 0.109375,
"grad_norm": 5.706351230194716,
"learning_rate": 5e-06,
"loss": 0.1664,
"num_input_tokens_seen": 7249880,
"step": 42
},
{
"epoch": 0.109375,
"loss": 0.1694188117980957,
"loss_ce": 0.0002903800050262362,
"loss_iou": 0.71875,
"loss_num": 0.03369140625,
"loss_xval": 0.1689453125,
"num_input_tokens_seen": 7249880,
"step": 42
},
{
"epoch": 0.11197916666666667,
"grad_norm": 7.30786008091978,
"learning_rate": 5e-06,
"loss": 0.1396,
"num_input_tokens_seen": 7422732,
"step": 43
},
{
"epoch": 0.11197916666666667,
"loss": 0.12171518802642822,
"loss_ce": 0.0002552264486439526,
"loss_iou": 0.609375,
"loss_num": 0.0242919921875,
"loss_xval": 0.12158203125,
"num_input_tokens_seen": 7422732,
"step": 43
},
{
"epoch": 0.11458333333333333,
"grad_norm": 10.925715703737882,
"learning_rate": 5e-06,
"loss": 0.1589,
"num_input_tokens_seen": 7595068,
"step": 44
},
{
"epoch": 0.11458333333333333,
"loss": 0.13566899299621582,
"loss_ce": 0.0002930228365585208,
"loss_iou": 0.58984375,
"loss_num": 0.027099609375,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 7595068,
"step": 44
},
{
"epoch": 0.1171875,
"grad_norm": 5.054139739954058,
"learning_rate": 5e-06,
"loss": 0.1463,
"num_input_tokens_seen": 7767900,
"step": 45
},
{
"epoch": 0.1171875,
"loss": 0.12349405884742737,
"loss_ce": 0.00020303628116380423,
"loss_iou": 0.7109375,
"loss_num": 0.024658203125,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 7767900,
"step": 45
},
{
"epoch": 0.11979166666666667,
"grad_norm": 12.342418471503326,
"learning_rate": 5e-06,
"loss": 0.1424,
"num_input_tokens_seen": 7940544,
"step": 46
},
{
"epoch": 0.11979166666666667,
"loss": 0.11029690504074097,
"loss_ce": 0.00021999998716637492,
"loss_iou": 0.81640625,
"loss_num": 0.02197265625,
"loss_xval": 0.10986328125,
"num_input_tokens_seen": 7940544,
"step": 46
},
{
"epoch": 0.12239583333333333,
"grad_norm": 5.062819394898654,
"learning_rate": 5e-06,
"loss": 0.1906,
"num_input_tokens_seen": 8113664,
"step": 47
},
{
"epoch": 0.12239583333333333,
"loss": 0.1845826804637909,
"loss_ce": 0.0001954784820554778,
"loss_iou": 0.310546875,
"loss_num": 0.036865234375,
"loss_xval": 0.1845703125,
"num_input_tokens_seen": 8113664,
"step": 47
},
{
"epoch": 0.125,
"grad_norm": 9.659514849549943,
"learning_rate": 5e-06,
"loss": 0.1428,
"num_input_tokens_seen": 8286408,
"step": 48
},
{
"epoch": 0.125,
"loss": 0.13132745027542114,
"loss_ce": 0.00022393176914192736,
"loss_iou": 0.5859375,
"loss_num": 0.0262451171875,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 8286408,
"step": 48
},
{
"epoch": 0.12760416666666666,
"grad_norm": 3.4602191470453296,
"learning_rate": 5e-06,
"loss": 0.1523,
"num_input_tokens_seen": 8459480,
"step": 49
},
{
"epoch": 0.12760416666666666,
"loss": 0.09740308672189713,
"loss_ce": 0.00011304817599011585,
"loss_iou": 0.734375,
"loss_num": 0.0194091796875,
"loss_xval": 0.09716796875,
"num_input_tokens_seen": 8459480,
"step": 49
},
{
"epoch": 0.13020833333333334,
"grad_norm": 2.792621267506476,
"learning_rate": 5e-06,
"loss": 0.1739,
"num_input_tokens_seen": 8632048,
"step": 50
},
{
"epoch": 0.13020833333333334,
"loss": 0.20529168844223022,
"loss_ce": 0.00015252322191372514,
"loss_iou": 0.55078125,
"loss_num": 0.041015625,
"loss_xval": 0.205078125,
"num_input_tokens_seen": 8632048,
"step": 50
},
{
"epoch": 0.1328125,
"grad_norm": 112.48651552153446,
"learning_rate": 5e-06,
"loss": 0.1474,
"num_input_tokens_seen": 8804436,
"step": 51
},
{
"epoch": 0.1328125,
"loss": 0.14565327763557434,
"loss_ce": 0.00020651462546084076,
"loss_iou": 0.796875,
"loss_num": 0.029052734375,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 8804436,
"step": 51
},
{
"epoch": 0.13541666666666666,
"grad_norm": 23.381698600452545,
"learning_rate": 5e-06,
"loss": 0.1281,
"num_input_tokens_seen": 8976692,
"step": 52
},
{
"epoch": 0.13541666666666666,
"loss": 0.07739880681037903,
"loss_ce": 0.0002808899153023958,
"loss_iou": 0.71484375,
"loss_num": 0.01544189453125,
"loss_xval": 0.0771484375,
"num_input_tokens_seen": 8976692,
"step": 52
},
{
"epoch": 0.13802083333333334,
"grad_norm": 20.24541765865236,
"learning_rate": 5e-06,
"loss": 0.1416,
"num_input_tokens_seen": 9149400,
"step": 53
},
{
"epoch": 0.13802083333333334,
"loss": 0.09311097115278244,
"loss_ce": 0.00012390354822855443,
"loss_iou": 0.7421875,
"loss_num": 0.0185546875,
"loss_xval": 0.0927734375,
"num_input_tokens_seen": 9149400,
"step": 53
},
{
"epoch": 0.140625,
"grad_norm": 5.275500097506868,
"learning_rate": 5e-06,
"loss": 0.1424,
"num_input_tokens_seen": 9321876,
"step": 54
},
{
"epoch": 0.140625,
"loss": 0.11511102318763733,
"loss_ce": 0.00018182306666858494,
"loss_iou": 0.55078125,
"loss_num": 0.02294921875,
"loss_xval": 0.11474609375,
"num_input_tokens_seen": 9321876,
"step": 54
},
{
"epoch": 0.14322916666666666,
"grad_norm": 6.68044187324112,
"learning_rate": 5e-06,
"loss": 0.1389,
"num_input_tokens_seen": 9494628,
"step": 55
},
{
"epoch": 0.14322916666666666,
"loss": 0.14306305348873138,
"loss_ce": 0.0001797609293134883,
"loss_iou": 0.6953125,
"loss_num": 0.028564453125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 9494628,
"step": 55
},
{
"epoch": 0.14583333333333334,
"grad_norm": 6.008068200145323,
"learning_rate": 5e-06,
"loss": 0.1457,
"num_input_tokens_seen": 9666508,
"step": 56
},
{
"epoch": 0.14583333333333334,
"loss": 0.10107017308473587,
"loss_ce": 0.00024009394110180438,
"loss_iou": 0.0,
"loss_num": 0.0201416015625,
"loss_xval": 0.1005859375,
"num_input_tokens_seen": 9666508,
"step": 56
},
{
"epoch": 0.1484375,
"grad_norm": 5.2880560255216436,
"learning_rate": 5e-06,
"loss": 0.1537,
"num_input_tokens_seen": 9839556,
"step": 57
},
{
"epoch": 0.1484375,
"loss": 0.12539099156856537,
"loss_ce": 0.003534301184117794,
"loss_iou": 0.55078125,
"loss_num": 0.0244140625,
"loss_xval": 0.1220703125,
"num_input_tokens_seen": 9839556,
"step": 57
},
{
"epoch": 0.15104166666666666,
"grad_norm": 12.763217046347364,
"learning_rate": 5e-06,
"loss": 0.1706,
"num_input_tokens_seen": 10011988,
"step": 58
},
{
"epoch": 0.15104166666666666,
"loss": 0.17848367989063263,
"loss_ce": 0.00016946055984590203,
"loss_iou": 0.52734375,
"loss_num": 0.03564453125,
"loss_xval": 0.1787109375,
"num_input_tokens_seen": 10011988,
"step": 58
},
{
"epoch": 0.15364583333333334,
"grad_norm": 8.269658303130955,
"learning_rate": 5e-06,
"loss": 0.157,
"num_input_tokens_seen": 10184712,
"step": 59
},
{
"epoch": 0.15364583333333334,
"loss": 0.16671502590179443,
"loss_ce": 0.0010350943775847554,
"loss_iou": 0.51953125,
"loss_num": 0.033203125,
"loss_xval": 0.166015625,
"num_input_tokens_seen": 10184712,
"step": 59
},
{
"epoch": 0.15625,
"grad_norm": 10.823127549550875,
"learning_rate": 5e-06,
"loss": 0.1397,
"num_input_tokens_seen": 10357876,
"step": 60
},
{
"epoch": 0.15625,
"loss": 0.15665964782238007,
"loss_ce": 0.00022653902124147862,
"loss_iou": 0.5390625,
"loss_num": 0.03125,
"loss_xval": 0.15625,
"num_input_tokens_seen": 10357876,
"step": 60
},
{
"epoch": 0.15885416666666666,
"grad_norm": 6.373677246488681,
"learning_rate": 5e-06,
"loss": 0.1239,
"num_input_tokens_seen": 10530560,
"step": 61
},
{
"epoch": 0.15885416666666666,
"loss": 0.1182846650481224,
"loss_ce": 0.0001816382718970999,
"loss_iou": 0.78515625,
"loss_num": 0.023681640625,
"loss_xval": 0.1181640625,
"num_input_tokens_seen": 10530560,
"step": 61
},
{
"epoch": 0.16145833333333334,
"grad_norm": 2.5506045315044688,
"learning_rate": 5e-06,
"loss": 0.1465,
"num_input_tokens_seen": 10702880,
"step": 62
},
{
"epoch": 0.16145833333333334,
"loss": 0.11390332132577896,
"loss_ce": 0.00019482464995235205,
"loss_iou": 0.890625,
"loss_num": 0.022705078125,
"loss_xval": 0.11376953125,
"num_input_tokens_seen": 10702880,
"step": 62
},
{
"epoch": 0.1640625,
"grad_norm": 7.222980659687508,
"learning_rate": 5e-06,
"loss": 0.1252,
"num_input_tokens_seen": 10875396,
"step": 63
},
{
"epoch": 0.1640625,
"loss": 0.14197739958763123,
"loss_ce": 0.00019273148791398853,
"loss_iou": 0.65234375,
"loss_num": 0.0283203125,
"loss_xval": 0.1416015625,
"num_input_tokens_seen": 10875396,
"step": 63
},
{
"epoch": 0.16666666666666666,
"grad_norm": 5.146091397448424,
"learning_rate": 5e-06,
"loss": 0.1305,
"num_input_tokens_seen": 11047776,
"step": 64
},
{
"epoch": 0.16666666666666666,
"loss": 0.14921408891677856,
"loss_ce": 0.00013572629541158676,
"loss_iou": 0.72265625,
"loss_num": 0.02978515625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 11047776,
"step": 64
},
{
"epoch": 0.16927083333333334,
"grad_norm": 4.80224094246898,
"learning_rate": 5e-06,
"loss": 0.1154,
"num_input_tokens_seen": 11220188,
"step": 65
},
{
"epoch": 0.16927083333333334,
"loss": 0.07668769359588623,
"loss_ce": 0.001126162358559668,
"loss_iou": 0.91015625,
"loss_num": 0.01507568359375,
"loss_xval": 0.07568359375,
"num_input_tokens_seen": 11220188,
"step": 65
},
{
"epoch": 0.171875,
"grad_norm": 10.700514293201024,
"learning_rate": 5e-06,
"loss": 0.1739,
"num_input_tokens_seen": 11392944,
"step": 66
},
{
"epoch": 0.171875,
"loss": 0.22246834635734558,
"loss_ce": 0.00011727018863894045,
"loss_iou": 0.75390625,
"loss_num": 0.04443359375,
"loss_xval": 0.22265625,
"num_input_tokens_seen": 11392944,
"step": 66
},
{
"epoch": 0.17447916666666666,
"grad_norm": 9.514503857806982,
"learning_rate": 5e-06,
"loss": 0.2101,
"num_input_tokens_seen": 11565084,
"step": 67
},
{
"epoch": 0.17447916666666666,
"loss": 0.13935251533985138,
"loss_ce": 0.0004364975611679256,
"loss_iou": 0.58203125,
"loss_num": 0.02783203125,
"loss_xval": 0.138671875,
"num_input_tokens_seen": 11565084,
"step": 67
},
{
"epoch": 0.17708333333333334,
"grad_norm": 28.845888384168894,
"learning_rate": 5e-06,
"loss": 0.1395,
"num_input_tokens_seen": 11737388,
"step": 68
},
{
"epoch": 0.17708333333333334,
"loss": 0.17835211753845215,
"loss_ce": 0.00019049833645112813,
"loss_iou": 0.5859375,
"loss_num": 0.03564453125,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 11737388,
"step": 68
},
{
"epoch": 0.1796875,
"grad_norm": 12.901299207431718,
"learning_rate": 5e-06,
"loss": 0.1475,
"num_input_tokens_seen": 11910160,
"step": 69
},
{
"epoch": 0.1796875,
"loss": 0.14130395650863647,
"loss_ce": 0.00040430587250739336,
"loss_iou": 0.71875,
"loss_num": 0.028076171875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 11910160,
"step": 69
},
{
"epoch": 0.18229166666666666,
"grad_norm": 4.066104418883702,
"learning_rate": 5e-06,
"loss": 0.1535,
"num_input_tokens_seen": 12083060,
"step": 70
},
{
"epoch": 0.18229166666666666,
"loss": 0.22210073471069336,
"loss_ce": 0.00048208353109657764,
"loss_iou": 0.52734375,
"loss_num": 0.044189453125,
"loss_xval": 0.2216796875,
"num_input_tokens_seen": 12083060,
"step": 70
},
{
"epoch": 0.18489583333333334,
"grad_norm": 7.20629091266797,
"learning_rate": 5e-06,
"loss": 0.1526,
"num_input_tokens_seen": 12255100,
"step": 71
},
{
"epoch": 0.18489583333333334,
"loss": 0.10638897120952606,
"loss_ce": 0.00015728682046756148,
"loss_iou": 0.0,
"loss_num": 0.021240234375,
"loss_xval": 0.1064453125,
"num_input_tokens_seen": 12255100,
"step": 71
},
{
"epoch": 0.1875,
"grad_norm": 5.6974371825888515,
"learning_rate": 5e-06,
"loss": 0.1194,
"num_input_tokens_seen": 12428188,
"step": 72
},
{
"epoch": 0.1875,
"loss": 0.17051713168621063,
"loss_ce": 0.0001069810678018257,
"loss_iou": 0.77734375,
"loss_num": 0.0341796875,
"loss_xval": 0.169921875,
"num_input_tokens_seen": 12428188,
"step": 72
},
{
"epoch": 0.19010416666666666,
"grad_norm": 9.689078279769502,
"learning_rate": 5e-06,
"loss": 0.1445,
"num_input_tokens_seen": 12601004,
"step": 73
},
{
"epoch": 0.19010416666666666,
"loss": 0.1433650702238083,
"loss_ce": 0.00020711585239041597,
"loss_iou": 0.7578125,
"loss_num": 0.0286865234375,
"loss_xval": 0.1435546875,
"num_input_tokens_seen": 12601004,
"step": 73
},
{
"epoch": 0.19270833333333334,
"grad_norm": 5.827672891178693,
"learning_rate": 5e-06,
"loss": 0.134,
"num_input_tokens_seen": 12773320,
"step": 74
},
{
"epoch": 0.19270833333333334,
"loss": 0.11652399599552155,
"loss_ce": 9.94274887489155e-05,
"loss_iou": 0.0,
"loss_num": 0.0233154296875,
"loss_xval": 0.1162109375,
"num_input_tokens_seen": 12773320,
"step": 74
},
{
"epoch": 0.1953125,
"grad_norm": 3.990167602163436,
"learning_rate": 5e-06,
"loss": 0.1368,
"num_input_tokens_seen": 12945852,
"step": 75
},
{
"epoch": 0.1953125,
"loss": 0.14618420600891113,
"loss_ce": 0.0001270811044378206,
"loss_iou": 0.59765625,
"loss_num": 0.0291748046875,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 12945852,
"step": 75
},
{
"epoch": 0.19791666666666666,
"grad_norm": 13.270667333466802,
"learning_rate": 5e-06,
"loss": 0.1563,
"num_input_tokens_seen": 13118484,
"step": 76
},
{
"epoch": 0.19791666666666666,
"loss": 0.15230345726013184,
"loss_ce": 0.00026487442664802074,
"loss_iou": 0.76953125,
"loss_num": 0.0303955078125,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 13118484,
"step": 76
},
{
"epoch": 0.20052083333333334,
"grad_norm": 7.7363268532740745,
"learning_rate": 5e-06,
"loss": 0.1283,
"num_input_tokens_seen": 13291272,
"step": 77
},
{
"epoch": 0.20052083333333334,
"loss": 0.14224107563495636,
"loss_ce": 5.9679325204342604e-05,
"loss_iou": 0.6796875,
"loss_num": 0.0284423828125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 13291272,
"step": 77
},
{
"epoch": 0.203125,
"grad_norm": 6.61313432355891,
"learning_rate": 5e-06,
"loss": 0.124,
"num_input_tokens_seen": 13464624,
"step": 78
},
{
"epoch": 0.203125,
"loss": 0.1373731642961502,
"loss_ce": 0.001081653987057507,
"loss_iou": 0.578125,
"loss_num": 0.0272216796875,
"loss_xval": 0.13671875,
"num_input_tokens_seen": 13464624,
"step": 78
},
{
"epoch": 0.20572916666666666,
"grad_norm": 8.581672711095537,
"learning_rate": 5e-06,
"loss": 0.1277,
"num_input_tokens_seen": 13637932,
"step": 79
},
{
"epoch": 0.20572916666666666,
"loss": 0.07391411066055298,
"loss_ce": 0.0002141633303835988,
"loss_iou": 0.921875,
"loss_num": 0.0147705078125,
"loss_xval": 0.07373046875,
"num_input_tokens_seen": 13637932,
"step": 79
},
{
"epoch": 0.20833333333333334,
"grad_norm": 9.343129724950805,
"learning_rate": 5e-06,
"loss": 0.1228,
"num_input_tokens_seen": 13810536,
"step": 80
},
{
"epoch": 0.20833333333333334,
"loss": 0.1317322701215744,
"loss_ce": 0.00010995224147336558,
"loss_iou": 0.5625,
"loss_num": 0.0263671875,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 13810536,
"step": 80
},
{
"epoch": 0.2109375,
"grad_norm": 3.335680846026802,
"learning_rate": 5e-06,
"loss": 0.1278,
"num_input_tokens_seen": 13982952,
"step": 81
},
{
"epoch": 0.2109375,
"loss": 0.11719675362110138,
"loss_ce": 7.029056723695248e-05,
"loss_iou": 0.72265625,
"loss_num": 0.0234375,
"loss_xval": 0.1171875,
"num_input_tokens_seen": 13982952,
"step": 81
},
{
"epoch": 0.21354166666666666,
"grad_norm": 5.015919335288412,
"learning_rate": 5e-06,
"loss": 0.1141,
"num_input_tokens_seen": 14156116,
"step": 82
},
{
"epoch": 0.21354166666666666,
"loss": 0.12401551753282547,
"loss_ce": 0.0005414030747488141,
"loss_iou": 0.8125,
"loss_num": 0.024658203125,
"loss_xval": 0.12353515625,
"num_input_tokens_seen": 14156116,
"step": 82
},
{
"epoch": 0.21614583333333334,
"grad_norm": 2.9623089480232765,
"learning_rate": 5e-06,
"loss": 0.1462,
"num_input_tokens_seen": 14328100,
"step": 83
},
{
"epoch": 0.21614583333333334,
"loss": 0.13083317875862122,
"loss_ce": 6.534742715302855e-05,
"loss_iou": 0.84765625,
"loss_num": 0.026123046875,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 14328100,
"step": 83
},
{
"epoch": 0.21875,
"grad_norm": 10.594036737745725,
"learning_rate": 5e-06,
"loss": 0.1571,
"num_input_tokens_seen": 14501028,
"step": 84
},
{
"epoch": 0.21875,
"loss": 0.12298892438411713,
"loss_ce": 9.464097092859447e-05,
"loss_iou": 0.765625,
"loss_num": 0.0245361328125,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 14501028,
"step": 84
},
{
"epoch": 0.22135416666666666,
"grad_norm": 15.787971676382128,
"learning_rate": 5e-06,
"loss": 0.1346,
"num_input_tokens_seen": 14673688,
"step": 85
},
{
"epoch": 0.22135416666666666,
"loss": 0.1362449675798416,
"loss_ce": 0.00013656073133461177,
"loss_iou": 0.77734375,
"loss_num": 0.0272216796875,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 14673688,
"step": 85
},
{
"epoch": 0.22395833333333334,
"grad_norm": 17.628757977108236,
"learning_rate": 5e-06,
"loss": 0.1309,
"num_input_tokens_seen": 14846388,
"step": 86
},
{
"epoch": 0.22395833333333334,
"loss": 0.2271496057510376,
"loss_ce": 0.00012933027755934745,
"loss_iou": 0.6875,
"loss_num": 0.04541015625,
"loss_xval": 0.2265625,
"num_input_tokens_seen": 14846388,
"step": 86
},
{
"epoch": 0.2265625,
"grad_norm": 4.200455159585171,
"learning_rate": 5e-06,
"loss": 0.1237,
"num_input_tokens_seen": 15019332,
"step": 87
},
{
"epoch": 0.2265625,
"loss": 0.1411372572183609,
"loss_ce": 0.000146055273944512,
"loss_iou": 0.7109375,
"loss_num": 0.0281982421875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 15019332,
"step": 87
},
{
"epoch": 0.22916666666666666,
"grad_norm": 16.128679810445924,
"learning_rate": 5e-06,
"loss": 0.1205,
"num_input_tokens_seen": 15191728,
"step": 88
},
{
"epoch": 0.22916666666666666,
"loss": 0.12598924338817596,
"loss_ce": 0.0001957894128281623,
"loss_iou": 0.7265625,
"loss_num": 0.025146484375,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 15191728,
"step": 88
},
{
"epoch": 0.23177083333333334,
"grad_norm": 3.347768447216801,
"learning_rate": 5e-06,
"loss": 0.1091,
"num_input_tokens_seen": 15364328,
"step": 89
},
{
"epoch": 0.23177083333333334,
"loss": 0.13717257976531982,
"loss_ce": 0.00014865108823869377,
"loss_iou": 0.671875,
"loss_num": 0.0274658203125,
"loss_xval": 0.13671875,
"num_input_tokens_seen": 15364328,
"step": 89
},
{
"epoch": 0.234375,
"grad_norm": 14.428792014632464,
"learning_rate": 5e-06,
"loss": 0.1169,
"num_input_tokens_seen": 15537008,
"step": 90
},
{
"epoch": 0.234375,
"loss": 0.09786561131477356,
"loss_ce": 8.728736429475248e-05,
"loss_iou": 0.8984375,
"loss_num": 0.01953125,
"loss_xval": 0.09765625,
"num_input_tokens_seen": 15537008,
"step": 90
},
{
"epoch": 0.23697916666666666,
"grad_norm": 9.593480834109474,
"learning_rate": 5e-06,
"loss": 0.1477,
"num_input_tokens_seen": 15709404,
"step": 91
},
{
"epoch": 0.23697916666666666,
"loss": 0.13464778661727905,
"loss_ce": 0.0004925199900753796,
"loss_iou": 0.0,
"loss_num": 0.02685546875,
"loss_xval": 0.1337890625,
"num_input_tokens_seen": 15709404,
"step": 91
},
{
"epoch": 0.23958333333333334,
"grad_norm": 5.187519307559665,
"learning_rate": 5e-06,
"loss": 0.1256,
"num_input_tokens_seen": 15882256,
"step": 92
},
{
"epoch": 0.23958333333333334,
"loss": 0.14495471119880676,
"loss_ce": 0.00020984606817364693,
"loss_iou": 0.59375,
"loss_num": 0.0289306640625,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 15882256,
"step": 92
},
{
"epoch": 0.2421875,
"grad_norm": 3.797220087224051,
"learning_rate": 5e-06,
"loss": 0.0953,
"num_input_tokens_seen": 16055680,
"step": 93
},
{
"epoch": 0.2421875,
"loss": 0.11551543325185776,
"loss_ce": 0.00012846880417782813,
"loss_iou": 0.84765625,
"loss_num": 0.0230712890625,
"loss_xval": 0.115234375,
"num_input_tokens_seen": 16055680,
"step": 93
},
{
"epoch": 0.24479166666666666,
"grad_norm": 12.640483904974769,
"learning_rate": 5e-06,
"loss": 0.1633,
"num_input_tokens_seen": 16228580,
"step": 94
},
{
"epoch": 0.24479166666666666,
"loss": 0.22419461607933044,
"loss_ce": 7.351529347943142e-05,
"loss_iou": 0.57421875,
"loss_num": 0.044921875,
"loss_xval": 0.224609375,
"num_input_tokens_seen": 16228580,
"step": 94
},
{
"epoch": 0.24739583333333334,
"grad_norm": 5.298357496019875,
"learning_rate": 5e-06,
"loss": 0.1399,
"num_input_tokens_seen": 16401760,
"step": 95
},
{
"epoch": 0.24739583333333334,
"loss": 0.13041989505290985,
"loss_ce": 0.0002624165790621191,
"loss_iou": 0.86328125,
"loss_num": 0.0260009765625,
"loss_xval": 0.1298828125,
"num_input_tokens_seen": 16401760,
"step": 95
},
{
"epoch": 0.25,
"grad_norm": 9.952010853168657,
"learning_rate": 5e-06,
"loss": 0.1295,
"num_input_tokens_seen": 16574496,
"step": 96
},
{
"epoch": 0.25,
"loss": 0.1939561665058136,
"loss_ce": 7.798791921231896e-05,
"loss_iou": 0.765625,
"loss_num": 0.038818359375,
"loss_xval": 0.1943359375,
"num_input_tokens_seen": 16574496,
"step": 96
},
{
"epoch": 0.2526041666666667,
"grad_norm": 3.049823911893114,
"learning_rate": 5e-06,
"loss": 0.1111,
"num_input_tokens_seen": 16747728,
"step": 97
},
{
"epoch": 0.2526041666666667,
"loss": 0.09351100027561188,
"loss_ce": 6.617652252316475e-05,
"loss_iou": 0.65625,
"loss_num": 0.0186767578125,
"loss_xval": 0.09326171875,
"num_input_tokens_seen": 16747728,
"step": 97
},
{
"epoch": 0.2552083333333333,
"grad_norm": 16.778434870585635,
"learning_rate": 5e-06,
"loss": 0.1019,
"num_input_tokens_seen": 16920748,
"step": 98
},
{
"epoch": 0.2552083333333333,
"loss": 0.06255725026130676,
"loss_ce": 0.00036242493661120534,
"loss_iou": 0.796875,
"loss_num": 0.012451171875,
"loss_xval": 0.062255859375,
"num_input_tokens_seen": 16920748,
"step": 98
},
{
"epoch": 0.2578125,
"grad_norm": 3.1538744569690427,
"learning_rate": 5e-06,
"loss": 0.0905,
"num_input_tokens_seen": 17093788,
"step": 99
},
{
"epoch": 0.2578125,
"loss": 0.07921752333641052,
"loss_ce": 5.4928314057178795e-05,
"loss_iou": 0.75390625,
"loss_num": 0.015869140625,
"loss_xval": 0.0791015625,
"num_input_tokens_seen": 17093788,
"step": 99
},
{
"epoch": 0.2604166666666667,
"grad_norm": 5.0492536766824925,
"learning_rate": 5e-06,
"loss": 0.1333,
"num_input_tokens_seen": 17266068,
"step": 100
},
{
"epoch": 0.2604166666666667,
"loss": 0.2560691237449646,
"loss_ce": 7.242064020829275e-05,
"loss_iou": 0.0,
"loss_num": 0.05126953125,
"loss_xval": 0.255859375,
"num_input_tokens_seen": 17266068,
"step": 100
},
{
"epoch": 0.2630208333333333,
"grad_norm": 3.206773313570764,
"learning_rate": 5e-06,
"loss": 0.1103,
"num_input_tokens_seen": 17438252,
"step": 101
},
{
"epoch": 0.2630208333333333,
"loss": 0.10068385303020477,
"loss_ce": 6.739624950569123e-05,
"loss_iou": 0.66015625,
"loss_num": 0.0201416015625,
"loss_xval": 0.1005859375,
"num_input_tokens_seen": 17438252,
"step": 101
},
{
"epoch": 0.265625,
"grad_norm": 4.023617188811506,
"learning_rate": 5e-06,
"loss": 0.1235,
"num_input_tokens_seen": 17611072,
"step": 102
},
{
"epoch": 0.265625,
"loss": 0.17407888174057007,
"loss_ce": 0.0001286847109440714,
"loss_iou": 0.8125,
"loss_num": 0.03466796875,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 17611072,
"step": 102
},
{
"epoch": 0.2682291666666667,
"grad_norm": 4.68999361675466,
"learning_rate": 5e-06,
"loss": 0.1292,
"num_input_tokens_seen": 17784276,
"step": 103
},
{
"epoch": 0.2682291666666667,
"loss": 0.14088758826255798,
"loss_ce": 0.00014050997560843825,
"loss_iou": 0.7578125,
"loss_num": 0.028076171875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 17784276,
"step": 103
},
{
"epoch": 0.2708333333333333,
"grad_norm": 3.670856513287625,
"learning_rate": 5e-06,
"loss": 0.1135,
"num_input_tokens_seen": 17956592,
"step": 104
},
{
"epoch": 0.2708333333333333,
"loss": 0.1348290890455246,
"loss_ce": 0.00012450128269847482,
"loss_iou": 0.0,
"loss_num": 0.0269775390625,
"loss_xval": 0.134765625,
"num_input_tokens_seen": 17956592,
"step": 104
},
{
"epoch": 0.2734375,
"grad_norm": 7.988958032566027,
"learning_rate": 5e-06,
"loss": 0.1302,
"num_input_tokens_seen": 18129536,
"step": 105
},
{
"epoch": 0.2734375,
"loss": 0.06641676276922226,
"loss_ce": 0.0001936157641466707,
"loss_iou": 0.58203125,
"loss_num": 0.01324462890625,
"loss_xval": 0.06640625,
"num_input_tokens_seen": 18129536,
"step": 105
},
{
"epoch": 0.2760416666666667,
"grad_norm": 6.167808656422766,
"learning_rate": 5e-06,
"loss": 0.0922,
"num_input_tokens_seen": 18302572,
"step": 106
},
{
"epoch": 0.2760416666666667,
"loss": 0.0900074690580368,
"loss_ce": 0.00028579036006703973,
"loss_iou": 0.8828125,
"loss_num": 0.0179443359375,
"loss_xval": 0.08984375,
"num_input_tokens_seen": 18302572,
"step": 106
},
{
"epoch": 0.2786458333333333,
"grad_norm": 15.444621610105179,
"learning_rate": 5e-06,
"loss": 0.1253,
"num_input_tokens_seen": 18474752,
"step": 107
},
{
"epoch": 0.2786458333333333,
"loss": 0.08840958774089813,
"loss_ce": 0.0001527518907096237,
"loss_iou": 0.734375,
"loss_num": 0.0177001953125,
"loss_xval": 0.08837890625,
"num_input_tokens_seen": 18474752,
"step": 107
},
{
"epoch": 0.28125,
"grad_norm": 9.197225254514525,
"learning_rate": 5e-06,
"loss": 0.1234,
"num_input_tokens_seen": 18647420,
"step": 108
},
{
"epoch": 0.28125,
"loss": 0.1322825402021408,
"loss_ce": 8.039205567911267e-05,
"loss_iou": 0.81640625,
"loss_num": 0.0264892578125,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 18647420,
"step": 108
},
{
"epoch": 0.2838541666666667,
"grad_norm": 4.014816953952452,
"learning_rate": 5e-06,
"loss": 0.1298,
"num_input_tokens_seen": 18820060,
"step": 109
},
{
"epoch": 0.2838541666666667,
"loss": 0.18935684859752655,
"loss_ce": 8.681887993589044e-05,
"loss_iou": 0.703125,
"loss_num": 0.037841796875,
"loss_xval": 0.189453125,
"num_input_tokens_seen": 18820060,
"step": 109
},
{
"epoch": 0.2864583333333333,
"grad_norm": 5.301291477011863,
"learning_rate": 5e-06,
"loss": 0.123,
"num_input_tokens_seen": 18992164,
"step": 110
},
{
"epoch": 0.2864583333333333,
"loss": 0.1676991879940033,
"loss_ce": 9.665168181527406e-05,
"loss_iou": 0.671875,
"loss_num": 0.03369140625,
"loss_xval": 0.16796875,
"num_input_tokens_seen": 18992164,
"step": 110
},
{
"epoch": 0.2890625,
"grad_norm": 3.7618362724585865,
"learning_rate": 5e-06,
"loss": 0.0973,
"num_input_tokens_seen": 19164016,
"step": 111
},
{
"epoch": 0.2890625,
"loss": 0.05550282821059227,
"loss_ce": 5.23876296938397e-05,
"loss_iou": 0.953125,
"loss_num": 0.0111083984375,
"loss_xval": 0.055419921875,
"num_input_tokens_seen": 19164016,
"step": 111
},
{
"epoch": 0.2916666666666667,
"grad_norm": 6.877157018975216,
"learning_rate": 5e-06,
"loss": 0.1429,
"num_input_tokens_seen": 19336416,
"step": 112
},
{
"epoch": 0.2916666666666667,
"loss": 0.21898075938224792,
"loss_ce": 0.00023076393699739128,
"loss_iou": 0.63671875,
"loss_num": 0.043701171875,
"loss_xval": 0.21875,
"num_input_tokens_seen": 19336416,
"step": 112
},
{
"epoch": 0.2942708333333333,
"grad_norm": 8.699267895879803,
"learning_rate": 5e-06,
"loss": 0.1221,
"num_input_tokens_seen": 19508784,
"step": 113
},
{
"epoch": 0.2942708333333333,
"loss": 0.11330369114875793,
"loss_ce": 0.00014450862363446504,
"loss_iou": 0.703125,
"loss_num": 0.0225830078125,
"loss_xval": 0.11328125,
"num_input_tokens_seen": 19508784,
"step": 113
},
{
"epoch": 0.296875,
"grad_norm": 6.679175716055245,
"learning_rate": 5e-06,
"loss": 0.1118,
"num_input_tokens_seen": 19681104,
"step": 114
},
{
"epoch": 0.296875,
"loss": 0.09517869353294373,
"loss_ce": 8.592366793891415e-05,
"loss_iou": 0.77734375,
"loss_num": 0.01904296875,
"loss_xval": 0.09521484375,
"num_input_tokens_seen": 19681104,
"step": 114
},
{
"epoch": 0.2994791666666667,
"grad_norm": 18.55418733227958,
"learning_rate": 5e-06,
"loss": 0.1435,
"num_input_tokens_seen": 19853176,
"step": 115
},
{
"epoch": 0.2994791666666667,
"loss": 0.1039402186870575,
"loss_ce": 5.8387617173139006e-05,
"loss_iou": 0.72265625,
"loss_num": 0.020751953125,
"loss_xval": 0.10400390625,
"num_input_tokens_seen": 19853176,
"step": 115
},
{
"epoch": 0.3020833333333333,
"grad_norm": 4.232168331373671,
"learning_rate": 5e-06,
"loss": 0.1276,
"num_input_tokens_seen": 20025704,
"step": 116
},
{
"epoch": 0.3020833333333333,
"loss": 0.08785620331764221,
"loss_ce": 8.76499543664977e-05,
"loss_iou": 0.69140625,
"loss_num": 0.017578125,
"loss_xval": 0.087890625,
"num_input_tokens_seen": 20025704,
"step": 116
},
{
"epoch": 0.3046875,
"grad_norm": 6.847887859581621,
"learning_rate": 5e-06,
"loss": 0.1147,
"num_input_tokens_seen": 20198488,
"step": 117
},
{
"epoch": 0.3046875,
"loss": 0.1606612354516983,
"loss_ce": 7.774594996590167e-05,
"loss_iou": 0.859375,
"loss_num": 0.031982421875,
"loss_xval": 0.16015625,
"num_input_tokens_seen": 20198488,
"step": 117
},
{
"epoch": 0.3072916666666667,
"grad_norm": 4.391317523713796,
"learning_rate": 5e-06,
"loss": 0.12,
"num_input_tokens_seen": 20371684,
"step": 118
},
{
"epoch": 0.3072916666666667,
"loss": 0.10194739699363708,
"loss_ce": 0.00014075401122681797,
"loss_iou": 0.68359375,
"loss_num": 0.0203857421875,
"loss_xval": 0.1015625,
"num_input_tokens_seen": 20371684,
"step": 118
},
{
"epoch": 0.3098958333333333,
"grad_norm": 8.958657986306372,
"learning_rate": 5e-06,
"loss": 0.1174,
"num_input_tokens_seen": 20544172,
"step": 119
},
{
"epoch": 0.3098958333333333,
"loss": 0.1237276941537857,
"loss_ce": 0.0004366845532786101,
"loss_iou": 0.71484375,
"loss_num": 0.024658203125,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 20544172,
"step": 119
},
{
"epoch": 0.3125,
"grad_norm": 3.5170839929817417,
"learning_rate": 5e-06,
"loss": 0.1268,
"num_input_tokens_seen": 20717160,
"step": 120
},
{
"epoch": 0.3125,
"loss": 0.15593719482421875,
"loss_ce": 0.00017548247706145048,
"loss_iou": 0.890625,
"loss_num": 0.0311279296875,
"loss_xval": 0.15625,
"num_input_tokens_seen": 20717160,
"step": 120
},
{
"epoch": 0.3151041666666667,
"grad_norm": 6.739906995090889,
"learning_rate": 5e-06,
"loss": 0.1242,
"num_input_tokens_seen": 20890032,
"step": 121
},
{
"epoch": 0.3151041666666667,
"loss": 0.1494368314743042,
"loss_ce": 0.0012434859527274966,
"loss_iou": 0.671875,
"loss_num": 0.0296630859375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 20890032,
"step": 121
},
{
"epoch": 0.3177083333333333,
"grad_norm": 6.127165000561302,
"learning_rate": 5e-06,
"loss": 0.1151,
"num_input_tokens_seen": 21062984,
"step": 122
},
{
"epoch": 0.3177083333333333,
"loss": 0.09486885368824005,
"loss_ce": 0.0001422952045686543,
"loss_iou": 0.80859375,
"loss_num": 0.0189208984375,
"loss_xval": 0.0947265625,
"num_input_tokens_seen": 21062984,
"step": 122
},
{
"epoch": 0.3203125,
"grad_norm": 8.718508748737245,
"learning_rate": 5e-06,
"loss": 0.1031,
"num_input_tokens_seen": 21235792,
"step": 123
},
{
"epoch": 0.3203125,
"loss": 0.11195512861013412,
"loss_ce": 0.0001387260272167623,
"loss_iou": 0.49609375,
"loss_num": 0.0223388671875,
"loss_xval": 0.11181640625,
"num_input_tokens_seen": 21235792,
"step": 123
},
{
"epoch": 0.3229166666666667,
"grad_norm": 13.341861393347486,
"learning_rate": 5e-06,
"loss": 0.1039,
"num_input_tokens_seen": 21407888,
"step": 124
},
{
"epoch": 0.3229166666666667,
"loss": 0.11872819066047668,
"loss_ce": 0.0001673972437856719,
"loss_iou": 0.703125,
"loss_num": 0.023681640625,
"loss_xval": 0.11865234375,
"num_input_tokens_seen": 21407888,
"step": 124
},
{
"epoch": 0.3255208333333333,
"grad_norm": 10.748431502763593,
"learning_rate": 5e-06,
"loss": 0.1201,
"num_input_tokens_seen": 21580252,
"step": 125
},
{
"epoch": 0.3255208333333333,
"loss": 0.14679506421089172,
"loss_ce": 6.655443576164544e-05,
"loss_iou": 0.373046875,
"loss_num": 0.029296875,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 21580252,
"step": 125
},
{
"epoch": 0.328125,
"grad_norm": 9.981967396091962,
"learning_rate": 5e-06,
"loss": 0.1147,
"num_input_tokens_seen": 21753052,
"step": 126
},
{
"epoch": 0.328125,
"loss": 0.09238539636135101,
"loss_ce": 0.00010023377399193123,
"loss_iou": 0.6328125,
"loss_num": 0.0184326171875,
"loss_xval": 0.09228515625,
"num_input_tokens_seen": 21753052,
"step": 126
},
{
"epoch": 0.3307291666666667,
"grad_norm": 8.119992313803278,
"learning_rate": 5e-06,
"loss": 0.1072,
"num_input_tokens_seen": 21925632,
"step": 127
},
{
"epoch": 0.3307291666666667,
"loss": 0.07983443140983582,
"loss_ce": 9.200449858326465e-05,
"loss_iou": 0.8046875,
"loss_num": 0.0159912109375,
"loss_xval": 0.07958984375,
"num_input_tokens_seen": 21925632,
"step": 127
},
{
"epoch": 0.3333333333333333,
"grad_norm": 6.9850353772680105,
"learning_rate": 5e-06,
"loss": 0.125,
"num_input_tokens_seen": 22098616,
"step": 128
},
{
"epoch": 0.3333333333333333,
"loss": 0.12042608857154846,
"loss_ce": 0.0001257982075912878,
"loss_iou": 0.65234375,
"loss_num": 0.0240478515625,
"loss_xval": 0.1201171875,
"num_input_tokens_seen": 22098616,
"step": 128
},
{
"epoch": 0.3359375,
"grad_norm": 3.3562574299779073,
"learning_rate": 5e-06,
"loss": 0.0891,
"num_input_tokens_seen": 22270980,
"step": 129
},
{
"epoch": 0.3359375,
"loss": 0.07171538472175598,
"loss_ce": 0.0001516598858870566,
"loss_iou": 0.9609375,
"loss_num": 0.01434326171875,
"loss_xval": 0.07177734375,
"num_input_tokens_seen": 22270980,
"step": 129
},
{
"epoch": 0.3385416666666667,
"grad_norm": 2.474071432452823,
"learning_rate": 5e-06,
"loss": 0.0986,
"num_input_tokens_seen": 22443752,
"step": 130
},
{
"epoch": 0.3385416666666667,
"loss": 0.071955606341362,
"loss_ce": 0.00020878079521935433,
"loss_iou": 0.7890625,
"loss_num": 0.01434326171875,
"loss_xval": 0.07177734375,
"num_input_tokens_seen": 22443752,
"step": 130
},
{
"epoch": 0.3411458333333333,
"grad_norm": 4.769496774720465,
"learning_rate": 5e-06,
"loss": 0.0827,
"num_input_tokens_seen": 22616684,
"step": 131
},
{
"epoch": 0.3411458333333333,
"loss": 0.08630406856536865,
"loss_ce": 0.00021397518867161125,
"loss_iou": 0.68359375,
"loss_num": 0.0172119140625,
"loss_xval": 0.0859375,
"num_input_tokens_seen": 22616684,
"step": 131
},
{
"epoch": 0.34375,
"grad_norm": 14.025079611665472,
"learning_rate": 5e-06,
"loss": 0.0834,
"num_input_tokens_seen": 22789044,
"step": 132
},
{
"epoch": 0.34375,
"loss": 0.10616521537303925,
"loss_ce": 0.00011663565237540752,
"loss_iou": 0.0,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 22789044,
"step": 132
},
{
"epoch": 0.3463541666666667,
"grad_norm": 3.9669475156886946,
"learning_rate": 5e-06,
"loss": 0.1167,
"num_input_tokens_seen": 22962080,
"step": 133
},
{
"epoch": 0.3463541666666667,
"loss": 0.11822222173213959,
"loss_ce": 0.00014970809570513666,
"loss_iou": 0.7109375,
"loss_num": 0.023681640625,
"loss_xval": 0.1181640625,
"num_input_tokens_seen": 22962080,
"step": 133
},
{
"epoch": 0.3489583333333333,
"grad_norm": 6.482768707064352,
"learning_rate": 5e-06,
"loss": 0.1163,
"num_input_tokens_seen": 23135120,
"step": 134
},
{
"epoch": 0.3489583333333333,
"loss": 0.09390418976545334,
"loss_ce": 0.00012366939336061478,
"loss_iou": 0.8125,
"loss_num": 0.018798828125,
"loss_xval": 0.09375,
"num_input_tokens_seen": 23135120,
"step": 134
},
{
"epoch": 0.3515625,
"grad_norm": 4.75477602454939,
"learning_rate": 5e-06,
"loss": 0.1443,
"num_input_tokens_seen": 23308372,
"step": 135
},
{
"epoch": 0.3515625,
"loss": 0.1679096817970276,
"loss_ce": 6.299919914454222e-05,
"loss_iou": 0.578125,
"loss_num": 0.033447265625,
"loss_xval": 0.16796875,
"num_input_tokens_seen": 23308372,
"step": 135
},
{
"epoch": 0.3541666666666667,
"grad_norm": 2.514900037834426,
"learning_rate": 5e-06,
"loss": 0.1001,
"num_input_tokens_seen": 23480360,
"step": 136
},
{
"epoch": 0.3541666666666667,
"loss": 0.10733547061681747,
"loss_ce": 6.619012128794566e-05,
"loss_iou": 0.6875,
"loss_num": 0.021484375,
"loss_xval": 0.107421875,
"num_input_tokens_seen": 23480360,
"step": 136
},
{
"epoch": 0.3567708333333333,
"grad_norm": 4.934909863394261,
"learning_rate": 5e-06,
"loss": 0.1083,
"num_input_tokens_seen": 23653652,
"step": 137
},
{
"epoch": 0.3567708333333333,
"loss": 0.11394178867340088,
"loss_ce": 8.070516923908144e-05,
"loss_iou": 0.8125,
"loss_num": 0.0228271484375,
"loss_xval": 0.11376953125,
"num_input_tokens_seen": 23653652,
"step": 137
},
{
"epoch": 0.359375,
"grad_norm": 3.707663252931766,
"learning_rate": 5e-06,
"loss": 0.0869,
"num_input_tokens_seen": 23826220,
"step": 138
},
{
"epoch": 0.359375,
"loss": 0.08403357863426208,
"loss_ce": 0.0001407563831890002,
"loss_iou": 0.8046875,
"loss_num": 0.0167236328125,
"loss_xval": 0.083984375,
"num_input_tokens_seen": 23826220,
"step": 138
},
{
"epoch": 0.3619791666666667,
"grad_norm": 5.810148215517029,
"learning_rate": 5e-06,
"loss": 0.0944,
"num_input_tokens_seen": 23998808,
"step": 139
},
{
"epoch": 0.3619791666666667,
"loss": 0.15188120305538177,
"loss_ce": 8.676404104335234e-05,
"loss_iou": 0.66796875,
"loss_num": 0.0303955078125,
"loss_xval": 0.1513671875,
"num_input_tokens_seen": 23998808,
"step": 139
},
{
"epoch": 0.3645833333333333,
"grad_norm": 5.097709919840357,
"learning_rate": 5e-06,
"loss": 0.1118,
"num_input_tokens_seen": 24171244,
"step": 140
},
{
"epoch": 0.3645833333333333,
"loss": 0.0743027776479721,
"loss_ce": 8.402515959460288e-05,
"loss_iou": 0.5859375,
"loss_num": 0.01483154296875,
"loss_xval": 0.07421875,
"num_input_tokens_seen": 24171244,
"step": 140
},
{
"epoch": 0.3671875,
"grad_norm": 29.485026694205214,
"learning_rate": 5e-06,
"loss": 0.1345,
"num_input_tokens_seen": 24343728,
"step": 141
},
{
"epoch": 0.3671875,
"loss": 0.20851582288742065,
"loss_ce": 0.00012653246812988073,
"loss_iou": 0.0,
"loss_num": 0.041748046875,
"loss_xval": 0.2080078125,
"num_input_tokens_seen": 24343728,
"step": 141
},
{
"epoch": 0.3697916666666667,
"grad_norm": 15.306197535117493,
"learning_rate": 5e-06,
"loss": 0.1169,
"num_input_tokens_seen": 24516776,
"step": 142
},
{
"epoch": 0.3697916666666667,
"loss": 0.06858328729867935,
"loss_ce": 7.132141035981476e-05,
"loss_iou": 0.6328125,
"loss_num": 0.013671875,
"loss_xval": 0.068359375,
"num_input_tokens_seen": 24516776,
"step": 142
},
{
"epoch": 0.3723958333333333,
"grad_norm": 7.8570075555495205,
"learning_rate": 5e-06,
"loss": 0.0911,
"num_input_tokens_seen": 24689788,
"step": 143
},
{
"epoch": 0.3723958333333333,
"loss": 0.10039569437503815,
"loss_ce": 8.441291720373556e-05,
"loss_iou": 0.443359375,
"loss_num": 0.02001953125,
"loss_xval": 0.10009765625,
"num_input_tokens_seen": 24689788,
"step": 143
},
{
"epoch": 0.375,
"grad_norm": 7.50330036811974,
"learning_rate": 5e-06,
"loss": 0.125,
"num_input_tokens_seen": 24862452,
"step": 144
},
{
"epoch": 0.375,
"loss": 0.06121515482664108,
"loss_ce": 5.792636875412427e-05,
"loss_iou": 0.6875,
"loss_num": 0.01226806640625,
"loss_xval": 0.06103515625,
"num_input_tokens_seen": 24862452,
"step": 144
},
{
"epoch": 0.3776041666666667,
"grad_norm": 9.259685096230124,
"learning_rate": 5e-06,
"loss": 0.115,
"num_input_tokens_seen": 25035336,
"step": 145
},
{
"epoch": 0.3776041666666667,
"loss": 0.09985796362161636,
"loss_ce": 0.0002180726150982082,
"loss_iou": 0.79296875,
"loss_num": 0.0198974609375,
"loss_xval": 0.099609375,
"num_input_tokens_seen": 25035336,
"step": 145
},
{
"epoch": 0.3802083333333333,
"grad_norm": 4.49972816018969,
"learning_rate": 5e-06,
"loss": 0.0953,
"num_input_tokens_seen": 25207968,
"step": 146
},
{
"epoch": 0.3802083333333333,
"loss": 0.10796058923006058,
"loss_ce": 0.0020951118785887957,
"loss_iou": 0.54296875,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 25207968,
"step": 146
},
{
"epoch": 0.3828125,
"grad_norm": 5.73441077024277,
"learning_rate": 5e-06,
"loss": 0.1225,
"num_input_tokens_seen": 25380784,
"step": 147
},
{
"epoch": 0.3828125,
"loss": 0.09899605065584183,
"loss_ce": 5.806491753901355e-05,
"loss_iou": 0.7578125,
"loss_num": 0.019775390625,
"loss_xval": 0.09912109375,
"num_input_tokens_seen": 25380784,
"step": 147
},
{
"epoch": 0.3854166666666667,
"grad_norm": 3.6755366051445137,
"learning_rate": 5e-06,
"loss": 0.1046,
"num_input_tokens_seen": 25553764,
"step": 148
},
{
"epoch": 0.3854166666666667,
"loss": 0.07523618638515472,
"loss_ce": 7.139628723962232e-05,
"loss_iou": 0.71875,
"loss_num": 0.0150146484375,
"loss_xval": 0.0751953125,
"num_input_tokens_seen": 25553764,
"step": 148
},
{
"epoch": 0.3880208333333333,
"grad_norm": 38.91246982097314,
"learning_rate": 5e-06,
"loss": 0.1093,
"num_input_tokens_seen": 25726456,
"step": 149
},
{
"epoch": 0.3880208333333333,
"loss": 0.08839882165193558,
"loss_ce": 8.095278462860733e-05,
"loss_iou": 0.80078125,
"loss_num": 0.0177001953125,
"loss_xval": 0.08837890625,
"num_input_tokens_seen": 25726456,
"step": 149
},
{
"epoch": 0.390625,
"grad_norm": 10.207658282865648,
"learning_rate": 5e-06,
"loss": 0.123,
"num_input_tokens_seen": 25899148,
"step": 150
},
{
"epoch": 0.390625,
"loss": 0.0730600580573082,
"loss_ce": 0.000214598243474029,
"loss_iou": 0.625,
"loss_num": 0.01458740234375,
"loss_xval": 0.07275390625,
"num_input_tokens_seen": 25899148,
"step": 150
},
{
"epoch": 0.3932291666666667,
"grad_norm": 4.730292038840616,
"learning_rate": 5e-06,
"loss": 0.0989,
"num_input_tokens_seen": 26072084,
"step": 151
},
{
"epoch": 0.3932291666666667,
"loss": 0.15038591623306274,
"loss_ce": 0.00011736503802239895,
"loss_iou": 0.578125,
"loss_num": 0.030029296875,
"loss_xval": 0.150390625,
"num_input_tokens_seen": 26072084,
"step": 151
},
{
"epoch": 0.3958333333333333,
"grad_norm": 3.321333890252999,
"learning_rate": 5e-06,
"loss": 0.103,
"num_input_tokens_seen": 26244756,
"step": 152
},
{
"epoch": 0.3958333333333333,
"loss": 0.08549217134714127,
"loss_ce": 0.0001497594639658928,
"loss_iou": 0.51953125,
"loss_num": 0.01708984375,
"loss_xval": 0.08544921875,
"num_input_tokens_seen": 26244756,
"step": 152
},
{
"epoch": 0.3984375,
"grad_norm": 6.087065910058266,
"learning_rate": 5e-06,
"loss": 0.08,
"num_input_tokens_seen": 26417208,
"step": 153
},
{
"epoch": 0.3984375,
"loss": 0.073136106133461,
"loss_ce": 4.6509514504577965e-05,
"loss_iou": 0.58203125,
"loss_num": 0.0146484375,
"loss_xval": 0.0732421875,
"num_input_tokens_seen": 26417208,
"step": 153
},
{
"epoch": 0.4010416666666667,
"grad_norm": 4.65746127286459,
"learning_rate": 5e-06,
"loss": 0.114,
"num_input_tokens_seen": 26590204,
"step": 154
},
{
"epoch": 0.4010416666666667,
"loss": 0.10405319184064865,
"loss_ce": 7.980540976859629e-05,
"loss_iou": 0.6953125,
"loss_num": 0.020751953125,
"loss_xval": 0.10400390625,
"num_input_tokens_seen": 26590204,
"step": 154
},
{
"epoch": 0.4036458333333333,
"grad_norm": 6.888837612325361,
"learning_rate": 5e-06,
"loss": 0.1096,
"num_input_tokens_seen": 26762676,
"step": 155
},
{
"epoch": 0.4036458333333333,
"loss": 0.11900262534618378,
"loss_ce": 7.562051177956164e-05,
"loss_iou": 0.0,
"loss_num": 0.0238037109375,
"loss_xval": 0.119140625,
"num_input_tokens_seen": 26762676,
"step": 155
},
{
"epoch": 0.40625,
"grad_norm": 5.172471219817385,
"learning_rate": 5e-06,
"loss": 0.111,
"num_input_tokens_seen": 26934984,
"step": 156
},
{
"epoch": 0.40625,
"loss": 0.11719199270009995,
"loss_ce": 6.55300755170174e-05,
"loss_iou": 0.64453125,
"loss_num": 0.0234375,
"loss_xval": 0.1171875,
"num_input_tokens_seen": 26934984,
"step": 156
},
{
"epoch": 0.4088541666666667,
"grad_norm": 4.328240204635411,
"learning_rate": 5e-06,
"loss": 0.1044,
"num_input_tokens_seen": 27106980,
"step": 157
},
{
"epoch": 0.4088541666666667,
"loss": 0.18060433864593506,
"loss_ce": 0.0002149457432096824,
"loss_iou": 0.5703125,
"loss_num": 0.0361328125,
"loss_xval": 0.1806640625,
"num_input_tokens_seen": 27106980,
"step": 157
},
{
"epoch": 0.4114583333333333,
"grad_norm": 24.038857971844152,
"learning_rate": 5e-06,
"loss": 0.0807,
"num_input_tokens_seen": 27279788,
"step": 158
},
{
"epoch": 0.4114583333333333,
"loss": 0.07357801496982574,
"loss_ce": 0.00018323655240237713,
"loss_iou": 0.0,
"loss_num": 0.01470947265625,
"loss_xval": 0.0732421875,
"num_input_tokens_seen": 27279788,
"step": 158
},
{
"epoch": 0.4140625,
"grad_norm": 7.8628512106902315,
"learning_rate": 5e-06,
"loss": 0.0787,
"num_input_tokens_seen": 27452544,
"step": 159
},
{
"epoch": 0.4140625,
"loss": 0.07588262856006622,
"loss_ce": 9.22220351640135e-05,
"loss_iou": 0.55859375,
"loss_num": 0.01519775390625,
"loss_xval": 0.07568359375,
"num_input_tokens_seen": 27452544,
"step": 159
},
{
"epoch": 0.4166666666666667,
"grad_norm": 13.319740473348578,
"learning_rate": 5e-06,
"loss": 0.0969,
"num_input_tokens_seen": 27625396,
"step": 160
},
{
"epoch": 0.4166666666666667,
"loss": 0.09103557467460632,
"loss_ce": 6.267878779908642e-05,
"loss_iou": 0.5546875,
"loss_num": 0.0181884765625,
"loss_xval": 0.0908203125,
"num_input_tokens_seen": 27625396,
"step": 160
},
{
"epoch": 0.4192708333333333,
"grad_norm": 4.7866046147187715,
"learning_rate": 5e-06,
"loss": 0.1014,
"num_input_tokens_seen": 27797456,
"step": 161
},
{
"epoch": 0.4192708333333333,
"loss": 0.08366774767637253,
"loss_ce": 8.010101737454534e-05,
"loss_iou": 0.5546875,
"loss_num": 0.0167236328125,
"loss_xval": 0.08349609375,
"num_input_tokens_seen": 27797456,
"step": 161
},
{
"epoch": 0.421875,
"grad_norm": 2.272455331760193,
"learning_rate": 5e-06,
"loss": 0.0512,
"num_input_tokens_seen": 27969760,
"step": 162
},
{
"epoch": 0.421875,
"loss": 0.03556237369775772,
"loss_ce": 3.9912010834086686e-05,
"loss_iou": 0.7109375,
"loss_num": 0.007110595703125,
"loss_xval": 0.03564453125,
"num_input_tokens_seen": 27969760,
"step": 162
},
{
"epoch": 0.4244791666666667,
"grad_norm": 14.623479662016367,
"learning_rate": 5e-06,
"loss": 0.0969,
"num_input_tokens_seen": 28141788,
"step": 163
},
{
"epoch": 0.4244791666666667,
"loss": 0.06145535781979561,
"loss_ce": 0.0001302829186897725,
"loss_iou": 0.478515625,
"loss_num": 0.01226806640625,
"loss_xval": 0.061279296875,
"num_input_tokens_seen": 28141788,
"step": 163
},
{
"epoch": 0.4270833333333333,
"grad_norm": 5.18949662678828,
"learning_rate": 5e-06,
"loss": 0.0929,
"num_input_tokens_seen": 28314784,
"step": 164
},
{
"epoch": 0.4270833333333333,
"loss": 0.09928463399410248,
"loss_ce": 7.19891395419836e-05,
"loss_iou": 0.66015625,
"loss_num": 0.0198974609375,
"loss_xval": 0.09912109375,
"num_input_tokens_seen": 28314784,
"step": 164
},
{
"epoch": 0.4296875,
"grad_norm": 11.297198645176522,
"learning_rate": 5e-06,
"loss": 0.168,
"num_input_tokens_seen": 28488116,
"step": 165
},
{
"epoch": 0.4296875,
"loss": 0.2097131311893463,
"loss_ce": 0.00036255159648135304,
"loss_iou": 0.58203125,
"loss_num": 0.0419921875,
"loss_xval": 0.208984375,
"num_input_tokens_seen": 28488116,
"step": 165
},
{
"epoch": 0.4322916666666667,
"grad_norm": 3.7511749963618604,
"learning_rate": 5e-06,
"loss": 0.1104,
"num_input_tokens_seen": 28660644,
"step": 166
},
{
"epoch": 0.4322916666666667,
"loss": 0.14858925342559814,
"loss_ce": 0.00012123005581088364,
"loss_iou": 0.72265625,
"loss_num": 0.0296630859375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 28660644,
"step": 166
},
{
"epoch": 0.4348958333333333,
"grad_norm": 3.143289835870396,
"learning_rate": 5e-06,
"loss": 0.1021,
"num_input_tokens_seen": 28833256,
"step": 167
},
{
"epoch": 0.4348958333333333,
"loss": 0.07967463880777359,
"loss_ce": 0.00023738775053061545,
"loss_iou": 0.6796875,
"loss_num": 0.015869140625,
"loss_xval": 0.07958984375,
"num_input_tokens_seen": 28833256,
"step": 167
},
{
"epoch": 0.4375,
"grad_norm": 2.7797894675264336,
"learning_rate": 5e-06,
"loss": 0.0608,
"num_input_tokens_seen": 29005644,
"step": 168
},
{
"epoch": 0.4375,
"loss": 0.03842185065150261,
"loss_ce": 9.177176252705976e-05,
"loss_iou": 0.6875,
"loss_num": 0.007659912109375,
"loss_xval": 0.038330078125,
"num_input_tokens_seen": 29005644,
"step": 168
},
{
"epoch": 0.4401041666666667,
"grad_norm": 5.829730930450416,
"learning_rate": 5e-06,
"loss": 0.0882,
"num_input_tokens_seen": 29178140,
"step": 169
},
{
"epoch": 0.4401041666666667,
"loss": 0.08472438156604767,
"loss_ce": 0.0001754334516590461,
"loss_iou": 0.0,
"loss_num": 0.0169677734375,
"loss_xval": 0.08447265625,
"num_input_tokens_seen": 29178140,
"step": 169
},
{
"epoch": 0.4427083333333333,
"grad_norm": 19.34918748164043,
"learning_rate": 5e-06,
"loss": 0.0919,
"num_input_tokens_seen": 29350724,
"step": 170
},
{
"epoch": 0.4427083333333333,
"loss": 0.14068102836608887,
"loss_ce": 0.00017810959252528846,
"loss_iou": 0.68359375,
"loss_num": 0.028076171875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 29350724,
"step": 170
},
{
"epoch": 0.4453125,
"grad_norm": 11.305442675935751,
"learning_rate": 5e-06,
"loss": 0.0914,
"num_input_tokens_seen": 29523556,
"step": 171
},
{
"epoch": 0.4453125,
"loss": 0.08168038725852966,
"loss_ce": 0.00010690485214581713,
"loss_iou": 0.4921875,
"loss_num": 0.016357421875,
"loss_xval": 0.08154296875,
"num_input_tokens_seen": 29523556,
"step": 171
},
{
"epoch": 0.4479166666666667,
"grad_norm": 2.746755888252267,
"learning_rate": 5e-06,
"loss": 0.0956,
"num_input_tokens_seen": 29696232,
"step": 172
},
{
"epoch": 0.4479166666666667,
"loss": 0.08047676831483841,
"loss_ce": 6.295397179201245e-05,
"loss_iou": 0.40234375,
"loss_num": 0.01611328125,
"loss_xval": 0.08056640625,
"num_input_tokens_seen": 29696232,
"step": 172
},
{
"epoch": 0.4505208333333333,
"grad_norm": 6.619988685929648,
"learning_rate": 5e-06,
"loss": 0.0578,
"num_input_tokens_seen": 29868892,
"step": 173
},
{
"epoch": 0.4505208333333333,
"loss": 0.03938157111406326,
"loss_ce": 0.0005632122629322112,
"loss_iou": 0.46875,
"loss_num": 0.00775146484375,
"loss_xval": 0.038818359375,
"num_input_tokens_seen": 29868892,
"step": 173
},
{
"epoch": 0.453125,
"grad_norm": 11.839215400516537,
"learning_rate": 5e-06,
"loss": 0.1263,
"num_input_tokens_seen": 30041044,
"step": 174
},
{
"epoch": 0.453125,
"loss": 0.06526083499193192,
"loss_ce": 0.0001820992911234498,
"loss_iou": 0.671875,
"loss_num": 0.01300048828125,
"loss_xval": 0.06494140625,
"num_input_tokens_seen": 30041044,
"step": 174
},
{
"epoch": 0.4557291666666667,
"grad_norm": 4.532895192393366,
"learning_rate": 5e-06,
"loss": 0.0624,
"num_input_tokens_seen": 30213960,
"step": 175
},
{
"epoch": 0.4557291666666667,
"loss": 0.05709821730852127,
"loss_ce": 0.00015241916116792709,
"loss_iou": 0.4140625,
"loss_num": 0.0113525390625,
"loss_xval": 0.056884765625,
"num_input_tokens_seen": 30213960,
"step": 175
},
{
"epoch": 0.4583333333333333,
"grad_norm": 4.373257654750305,
"learning_rate": 5e-06,
"loss": 0.0684,
"num_input_tokens_seen": 30386860,
"step": 176
},
{
"epoch": 0.4583333333333333,
"loss": 0.048836298286914825,
"loss_ce": 6.920905434526503e-05,
"loss_iou": 0.4296875,
"loss_num": 0.009765625,
"loss_xval": 0.048828125,
"num_input_tokens_seen": 30386860,
"step": 176
},
{
"epoch": 0.4609375,
"grad_norm": 7.579139401570638,
"learning_rate": 5e-06,
"loss": 0.0843,
"num_input_tokens_seen": 30559656,
"step": 177
},
{
"epoch": 0.4609375,
"loss": 0.12145084142684937,
"loss_ce": 6.717803626088426e-05,
"loss_iou": 0.0,
"loss_num": 0.0242919921875,
"loss_xval": 0.12158203125,
"num_input_tokens_seen": 30559656,
"step": 177
},
{
"epoch": 0.4635416666666667,
"grad_norm": 5.807914334628034,
"learning_rate": 5e-06,
"loss": 0.1275,
"num_input_tokens_seen": 30732276,
"step": 178
},
{
"epoch": 0.4635416666666667,
"loss": 0.10631553828716278,
"loss_ce": 0.00014487920270767063,
"loss_iou": 0.671875,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 30732276,
"step": 178
},
{
"epoch": 0.4661458333333333,
"grad_norm": 2.6998654471345827,
"learning_rate": 5e-06,
"loss": 0.0584,
"num_input_tokens_seen": 30905448,
"step": 179
},
{
"epoch": 0.4661458333333333,
"loss": 0.043617475777864456,
"loss_ce": 0.00012992750271223485,
"loss_iou": 0.67578125,
"loss_num": 0.0086669921875,
"loss_xval": 0.04345703125,
"num_input_tokens_seen": 30905448,
"step": 179
},
{
"epoch": 0.46875,
"grad_norm": 10.092481931653555,
"learning_rate": 5e-06,
"loss": 0.0841,
"num_input_tokens_seen": 31078192,
"step": 180
},
{
"epoch": 0.46875,
"loss": 0.07613378763198853,
"loss_ce": 0.00038915983168408275,
"loss_iou": 0.439453125,
"loss_num": 0.01513671875,
"loss_xval": 0.07568359375,
"num_input_tokens_seen": 31078192,
"step": 180
},
{
"epoch": 0.4713541666666667,
"grad_norm": 4.850400427659922,
"learning_rate": 5e-06,
"loss": 0.1259,
"num_input_tokens_seen": 31250936,
"step": 181
},
{
"epoch": 0.4713541666666667,
"loss": 0.06517961621284485,
"loss_ce": 5.5098360462579876e-05,
"loss_iou": 0.703125,
"loss_num": 0.01300048828125,
"loss_xval": 0.06494140625,
"num_input_tokens_seen": 31250936,
"step": 181
},
{
"epoch": 0.4739583333333333,
"grad_norm": 8.116429898780023,
"learning_rate": 5e-06,
"loss": 0.0679,
"num_input_tokens_seen": 31423824,
"step": 182
},
{
"epoch": 0.4739583333333333,
"loss": 0.04832879453897476,
"loss_ce": 0.00014153837400954217,
"loss_iou": 0.67578125,
"loss_num": 0.0096435546875,
"loss_xval": 0.048095703125,
"num_input_tokens_seen": 31423824,
"step": 182
},
{
"epoch": 0.4765625,
"grad_norm": 15.778873010591404,
"learning_rate": 5e-06,
"loss": 0.0796,
"num_input_tokens_seen": 31596028,
"step": 183
},
{
"epoch": 0.4765625,
"loss": 0.058887895196676254,
"loss_ce": 9.577826858730987e-05,
"loss_iou": 0.5703125,
"loss_num": 0.01177978515625,
"loss_xval": 0.058837890625,
"num_input_tokens_seen": 31596028,
"step": 183
},
{
"epoch": 0.4791666666666667,
"grad_norm": 4.58612996328364,
"learning_rate": 5e-06,
"loss": 0.1133,
"num_input_tokens_seen": 31768480,
"step": 184
},
{
"epoch": 0.4791666666666667,
"loss": 0.07175838947296143,
"loss_ce": 7.259925041580573e-05,
"loss_iou": 0.625,
"loss_num": 0.01434326171875,
"loss_xval": 0.07177734375,
"num_input_tokens_seen": 31768480,
"step": 184
},
{
"epoch": 0.4817708333333333,
"grad_norm": 15.298267591347829,
"learning_rate": 5e-06,
"loss": 0.137,
"num_input_tokens_seen": 31941340,
"step": 185
},
{
"epoch": 0.4817708333333333,
"loss": 0.1580718755722046,
"loss_ce": 0.00023497387883253396,
"loss_iou": 0.6484375,
"loss_num": 0.031494140625,
"loss_xval": 0.158203125,
"num_input_tokens_seen": 31941340,
"step": 185
},
{
"epoch": 0.484375,
"grad_norm": 9.445985569352896,
"learning_rate": 5e-06,
"loss": 0.1414,
"num_input_tokens_seen": 32114196,
"step": 186
},
{
"epoch": 0.484375,
"loss": 0.1261276751756668,
"loss_ce": 0.00012059589062118903,
"loss_iou": 0.68359375,
"loss_num": 0.025146484375,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 32114196,
"step": 186
},
{
"epoch": 0.4869791666666667,
"grad_norm": 4.074608010814493,
"learning_rate": 5e-06,
"loss": 0.1168,
"num_input_tokens_seen": 32286624,
"step": 187
},
{
"epoch": 0.4869791666666667,
"loss": 0.08998198807239532,
"loss_ce": 0.0001382330956403166,
"loss_iou": 0.6328125,
"loss_num": 0.0179443359375,
"loss_xval": 0.08984375,
"num_input_tokens_seen": 32286624,
"step": 187
},
{
"epoch": 0.4895833333333333,
"grad_norm": 3.9575106116123293,
"learning_rate": 5e-06,
"loss": 0.1015,
"num_input_tokens_seen": 32459076,
"step": 188
},
{
"epoch": 0.4895833333333333,
"loss": 0.10690590739250183,
"loss_ce": 0.00015541848551947623,
"loss_iou": 0.6328125,
"loss_num": 0.0213623046875,
"loss_xval": 0.10693359375,
"num_input_tokens_seen": 32459076,
"step": 188
},
{
"epoch": 0.4921875,
"grad_norm": 3.7334350922271793,
"learning_rate": 5e-06,
"loss": 0.0944,
"num_input_tokens_seen": 32631908,
"step": 189
},
{
"epoch": 0.4921875,
"loss": 0.12938711047172546,
"loss_ce": 8.413316390942782e-05,
"loss_iou": 0.5,
"loss_num": 0.02587890625,
"loss_xval": 0.12890625,
"num_input_tokens_seen": 32631908,
"step": 189
},
{
"epoch": 0.4947916666666667,
"grad_norm": 12.613411687656823,
"learning_rate": 5e-06,
"loss": 0.089,
"num_input_tokens_seen": 32804848,
"step": 190
},
{
"epoch": 0.4947916666666667,
"loss": 0.17628361284732819,
"loss_ce": 0.00019717792747542262,
"loss_iou": 0.56640625,
"loss_num": 0.03515625,
"loss_xval": 0.17578125,
"num_input_tokens_seen": 32804848,
"step": 190
},
{
"epoch": 0.4973958333333333,
"grad_norm": 10.089904229108118,
"learning_rate": 5e-06,
"loss": 0.0984,
"num_input_tokens_seen": 32977460,
"step": 191
},
{
"epoch": 0.4973958333333333,
"loss": 0.09072966128587723,
"loss_ce": 0.00012297437933739275,
"loss_iou": 0.6796875,
"loss_num": 0.0181884765625,
"loss_xval": 0.0908203125,
"num_input_tokens_seen": 32977460,
"step": 191
},
{
"epoch": 0.5,
"grad_norm": 5.567747432819187,
"learning_rate": 5e-06,
"loss": 0.107,
"num_input_tokens_seen": 33150480,
"step": 192
},
{
"epoch": 0.5,
"loss": 0.08254844695329666,
"loss_ce": 8.995212556328624e-05,
"loss_iou": 0.68359375,
"loss_num": 0.0164794921875,
"loss_xval": 0.08251953125,
"num_input_tokens_seen": 33150480,
"step": 192
},
{
"epoch": 0.5026041666666666,
"grad_norm": 5.606336333733017,
"learning_rate": 5e-06,
"loss": 0.092,
"num_input_tokens_seen": 33322812,
"step": 193
},
{
"epoch": 0.5026041666666666,
"loss": 0.12639451026916504,
"loss_ce": 8.224871271522716e-05,
"loss_iou": 0.416015625,
"loss_num": 0.0252685546875,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 33322812,
"step": 193
},
{
"epoch": 0.5052083333333334,
"grad_norm": 10.892578547201238,
"learning_rate": 5e-06,
"loss": 0.0985,
"num_input_tokens_seen": 33494972,
"step": 194
},
{
"epoch": 0.5052083333333334,
"loss": 0.04478445649147034,
"loss_ce": 9.146681259153411e-05,
"loss_iou": 0.50390625,
"loss_num": 0.0089111328125,
"loss_xval": 0.044677734375,
"num_input_tokens_seen": 33494972,
"step": 194
},
{
"epoch": 0.5078125,
"grad_norm": 6.379235584994378,
"learning_rate": 5e-06,
"loss": 0.0909,
"num_input_tokens_seen": 33667632,
"step": 195
},
{
"epoch": 0.5078125,
"loss": 0.08593515306711197,
"loss_ce": 8.921044354792684e-05,
"loss_iou": 0.56640625,
"loss_num": 0.0172119140625,
"loss_xval": 0.0859375,
"num_input_tokens_seen": 33667632,
"step": 195
},
{
"epoch": 0.5104166666666666,
"grad_norm": 9.027964931503206,
"learning_rate": 5e-06,
"loss": 0.1088,
"num_input_tokens_seen": 33840020,
"step": 196
},
{
"epoch": 0.5104166666666666,
"loss": 0.07991337776184082,
"loss_ce": 0.00010991313320118934,
"loss_iou": 0.7265625,
"loss_num": 0.0159912109375,
"loss_xval": 0.07958984375,
"num_input_tokens_seen": 33840020,
"step": 196
},
{
"epoch": 0.5130208333333334,
"grad_norm": 7.170409659790098,
"learning_rate": 5e-06,
"loss": 0.1238,
"num_input_tokens_seen": 34013036,
"step": 197
},
{
"epoch": 0.5130208333333334,
"loss": 0.18947342038154602,
"loss_ce": 0.00014236349670682102,
"loss_iou": 0.60546875,
"loss_num": 0.037841796875,
"loss_xval": 0.189453125,
"num_input_tokens_seen": 34013036,
"step": 197
},
{
"epoch": 0.515625,
"grad_norm": 4.032339612187944,
"learning_rate": 5e-06,
"loss": 0.0872,
"num_input_tokens_seen": 34186220,
"step": 198
},
{
"epoch": 0.515625,
"loss": 0.07599274069070816,
"loss_ce": 6.500923336716369e-05,
"loss_iou": 0.5625,
"loss_num": 0.01519775390625,
"loss_xval": 0.076171875,
"num_input_tokens_seen": 34186220,
"step": 198
},
{
"epoch": 0.5182291666666666,
"grad_norm": 4.904239326276205,
"learning_rate": 5e-06,
"loss": 0.0764,
"num_input_tokens_seen": 34359052,
"step": 199
},
{
"epoch": 0.5182291666666666,
"loss": 0.08559094369411469,
"loss_ce": 0.0001112048194045201,
"loss_iou": 0.59765625,
"loss_num": 0.01708984375,
"loss_xval": 0.08544921875,
"num_input_tokens_seen": 34359052,
"step": 199
},
{
"epoch": 0.5208333333333334,
"grad_norm": 6.516342930606259,
"learning_rate": 5e-06,
"loss": 0.0773,
"num_input_tokens_seen": 34531672,
"step": 200
},
{
"epoch": 0.5208333333333334,
"loss": 0.05880989879369736,
"loss_ce": 0.00010933385055977851,
"loss_iou": 0.66796875,
"loss_num": 0.01171875,
"loss_xval": 0.05859375,
"num_input_tokens_seen": 34531672,
"step": 200
},
{
"epoch": 0.5234375,
"grad_norm": 3.361383386602773,
"learning_rate": 5e-06,
"loss": 0.0848,
"num_input_tokens_seen": 34704136,
"step": 201
},
{
"epoch": 0.5234375,
"loss": 0.05374922603368759,
"loss_ce": 6.880733417347074e-05,
"loss_iou": 0.484375,
"loss_num": 0.0107421875,
"loss_xval": 0.0537109375,
"num_input_tokens_seen": 34704136,
"step": 201
},
{
"epoch": 0.5260416666666666,
"grad_norm": 11.210671135103166,
"learning_rate": 5e-06,
"loss": 0.1067,
"num_input_tokens_seen": 34877364,
"step": 202
},
{
"epoch": 0.5260416666666666,
"loss": 0.09961295872926712,
"loss_ce": 6.461787415901199e-05,
"loss_iou": 0.7109375,
"loss_num": 0.0198974609375,
"loss_xval": 0.099609375,
"num_input_tokens_seen": 34877364,
"step": 202
},
{
"epoch": 0.5286458333333334,
"grad_norm": 6.444880899253943,
"learning_rate": 5e-06,
"loss": 0.111,
"num_input_tokens_seen": 35050192,
"step": 203
},
{
"epoch": 0.5286458333333334,
"loss": 0.09913990646600723,
"loss_ce": 7.98513792688027e-05,
"loss_iou": 0.5234375,
"loss_num": 0.019775390625,
"loss_xval": 0.09912109375,
"num_input_tokens_seen": 35050192,
"step": 203
},
{
"epoch": 0.53125,
"grad_norm": 3.8614428868304533,
"learning_rate": 5e-06,
"loss": 0.1063,
"num_input_tokens_seen": 35223020,
"step": 204
},
{
"epoch": 0.53125,
"loss": 0.06953012943267822,
"loss_ce": 7.212607306428254e-05,
"loss_iou": 0.7421875,
"loss_num": 0.013916015625,
"loss_xval": 0.0693359375,
"num_input_tokens_seen": 35223020,
"step": 204
},
{
"epoch": 0.5338541666666666,
"grad_norm": 6.191654916504458,
"learning_rate": 5e-06,
"loss": 0.091,
"num_input_tokens_seen": 35396176,
"step": 205
},
{
"epoch": 0.5338541666666666,
"loss": 0.15430204570293427,
"loss_ce": 0.00027982849860563874,
"loss_iou": 0.49609375,
"loss_num": 0.03076171875,
"loss_xval": 0.154296875,
"num_input_tokens_seen": 35396176,
"step": 205
},
{
"epoch": 0.5364583333333334,
"grad_norm": 5.468880474808822,
"learning_rate": 5e-06,
"loss": 0.0667,
"num_input_tokens_seen": 35568912,
"step": 206
},
{
"epoch": 0.5364583333333334,
"loss": 0.06578241288661957,
"loss_ce": 6.280931120272726e-05,
"loss_iou": 0.55078125,
"loss_num": 0.01318359375,
"loss_xval": 0.06591796875,
"num_input_tokens_seen": 35568912,
"step": 206
},
{
"epoch": 0.5390625,
"grad_norm": 5.886325106674437,
"learning_rate": 5e-06,
"loss": 0.1381,
"num_input_tokens_seen": 35741540,
"step": 207
},
{
"epoch": 0.5390625,
"loss": 0.11603943258523941,
"loss_ce": 0.00010315363761037588,
"loss_iou": 0.765625,
"loss_num": 0.023193359375,
"loss_xval": 0.11572265625,
"num_input_tokens_seen": 35741540,
"step": 207
},
{
"epoch": 0.5416666666666666,
"grad_norm": 4.502393531758672,
"learning_rate": 5e-06,
"loss": 0.0893,
"num_input_tokens_seen": 35914024,
"step": 208
},
{
"epoch": 0.5416666666666666,
"loss": 0.08899325132369995,
"loss_ce": 6.502882024506107e-05,
"loss_iou": 0.75,
"loss_num": 0.017822265625,
"loss_xval": 0.0888671875,
"num_input_tokens_seen": 35914024,
"step": 208
},
{
"epoch": 0.5442708333333334,
"grad_norm": 10.086026290203142,
"learning_rate": 5e-06,
"loss": 0.1033,
"num_input_tokens_seen": 36087084,
"step": 209
},
{
"epoch": 0.5442708333333334,
"loss": 0.060362037271261215,
"loss_ce": 0.00013559818034991622,
"loss_iou": 0.51953125,
"loss_num": 0.01202392578125,
"loss_xval": 0.060302734375,
"num_input_tokens_seen": 36087084,
"step": 209
},
{
"epoch": 0.546875,
"grad_norm": 6.731766943850301,
"learning_rate": 5e-06,
"loss": 0.0989,
"num_input_tokens_seen": 36259864,
"step": 210
},
{
"epoch": 0.546875,
"loss": 0.04847151041030884,
"loss_ce": 0.00013166893040761352,
"loss_iou": 0.53515625,
"loss_num": 0.0096435546875,
"loss_xval": 0.04833984375,
"num_input_tokens_seen": 36259864,
"step": 210
},
{
"epoch": 0.5494791666666666,
"grad_norm": 6.316474875770928,
"learning_rate": 5e-06,
"loss": 0.087,
"num_input_tokens_seen": 36433104,
"step": 211
},
{
"epoch": 0.5494791666666666,
"loss": 0.09729330986738205,
"loss_ce": 0.0003694796178024262,
"loss_iou": 0.6015625,
"loss_num": 0.0194091796875,
"loss_xval": 0.0966796875,
"num_input_tokens_seen": 36433104,
"step": 211
},
{
"epoch": 0.5520833333333334,
"grad_norm": 8.68013938900971,
"learning_rate": 5e-06,
"loss": 0.1649,
"num_input_tokens_seen": 36605948,
"step": 212
},
{
"epoch": 0.5520833333333334,
"loss": 0.10549305379390717,
"loss_ce": 5.4825890401843935e-05,
"loss_iou": 0.48046875,
"loss_num": 0.0211181640625,
"loss_xval": 0.10546875,
"num_input_tokens_seen": 36605948,
"step": 212
},
{
"epoch": 0.5546875,
"grad_norm": 2.9587466587848543,
"learning_rate": 5e-06,
"loss": 0.0597,
"num_input_tokens_seen": 36778360,
"step": 213
},
{
"epoch": 0.5546875,
"loss": 0.054243359714746475,
"loss_ce": 0.00010517801274545491,
"loss_iou": 0.69921875,
"loss_num": 0.01080322265625,
"loss_xval": 0.05419921875,
"num_input_tokens_seen": 36778360,
"step": 213
},
{
"epoch": 0.5572916666666666,
"grad_norm": 3.540440347425946,
"learning_rate": 5e-06,
"loss": 0.0541,
"num_input_tokens_seen": 36950340,
"step": 214
},
{
"epoch": 0.5572916666666666,
"loss": 0.044868774712085724,
"loss_ce": 0.0001605255965841934,
"loss_iou": 0.5234375,
"loss_num": 0.0089111328125,
"loss_xval": 0.044677734375,
"num_input_tokens_seen": 36950340,
"step": 214
},
{
"epoch": 0.5598958333333334,
"grad_norm": 1.7960907214462793,
"learning_rate": 5e-06,
"loss": 0.0558,
"num_input_tokens_seen": 37123392,
"step": 215
},
{
"epoch": 0.5598958333333334,
"loss": 0.03447698801755905,
"loss_ce": 0.00014471304893959314,
"loss_iou": 0.5,
"loss_num": 0.006866455078125,
"loss_xval": 0.034423828125,
"num_input_tokens_seen": 37123392,
"step": 215
},
{
"epoch": 0.5625,
"grad_norm": 4.431604970837842,
"learning_rate": 5e-06,
"loss": 0.0965,
"num_input_tokens_seen": 37295368,
"step": 216
},
{
"epoch": 0.5625,
"loss": 0.1555291712284088,
"loss_ce": 8.788481500232592e-05,
"loss_iou": 0.0,
"loss_num": 0.0311279296875,
"loss_xval": 0.1552734375,
"num_input_tokens_seen": 37295368,
"step": 216
},
{
"epoch": 0.5651041666666666,
"grad_norm": 8.013606775608135,
"learning_rate": 5e-06,
"loss": 0.1017,
"num_input_tokens_seen": 37467908,
"step": 217
},
{
"epoch": 0.5651041666666666,
"loss": 0.12359996885061264,
"loss_ce": 0.00015636239550076425,
"loss_iou": 0.0,
"loss_num": 0.024658203125,
"loss_xval": 0.12353515625,
"num_input_tokens_seen": 37467908,
"step": 217
},
{
"epoch": 0.5677083333333334,
"grad_norm": 9.000183276004282,
"learning_rate": 5e-06,
"loss": 0.0964,
"num_input_tokens_seen": 37640328,
"step": 218
},
{
"epoch": 0.5677083333333334,
"loss": 0.09344692528247833,
"loss_ce": 6.314014899544418e-05,
"loss_iou": 0.609375,
"loss_num": 0.0186767578125,
"loss_xval": 0.09326171875,
"num_input_tokens_seen": 37640328,
"step": 218
},
{
"epoch": 0.5703125,
"grad_norm": 28.397075300946053,
"learning_rate": 5e-06,
"loss": 0.1042,
"num_input_tokens_seen": 37812984,
"step": 219
},
{
"epoch": 0.5703125,
"loss": 0.11136841773986816,
"loss_ce": 7.081658986862749e-05,
"loss_iou": 0.734375,
"loss_num": 0.022216796875,
"loss_xval": 0.111328125,
"num_input_tokens_seen": 37812984,
"step": 219
},
{
"epoch": 0.5729166666666666,
"grad_norm": 3.6482189206456126,
"learning_rate": 5e-06,
"loss": 0.0685,
"num_input_tokens_seen": 37985152,
"step": 220
},
{
"epoch": 0.5729166666666666,
"loss": 0.05360790342092514,
"loss_ce": 6.481433956651017e-05,
"loss_iou": 0.5859375,
"loss_num": 0.0107421875,
"loss_xval": 0.053466796875,
"num_input_tokens_seen": 37985152,
"step": 220
},
{
"epoch": 0.5755208333333334,
"grad_norm": 24.217399076672056,
"learning_rate": 5e-06,
"loss": 0.0891,
"num_input_tokens_seen": 38157616,
"step": 221
},
{
"epoch": 0.5755208333333334,
"loss": 0.11579165607690811,
"loss_ce": 0.0005420194938778877,
"loss_iou": 0.3984375,
"loss_num": 0.0230712890625,
"loss_xval": 0.115234375,
"num_input_tokens_seen": 38157616,
"step": 221
},
{
"epoch": 0.578125,
"grad_norm": 3.7151820904220063,
"learning_rate": 5e-06,
"loss": 0.057,
"num_input_tokens_seen": 38330496,
"step": 222
},
{
"epoch": 0.578125,
"loss": 0.04516543075442314,
"loss_ce": 9.09663358470425e-05,
"loss_iou": 0.515625,
"loss_num": 0.009033203125,
"loss_xval": 0.045166015625,
"num_input_tokens_seen": 38330496,
"step": 222
},
{
"epoch": 0.5807291666666666,
"grad_norm": 15.97315866564612,
"learning_rate": 5e-06,
"loss": 0.1208,
"num_input_tokens_seen": 38503204,
"step": 223
},
{
"epoch": 0.5807291666666666,
"loss": 0.11001908779144287,
"loss_ce": 6.425123137887567e-05,
"loss_iou": 0.6484375,
"loss_num": 0.02197265625,
"loss_xval": 0.10986328125,
"num_input_tokens_seen": 38503204,
"step": 223
},
{
"epoch": 0.5833333333333334,
"grad_norm": 15.54736656112959,
"learning_rate": 5e-06,
"loss": 0.0667,
"num_input_tokens_seen": 38675744,
"step": 224
},
{
"epoch": 0.5833333333333334,
"loss": 0.11892453581094742,
"loss_ce": 0.00011960987467318773,
"loss_iou": 0.0,
"loss_num": 0.0238037109375,
"loss_xval": 0.11865234375,
"num_input_tokens_seen": 38675744,
"step": 224
},
{
"epoch": 0.5859375,
"grad_norm": 8.708565681630517,
"learning_rate": 5e-06,
"loss": 0.0858,
"num_input_tokens_seen": 38848284,
"step": 225
},
{
"epoch": 0.5859375,
"loss": 0.06076966971158981,
"loss_ce": 0.000436413218267262,
"loss_iou": 0.0,
"loss_num": 0.0120849609375,
"loss_xval": 0.060302734375,
"num_input_tokens_seen": 38848284,
"step": 225
},
{
"epoch": 0.5885416666666666,
"grad_norm": 11.665207638748996,
"learning_rate": 5e-06,
"loss": 0.1059,
"num_input_tokens_seen": 39021192,
"step": 226
},
{
"epoch": 0.5885416666666666,
"loss": 0.08464138209819794,
"loss_ce": 0.00010768979700515047,
"loss_iou": 0.57421875,
"loss_num": 0.016845703125,
"loss_xval": 0.08447265625,
"num_input_tokens_seen": 39021192,
"step": 226
},
{
"epoch": 0.5911458333333334,
"grad_norm": 6.122755854158079,
"learning_rate": 5e-06,
"loss": 0.0916,
"num_input_tokens_seen": 39194408,
"step": 227
},
{
"epoch": 0.5911458333333334,
"loss": 0.11753110587596893,
"loss_ce": 6.894973921589553e-05,
"loss_iou": 0.6796875,
"loss_num": 0.0234375,
"loss_xval": 0.11767578125,
"num_input_tokens_seen": 39194408,
"step": 227
},
{
"epoch": 0.59375,
"grad_norm": 25.91736548090707,
"learning_rate": 5e-06,
"loss": 0.0936,
"num_input_tokens_seen": 39366972,
"step": 228
},
{
"epoch": 0.59375,
"loss": 0.09147345274686813,
"loss_ce": 7.33033666620031e-05,
"loss_iou": 0.578125,
"loss_num": 0.018310546875,
"loss_xval": 0.09130859375,
"num_input_tokens_seen": 39366972,
"step": 228
},
{
"epoch": 0.5963541666666666,
"grad_norm": 22.31114946018542,
"learning_rate": 5e-06,
"loss": 0.094,
"num_input_tokens_seen": 39539944,
"step": 229
},
{
"epoch": 0.5963541666666666,
"loss": 0.11777202785015106,
"loss_ce": 3.521383769111708e-05,
"loss_iou": 0.71484375,
"loss_num": 0.0235595703125,
"loss_xval": 0.11767578125,
"num_input_tokens_seen": 39539944,
"step": 229
},
{
"epoch": 0.5989583333333334,
"grad_norm": 4.025666229457589,
"learning_rate": 5e-06,
"loss": 0.0886,
"num_input_tokens_seen": 39712932,
"step": 230
},
{
"epoch": 0.5989583333333334,
"loss": 0.1201152354478836,
"loss_ce": 0.00015063578030094504,
"loss_iou": 0.6796875,
"loss_num": 0.02392578125,
"loss_xval": 0.1201171875,
"num_input_tokens_seen": 39712932,
"step": 230
},
{
"epoch": 0.6015625,
"grad_norm": 3.7609078788021097,
"learning_rate": 5e-06,
"loss": 0.0683,
"num_input_tokens_seen": 39885616,
"step": 231
},
{
"epoch": 0.6015625,
"loss": 0.07680265605449677,
"loss_ce": 0.0001730183430481702,
"loss_iou": 0.5859375,
"loss_num": 0.01531982421875,
"loss_xval": 0.07666015625,
"num_input_tokens_seen": 39885616,
"step": 231
},
{
"epoch": 0.6041666666666666,
"grad_norm": 5.989352644027437,
"learning_rate": 5e-06,
"loss": 0.0674,
"num_input_tokens_seen": 40057968,
"step": 232
},
{
"epoch": 0.6041666666666666,
"loss": 0.1005856841802597,
"loss_ce": 0.00013706949539482594,
"loss_iou": 0.462890625,
"loss_num": 0.02001953125,
"loss_xval": 0.1005859375,
"num_input_tokens_seen": 40057968,
"step": 232
},
{
"epoch": 0.6067708333333334,
"grad_norm": 4.762149494132162,
"learning_rate": 5e-06,
"loss": 0.0698,
"num_input_tokens_seen": 40230848,
"step": 233
},
{
"epoch": 0.6067708333333334,
"loss": 0.10376375913619995,
"loss_ce": 3.4517663152655587e-05,
"loss_iou": 0.63671875,
"loss_num": 0.020751953125,
"loss_xval": 0.103515625,
"num_input_tokens_seen": 40230848,
"step": 233
},
{
"epoch": 0.609375,
"grad_norm": 5.409386698496161,
"learning_rate": 5e-06,
"loss": 0.1021,
"num_input_tokens_seen": 40403276,
"step": 234
},
{
"epoch": 0.609375,
"loss": 0.06936685740947723,
"loss_ce": 0.0003818793629761785,
"loss_iou": 0.482421875,
"loss_num": 0.0137939453125,
"loss_xval": 0.06884765625,
"num_input_tokens_seen": 40403276,
"step": 234
},
{
"epoch": 0.6119791666666666,
"grad_norm": 10.974609444669646,
"learning_rate": 5e-06,
"loss": 0.1001,
"num_input_tokens_seen": 40576292,
"step": 235
},
{
"epoch": 0.6119791666666666,
"loss": 0.11177849024534225,
"loss_ce": 0.00026726460782811046,
"loss_iou": 0.61328125,
"loss_num": 0.0223388671875,
"loss_xval": 0.111328125,
"num_input_tokens_seen": 40576292,
"step": 235
},
{
"epoch": 0.6145833333333334,
"grad_norm": 3.802157730607013,
"learning_rate": 5e-06,
"loss": 0.0768,
"num_input_tokens_seen": 40749076,
"step": 236
},
{
"epoch": 0.6145833333333334,
"loss": 0.07335153222084045,
"loss_ce": 0.000506069976836443,
"loss_iou": 0.6171875,
"loss_num": 0.0145263671875,
"loss_xval": 0.07275390625,
"num_input_tokens_seen": 40749076,
"step": 236
},
{
"epoch": 0.6171875,
"grad_norm": 3.5754950924222406,
"learning_rate": 5e-06,
"loss": 0.0497,
"num_input_tokens_seen": 40922288,
"step": 237
},
{
"epoch": 0.6171875,
"loss": 0.02886682003736496,
"loss_ce": 0.00014977881801314652,
"loss_iou": 0.55078125,
"loss_num": 0.0057373046875,
"loss_xval": 0.0286865234375,
"num_input_tokens_seen": 40922288,
"step": 237
},
{
"epoch": 0.6197916666666666,
"grad_norm": 4.288040219675324,
"learning_rate": 5e-06,
"loss": 0.0508,
"num_input_tokens_seen": 41094828,
"step": 238
},
{
"epoch": 0.6197916666666666,
"loss": 0.05992227792739868,
"loss_ce": 0.0003519634483382106,
"loss_iou": 0.640625,
"loss_num": 0.01190185546875,
"loss_xval": 0.0595703125,
"num_input_tokens_seen": 41094828,
"step": 238
},
{
"epoch": 0.6223958333333334,
"grad_norm": 6.504525689859585,
"learning_rate": 5e-06,
"loss": 0.0925,
"num_input_tokens_seen": 41267332,
"step": 239
},
{
"epoch": 0.6223958333333334,
"loss": 0.06373357772827148,
"loss_ce": 4.338783037383109e-05,
"loss_iou": 0.6875,
"loss_num": 0.01275634765625,
"loss_xval": 0.0634765625,
"num_input_tokens_seen": 41267332,
"step": 239
},
{
"epoch": 0.625,
"grad_norm": 5.068763378329545,
"learning_rate": 5e-06,
"loss": 0.0829,
"num_input_tokens_seen": 41439728,
"step": 240
},
{
"epoch": 0.625,
"loss": 0.08366407454013824,
"loss_ce": 0.00022901550983078778,
"loss_iou": 0.578125,
"loss_num": 0.0167236328125,
"loss_xval": 0.08349609375,
"num_input_tokens_seen": 41439728,
"step": 240
},
{
"epoch": 0.6276041666666666,
"grad_norm": 9.15531863667315,
"learning_rate": 5e-06,
"loss": 0.0909,
"num_input_tokens_seen": 41612180,
"step": 241
},
{
"epoch": 0.6276041666666666,
"loss": 0.11120368540287018,
"loss_ce": 5.867354047950357e-05,
"loss_iou": 0.478515625,
"loss_num": 0.022216796875,
"loss_xval": 0.111328125,
"num_input_tokens_seen": 41612180,
"step": 241
},
{
"epoch": 0.6302083333333334,
"grad_norm": 2.0214181878511566,
"learning_rate": 5e-06,
"loss": 0.0741,
"num_input_tokens_seen": 41784848,
"step": 242
},
{
"epoch": 0.6302083333333334,
"loss": 0.05301050841808319,
"loss_ce": 9.30292735574767e-05,
"loss_iou": 0.51171875,
"loss_num": 0.0106201171875,
"loss_xval": 0.052978515625,
"num_input_tokens_seen": 41784848,
"step": 242
},
{
"epoch": 0.6328125,
"grad_norm": 4.1167075841800385,
"learning_rate": 5e-06,
"loss": 0.0462,
"num_input_tokens_seen": 41957024,
"step": 243
},
{
"epoch": 0.6328125,
"loss": 0.05533324182033539,
"loss_ce": 8.116720709949732e-05,
"loss_iou": 0.498046875,
"loss_num": 0.01104736328125,
"loss_xval": 0.05517578125,
"num_input_tokens_seen": 41957024,
"step": 243
},
{
"epoch": 0.6354166666666666,
"grad_norm": 12.037461080324686,
"learning_rate": 5e-06,
"loss": 0.1285,
"num_input_tokens_seen": 42129920,
"step": 244
},
{
"epoch": 0.6354166666666666,
"loss": 0.07943513244390488,
"loss_ce": 8.94309050636366e-05,
"loss_iou": 0.7265625,
"loss_num": 0.015869140625,
"loss_xval": 0.0791015625,
"num_input_tokens_seen": 42129920,
"step": 244
},
{
"epoch": 0.6380208333333334,
"grad_norm": 6.295206206189768,
"learning_rate": 5e-06,
"loss": 0.0889,
"num_input_tokens_seen": 42302912,
"step": 245
},
{
"epoch": 0.6380208333333334,
"loss": 0.09156939387321472,
"loss_ce": 4.717556657851674e-05,
"loss_iou": 0.5234375,
"loss_num": 0.018310546875,
"loss_xval": 0.09130859375,
"num_input_tokens_seen": 42302912,
"step": 245
},
{
"epoch": 0.640625,
"grad_norm": 25.409557942414935,
"learning_rate": 5e-06,
"loss": 0.0661,
"num_input_tokens_seen": 42475584,
"step": 246
},
{
"epoch": 0.640625,
"loss": 0.097844198346138,
"loss_ce": 6.587710231542587e-05,
"loss_iou": 0.431640625,
"loss_num": 0.01953125,
"loss_xval": 0.09765625,
"num_input_tokens_seen": 42475584,
"step": 246
},
{
"epoch": 0.6432291666666666,
"grad_norm": 4.450370043022936,
"learning_rate": 5e-06,
"loss": 0.0697,
"num_input_tokens_seen": 42647808,
"step": 247
},
{
"epoch": 0.6432291666666666,
"loss": 0.1061711385846138,
"loss_ce": 0.00015306829300243407,
"loss_iou": 0.5859375,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 42647808,
"step": 247
},
{
"epoch": 0.6458333333333334,
"grad_norm": 4.116581907360989,
"learning_rate": 5e-06,
"loss": 0.0858,
"num_input_tokens_seen": 42820508,
"step": 248
},
{
"epoch": 0.6458333333333334,
"loss": 0.07190299779176712,
"loss_ce": 7.987646677065641e-05,
"loss_iou": 0.5546875,
"loss_num": 0.01434326171875,
"loss_xval": 0.07177734375,
"num_input_tokens_seen": 42820508,
"step": 248
},
{
"epoch": 0.6484375,
"grad_norm": 4.147716000593212,
"learning_rate": 5e-06,
"loss": 0.0854,
"num_input_tokens_seen": 42992784,
"step": 249
},
{
"epoch": 0.6484375,
"loss": 0.05028773471713066,
"loss_ce": 5.58009123778902e-05,
"loss_iou": 0.6640625,
"loss_num": 0.01007080078125,
"loss_xval": 0.05029296875,
"num_input_tokens_seen": 42992784,
"step": 249
},
{
"epoch": 0.6510416666666666,
"grad_norm": 16.495817541741257,
"learning_rate": 5e-06,
"loss": 0.0747,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6510416666666666,
"eval_seeclick_CIoU": 0.4124833643436432,
"eval_seeclick_GIoU": 0.41358618438243866,
"eval_seeclick_IoU": 0.445960208773613,
"eval_seeclick_MAE_all": 0.0724409706890583,
"eval_seeclick_MAE_h": 0.06929375603795052,
"eval_seeclick_MAE_w": 0.09850849956274033,
"eval_seeclick_MAE_x": 0.07914602756500244,
"eval_seeclick_MAE_y": 0.04281560517847538,
"eval_seeclick_NUM_probability": 0.9999896287918091,
"eval_seeclick_inside_bbox": 0.921875,
"eval_seeclick_loss": 0.9194074273109436,
"eval_seeclick_loss_ce": 0.6105623841285706,
"eval_seeclick_loss_iou": 0.67578125,
"eval_seeclick_loss_num": 0.0633697509765625,
"eval_seeclick_loss_xval": 0.31683349609375,
"eval_seeclick_runtime": 73.8784,
"eval_seeclick_samples_per_second": 0.582,
"eval_seeclick_steps_per_second": 0.027,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6510416666666666,
"eval_icons_CIoU": 0.6936101317405701,
"eval_icons_GIoU": 0.697041928768158,
"eval_icons_IoU": 0.704749345779419,
"eval_icons_MAE_all": 0.039153311401605606,
"eval_icons_MAE_h": 0.04060409218072891,
"eval_icons_MAE_w": 0.05324110668152571,
"eval_icons_MAE_x": 0.03839818201959133,
"eval_icons_MAE_y": 0.02436987590044737,
"eval_icons_NUM_probability": 0.9999879896640778,
"eval_icons_inside_bbox": 0.9565972089767456,
"eval_icons_loss": 0.15028713643550873,
"eval_icons_loss_ce": 0.00046230135194491595,
"eval_icons_loss_iou": 0.600341796875,
"eval_icons_loss_num": 0.02852630615234375,
"eval_icons_loss_xval": 0.142608642578125,
"eval_icons_runtime": 80.0672,
"eval_icons_samples_per_second": 0.624,
"eval_icons_steps_per_second": 0.025,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6510416666666666,
"eval_screenspot_CIoU": 0.42071565985679626,
"eval_screenspot_GIoU": 0.4120611349741618,
"eval_screenspot_IoU": 0.48129573464393616,
"eval_screenspot_MAE_all": 0.11981111764907837,
"eval_screenspot_MAE_h": 0.08953167746464412,
"eval_screenspot_MAE_w": 0.19297573963801065,
"eval_screenspot_MAE_x": 0.1243693083524704,
"eval_screenspot_MAE_y": 0.07236775507529576,
"eval_screenspot_NUM_probability": 0.9999845623970032,
"eval_screenspot_inside_bbox": 0.7979166706403097,
"eval_screenspot_loss": 0.8879116177558899,
"eval_screenspot_loss_ce": 0.3984930415948232,
"eval_screenspot_loss_iou": 0.5793863932291666,
"eval_screenspot_loss_num": 0.09791056315104167,
"eval_screenspot_loss_xval": 0.4894205729166667,
"eval_screenspot_runtime": 139.1948,
"eval_screenspot_samples_per_second": 0.639,
"eval_screenspot_steps_per_second": 0.022,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6510416666666666,
"eval_compot_CIoU": 0.8471810519695282,
"eval_compot_GIoU": 0.8459496200084686,
"eval_compot_IoU": 0.8493484258651733,
"eval_compot_MAE_all": 0.01606033928692341,
"eval_compot_MAE_h": 0.015686397906392813,
"eval_compot_MAE_w": 0.024428557604551315,
"eval_compot_MAE_x": 0.013795553240925074,
"eval_compot_MAE_y": 0.01033084886148572,
"eval_compot_NUM_probability": 0.9999726712703705,
"eval_compot_inside_bbox": 1.0,
"eval_compot_loss": 0.07782306522130966,
"eval_compot_loss_ce": 0.0001593182678334415,
"eval_compot_loss_iou": 0.693115234375,
"eval_compot_loss_num": 0.01538848876953125,
"eval_compot_loss_xval": 0.076904296875,
"eval_compot_runtime": 81.1661,
"eval_compot_samples_per_second": 0.616,
"eval_compot_steps_per_second": 0.025,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6510416666666666,
"loss": 0.0681569054722786,
"loss_ce": 0.00022477866150438786,
"loss_iou": 0.7265625,
"loss_num": 0.01361083984375,
"loss_xval": 0.06787109375,
"num_input_tokens_seen": 43165896,
"step": 250
},
{
"epoch": 0.6536458333333334,
"grad_norm": 3.6701809821868308,
"learning_rate": 5e-06,
"loss": 0.0838,
"num_input_tokens_seen": 43338688,
"step": 251
},
{
"epoch": 0.6536458333333334,
"loss": 0.06336190551519394,
"loss_ce": 9.896879782900214e-05,
"loss_iou": 0.74609375,
"loss_num": 0.01263427734375,
"loss_xval": 0.0634765625,
"num_input_tokens_seen": 43338688,
"step": 251
},
{
"epoch": 0.65625,
"grad_norm": 1.8275074603928982,
"learning_rate": 5e-06,
"loss": 0.0518,
"num_input_tokens_seen": 43511012,
"step": 252
},
{
"epoch": 0.65625,
"loss": 0.04357748478651047,
"loss_ce": 0.00015097142022568733,
"loss_iou": 0.5078125,
"loss_num": 0.0086669921875,
"loss_xval": 0.04345703125,
"num_input_tokens_seen": 43511012,
"step": 252
},
{
"epoch": 0.6588541666666666,
"grad_norm": 2.1416791842803238,
"learning_rate": 5e-06,
"loss": 0.057,
"num_input_tokens_seen": 43683084,
"step": 253
},
{
"epoch": 0.6588541666666666,
"loss": 0.05978800728917122,
"loss_ce": 9.562318882672116e-05,
"loss_iou": 0.447265625,
"loss_num": 0.011962890625,
"loss_xval": 0.0595703125,
"num_input_tokens_seen": 43683084,
"step": 253
},
{
"epoch": 0.6614583333333334,
"grad_norm": 3.5604873362214167,
"learning_rate": 5e-06,
"loss": 0.0713,
"num_input_tokens_seen": 43855336,
"step": 254
},
{
"epoch": 0.6614583333333334,
"loss": 0.06191530451178551,
"loss_ce": 0.0003155705926474184,
"loss_iou": 0.453125,
"loss_num": 0.0123291015625,
"loss_xval": 0.0615234375,
"num_input_tokens_seen": 43855336,
"step": 254
},
{
"epoch": 0.6640625,
"grad_norm": 7.717445783436579,
"learning_rate": 5e-06,
"loss": 0.0799,
"num_input_tokens_seen": 44028296,
"step": 255
},
{
"epoch": 0.6640625,
"loss": 0.09458325803279877,
"loss_ce": 3.980396650149487e-05,
"loss_iou": 0.70703125,
"loss_num": 0.0189208984375,
"loss_xval": 0.0947265625,
"num_input_tokens_seen": 44028296,
"step": 255
},
{
"epoch": 0.6666666666666666,
"grad_norm": 15.823495393044448,
"learning_rate": 5e-06,
"loss": 0.0915,
"num_input_tokens_seen": 44200980,
"step": 256
},
{
"epoch": 0.6666666666666666,
"loss": 0.10397086292505264,
"loss_ce": 5.850698289577849e-05,
"loss_iou": 0.73046875,
"loss_num": 0.020751953125,
"loss_xval": 0.10400390625,
"num_input_tokens_seen": 44200980,
"step": 256
},
{
"epoch": 0.6692708333333334,
"grad_norm": 4.767840698347708,
"learning_rate": 5e-06,
"loss": 0.0808,
"num_input_tokens_seen": 44373548,
"step": 257
},
{
"epoch": 0.6692708333333334,
"loss": 0.04920345917344093,
"loss_ce": 7.015664596110582e-05,
"loss_iou": 0.69140625,
"loss_num": 0.00982666015625,
"loss_xval": 0.049072265625,
"num_input_tokens_seen": 44373548,
"step": 257
},
{
"epoch": 0.671875,
"grad_norm": 7.0061287275719195,
"learning_rate": 5e-06,
"loss": 0.087,
"num_input_tokens_seen": 44545956,
"step": 258
},
{
"epoch": 0.671875,
"loss": 0.12110073864459991,
"loss_ce": 3.750172254513018e-05,
"loss_iou": 0.6015625,
"loss_num": 0.024169921875,
"loss_xval": 0.12109375,
"num_input_tokens_seen": 44545956,
"step": 258
},
{
"epoch": 0.6744791666666666,
"grad_norm": 16.13195894853677,
"learning_rate": 5e-06,
"loss": 0.1143,
"num_input_tokens_seen": 44719164,
"step": 259
},
{
"epoch": 0.6744791666666666,
"loss": 0.15427453815937042,
"loss_ce": 9.973209671443328e-05,
"loss_iou": 0.65234375,
"loss_num": 0.0308837890625,
"loss_xval": 0.154296875,
"num_input_tokens_seen": 44719164,
"step": 259
},
{
"epoch": 0.6770833333333334,
"grad_norm": 3.769620732282852,
"learning_rate": 5e-06,
"loss": 0.0715,
"num_input_tokens_seen": 44891856,
"step": 260
},
{
"epoch": 0.6770833333333334,
"loss": 0.08834376931190491,
"loss_ce": 8.69391078595072e-05,
"loss_iou": 0.625,
"loss_num": 0.0177001953125,
"loss_xval": 0.08837890625,
"num_input_tokens_seen": 44891856,
"step": 260
},
{
"epoch": 0.6796875,
"grad_norm": 15.395477460165395,
"learning_rate": 5e-06,
"loss": 0.0877,
"num_input_tokens_seen": 45064988,
"step": 261
},
{
"epoch": 0.6796875,
"loss": 0.08463309705257416,
"loss_ce": 0.00025199196534231305,
"loss_iou": 0.390625,
"loss_num": 0.016845703125,
"loss_xval": 0.08447265625,
"num_input_tokens_seen": 45064988,
"step": 261
},
{
"epoch": 0.6822916666666666,
"grad_norm": 1.9846406004820456,
"learning_rate": 5e-06,
"loss": 0.0567,
"num_input_tokens_seen": 45237912,
"step": 262
},
{
"epoch": 0.6822916666666666,
"loss": 0.10574272274971008,
"loss_ce": 4.509550126385875e-05,
"loss_iou": 0.4140625,
"loss_num": 0.0211181640625,
"loss_xval": 0.10546875,
"num_input_tokens_seen": 45237912,
"step": 262
},
{
"epoch": 0.6848958333333334,
"grad_norm": 10.620697806562827,
"learning_rate": 5e-06,
"loss": 0.0748,
"num_input_tokens_seen": 45410408,
"step": 263
},
{
"epoch": 0.6848958333333334,
"loss": 0.0699179470539093,
"loss_ce": 0.0001700148859526962,
"loss_iou": 0.400390625,
"loss_num": 0.013916015625,
"loss_xval": 0.06982421875,
"num_input_tokens_seen": 45410408,
"step": 263
},
{
"epoch": 0.6875,
"grad_norm": 4.4669986375425985,
"learning_rate": 5e-06,
"loss": 0.0521,
"num_input_tokens_seen": 45583132,
"step": 264
},
{
"epoch": 0.6875,
"loss": 0.046352967619895935,
"loss_ce": 0.00013409550592768937,
"loss_iou": 0.78515625,
"loss_num": 0.00921630859375,
"loss_xval": 0.046142578125,
"num_input_tokens_seen": 45583132,
"step": 264
},
{
"epoch": 0.6901041666666666,
"grad_norm": 6.203574811391586,
"learning_rate": 5e-06,
"loss": 0.0928,
"num_input_tokens_seen": 45755472,
"step": 265
},
{
"epoch": 0.6901041666666666,
"loss": 0.061830393970012665,
"loss_ce": 3.229987487429753e-05,
"loss_iou": 0.6328125,
"loss_num": 0.01239013671875,
"loss_xval": 0.061767578125,
"num_input_tokens_seen": 45755472,
"step": 265
},
{
"epoch": 0.6927083333333334,
"grad_norm": 10.894591035750713,
"learning_rate": 5e-06,
"loss": 0.1005,
"num_input_tokens_seen": 45928200,
"step": 266
},
{
"epoch": 0.6927083333333334,
"loss": 0.1057087630033493,
"loss_ce": 0.00011794811143772677,
"loss_iou": 0.6875,
"loss_num": 0.0211181640625,
"loss_xval": 0.10546875,
"num_input_tokens_seen": 45928200,
"step": 266
},
{
"epoch": 0.6953125,
"grad_norm": 3.559473609758924,
"learning_rate": 5e-06,
"loss": 0.0525,
"num_input_tokens_seen": 46101500,
"step": 267
},
{
"epoch": 0.6953125,
"loss": 0.059102512896060944,
"loss_ce": 5.099709960632026e-05,
"loss_iou": 0.470703125,
"loss_num": 0.0118408203125,
"loss_xval": 0.05908203125,
"num_input_tokens_seen": 46101500,
"step": 267
},
{
"epoch": 0.6979166666666666,
"grad_norm": 32.963299647312084,
"learning_rate": 5e-06,
"loss": 0.1156,
"num_input_tokens_seen": 46273792,
"step": 268
},
{
"epoch": 0.6979166666666666,
"loss": 0.17293627560138702,
"loss_ce": 6.945877976249903e-05,
"loss_iou": 0.5859375,
"loss_num": 0.03466796875,
"loss_xval": 0.1728515625,
"num_input_tokens_seen": 46273792,
"step": 268
},
{
"epoch": 0.7005208333333334,
"grad_norm": 5.379185511033478,
"learning_rate": 5e-06,
"loss": 0.1206,
"num_input_tokens_seen": 46446840,
"step": 269
},
{
"epoch": 0.7005208333333334,
"loss": 0.0702916830778122,
"loss_ce": 4.0215229091700166e-05,
"loss_iou": 0.65625,
"loss_num": 0.0140380859375,
"loss_xval": 0.0703125,
"num_input_tokens_seen": 46446840,
"step": 269
},
{
"epoch": 0.703125,
"grad_norm": 4.833206887807392,
"learning_rate": 5e-06,
"loss": 0.1057,
"num_input_tokens_seen": 46619224,
"step": 270
},
{
"epoch": 0.703125,
"loss": 0.06372040510177612,
"loss_ce": 4.5477234380086884e-05,
"loss_iou": 0.482421875,
"loss_num": 0.01275634765625,
"loss_xval": 0.0634765625,
"num_input_tokens_seen": 46619224,
"step": 270
},
{
"epoch": 0.7057291666666666,
"grad_norm": 4.9691828426728515,
"learning_rate": 5e-06,
"loss": 0.073,
"num_input_tokens_seen": 46791948,
"step": 271
},
{
"epoch": 0.7057291666666666,
"loss": 0.041718438267707825,
"loss_ce": 0.00013823516201227903,
"loss_iou": 0.66796875,
"loss_num": 0.00830078125,
"loss_xval": 0.04150390625,
"num_input_tokens_seen": 46791948,
"step": 271
},
{
"epoch": 0.7083333333333334,
"grad_norm": 5.792546307908184,
"learning_rate": 5e-06,
"loss": 0.0812,
"num_input_tokens_seen": 46964400,
"step": 272
},
{
"epoch": 0.7083333333333334,
"loss": 0.07085588574409485,
"loss_ce": 0.00013139640213921666,
"loss_iou": 0.671875,
"loss_num": 0.01416015625,
"loss_xval": 0.07080078125,
"num_input_tokens_seen": 46964400,
"step": 272
},
{
"epoch": 0.7109375,
"grad_norm": 8.864261104979098,
"learning_rate": 5e-06,
"loss": 0.0973,
"num_input_tokens_seen": 47137156,
"step": 273
},
{
"epoch": 0.7109375,
"loss": 0.08093667030334473,
"loss_ce": 0.00015664326201658696,
"loss_iou": 0.640625,
"loss_num": 0.0162353515625,
"loss_xval": 0.08056640625,
"num_input_tokens_seen": 47137156,
"step": 273
},
{
"epoch": 0.7135416666666666,
"grad_norm": 3.8762493026111633,
"learning_rate": 5e-06,
"loss": 0.0958,
"num_input_tokens_seen": 47309640,
"step": 274
},
{
"epoch": 0.7135416666666666,
"loss": 0.1435449719429016,
"loss_ce": 5.13083505211398e-05,
"loss_iou": 0.5234375,
"loss_num": 0.0286865234375,
"loss_xval": 0.1435546875,
"num_input_tokens_seen": 47309640,
"step": 274
},
{
"epoch": 0.7161458333333334,
"grad_norm": 4.845607455502515,
"learning_rate": 5e-06,
"loss": 0.0587,
"num_input_tokens_seen": 47482920,
"step": 275
},
{
"epoch": 0.7161458333333334,
"loss": 0.06996987760066986,
"loss_ce": 0.000145662619615905,
"loss_iou": 0.45703125,
"loss_num": 0.01397705078125,
"loss_xval": 0.06982421875,
"num_input_tokens_seen": 47482920,
"step": 275
},
{
"epoch": 0.71875,
"grad_norm": 6.023028440412175,
"learning_rate": 5e-06,
"loss": 0.1017,
"num_input_tokens_seen": 47655164,
"step": 276
},
{
"epoch": 0.71875,
"loss": 0.11321437358856201,
"loss_ce": 5.519590195035562e-05,
"loss_iou": 0.58984375,
"loss_num": 0.0225830078125,
"loss_xval": 0.11328125,
"num_input_tokens_seen": 47655164,
"step": 276
},
{
"epoch": 0.7213541666666666,
"grad_norm": 4.375656119857942,
"learning_rate": 5e-06,
"loss": 0.0618,
"num_input_tokens_seen": 47827856,
"step": 277
},
{
"epoch": 0.7213541666666666,
"loss": 0.0524156428873539,
"loss_ce": 0.0002611021918710321,
"loss_iou": 0.73046875,
"loss_num": 0.01043701171875,
"loss_xval": 0.05224609375,
"num_input_tokens_seen": 47827856,
"step": 277
},
{
"epoch": 0.7239583333333334,
"grad_norm": 3.478066675039873,
"learning_rate": 5e-06,
"loss": 0.0642,
"num_input_tokens_seen": 48000956,
"step": 278
},
{
"epoch": 0.7239583333333334,
"loss": 0.03859657049179077,
"loss_ce": 5.2869407227262855e-05,
"loss_iou": 0.474609375,
"loss_num": 0.007720947265625,
"loss_xval": 0.03857421875,
"num_input_tokens_seen": 48000956,
"step": 278
},
{
"epoch": 0.7265625,
"grad_norm": 10.669002227751372,
"learning_rate": 5e-06,
"loss": 0.0673,
"num_input_tokens_seen": 48173420,
"step": 279
},
{
"epoch": 0.7265625,
"loss": 0.04094023257493973,
"loss_ce": 0.00010771260713227093,
"loss_iou": 0.482421875,
"loss_num": 0.0081787109375,
"loss_xval": 0.040771484375,
"num_input_tokens_seen": 48173420,
"step": 279
},
{
"epoch": 0.7291666666666666,
"grad_norm": 6.013727130209973,
"learning_rate": 5e-06,
"loss": 0.0843,
"num_input_tokens_seen": 48346040,
"step": 280
},
{
"epoch": 0.7291666666666666,
"loss": 0.06400243937969208,
"loss_ce": 6.811654020566493e-05,
"loss_iou": 0.5546875,
"loss_num": 0.0128173828125,
"loss_xval": 0.06396484375,
"num_input_tokens_seen": 48346040,
"step": 280
},
{
"epoch": 0.7317708333333334,
"grad_norm": 6.320025783309937,
"learning_rate": 5e-06,
"loss": 0.0776,
"num_input_tokens_seen": 48518684,
"step": 281
},
{
"epoch": 0.7317708333333334,
"loss": 0.08228301256895065,
"loss_ce": 0.00012969484669156373,
"loss_iou": 0.7421875,
"loss_num": 0.0164794921875,
"loss_xval": 0.08203125,
"num_input_tokens_seen": 48518684,
"step": 281
},
{
"epoch": 0.734375,
"grad_norm": 2.3539480804430353,
"learning_rate": 5e-06,
"loss": 0.064,
"num_input_tokens_seen": 48691296,
"step": 282
},
{
"epoch": 0.734375,
"loss": 0.06360374391078949,
"loss_ce": 5.089196565677412e-05,
"loss_iou": 0.55859375,
"loss_num": 0.0126953125,
"loss_xval": 0.0634765625,
"num_input_tokens_seen": 48691296,
"step": 282
},
{
"epoch": 0.7369791666666666,
"grad_norm": 4.165777643617544,
"learning_rate": 5e-06,
"loss": 0.0581,
"num_input_tokens_seen": 48864252,
"step": 283
},
{
"epoch": 0.7369791666666666,
"loss": 0.07076792418956757,
"loss_ce": 5.869198503205553e-05,
"loss_iou": 0.455078125,
"loss_num": 0.01409912109375,
"loss_xval": 0.07080078125,
"num_input_tokens_seen": 48864252,
"step": 283
},
{
"epoch": 0.7395833333333334,
"grad_norm": 4.530184060910693,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 49037116,
"step": 284
},
{
"epoch": 0.7395833333333334,
"loss": 0.10114337503910065,
"loss_ce": 6.914998812135309e-05,
"loss_iou": 0.66015625,
"loss_num": 0.020263671875,
"loss_xval": 0.10107421875,
"num_input_tokens_seen": 49037116,
"step": 284
},
{
"epoch": 0.7421875,
"grad_norm": 7.025143291686679,
"learning_rate": 5e-06,
"loss": 0.09,
"num_input_tokens_seen": 49209880,
"step": 285
},
{
"epoch": 0.7421875,
"loss": 0.07852576673030853,
"loss_ce": 3.455359546933323e-05,
"loss_iou": 0.5859375,
"loss_num": 0.0157470703125,
"loss_xval": 0.07861328125,
"num_input_tokens_seen": 49209880,
"step": 285
},
{
"epoch": 0.7447916666666666,
"grad_norm": 8.858211776100614,
"learning_rate": 5e-06,
"loss": 0.084,
"num_input_tokens_seen": 49381700,
"step": 286
},
{
"epoch": 0.7447916666666666,
"loss": 0.05212024226784706,
"loss_ce": 5.7252564147347584e-05,
"loss_iou": 0.7265625,
"loss_num": 0.01043701171875,
"loss_xval": 0.052001953125,
"num_input_tokens_seen": 49381700,
"step": 286
},
{
"epoch": 0.7473958333333334,
"grad_norm": 3.6537179877047663,
"learning_rate": 5e-06,
"loss": 0.0989,
"num_input_tokens_seen": 49554536,
"step": 287
},
{
"epoch": 0.7473958333333334,
"loss": 0.055037256330251694,
"loss_ce": 2.9322651244001463e-05,
"loss_iou": 0.546875,
"loss_num": 0.010986328125,
"loss_xval": 0.054931640625,
"num_input_tokens_seen": 49554536,
"step": 287
},
{
"epoch": 0.75,
"grad_norm": 5.570461350284086,
"learning_rate": 5e-06,
"loss": 0.0772,
"num_input_tokens_seen": 49726396,
"step": 288
},
{
"epoch": 0.75,
"loss": 0.07801353931427002,
"loss_ce": 7.164124690461904e-05,
"loss_iou": 0.451171875,
"loss_num": 0.015625,
"loss_xval": 0.078125,
"num_input_tokens_seen": 49726396,
"step": 288
},
{
"epoch": 0.7526041666666666,
"grad_norm": 5.806990578827175,
"learning_rate": 5e-06,
"loss": 0.0737,
"num_input_tokens_seen": 49899536,
"step": 289
},
{
"epoch": 0.7526041666666666,
"loss": 0.09019728004932404,
"loss_ce": 4.8357818741351366e-05,
"loss_iou": 0.46484375,
"loss_num": 0.01806640625,
"loss_xval": 0.09033203125,
"num_input_tokens_seen": 49899536,
"step": 289
},
{
"epoch": 0.7552083333333334,
"grad_norm": 6.584433746493665,
"learning_rate": 5e-06,
"loss": 0.0753,
"num_input_tokens_seen": 50072028,
"step": 290
},
{
"epoch": 0.7552083333333334,
"loss": 0.05955757200717926,
"loss_ce": 0.00013984768884256482,
"loss_iou": 0.5,
"loss_num": 0.01190185546875,
"loss_xval": 0.059326171875,
"num_input_tokens_seen": 50072028,
"step": 290
},
{
"epoch": 0.7578125,
"grad_norm": 4.769362882722307,
"learning_rate": 5e-06,
"loss": 0.0836,
"num_input_tokens_seen": 50244788,
"step": 291
},
{
"epoch": 0.7578125,
"loss": 0.08353784680366516,
"loss_ce": 4.175720823695883e-05,
"loss_iou": 0.58203125,
"loss_num": 0.0167236328125,
"loss_xval": 0.08349609375,
"num_input_tokens_seen": 50244788,
"step": 291
},
{
"epoch": 0.7604166666666666,
"grad_norm": 4.630970710069874,
"learning_rate": 5e-06,
"loss": 0.0924,
"num_input_tokens_seen": 50417020,
"step": 292
},
{
"epoch": 0.7604166666666666,
"loss": 0.0867491364479065,
"loss_ce": 6.395512173185125e-05,
"loss_iou": 0.0,
"loss_num": 0.017333984375,
"loss_xval": 0.0869140625,
"num_input_tokens_seen": 50417020,
"step": 292
},
{
"epoch": 0.7630208333333334,
"grad_norm": 4.771052495662392,
"learning_rate": 5e-06,
"loss": 0.0789,
"num_input_tokens_seen": 50589288,
"step": 293
},
{
"epoch": 0.7630208333333334,
"loss": 0.04268595576286316,
"loss_ce": 3.76409079763107e-05,
"loss_iou": 0.62109375,
"loss_num": 0.008544921875,
"loss_xval": 0.042724609375,
"num_input_tokens_seen": 50589288,
"step": 293
},
{
"epoch": 0.765625,
"grad_norm": 5.549980291826297,
"learning_rate": 5e-06,
"loss": 0.1124,
"num_input_tokens_seen": 50762276,
"step": 294
},
{
"epoch": 0.765625,
"loss": 0.08056493103504181,
"loss_ce": 5.956060340395197e-05,
"loss_iou": 0.58203125,
"loss_num": 0.01611328125,
"loss_xval": 0.08056640625,
"num_input_tokens_seen": 50762276,
"step": 294
},
{
"epoch": 0.7682291666666666,
"grad_norm": 58.66835057028341,
"learning_rate": 5e-06,
"loss": 0.0912,
"num_input_tokens_seen": 50935292,
"step": 295
},
{
"epoch": 0.7682291666666666,
"loss": 0.12624840438365936,
"loss_ce": 5.8218334743287414e-05,
"loss_iou": 0.72265625,
"loss_num": 0.0252685546875,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 50935292,
"step": 295
},
{
"epoch": 0.7708333333333334,
"grad_norm": 5.644622915739686,
"learning_rate": 5e-06,
"loss": 0.0868,
"num_input_tokens_seen": 51108336,
"step": 296
},
{
"epoch": 0.7708333333333334,
"loss": 0.1262531876564026,
"loss_ce": 9.352029883302748e-05,
"loss_iou": 0.6015625,
"loss_num": 0.0252685546875,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 51108336,
"step": 296
},
{
"epoch": 0.7734375,
"grad_norm": 9.321237615443232,
"learning_rate": 5e-06,
"loss": 0.1107,
"num_input_tokens_seen": 51280676,
"step": 297
},
{
"epoch": 0.7734375,
"loss": 0.08918002992868423,
"loss_ce": 6.8700457632076e-05,
"loss_iou": 0.6484375,
"loss_num": 0.017822265625,
"loss_xval": 0.0888671875,
"num_input_tokens_seen": 51280676,
"step": 297
},
{
"epoch": 0.7760416666666666,
"grad_norm": 8.413905673909936,
"learning_rate": 5e-06,
"loss": 0.0788,
"num_input_tokens_seen": 51452600,
"step": 298
},
{
"epoch": 0.7760416666666666,
"loss": 0.06944364309310913,
"loss_ce": 7.71840859670192e-05,
"loss_iou": 0.53125,
"loss_num": 0.01385498046875,
"loss_xval": 0.0693359375,
"num_input_tokens_seen": 51452600,
"step": 298
},
{
"epoch": 0.7786458333333334,
"grad_norm": 3.9297688671160738,
"learning_rate": 5e-06,
"loss": 0.08,
"num_input_tokens_seen": 51625560,
"step": 299
},
{
"epoch": 0.7786458333333334,
"loss": 0.12099509686231613,
"loss_ce": 0.00014548808394465595,
"loss_iou": 0.447265625,
"loss_num": 0.024169921875,
"loss_xval": 0.12109375,
"num_input_tokens_seen": 51625560,
"step": 299
},
{
"epoch": 0.78125,
"grad_norm": 8.840803926190146,
"learning_rate": 5e-06,
"loss": 0.0903,
"num_input_tokens_seen": 51797848,
"step": 300
},
{
"epoch": 0.78125,
"loss": 0.08386749029159546,
"loss_ce": 0.00018828835163731128,
"loss_iou": 0.62890625,
"loss_num": 0.0167236328125,
"loss_xval": 0.08349609375,
"num_input_tokens_seen": 51797848,
"step": 300
},
{
"epoch": 0.7838541666666666,
"grad_norm": 6.267252913184398,
"learning_rate": 5e-06,
"loss": 0.0768,
"num_input_tokens_seen": 51970968,
"step": 301
},
{
"epoch": 0.7838541666666666,
"loss": 0.08612730354070663,
"loss_ce": 0.000205064527108334,
"loss_iou": 0.470703125,
"loss_num": 0.0172119140625,
"loss_xval": 0.0859375,
"num_input_tokens_seen": 51970968,
"step": 301
},
{
"epoch": 0.7864583333333334,
"grad_norm": 5.712597753331284,
"learning_rate": 5e-06,
"loss": 0.0856,
"num_input_tokens_seen": 52143656,
"step": 302
},
{
"epoch": 0.7864583333333334,
"loss": 0.14908897876739502,
"loss_ce": 0.00016319258429575711,
"loss_iou": 0.6796875,
"loss_num": 0.02978515625,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 52143656,
"step": 302
},
{
"epoch": 0.7890625,
"grad_norm": 8.35751018269278,
"learning_rate": 5e-06,
"loss": 0.0759,
"num_input_tokens_seen": 52316820,
"step": 303
},
{
"epoch": 0.7890625,
"loss": 0.059616073966026306,
"loss_ce": 7.628079038113356e-05,
"loss_iou": 0.5546875,
"loss_num": 0.01190185546875,
"loss_xval": 0.0595703125,
"num_input_tokens_seen": 52316820,
"step": 303
},
{
"epoch": 0.7916666666666666,
"grad_norm": 21.438956075626194,
"learning_rate": 5e-06,
"loss": 0.0912,
"num_input_tokens_seen": 52489896,
"step": 304
},
{
"epoch": 0.7916666666666666,
"loss": 0.10695922374725342,
"loss_ce": 0.00010192444460699335,
"loss_iou": 0.6171875,
"loss_num": 0.0213623046875,
"loss_xval": 0.10693359375,
"num_input_tokens_seen": 52489896,
"step": 304
},
{
"epoch": 0.7942708333333334,
"grad_norm": 11.563280258074105,
"learning_rate": 5e-06,
"loss": 0.0721,
"num_input_tokens_seen": 52662040,
"step": 305
},
{
"epoch": 0.7942708333333334,
"loss": 0.08778760582208633,
"loss_ce": 4.956443444825709e-05,
"loss_iou": 0.609375,
"loss_num": 0.017578125,
"loss_xval": 0.087890625,
"num_input_tokens_seen": 52662040,
"step": 305
},
{
"epoch": 0.796875,
"grad_norm": 3.1582836546422683,
"learning_rate": 5e-06,
"loss": 0.0762,
"num_input_tokens_seen": 52833528,
"step": 306
},
{
"epoch": 0.796875,
"loss": 0.048152316361665726,
"loss_ce": 5.6614066124893725e-05,
"loss_iou": 0.0,
"loss_num": 0.0096435546875,
"loss_xval": 0.048095703125,
"num_input_tokens_seen": 52833528,
"step": 306
},
{
"epoch": 0.7994791666666666,
"grad_norm": 3.9541505621592403,
"learning_rate": 5e-06,
"loss": 0.0728,
"num_input_tokens_seen": 53006328,
"step": 307
},
{
"epoch": 0.7994791666666666,
"loss": 0.07045421004295349,
"loss_ce": 6.541772745549679e-05,
"loss_iou": 0.51953125,
"loss_num": 0.01409912109375,
"loss_xval": 0.0703125,
"num_input_tokens_seen": 53006328,
"step": 307
},
{
"epoch": 0.8020833333333334,
"grad_norm": 21.04478597433239,
"learning_rate": 5e-06,
"loss": 0.0841,
"num_input_tokens_seen": 53179340,
"step": 308
},
{
"epoch": 0.8020833333333334,
"loss": 0.04340720921754837,
"loss_ce": 7.224958972074091e-05,
"loss_iou": 0.486328125,
"loss_num": 0.0086669921875,
"loss_xval": 0.04345703125,
"num_input_tokens_seen": 53179340,
"step": 308
},
{
"epoch": 0.8046875,
"grad_norm": 36.45731809620038,
"learning_rate": 5e-06,
"loss": 0.0903,
"num_input_tokens_seen": 53352024,
"step": 309
},
{
"epoch": 0.8046875,
"loss": 0.0791003406047821,
"loss_ce": 9.0329660451971e-05,
"loss_iou": 0.59375,
"loss_num": 0.0157470703125,
"loss_xval": 0.0791015625,
"num_input_tokens_seen": 53352024,
"step": 309
},
{
"epoch": 0.8072916666666666,
"grad_norm": 3.6922772893156828,
"learning_rate": 5e-06,
"loss": 0.0982,
"num_input_tokens_seen": 53524908,
"step": 310
},
{
"epoch": 0.8072916666666666,
"loss": 0.06811343133449554,
"loss_ce": 0.00012026849435642362,
"loss_iou": 0.546875,
"loss_num": 0.01361083984375,
"loss_xval": 0.06787109375,
"num_input_tokens_seen": 53524908,
"step": 310
},
{
"epoch": 0.8098958333333334,
"grad_norm": 27.046913168708976,
"learning_rate": 5e-06,
"loss": 0.081,
"num_input_tokens_seen": 53696732,
"step": 311
},
{
"epoch": 0.8098958333333334,
"loss": 0.0632261261343956,
"loss_ce": 3.94820308429189e-05,
"loss_iou": 0.62109375,
"loss_num": 0.01263427734375,
"loss_xval": 0.06298828125,
"num_input_tokens_seen": 53696732,
"step": 311
},
{
"epoch": 0.8125,
"grad_norm": 14.857627339858754,
"learning_rate": 5e-06,
"loss": 0.1017,
"num_input_tokens_seen": 53869308,
"step": 312
},
{
"epoch": 0.8125,
"loss": 0.07280921936035156,
"loss_ce": 0.0001468673290219158,
"loss_iou": 0.482421875,
"loss_num": 0.0145263671875,
"loss_xval": 0.07275390625,
"num_input_tokens_seen": 53869308,
"step": 312
},
{
"epoch": 0.8151041666666666,
"grad_norm": 4.652815682219442,
"learning_rate": 5e-06,
"loss": 0.0804,
"num_input_tokens_seen": 54042004,
"step": 313
},
{
"epoch": 0.8151041666666666,
"loss": 0.08997043967247009,
"loss_ce": 3.514082345645875e-05,
"loss_iou": 0.73046875,
"loss_num": 0.0179443359375,
"loss_xval": 0.08984375,
"num_input_tokens_seen": 54042004,
"step": 313
},
{
"epoch": 0.8177083333333334,
"grad_norm": 4.472330671881049,
"learning_rate": 5e-06,
"loss": 0.0766,
"num_input_tokens_seen": 54214544,
"step": 314
},
{
"epoch": 0.8177083333333334,
"loss": 0.05066576227545738,
"loss_ce": 3.710209784912877e-05,
"loss_iou": 0.462890625,
"loss_num": 0.0101318359375,
"loss_xval": 0.050537109375,
"num_input_tokens_seen": 54214544,
"step": 314
},
{
"epoch": 0.8203125,
"grad_norm": 14.395534068995472,
"learning_rate": 5e-06,
"loss": 0.0834,
"num_input_tokens_seen": 54387032,
"step": 315
},
{
"epoch": 0.8203125,
"loss": 0.11046263575553894,
"loss_ce": 5.003847763873637e-05,
"loss_iou": 0.609375,
"loss_num": 0.0220947265625,
"loss_xval": 0.1103515625,
"num_input_tokens_seen": 54387032,
"step": 315
},
{
"epoch": 0.8229166666666666,
"grad_norm": 5.6405315516941545,
"learning_rate": 5e-06,
"loss": 0.071,
"num_input_tokens_seen": 54559764,
"step": 316
},
{
"epoch": 0.8229166666666666,
"loss": 0.07865004241466522,
"loss_ce": 0.00015882565639913082,
"loss_iou": 0.5859375,
"loss_num": 0.0157470703125,
"loss_xval": 0.07861328125,
"num_input_tokens_seen": 54559764,
"step": 316
},
{
"epoch": 0.8255208333333334,
"grad_norm": 8.404403222163058,
"learning_rate": 5e-06,
"loss": 0.0788,
"num_input_tokens_seen": 54732960,
"step": 317
},
{
"epoch": 0.8255208333333334,
"loss": 0.09951162338256836,
"loss_ce": 5.4833071772009134e-05,
"loss_iou": 0.462890625,
"loss_num": 0.0198974609375,
"loss_xval": 0.099609375,
"num_input_tokens_seen": 54732960,
"step": 317
},
{
"epoch": 0.828125,
"grad_norm": 12.856336033562837,
"learning_rate": 5e-06,
"loss": 0.0745,
"num_input_tokens_seen": 54905888,
"step": 318
},
{
"epoch": 0.828125,
"loss": 0.05548687279224396,
"loss_ce": 0.00015850822092033923,
"loss_iou": 0.5078125,
"loss_num": 0.0111083984375,
"loss_xval": 0.055419921875,
"num_input_tokens_seen": 54905888,
"step": 318
},
{
"epoch": 0.8307291666666666,
"grad_norm": 7.5015307945338545,
"learning_rate": 5e-06,
"loss": 0.0814,
"num_input_tokens_seen": 55078584,
"step": 319
},
{
"epoch": 0.8307291666666666,
"loss": 0.09399284422397614,
"loss_ce": 0.00018180246115662158,
"loss_iou": 0.8203125,
"loss_num": 0.018798828125,
"loss_xval": 0.09375,
"num_input_tokens_seen": 55078584,
"step": 319
},
{
"epoch": 0.8333333333333334,
"grad_norm": 3.549717733561083,
"learning_rate": 5e-06,
"loss": 0.0888,
"num_input_tokens_seen": 55251416,
"step": 320
},
{
"epoch": 0.8333333333333334,
"loss": 0.10571445524692535,
"loss_ce": 0.00012363299902062863,
"loss_iou": 0.390625,
"loss_num": 0.0211181640625,
"loss_xval": 0.10546875,
"num_input_tokens_seen": 55251416,
"step": 320
},
{
"epoch": 0.8359375,
"grad_norm": 15.137913189345245,
"learning_rate": 5e-06,
"loss": 0.0813,
"num_input_tokens_seen": 55424308,
"step": 321
},
{
"epoch": 0.8359375,
"loss": 0.12823191285133362,
"loss_ce": 0.0001038559275912121,
"loss_iou": 0.423828125,
"loss_num": 0.025634765625,
"loss_xval": 0.1279296875,
"num_input_tokens_seen": 55424308,
"step": 321
},
{
"epoch": 0.8385416666666666,
"grad_norm": 4.928878873643115,
"learning_rate": 5e-06,
"loss": 0.1069,
"num_input_tokens_seen": 55597376,
"step": 322
},
{
"epoch": 0.8385416666666666,
"loss": 0.1322542130947113,
"loss_ce": 2.1536015992751345e-05,
"loss_iou": 0.578125,
"loss_num": 0.0264892578125,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 55597376,
"step": 322
},
{
"epoch": 0.8411458333333334,
"grad_norm": 19.737058147658324,
"learning_rate": 5e-06,
"loss": 0.0898,
"num_input_tokens_seen": 55769600,
"step": 323
},
{
"epoch": 0.8411458333333334,
"loss": 0.17265748977661133,
"loss_ce": 8.058187086135149e-05,
"loss_iou": 0.0,
"loss_num": 0.034423828125,
"loss_xval": 0.1728515625,
"num_input_tokens_seen": 55769600,
"step": 323
},
{
"epoch": 0.84375,
"grad_norm": 7.152491955998749,
"learning_rate": 5e-06,
"loss": 0.0743,
"num_input_tokens_seen": 55942580,
"step": 324
},
{
"epoch": 0.84375,
"loss": 0.059846702963113785,
"loss_ce": 6.276796921156347e-05,
"loss_iou": 0.5234375,
"loss_num": 0.011962890625,
"loss_xval": 0.059814453125,
"num_input_tokens_seen": 55942580,
"step": 324
},
{
"epoch": 0.8463541666666666,
"grad_norm": 6.664096474807532,
"learning_rate": 5e-06,
"loss": 0.0831,
"num_input_tokens_seen": 56115528,
"step": 325
},
{
"epoch": 0.8463541666666666,
"loss": 0.06182098388671875,
"loss_ce": 3.814647061517462e-05,
"loss_iou": 0.7734375,
"loss_num": 0.01239013671875,
"loss_xval": 0.061767578125,
"num_input_tokens_seen": 56115528,
"step": 325
},
{
"epoch": 0.8489583333333334,
"grad_norm": 4.533780334308584,
"learning_rate": 5e-06,
"loss": 0.085,
"num_input_tokens_seen": 56288484,
"step": 326
},
{
"epoch": 0.8489583333333334,
"loss": 0.10065165907144547,
"loss_ce": 0.00015727368008811027,
"loss_iou": 0.4453125,
"loss_num": 0.0201416015625,
"loss_xval": 0.1005859375,
"num_input_tokens_seen": 56288484,
"step": 326
},
{
"epoch": 0.8515625,
"grad_norm": 4.639727507170639,
"learning_rate": 5e-06,
"loss": 0.0767,
"num_input_tokens_seen": 56460840,
"step": 327
},
{
"epoch": 0.8515625,
"loss": 0.0594358891248703,
"loss_ce": 7.919950439827517e-05,
"loss_iou": 0.69140625,
"loss_num": 0.01190185546875,
"loss_xval": 0.059326171875,
"num_input_tokens_seen": 56460840,
"step": 327
},
{
"epoch": 0.8541666666666666,
"grad_norm": 4.945822599515496,
"learning_rate": 5e-06,
"loss": 0.0809,
"num_input_tokens_seen": 56633612,
"step": 328
},
{
"epoch": 0.8541666666666666,
"loss": 0.10031691938638687,
"loss_ce": 3.615960304159671e-05,
"loss_iou": 0.462890625,
"loss_num": 0.02001953125,
"loss_xval": 0.10009765625,
"num_input_tokens_seen": 56633612,
"step": 328
},
{
"epoch": 0.8567708333333334,
"grad_norm": 17.721130156863943,
"learning_rate": 5e-06,
"loss": 0.0852,
"num_input_tokens_seen": 56806644,
"step": 329
},
{
"epoch": 0.8567708333333334,
"loss": 0.06605279445648193,
"loss_ce": 0.0002263778733322397,
"loss_iou": 0.60546875,
"loss_num": 0.01318359375,
"loss_xval": 0.06591796875,
"num_input_tokens_seen": 56806644,
"step": 329
},
{
"epoch": 0.859375,
"grad_norm": 4.228842310344442,
"learning_rate": 5e-06,
"loss": 0.0829,
"num_input_tokens_seen": 56978832,
"step": 330
},
{
"epoch": 0.859375,
"loss": 0.09333358705043793,
"loss_ce": 7.186534639913589e-05,
"loss_iou": 0.48046875,
"loss_num": 0.0186767578125,
"loss_xval": 0.09326171875,
"num_input_tokens_seen": 56978832,
"step": 330
},
{
"epoch": 0.8619791666666666,
"grad_norm": 4.379026646689163,
"learning_rate": 5e-06,
"loss": 0.0708,
"num_input_tokens_seen": 57151476,
"step": 331
},
{
"epoch": 0.8619791666666666,
"loss": 0.05179120972752571,
"loss_ce": 6.39162608422339e-05,
"loss_iou": 0.49609375,
"loss_num": 0.0103759765625,
"loss_xval": 0.0517578125,
"num_input_tokens_seen": 57151476,
"step": 331
},
{
"epoch": 0.8645833333333334,
"grad_norm": 9.125447816364591,
"learning_rate": 5e-06,
"loss": 0.0697,
"num_input_tokens_seen": 57323400,
"step": 332
},
{
"epoch": 0.8645833333333334,
"loss": 0.10480596870183945,
"loss_ce": 6.964314525248483e-05,
"loss_iou": 0.51171875,
"loss_num": 0.02099609375,
"loss_xval": 0.1044921875,
"num_input_tokens_seen": 57323400,
"step": 332
},
{
"epoch": 0.8671875,
"grad_norm": 5.241494036335466,
"learning_rate": 5e-06,
"loss": 0.0617,
"num_input_tokens_seen": 57496068,
"step": 333
},
{
"epoch": 0.8671875,
"loss": 0.06167437136173248,
"loss_ce": 0.00012041500303894281,
"loss_iou": 0.71875,
"loss_num": 0.0123291015625,
"loss_xval": 0.0615234375,
"num_input_tokens_seen": 57496068,
"step": 333
},
{
"epoch": 0.8697916666666666,
"grad_norm": 3.7921802750366664,
"learning_rate": 5e-06,
"loss": 0.0859,
"num_input_tokens_seen": 57669236,
"step": 334
},
{
"epoch": 0.8697916666666666,
"loss": 0.03953123837709427,
"loss_ce": 0.00020934098574798554,
"loss_iou": 0.47265625,
"loss_num": 0.00787353515625,
"loss_xval": 0.039306640625,
"num_input_tokens_seen": 57669236,
"step": 334
},
{
"epoch": 0.8723958333333334,
"grad_norm": 7.046054210110739,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 57841676,
"step": 335
},
{
"epoch": 0.8723958333333334,
"loss": 0.13086940348148346,
"loss_ce": 4.054443706991151e-05,
"loss_iou": 0.46875,
"loss_num": 0.026123046875,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 57841676,
"step": 335
},
{
"epoch": 0.875,
"grad_norm": 7.658169223957337,
"learning_rate": 5e-06,
"loss": 0.0927,
"num_input_tokens_seen": 58015076,
"step": 336
},
{
"epoch": 0.875,
"loss": 0.03890954330563545,
"loss_ce": 0.0006252414314076304,
"loss_iou": 0.52734375,
"loss_num": 0.007659912109375,
"loss_xval": 0.038330078125,
"num_input_tokens_seen": 58015076,
"step": 336
},
{
"epoch": 0.8776041666666666,
"grad_norm": 4.319289196507174,
"learning_rate": 5e-06,
"loss": 0.0931,
"num_input_tokens_seen": 58187592,
"step": 337
},
{
"epoch": 0.8776041666666666,
"loss": 0.10160954296588898,
"loss_ce": 4.7046225517988205e-05,
"loss_iou": 0.625,
"loss_num": 0.0203857421875,
"loss_xval": 0.1015625,
"num_input_tokens_seen": 58187592,
"step": 337
},
{
"epoch": 0.8802083333333334,
"grad_norm": 8.26220496195536,
"learning_rate": 5e-06,
"loss": 0.0691,
"num_input_tokens_seen": 58360220,
"step": 338
},
{
"epoch": 0.8802083333333334,
"loss": 0.05522051081061363,
"loss_ce": 4.472649015951902e-05,
"loss_iou": 0.44140625,
"loss_num": 0.01104736328125,
"loss_xval": 0.05517578125,
"num_input_tokens_seen": 58360220,
"step": 338
},
{
"epoch": 0.8828125,
"grad_norm": 3.8822556756341036,
"learning_rate": 5e-06,
"loss": 0.0498,
"num_input_tokens_seen": 58532536,
"step": 339
},
{
"epoch": 0.8828125,
"loss": 0.07371848821640015,
"loss_ce": 7.957669731695205e-05,
"loss_iou": 0.515625,
"loss_num": 0.01470947265625,
"loss_xval": 0.07373046875,
"num_input_tokens_seen": 58532536,
"step": 339
},
{
"epoch": 0.8854166666666666,
"grad_norm": 9.884171334560891,
"learning_rate": 5e-06,
"loss": 0.0594,
"num_input_tokens_seen": 58705004,
"step": 340
},
{
"epoch": 0.8854166666666666,
"loss": 0.06320229917764664,
"loss_ce": 6.143321661511436e-05,
"loss_iou": 0.44921875,
"loss_num": 0.01263427734375,
"loss_xval": 0.06298828125,
"num_input_tokens_seen": 58705004,
"step": 340
},
{
"epoch": 0.8880208333333334,
"grad_norm": 5.196359666592977,
"learning_rate": 5e-06,
"loss": 0.0404,
"num_input_tokens_seen": 58878152,
"step": 341
},
{
"epoch": 0.8880208333333334,
"loss": 0.03186158090829849,
"loss_ce": 0.0002453709894325584,
"loss_iou": 0.546875,
"loss_num": 0.006317138671875,
"loss_xval": 0.03173828125,
"num_input_tokens_seen": 58878152,
"step": 341
},
{
"epoch": 0.890625,
"grad_norm": 6.15237627529603,
"learning_rate": 5e-06,
"loss": 0.0741,
"num_input_tokens_seen": 59050440,
"step": 342
},
{
"epoch": 0.890625,
"loss": 0.05918329954147339,
"loss_ce": 5.5491131206508726e-05,
"loss_iou": 0.36328125,
"loss_num": 0.0118408203125,
"loss_xval": 0.05908203125,
"num_input_tokens_seen": 59050440,
"step": 342
},
{
"epoch": 0.8932291666666666,
"grad_norm": 2.027289516848372,
"learning_rate": 5e-06,
"loss": 0.0797,
"num_input_tokens_seen": 59223528,
"step": 343
},
{
"epoch": 0.8932291666666666,
"loss": 0.0674634724855423,
"loss_ce": 3.488633592496626e-05,
"loss_iou": 0.443359375,
"loss_num": 0.01348876953125,
"loss_xval": 0.0673828125,
"num_input_tokens_seen": 59223528,
"step": 343
},
{
"epoch": 0.8958333333333334,
"grad_norm": 3.3607059104825554,
"learning_rate": 5e-06,
"loss": 0.0925,
"num_input_tokens_seen": 59396320,
"step": 344
},
{
"epoch": 0.8958333333333334,
"loss": 0.04388820007443428,
"loss_ce": 8.021650137379766e-05,
"loss_iou": 0.0,
"loss_num": 0.0087890625,
"loss_xval": 0.043701171875,
"num_input_tokens_seen": 59396320,
"step": 344
},
{
"epoch": 0.8984375,
"grad_norm": 11.313403126591554,
"learning_rate": 5e-06,
"loss": 0.11,
"num_input_tokens_seen": 59568904,
"step": 345
},
{
"epoch": 0.8984375,
"loss": 0.059322062879800797,
"loss_ce": 8.744518709136173e-05,
"loss_iou": 0.5,
"loss_num": 0.0118408203125,
"loss_xval": 0.059326171875,
"num_input_tokens_seen": 59568904,
"step": 345
},
{
"epoch": 0.9010416666666666,
"grad_norm": 9.716589837853562,
"learning_rate": 5e-06,
"loss": 0.0927,
"num_input_tokens_seen": 59741504,
"step": 346
},
{
"epoch": 0.9010416666666666,
"loss": 0.08992569893598557,
"loss_ce": 5.143693124409765e-05,
"loss_iou": 0.578125,
"loss_num": 0.0179443359375,
"loss_xval": 0.08984375,
"num_input_tokens_seen": 59741504,
"step": 346
},
{
"epoch": 0.9036458333333334,
"grad_norm": 4.9130642144499985,
"learning_rate": 5e-06,
"loss": 0.0664,
"num_input_tokens_seen": 59913580,
"step": 347
},
{
"epoch": 0.9036458333333334,
"loss": 0.059036046266555786,
"loss_ce": 6.082511754357256e-05,
"loss_iou": 0.51953125,
"loss_num": 0.01177978515625,
"loss_xval": 0.05908203125,
"num_input_tokens_seen": 59913580,
"step": 347
},
{
"epoch": 0.90625,
"grad_norm": 5.535144728019767,
"learning_rate": 5e-06,
"loss": 0.0459,
"num_input_tokens_seen": 60086300,
"step": 348
},
{
"epoch": 0.90625,
"loss": 0.040458932518959045,
"loss_ce": 6.891523662488908e-05,
"loss_iou": 0.62109375,
"loss_num": 0.008056640625,
"loss_xval": 0.040283203125,
"num_input_tokens_seen": 60086300,
"step": 348
},
{
"epoch": 0.9088541666666666,
"grad_norm": 8.128924901708682,
"learning_rate": 5e-06,
"loss": 0.1183,
"num_input_tokens_seen": 60258804,
"step": 349
},
{
"epoch": 0.9088541666666666,
"loss": 0.10604314506053925,
"loss_ce": 2.508011857571546e-05,
"loss_iou": 0.58984375,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 60258804,
"step": 349
},
{
"epoch": 0.9114583333333334,
"grad_norm": 6.130745719562545,
"learning_rate": 5e-06,
"loss": 0.1182,
"num_input_tokens_seen": 60431928,
"step": 350
},
{
"epoch": 0.9114583333333334,
"loss": 0.137631356716156,
"loss_ce": 5.8111756516154855e-05,
"loss_iou": 0.671875,
"loss_num": 0.0274658203125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 60431928,
"step": 350
},
{
"epoch": 0.9140625,
"grad_norm": 6.6090310971417345,
"learning_rate": 5e-06,
"loss": 0.0682,
"num_input_tokens_seen": 60604596,
"step": 351
},
{
"epoch": 0.9140625,
"loss": 0.03837839514017105,
"loss_ce": 0.0008875515777617693,
"loss_iou": 0.5078125,
"loss_num": 0.00750732421875,
"loss_xval": 0.03759765625,
"num_input_tokens_seen": 60604596,
"step": 351
},
{
"epoch": 0.9166666666666666,
"grad_norm": 37.977702783393255,
"learning_rate": 5e-06,
"loss": 0.0839,
"num_input_tokens_seen": 60777696,
"step": 352
},
{
"epoch": 0.9166666666666666,
"loss": 0.05776657909154892,
"loss_ce": 7.309722423087806e-05,
"loss_iou": 0.578125,
"loss_num": 0.01153564453125,
"loss_xval": 0.0576171875,
"num_input_tokens_seen": 60777696,
"step": 352
},
{
"epoch": 0.9192708333333334,
"grad_norm": 10.793340791159972,
"learning_rate": 5e-06,
"loss": 0.1036,
"num_input_tokens_seen": 60950176,
"step": 353
},
{
"epoch": 0.9192708333333334,
"loss": 0.10405679047107697,
"loss_ce": 5.2886520279571414e-05,
"loss_iou": 0.51953125,
"loss_num": 0.020751953125,
"loss_xval": 0.10400390625,
"num_input_tokens_seen": 60950176,
"step": 353
},
{
"epoch": 0.921875,
"grad_norm": 4.2624655031129395,
"learning_rate": 5e-06,
"loss": 0.0638,
"num_input_tokens_seen": 61123352,
"step": 354
},
{
"epoch": 0.921875,
"loss": 0.045672204345464706,
"loss_ce": 4.842308408115059e-05,
"loss_iou": 0.640625,
"loss_num": 0.0091552734375,
"loss_xval": 0.045654296875,
"num_input_tokens_seen": 61123352,
"step": 354
},
{
"epoch": 0.9244791666666666,
"grad_norm": 5.263551596545367,
"learning_rate": 5e-06,
"loss": 0.0836,
"num_input_tokens_seen": 61296296,
"step": 355
},
{
"epoch": 0.9244791666666666,
"loss": 0.07579399645328522,
"loss_ce": 1.8844926671590656e-05,
"loss_iou": 0.484375,
"loss_num": 0.01513671875,
"loss_xval": 0.07568359375,
"num_input_tokens_seen": 61296296,
"step": 355
},
{
"epoch": 0.9270833333333334,
"grad_norm": 4.969020675022387,
"learning_rate": 5e-06,
"loss": 0.1161,
"num_input_tokens_seen": 61468464,
"step": 356
},
{
"epoch": 0.9270833333333334,
"loss": 0.0982382521033287,
"loss_ce": 9.372214117320254e-05,
"loss_iou": 0.474609375,
"loss_num": 0.0196533203125,
"loss_xval": 0.09814453125,
"num_input_tokens_seen": 61468464,
"step": 356
},
{
"epoch": 0.9296875,
"grad_norm": 9.751227404400339,
"learning_rate": 5e-06,
"loss": 0.0667,
"num_input_tokens_seen": 61641104,
"step": 357
},
{
"epoch": 0.9296875,
"loss": 0.08714728057384491,
"loss_ce": 6.537619628943503e-05,
"loss_iou": 0.4921875,
"loss_num": 0.0174560546875,
"loss_xval": 0.0869140625,
"num_input_tokens_seen": 61641104,
"step": 357
},
{
"epoch": 0.9322916666666666,
"grad_norm": 3.705998309698105,
"learning_rate": 5e-06,
"loss": 0.0614,
"num_input_tokens_seen": 61813956,
"step": 358
},
{
"epoch": 0.9322916666666666,
"loss": 0.05996260046958923,
"loss_ce": 7.185334106907248e-05,
"loss_iou": 0.439453125,
"loss_num": 0.011962890625,
"loss_xval": 0.059814453125,
"num_input_tokens_seen": 61813956,
"step": 358
},
{
"epoch": 0.9348958333333334,
"grad_norm": 5.61843483400317,
"learning_rate": 5e-06,
"loss": 0.0953,
"num_input_tokens_seen": 61987068,
"step": 359
},
{
"epoch": 0.9348958333333334,
"loss": 0.1060662716627121,
"loss_ce": 4.819741297978908e-05,
"loss_iou": 0.5078125,
"loss_num": 0.021240234375,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 61987068,
"step": 359
},
{
"epoch": 0.9375,
"grad_norm": 4.53602826237247,
"learning_rate": 5e-06,
"loss": 0.0859,
"num_input_tokens_seen": 62160000,
"step": 360
},
{
"epoch": 0.9375,
"loss": 0.12344817072153091,
"loss_ce": 3.5084449336864054e-05,
"loss_iou": 0.5859375,
"loss_num": 0.024658203125,
"loss_xval": 0.12353515625,
"num_input_tokens_seen": 62160000,
"step": 360
},
{
"epoch": 0.9401041666666666,
"grad_norm": 2.382495598116124,
"learning_rate": 5e-06,
"loss": 0.0654,
"num_input_tokens_seen": 62332704,
"step": 361
},
{
"epoch": 0.9401041666666666,
"loss": 0.04234257712960243,
"loss_ce": 9.099017916014418e-05,
"loss_iou": 0.53125,
"loss_num": 0.00848388671875,
"loss_xval": 0.042236328125,
"num_input_tokens_seen": 62332704,
"step": 361
},
{
"epoch": 0.9427083333333334,
"grad_norm": 3.67565808505264,
"learning_rate": 5e-06,
"loss": 0.0921,
"num_input_tokens_seen": 62505472,
"step": 362
},
{
"epoch": 0.9427083333333334,
"loss": 0.1404985636472702,
"loss_ce": 0.0001482181833125651,
"loss_iou": 0.41015625,
"loss_num": 0.028076171875,
"loss_xval": 0.140625,
"num_input_tokens_seen": 62505472,
"step": 362
},
{
"epoch": 0.9453125,
"grad_norm": 4.393117034860246,
"learning_rate": 5e-06,
"loss": 0.0746,
"num_input_tokens_seen": 62677852,
"step": 363
},
{
"epoch": 0.9453125,
"loss": 0.08209509402513504,
"loss_ce": 3.332511550979689e-05,
"loss_iou": 0.57421875,
"loss_num": 0.016357421875,
"loss_xval": 0.08203125,
"num_input_tokens_seen": 62677852,
"step": 363
},
{
"epoch": 0.9479166666666666,
"grad_norm": 56.3211081199482,
"learning_rate": 5e-06,
"loss": 0.0939,
"num_input_tokens_seen": 62850624,
"step": 364
},
{
"epoch": 0.9479166666666666,
"loss": 0.06467482447624207,
"loss_ce": 3.859533171635121e-05,
"loss_iou": 0.53515625,
"loss_num": 0.012939453125,
"loss_xval": 0.064453125,
"num_input_tokens_seen": 62850624,
"step": 364
},
{
"epoch": 0.9505208333333334,
"grad_norm": 4.908757065453886,
"learning_rate": 5e-06,
"loss": 0.0602,
"num_input_tokens_seen": 63022912,
"step": 365
},
{
"epoch": 0.9505208333333334,
"loss": 0.05544174462556839,
"loss_ce": 0.00011337252362864092,
"loss_iou": 0.578125,
"loss_num": 0.01104736328125,
"loss_xval": 0.055419921875,
"num_input_tokens_seen": 63022912,
"step": 365
},
{
"epoch": 0.953125,
"grad_norm": 7.057560906891319,
"learning_rate": 5e-06,
"loss": 0.0884,
"num_input_tokens_seen": 63195992,
"step": 366
},
{
"epoch": 0.953125,
"loss": 0.08902530372142792,
"loss_ce": 3.6048379115527496e-05,
"loss_iou": 0.51953125,
"loss_num": 0.017822265625,
"loss_xval": 0.0888671875,
"num_input_tokens_seen": 63195992,
"step": 366
},
{
"epoch": 0.9557291666666666,
"grad_norm": 4.46902192772412,
"learning_rate": 5e-06,
"loss": 0.0773,
"num_input_tokens_seen": 63368708,
"step": 367
},
{
"epoch": 0.9557291666666666,
"loss": 0.02907339483499527,
"loss_ce": 3.591889617382549e-05,
"loss_iou": 0.0,
"loss_num": 0.00579833984375,
"loss_xval": 0.029052734375,
"num_input_tokens_seen": 63368708,
"step": 367
},
{
"epoch": 0.9583333333333334,
"grad_norm": 4.189625335712974,
"learning_rate": 5e-06,
"loss": 0.0685,
"num_input_tokens_seen": 63541312,
"step": 368
},
{
"epoch": 0.9583333333333334,
"loss": 0.10639164596796036,
"loss_ce": 8.366195834241807e-05,
"loss_iou": 0.578125,
"loss_num": 0.021240234375,
"loss_xval": 0.1064453125,
"num_input_tokens_seen": 63541312,
"step": 368
},
{
"epoch": 0.9609375,
"grad_norm": 5.657094938839105,
"learning_rate": 5e-06,
"loss": 0.068,
"num_input_tokens_seen": 63713968,
"step": 369
},
{
"epoch": 0.9609375,
"loss": 0.08933991193771362,
"loss_ce": 3.0215423976187594e-05,
"loss_iou": 0.484375,
"loss_num": 0.017822265625,
"loss_xval": 0.08935546875,
"num_input_tokens_seen": 63713968,
"step": 369
},
{
"epoch": 0.9635416666666666,
"grad_norm": 5.423233634302121,
"learning_rate": 5e-06,
"loss": 0.0647,
"num_input_tokens_seen": 63886996,
"step": 370
},
{
"epoch": 0.9635416666666666,
"loss": 0.06539873778820038,
"loss_ce": 3.008513340319041e-05,
"loss_iou": 0.59765625,
"loss_num": 0.0130615234375,
"loss_xval": 0.0654296875,
"num_input_tokens_seen": 63886996,
"step": 370
},
{
"epoch": 0.9661458333333334,
"grad_norm": 5.395935683494909,
"learning_rate": 5e-06,
"loss": 0.09,
"num_input_tokens_seen": 64059660,
"step": 371
},
{
"epoch": 0.9661458333333334,
"loss": 0.12266229093074799,
"loss_ce": 7.318713323911652e-05,
"loss_iou": 0.5703125,
"loss_num": 0.0245361328125,
"loss_xval": 0.12255859375,
"num_input_tokens_seen": 64059660,
"step": 371
},
{
"epoch": 0.96875,
"grad_norm": 5.856107929033903,
"learning_rate": 5e-06,
"loss": 0.0854,
"num_input_tokens_seen": 64232096,
"step": 372
},
{
"epoch": 0.96875,
"loss": 0.09164264798164368,
"loss_ce": 4.413935312186368e-05,
"loss_iou": 0.0,
"loss_num": 0.018310546875,
"loss_xval": 0.091796875,
"num_input_tokens_seen": 64232096,
"step": 372
},
{
"epoch": 0.9713541666666666,
"grad_norm": 5.870548313752241,
"learning_rate": 5e-06,
"loss": 0.0559,
"num_input_tokens_seen": 64404756,
"step": 373
},
{
"epoch": 0.9713541666666666,
"loss": 0.046882934868335724,
"loss_ce": 2.3196011170512065e-05,
"loss_iou": 0.50390625,
"loss_num": 0.0093994140625,
"loss_xval": 0.046875,
"num_input_tokens_seen": 64404756,
"step": 373
},
{
"epoch": 0.9739583333333334,
"grad_norm": 10.532029531276638,
"learning_rate": 5e-06,
"loss": 0.0822,
"num_input_tokens_seen": 64577476,
"step": 374
},
{
"epoch": 0.9739583333333334,
"loss": 0.07333735376596451,
"loss_ce": 6.464817124651745e-05,
"loss_iou": 0.5859375,
"loss_num": 0.0146484375,
"loss_xval": 0.0732421875,
"num_input_tokens_seen": 64577476,
"step": 374
},
{
"epoch": 0.9765625,
"grad_norm": 8.746459219065029,
"learning_rate": 5e-06,
"loss": 0.0993,
"num_input_tokens_seen": 64750252,
"step": 375
},
{
"epoch": 0.9765625,
"loss": 0.08189202845096588,
"loss_ce": 2.8627566280192696e-05,
"loss_iou": 0.55078125,
"loss_num": 0.016357421875,
"loss_xval": 0.08203125,
"num_input_tokens_seen": 64750252,
"step": 375
},
{
"epoch": 0.9791666666666666,
"grad_norm": 4.369734068569422,
"learning_rate": 5e-06,
"loss": 0.0772,
"num_input_tokens_seen": 64922984,
"step": 376
},
{
"epoch": 0.9791666666666666,
"loss": 0.07155308127403259,
"loss_ce": 5.0396010919939727e-05,
"loss_iou": 0.51953125,
"loss_num": 0.01434326171875,
"loss_xval": 0.0712890625,
"num_input_tokens_seen": 64922984,
"step": 376
},
{
"epoch": 0.9817708333333334,
"grad_norm": 5.055558558635633,
"learning_rate": 5e-06,
"loss": 0.0739,
"num_input_tokens_seen": 65095228,
"step": 377
},
{
"epoch": 0.9817708333333334,
"loss": 0.10587326437234879,
"loss_ce": 2.304443478351459e-05,
"loss_iou": 0.37890625,
"loss_num": 0.0211181640625,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 65095228,
"step": 377
},
{
"epoch": 0.984375,
"grad_norm": 5.286209551414624,
"learning_rate": 5e-06,
"loss": 0.0876,
"num_input_tokens_seen": 65267596,
"step": 378
},
{
"epoch": 0.984375,
"loss": 0.06380397081375122,
"loss_ce": 5.275038711261004e-05,
"loss_iou": 0.59375,
"loss_num": 0.01275634765625,
"loss_xval": 0.06396484375,
"num_input_tokens_seen": 65267596,
"step": 378
},
{
"epoch": 0.9869791666666666,
"grad_norm": 4.779020534428804,
"learning_rate": 5e-06,
"loss": 0.0801,
"num_input_tokens_seen": 65439632,
"step": 379
},
{
"epoch": 0.9869791666666666,
"loss": 0.05044550448656082,
"loss_ce": 3.0469000193988904e-05,
"loss_iou": 0.68359375,
"loss_num": 0.01007080078125,
"loss_xval": 0.05029296875,
"num_input_tokens_seen": 65439632,
"step": 379
},
{
"epoch": 0.9895833333333334,
"grad_norm": 4.685839131970804,
"learning_rate": 5e-06,
"loss": 0.0717,
"num_input_tokens_seen": 65612188,
"step": 380
},
{
"epoch": 0.9895833333333334,
"loss": 0.05971755087375641,
"loss_ce": 2.5168032152578235e-05,
"loss_iou": 0.5234375,
"loss_num": 0.01190185546875,
"loss_xval": 0.0595703125,
"num_input_tokens_seen": 65612188,
"step": 380
},
{
"epoch": 0.9921875,
"grad_norm": 5.019679075383125,
"learning_rate": 5e-06,
"loss": 0.0852,
"num_input_tokens_seen": 65785132,
"step": 381
},
{
"epoch": 0.9921875,
"loss": 0.0729844868183136,
"loss_ce": 7.799551531206816e-05,
"loss_iou": 0.5390625,
"loss_num": 0.01458740234375,
"loss_xval": 0.07275390625,
"num_input_tokens_seen": 65785132,
"step": 381
},
{
"epoch": 0.9947916666666666,
"grad_norm": 5.2408542210225075,
"learning_rate": 5e-06,
"loss": 0.0618,
"num_input_tokens_seen": 65958084,
"step": 382
},
{
"epoch": 0.9947916666666666,
"loss": 0.07006223499774933,
"loss_ce": 2.439254785713274e-05,
"loss_iou": 0.400390625,
"loss_num": 0.0140380859375,
"loss_xval": 0.06982421875,
"num_input_tokens_seen": 65958084,
"step": 382
},
{
"epoch": 0.9973958333333334,
"grad_norm": 8.43973663796555,
"learning_rate": 5e-06,
"loss": 0.0739,
"num_input_tokens_seen": 66130316,
"step": 383
},
{
"epoch": 0.9973958333333334,
"loss": 0.05812692642211914,
"loss_ce": 0.00012826945749111474,
"loss_iou": 0.55078125,
"loss_num": 0.0115966796875,
"loss_xval": 0.05810546875,
"num_input_tokens_seen": 66130316,
"step": 383
},
{
"epoch": 1.0,
"grad_norm": 14.446394116068738,
"learning_rate": 5e-06,
"loss": 0.0949,
"num_input_tokens_seen": 66302752,
"step": 384
},
{
"epoch": 1.0,
"loss": 0.12496863305568695,
"loss_ce": 4.493331289268099e-05,
"loss_iou": 0.5703125,
"loss_num": 0.0250244140625,
"loss_xval": 0.125,
"num_input_tokens_seen": 66302752,
"step": 384
},
{
"epoch": 1.0026041666666667,
"grad_norm": 4.289918076646653,
"learning_rate": 5e-06,
"loss": 0.0624,
"num_input_tokens_seen": 66475484,
"step": 385
},
{
"epoch": 1.0026041666666667,
"loss": 0.04102395847439766,
"loss_ce": 2.3592958314111456e-05,
"loss_iou": 0.435546875,
"loss_num": 0.0081787109375,
"loss_xval": 0.041015625,
"num_input_tokens_seen": 66475484,
"step": 385
},
{
"epoch": 1.0052083333333333,
"grad_norm": 4.281041154001583,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 66647384,
"step": 386
},
{
"epoch": 1.0052083333333333,
"loss": 0.12955166399478912,
"loss_ce": 3.50592345057521e-05,
"loss_iou": 0.5859375,
"loss_num": 0.02587890625,
"loss_xval": 0.1298828125,
"num_input_tokens_seen": 66647384,
"step": 386
},
{
"epoch": 1.0078125,
"grad_norm": 9.266009885978788,
"learning_rate": 5e-06,
"loss": 0.0655,
"num_input_tokens_seen": 66820256,
"step": 387
},
{
"epoch": 1.0078125,
"loss": 0.076566182076931,
"loss_ce": 4.3353000364732e-05,
"loss_iou": 0.48046875,
"loss_num": 0.01531982421875,
"loss_xval": 0.07666015625,
"num_input_tokens_seen": 66820256,
"step": 387
},
{
"epoch": 1.0104166666666667,
"grad_norm": 6.544465032820982,
"learning_rate": 5e-06,
"loss": 0.0798,
"num_input_tokens_seen": 66992440,
"step": 388
},
{
"epoch": 1.0104166666666667,
"loss": 0.0616319440305233,
"loss_ce": 4.7468380216741934e-05,
"loss_iou": 0.609375,
"loss_num": 0.0123291015625,
"loss_xval": 0.0615234375,
"num_input_tokens_seen": 66992440,
"step": 388
},
{
"epoch": 1.0130208333333333,
"grad_norm": 5.606310567972833,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 67165064,
"step": 389
},
{
"epoch": 1.0130208333333333,
"loss": 0.08174864202737808,
"loss_ce": 2.2569187422050163e-05,
"loss_iou": 0.515625,
"loss_num": 0.016357421875,
"loss_xval": 0.08154296875,
"num_input_tokens_seen": 67165064,
"step": 389
},
{
"epoch": 1.015625,
"grad_norm": 11.386296445247536,
"learning_rate": 5e-06,
"loss": 0.1006,
"num_input_tokens_seen": 67338208,
"step": 390
},
{
"epoch": 1.015625,
"loss": 0.13062095642089844,
"loss_ce": 6.675285840174183e-05,
"loss_iou": 0.625,
"loss_num": 0.026123046875,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 67338208,
"step": 390
},
{
"epoch": 1.0182291666666667,
"grad_norm": 9.218245759030461,
"learning_rate": 5e-06,
"loss": 0.0807,
"num_input_tokens_seen": 67511096,
"step": 391
},
{
"epoch": 1.0182291666666667,
"loss": 0.1230873167514801,
"loss_ce": 4.043774606543593e-05,
"loss_iou": 0.466796875,
"loss_num": 0.024658203125,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 67511096,
"step": 391
},
{
"epoch": 1.0208333333333333,
"grad_norm": 7.360763519108863,
"learning_rate": 5e-06,
"loss": 0.0808,
"num_input_tokens_seen": 67683500,
"step": 392
},
{
"epoch": 1.0208333333333333,
"loss": 0.04704148322343826,
"loss_ce": 7.492824079236016e-05,
"loss_iou": 0.69140625,
"loss_num": 0.0093994140625,
"loss_xval": 0.046875,
"num_input_tokens_seen": 67683500,
"step": 392
},
{
"epoch": 1.0234375,
"grad_norm": 24.398414774258395,
"learning_rate": 5e-06,
"loss": 0.0653,
"num_input_tokens_seen": 67856564,
"step": 393
},
{
"epoch": 1.0234375,
"loss": 0.052695855498313904,
"loss_ce": 6.829424819443375e-05,
"loss_iou": 0.48828125,
"loss_num": 0.01055908203125,
"loss_xval": 0.052734375,
"num_input_tokens_seen": 67856564,
"step": 393
},
{
"epoch": 1.0260416666666667,
"grad_norm": 3.952882418188013,
"learning_rate": 5e-06,
"loss": 0.0773,
"num_input_tokens_seen": 68029008,
"step": 394
},
{
"epoch": 1.0260416666666667,
"loss": 0.05983951687812805,
"loss_ce": 5.558759949053638e-05,
"loss_iou": 0.0,
"loss_num": 0.011962890625,
"loss_xval": 0.059814453125,
"num_input_tokens_seen": 68029008,
"step": 394
},
{
"epoch": 1.0286458333333333,
"grad_norm": 6.537700275238822,
"learning_rate": 5e-06,
"loss": 0.0599,
"num_input_tokens_seen": 68201948,
"step": 395
},
{
"epoch": 1.0286458333333333,
"loss": 0.09255748987197876,
"loss_ce": 4.3449574150145054e-05,
"loss_iou": 0.53515625,
"loss_num": 0.0185546875,
"loss_xval": 0.09228515625,
"num_input_tokens_seen": 68201948,
"step": 395
},
{
"epoch": 1.03125,
"grad_norm": 9.576915699057926,
"learning_rate": 5e-06,
"loss": 0.0537,
"num_input_tokens_seen": 68374640,
"step": 396
},
{
"epoch": 1.03125,
"loss": 0.06341977417469025,
"loss_ce": 6.528654193971306e-05,
"loss_iou": 0.419921875,
"loss_num": 0.0126953125,
"loss_xval": 0.0634765625,
"num_input_tokens_seen": 68374640,
"step": 396
},
{
"epoch": 1.0338541666666667,
"grad_norm": 4.782272117515052,
"learning_rate": 5e-06,
"loss": 0.0954,
"num_input_tokens_seen": 68547628,
"step": 397
},
{
"epoch": 1.0338541666666667,
"loss": 0.05385718494653702,
"loss_ce": 5.469346069730818e-05,
"loss_iou": 0.56640625,
"loss_num": 0.0107421875,
"loss_xval": 0.0537109375,
"num_input_tokens_seen": 68547628,
"step": 397
},
{
"epoch": 1.0364583333333333,
"grad_norm": 5.7075554975649485,
"learning_rate": 5e-06,
"loss": 0.1176,
"num_input_tokens_seen": 68720856,
"step": 398
},
{
"epoch": 1.0364583333333333,
"loss": 0.04491497576236725,
"loss_ce": 2.361964106967207e-05,
"loss_iou": 0.6953125,
"loss_num": 0.00897216796875,
"loss_xval": 0.044921875,
"num_input_tokens_seen": 68720856,
"step": 398
},
{
"epoch": 1.0390625,
"grad_norm": 5.118278327904573,
"learning_rate": 5e-06,
"loss": 0.0574,
"num_input_tokens_seen": 68893560,
"step": 399
},
{
"epoch": 1.0390625,
"loss": 0.057190101593732834,
"loss_ce": 6.119744648458436e-05,
"loss_iou": 0.640625,
"loss_num": 0.01141357421875,
"loss_xval": 0.05712890625,
"num_input_tokens_seen": 68893560,
"step": 399
},
{
"epoch": 1.0416666666666667,
"grad_norm": 4.488918016416647,
"learning_rate": 5e-06,
"loss": 0.064,
"num_input_tokens_seen": 69066468,
"step": 400
},
{
"epoch": 1.0416666666666667,
"loss": 0.06421714276075363,
"loss_ce": 3.867531631840393e-05,
"loss_iou": 0.57421875,
"loss_num": 0.0128173828125,
"loss_xval": 0.06396484375,
"num_input_tokens_seen": 69066468,
"step": 400
},
{
"epoch": 1.0442708333333333,
"grad_norm": 5.792990925737602,
"learning_rate": 5e-06,
"loss": 0.0838,
"num_input_tokens_seen": 69239188,
"step": 401
},
{
"epoch": 1.0442708333333333,
"loss": 0.0709276869893074,
"loss_ce": 3.5352179111214355e-05,
"loss_iou": 0.5703125,
"loss_num": 0.01416015625,
"loss_xval": 0.07080078125,
"num_input_tokens_seen": 69239188,
"step": 401
},
{
"epoch": 1.046875,
"grad_norm": 5.477877590256074,
"learning_rate": 5e-06,
"loss": 0.0616,
"num_input_tokens_seen": 69412048,
"step": 402
},
{
"epoch": 1.046875,
"loss": 0.05337923392653465,
"loss_ce": 0.00012606415839400142,
"loss_iou": 0.53125,
"loss_num": 0.01068115234375,
"loss_xval": 0.05322265625,
"num_input_tokens_seen": 69412048,
"step": 402
},
{
"epoch": 1.0494791666666667,
"grad_norm": 7.785066348132969,
"learning_rate": 5e-06,
"loss": 0.077,
"num_input_tokens_seen": 69584372,
"step": 403
},
{
"epoch": 1.0494791666666667,
"loss": 0.11938966065645218,
"loss_ce": 0.0002032608463196084,
"loss_iou": 0.404296875,
"loss_num": 0.0238037109375,
"loss_xval": 0.119140625,
"num_input_tokens_seen": 69584372,
"step": 403
},
{
"epoch": 1.0520833333333333,
"grad_norm": 3.487837088264721,
"learning_rate": 5e-06,
"loss": 0.0598,
"num_input_tokens_seen": 69756908,
"step": 404
},
{
"epoch": 1.0520833333333333,
"loss": 0.04861289635300636,
"loss_ce": 5.943168798694387e-05,
"loss_iou": 0.421875,
"loss_num": 0.00970458984375,
"loss_xval": 0.048583984375,
"num_input_tokens_seen": 69756908,
"step": 404
},
{
"epoch": 1.0546875,
"grad_norm": 4.5585273415308505,
"learning_rate": 5e-06,
"loss": 0.0738,
"num_input_tokens_seen": 69929892,
"step": 405
},
{
"epoch": 1.0546875,
"loss": 0.036217886954545975,
"loss_ce": 5.455628706840798e-05,
"loss_iou": 0.494140625,
"loss_num": 0.007232666015625,
"loss_xval": 0.0361328125,
"num_input_tokens_seen": 69929892,
"step": 405
},
{
"epoch": 1.0572916666666667,
"grad_norm": 5.607953623525571,
"learning_rate": 5e-06,
"loss": 0.0589,
"num_input_tokens_seen": 70102304,
"step": 406
},
{
"epoch": 1.0572916666666667,
"loss": 0.08112768828868866,
"loss_ce": 4.247991455486044e-05,
"loss_iou": 0.66796875,
"loss_num": 0.0162353515625,
"loss_xval": 0.0810546875,
"num_input_tokens_seen": 70102304,
"step": 406
},
{
"epoch": 1.0598958333333333,
"grad_norm": 3.224104704302036,
"learning_rate": 5e-06,
"loss": 0.0706,
"num_input_tokens_seen": 70274860,
"step": 407
},
{
"epoch": 1.0598958333333333,
"loss": 0.042282506823539734,
"loss_ce": 3.09214046865236e-05,
"loss_iou": 0.52734375,
"loss_num": 0.0084228515625,
"loss_xval": 0.042236328125,
"num_input_tokens_seen": 70274860,
"step": 407
},
{
"epoch": 1.0625,
"grad_norm": 13.790835085548427,
"learning_rate": 5e-06,
"loss": 0.054,
"num_input_tokens_seen": 70447752,
"step": 408
},
{
"epoch": 1.0625,
"loss": 0.06554967164993286,
"loss_ce": 4.369396629044786e-05,
"loss_iou": 0.453125,
"loss_num": 0.01312255859375,
"loss_xval": 0.0654296875,
"num_input_tokens_seen": 70447752,
"step": 408
},
{
"epoch": 1.0651041666666667,
"grad_norm": 21.170926774013214,
"learning_rate": 5e-06,
"loss": 0.1041,
"num_input_tokens_seen": 70620408,
"step": 409
},
{
"epoch": 1.0651041666666667,
"loss": 0.05410638824105263,
"loss_ce": 2.9238653951324522e-05,
"loss_iou": 0.5625,
"loss_num": 0.01080322265625,
"loss_xval": 0.05419921875,
"num_input_tokens_seen": 70620408,
"step": 409
},
{
"epoch": 1.0677083333333333,
"grad_norm": 4.451906270983918,
"learning_rate": 5e-06,
"loss": 0.0553,
"num_input_tokens_seen": 70792740,
"step": 410
},
{
"epoch": 1.0677083333333333,
"loss": 0.060652364045381546,
"loss_ce": 4.4454794988268986e-05,
"loss_iou": 0.43359375,
"loss_num": 0.0120849609375,
"loss_xval": 0.060546875,
"num_input_tokens_seen": 70792740,
"step": 410
},
{
"epoch": 1.0703125,
"grad_norm": 4.685547616833428,
"learning_rate": 5e-06,
"loss": 0.0872,
"num_input_tokens_seen": 70965912,
"step": 411
},
{
"epoch": 1.0703125,
"loss": 0.038969703018665314,
"loss_ce": 2.9273152904352173e-05,
"loss_iou": 0.578125,
"loss_num": 0.007781982421875,
"loss_xval": 0.0390625,
"num_input_tokens_seen": 70965912,
"step": 411
},
{
"epoch": 1.0729166666666667,
"grad_norm": 4.205176098429634,
"learning_rate": 5e-06,
"loss": 0.097,
"num_input_tokens_seen": 71138240,
"step": 412
},
{
"epoch": 1.0729166666666667,
"loss": 0.06594446301460266,
"loss_ce": 2.6499863452045247e-05,
"loss_iou": 0.490234375,
"loss_num": 0.01318359375,
"loss_xval": 0.06591796875,
"num_input_tokens_seen": 71138240,
"step": 412
},
{
"epoch": 1.0755208333333333,
"grad_norm": 2.6975606542073414,
"learning_rate": 5e-06,
"loss": 0.0503,
"num_input_tokens_seen": 71311316,
"step": 413
},
{
"epoch": 1.0755208333333333,
"loss": 0.04244375228881836,
"loss_ce": 2.4318891519214958e-05,
"loss_iou": 0.5078125,
"loss_num": 0.00848388671875,
"loss_xval": 0.04248046875,
"num_input_tokens_seen": 71311316,
"step": 413
},
{
"epoch": 1.078125,
"grad_norm": 7.285990628964785,
"learning_rate": 5e-06,
"loss": 0.0779,
"num_input_tokens_seen": 71483816,
"step": 414
},
{
"epoch": 1.078125,
"loss": 0.04324822127819061,
"loss_ce": 2.007262264669407e-05,
"loss_iou": 0.5703125,
"loss_num": 0.0086669921875,
"loss_xval": 0.043212890625,
"num_input_tokens_seen": 71483816,
"step": 414
},
{
"epoch": 1.0807291666666667,
"grad_norm": 4.52661538724694,
"learning_rate": 5e-06,
"loss": 0.0641,
"num_input_tokens_seen": 71656340,
"step": 415
},
{
"epoch": 1.0807291666666667,
"loss": 0.08217348903417587,
"loss_ce": 4.30593136115931e-05,
"loss_iou": 0.45703125,
"loss_num": 0.0164794921875,
"loss_xval": 0.08203125,
"num_input_tokens_seen": 71656340,
"step": 415
},
{
"epoch": 1.0833333333333333,
"grad_norm": 4.6125148439773485,
"learning_rate": 5e-06,
"loss": 0.0882,
"num_input_tokens_seen": 71828256,
"step": 416
},
{
"epoch": 1.0833333333333333,
"loss": 0.13281017541885376,
"loss_ce": 2.819242035911884e-05,
"loss_iou": 0.5703125,
"loss_num": 0.0264892578125,
"loss_xval": 0.1328125,
"num_input_tokens_seen": 71828256,
"step": 416
},
{
"epoch": 1.0859375,
"grad_norm": 4.643689845065649,
"learning_rate": 5e-06,
"loss": 0.0627,
"num_input_tokens_seen": 72000720,
"step": 417
},
{
"epoch": 1.0859375,
"loss": 0.11508992314338684,
"loss_ce": 2.3400416466756724e-05,
"loss_iou": 0.0,
"loss_num": 0.0230712890625,
"loss_xval": 0.115234375,
"num_input_tokens_seen": 72000720,
"step": 417
},
{
"epoch": 1.0885416666666667,
"grad_norm": 4.231062348032645,
"learning_rate": 5e-06,
"loss": 0.0809,
"num_input_tokens_seen": 72173088,
"step": 418
},
{
"epoch": 1.0885416666666667,
"loss": 0.10267479717731476,
"loss_ce": 4.418793832883239e-05,
"loss_iou": 0.0,
"loss_num": 0.0205078125,
"loss_xval": 0.1025390625,
"num_input_tokens_seen": 72173088,
"step": 418
},
{
"epoch": 1.0911458333333333,
"grad_norm": 3.198450887714274,
"learning_rate": 5e-06,
"loss": 0.094,
"num_input_tokens_seen": 72345528,
"step": 419
},
{
"epoch": 1.0911458333333333,
"loss": 0.10875082015991211,
"loss_ce": 0.00010823761840583757,
"loss_iou": 0.431640625,
"loss_num": 0.021728515625,
"loss_xval": 0.1083984375,
"num_input_tokens_seen": 72345528,
"step": 419
},
{
"epoch": 1.09375,
"grad_norm": 4.428326611539968,
"learning_rate": 5e-06,
"loss": 0.0439,
"num_input_tokens_seen": 72517624,
"step": 420
},
{
"epoch": 1.09375,
"loss": 0.05049506574869156,
"loss_ce": 4.951009759679437e-05,
"loss_iou": 0.51171875,
"loss_num": 0.01007080078125,
"loss_xval": 0.050537109375,
"num_input_tokens_seen": 72517624,
"step": 420
},
{
"epoch": 1.0963541666666667,
"grad_norm": 5.316131474081422,
"learning_rate": 5e-06,
"loss": 0.0816,
"num_input_tokens_seen": 72690776,
"step": 421
},
{
"epoch": 1.0963541666666667,
"loss": 0.0798894613981247,
"loss_ce": 4.021547283628024e-05,
"loss_iou": 0.515625,
"loss_num": 0.0159912109375,
"loss_xval": 0.080078125,
"num_input_tokens_seen": 72690776,
"step": 421
},
{
"epoch": 1.0989583333333333,
"grad_norm": 4.80233181201779,
"learning_rate": 5e-06,
"loss": 0.099,
"num_input_tokens_seen": 72863552,
"step": 422
},
{
"epoch": 1.0989583333333333,
"loss": 0.09374965727329254,
"loss_ce": 3.0174571293173358e-05,
"loss_iou": 0.6484375,
"loss_num": 0.018798828125,
"loss_xval": 0.09375,
"num_input_tokens_seen": 72863552,
"step": 422
},
{
"epoch": 1.1015625,
"grad_norm": 3.042295910685699,
"learning_rate": 5e-06,
"loss": 0.0756,
"num_input_tokens_seen": 73035716,
"step": 423
},
{
"epoch": 1.1015625,
"loss": 0.05097030848264694,
"loss_ce": 2.1213931177044287e-05,
"loss_iou": 0.57421875,
"loss_num": 0.01019287109375,
"loss_xval": 0.051025390625,
"num_input_tokens_seen": 73035716,
"step": 423
},
{
"epoch": 1.1041666666666667,
"grad_norm": 3.8912805314115473,
"learning_rate": 5e-06,
"loss": 0.0536,
"num_input_tokens_seen": 73208532,
"step": 424
},
{
"epoch": 1.1041666666666667,
"loss": 0.04248078912496567,
"loss_ce": 4.60965748061426e-05,
"loss_iou": 0.61328125,
"loss_num": 0.00848388671875,
"loss_xval": 0.04248046875,
"num_input_tokens_seen": 73208532,
"step": 424
},
{
"epoch": 1.1067708333333333,
"grad_norm": 14.632789864172295,
"learning_rate": 5e-06,
"loss": 0.0658,
"num_input_tokens_seen": 73381164,
"step": 425
},
{
"epoch": 1.1067708333333333,
"loss": 0.052568383514881134,
"loss_ce": 0.000154447479872033,
"loss_iou": 0.47265625,
"loss_num": 0.010498046875,
"loss_xval": 0.052490234375,
"num_input_tokens_seen": 73381164,
"step": 425
},
{
"epoch": 1.109375,
"grad_norm": 4.3783199311299486,
"learning_rate": 5e-06,
"loss": 0.0861,
"num_input_tokens_seen": 73553404,
"step": 426
},
{
"epoch": 1.109375,
"loss": 0.125158429145813,
"loss_ce": 6.687753193546087e-05,
"loss_iou": 0.0,
"loss_num": 0.0250244140625,
"loss_xval": 0.125,
"num_input_tokens_seen": 73553404,
"step": 426
},
{
"epoch": 1.1119791666666667,
"grad_norm": 3.5716619036124766,
"learning_rate": 5e-06,
"loss": 0.075,
"num_input_tokens_seen": 73726020,
"step": 427
},
{
"epoch": 1.1119791666666667,
"loss": 0.14429143071174622,
"loss_ce": 3.4840733860619366e-05,
"loss_iou": 0.0,
"loss_num": 0.0289306640625,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 73726020,
"step": 427
},
{
"epoch": 1.1145833333333333,
"grad_norm": 4.334696158970524,
"learning_rate": 5e-06,
"loss": 0.0636,
"num_input_tokens_seen": 73898692,
"step": 428
},
{
"epoch": 1.1145833333333333,
"loss": 0.08402653783559799,
"loss_ce": 4.216242450638674e-05,
"loss_iou": 0.5546875,
"loss_num": 0.016845703125,
"loss_xval": 0.083984375,
"num_input_tokens_seen": 73898692,
"step": 428
},
{
"epoch": 1.1171875,
"grad_norm": 5.554218703009278,
"learning_rate": 5e-06,
"loss": 0.059,
"num_input_tokens_seen": 74071620,
"step": 429
},
{
"epoch": 1.1171875,
"loss": 0.0587516650557518,
"loss_ce": 3.584453952498734e-05,
"loss_iou": 0.49609375,
"loss_num": 0.01171875,
"loss_xval": 0.05859375,
"num_input_tokens_seen": 74071620,
"step": 429
},
{
"epoch": 1.1197916666666667,
"grad_norm": 4.43058457913269,
"learning_rate": 5e-06,
"loss": 0.111,
"num_input_tokens_seen": 74244416,
"step": 430
},
{
"epoch": 1.1197916666666667,
"loss": 0.1568872630596161,
"loss_ce": 2.6918030926026404e-05,
"loss_iou": 0.71875,
"loss_num": 0.031494140625,
"loss_xval": 0.1572265625,
"num_input_tokens_seen": 74244416,
"step": 430
},
{
"epoch": 1.1223958333333333,
"grad_norm": 5.12234513289191,
"learning_rate": 5e-06,
"loss": 0.0652,
"num_input_tokens_seen": 74416812,
"step": 431
},
{
"epoch": 1.1223958333333333,
"loss": 0.04296587407588959,
"loss_ce": 7.341805758187547e-05,
"loss_iou": 0.0,
"loss_num": 0.00860595703125,
"loss_xval": 0.04296875,
"num_input_tokens_seen": 74416812,
"step": 431
},
{
"epoch": 1.125,
"grad_norm": 12.372100052601173,
"learning_rate": 5e-06,
"loss": 0.0794,
"num_input_tokens_seen": 74589952,
"step": 432
},
{
"epoch": 1.125,
"loss": 0.057425886392593384,
"loss_ce": 3.757977538043633e-05,
"loss_iou": 0.4609375,
"loss_num": 0.011474609375,
"loss_xval": 0.057373046875,
"num_input_tokens_seen": 74589952,
"step": 432
},
{
"epoch": 1.1276041666666667,
"grad_norm": 5.254766938250951,
"learning_rate": 5e-06,
"loss": 0.0813,
"num_input_tokens_seen": 74762884,
"step": 433
},
{
"epoch": 1.1276041666666667,
"loss": 0.07790642231702805,
"loss_ce": 0.00014763849321752787,
"loss_iou": 0.43359375,
"loss_num": 0.01556396484375,
"loss_xval": 0.07763671875,
"num_input_tokens_seen": 74762884,
"step": 433
},
{
"epoch": 1.1302083333333333,
"grad_norm": 4.363985148609402,
"learning_rate": 5e-06,
"loss": 0.0678,
"num_input_tokens_seen": 74935932,
"step": 434
},
{
"epoch": 1.1302083333333333,
"loss": 0.09511469304561615,
"loss_ce": 2.1914216631557792e-05,
"loss_iou": 0.671875,
"loss_num": 0.01904296875,
"loss_xval": 0.09521484375,
"num_input_tokens_seen": 74935932,
"step": 434
},
{
"epoch": 1.1328125,
"grad_norm": 36.76822239657336,
"learning_rate": 5e-06,
"loss": 0.0789,
"num_input_tokens_seen": 75109188,
"step": 435
},
{
"epoch": 1.1328125,
"loss": 0.05521143600344658,
"loss_ce": 3.5653371014632285e-05,
"loss_iou": 0.5234375,
"loss_num": 0.01104736328125,
"loss_xval": 0.05517578125,
"num_input_tokens_seen": 75109188,
"step": 435
},
{
"epoch": 1.1354166666666667,
"grad_norm": 5.8422904737549635,
"learning_rate": 5e-06,
"loss": 0.0833,
"num_input_tokens_seen": 75282080,
"step": 436
},
{
"epoch": 1.1354166666666667,
"loss": 0.1195131167769432,
"loss_ce": 3.679828660096973e-05,
"loss_iou": 0.5078125,
"loss_num": 0.02392578125,
"loss_xval": 0.11962890625,
"num_input_tokens_seen": 75282080,
"step": 436
},
{
"epoch": 1.1380208333333333,
"grad_norm": 5.633890734428066,
"learning_rate": 5e-06,
"loss": 0.0714,
"num_input_tokens_seen": 75454600,
"step": 437
},
{
"epoch": 1.1380208333333333,
"loss": 0.04856128245592117,
"loss_ce": 8.410715963691473e-05,
"loss_iou": 0.482421875,
"loss_num": 0.00970458984375,
"loss_xval": 0.048583984375,
"num_input_tokens_seen": 75454600,
"step": 437
},
{
"epoch": 1.140625,
"grad_norm": 4.6822951947306946,
"learning_rate": 5e-06,
"loss": 0.0673,
"num_input_tokens_seen": 75627104,
"step": 438
},
{
"epoch": 1.140625,
"loss": 0.05475003272294998,
"loss_ce": 4.72724532301072e-05,
"loss_iou": 0.51953125,
"loss_num": 0.01092529296875,
"loss_xval": 0.0546875,
"num_input_tokens_seen": 75627104,
"step": 438
},
{
"epoch": 1.1432291666666667,
"grad_norm": 6.006286624841916,
"learning_rate": 5e-06,
"loss": 0.0639,
"num_input_tokens_seen": 75799808,
"step": 439
},
{
"epoch": 1.1432291666666667,
"loss": 0.0665750578045845,
"loss_ce": 1.621775300009176e-05,
"loss_iou": 0.62890625,
"loss_num": 0.0133056640625,
"loss_xval": 0.06640625,
"num_input_tokens_seen": 75799808,
"step": 439
},
{
"epoch": 1.1458333333333333,
"grad_norm": 28.980298300208794,
"learning_rate": 5e-06,
"loss": 0.0762,
"num_input_tokens_seen": 75972072,
"step": 440
},
{
"epoch": 1.1458333333333333,
"loss": 0.09322504699230194,
"loss_ce": 2.435836722725071e-05,
"loss_iou": 0.71875,
"loss_num": 0.0186767578125,
"loss_xval": 0.09326171875,
"num_input_tokens_seen": 75972072,
"step": 440
},
{
"epoch": 1.1484375,
"grad_norm": 13.46707783271563,
"learning_rate": 5e-06,
"loss": 0.1018,
"num_input_tokens_seen": 76144720,
"step": 441
},
{
"epoch": 1.1484375,
"loss": 0.08068449795246124,
"loss_ce": 7.231286144815385e-05,
"loss_iou": 0.3515625,
"loss_num": 0.01611328125,
"loss_xval": 0.08056640625,
"num_input_tokens_seen": 76144720,
"step": 441
},
{
"epoch": 1.1510416666666667,
"grad_norm": 2.9432117535086357,
"learning_rate": 5e-06,
"loss": 0.0636,
"num_input_tokens_seen": 76316644,
"step": 442
},
{
"epoch": 1.1510416666666667,
"loss": 0.056132424622774124,
"loss_ce": 0.00016318520647473633,
"loss_iou": 0.66796875,
"loss_num": 0.01116943359375,
"loss_xval": 0.055908203125,
"num_input_tokens_seen": 76316644,
"step": 442
},
{
"epoch": 1.1536458333333333,
"grad_norm": 2.439393218897116,
"learning_rate": 5e-06,
"loss": 0.0374,
"num_input_tokens_seen": 76489288,
"step": 443
},
{
"epoch": 1.1536458333333333,
"loss": 0.03558982163667679,
"loss_ce": 2.1583975467365235e-05,
"loss_iou": 0.451171875,
"loss_num": 0.007110595703125,
"loss_xval": 0.03564453125,
"num_input_tokens_seen": 76489288,
"step": 443
},
{
"epoch": 1.15625,
"grad_norm": 14.267748702685116,
"learning_rate": 5e-06,
"loss": 0.0665,
"num_input_tokens_seen": 76661744,
"step": 444
},
{
"epoch": 1.15625,
"loss": 0.11009622365236282,
"loss_ce": 2.694531031011138e-05,
"loss_iou": 0.0,
"loss_num": 0.02197265625,
"loss_xval": 0.10986328125,
"num_input_tokens_seen": 76661744,
"step": 444
},
{
"epoch": 1.1588541666666667,
"grad_norm": 20.903388479491294,
"learning_rate": 5e-06,
"loss": 0.0489,
"num_input_tokens_seen": 76833952,
"step": 445
},
{
"epoch": 1.1588541666666667,
"loss": 0.03838071599602699,
"loss_ce": 3.5379373002797365e-05,
"loss_iou": 0.65234375,
"loss_num": 0.007659912109375,
"loss_xval": 0.038330078125,
"num_input_tokens_seen": 76833952,
"step": 445
},
{
"epoch": 1.1614583333333333,
"grad_norm": 8.469986250177818,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 77007080,
"step": 446
},
{
"epoch": 1.1614583333333333,
"loss": 0.051541343331336975,
"loss_ce": 2.767006662907079e-05,
"loss_iou": 0.59375,
"loss_num": 0.01031494140625,
"loss_xval": 0.051513671875,
"num_input_tokens_seen": 77007080,
"step": 446
},
{
"epoch": 1.1640625,
"grad_norm": 5.002762010889236,
"learning_rate": 5e-06,
"loss": 0.1031,
"num_input_tokens_seen": 77180040,
"step": 447
},
{
"epoch": 1.1640625,
"loss": 0.20415398478507996,
"loss_ce": 5.2410614443942904e-05,
"loss_iou": 0.45703125,
"loss_num": 0.040771484375,
"loss_xval": 0.2041015625,
"num_input_tokens_seen": 77180040,
"step": 447
},
{
"epoch": 1.1666666666666667,
"grad_norm": 100.8603598057148,
"learning_rate": 5e-06,
"loss": 0.0848,
"num_input_tokens_seen": 77353160,
"step": 448
},
{
"epoch": 1.1666666666666667,
"loss": 0.07399199903011322,
"loss_ce": 7.841931073926389e-05,
"loss_iou": 0.69921875,
"loss_num": 0.0147705078125,
"loss_xval": 0.07373046875,
"num_input_tokens_seen": 77353160,
"step": 448
},
{
"epoch": 1.1692708333333333,
"grad_norm": 3.351767451423234,
"learning_rate": 5e-06,
"loss": 0.0852,
"num_input_tokens_seen": 77526040,
"step": 449
},
{
"epoch": 1.1692708333333333,
"loss": 0.06542657315731049,
"loss_ce": 5.792453157482669e-05,
"loss_iou": 0.4609375,
"loss_num": 0.0130615234375,
"loss_xval": 0.0654296875,
"num_input_tokens_seen": 77526040,
"step": 449
},
{
"epoch": 1.171875,
"grad_norm": 4.456484406427641,
"learning_rate": 5e-06,
"loss": 0.0804,
"num_input_tokens_seen": 77698436,
"step": 450
},
{
"epoch": 1.171875,
"loss": 0.037512898445129395,
"loss_ce": 6.783234130125493e-05,
"loss_iou": 0.57421875,
"loss_num": 0.00750732421875,
"loss_xval": 0.037353515625,
"num_input_tokens_seen": 77698436,
"step": 450
},
{
"epoch": 1.1744791666666667,
"grad_norm": 5.16567471249552,
"learning_rate": 5e-06,
"loss": 0.076,
"num_input_tokens_seen": 77870996,
"step": 451
},
{
"epoch": 1.1744791666666667,
"loss": 0.1230858787894249,
"loss_ce": 3.9006972656352445e-05,
"loss_iou": 0.52734375,
"loss_num": 0.0245361328125,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 77870996,
"step": 451
},
{
"epoch": 1.1770833333333333,
"grad_norm": 4.701516259626003,
"learning_rate": 5e-06,
"loss": 0.0584,
"num_input_tokens_seen": 78043824,
"step": 452
},
{
"epoch": 1.1770833333333333,
"loss": 0.04219118878245354,
"loss_ce": 6.16741890553385e-05,
"loss_iou": 0.58984375,
"loss_num": 0.0084228515625,
"loss_xval": 0.042236328125,
"num_input_tokens_seen": 78043824,
"step": 452
},
{
"epoch": 1.1796875,
"grad_norm": 5.151886406386116,
"learning_rate": 5e-06,
"loss": 0.0829,
"num_input_tokens_seen": 78216596,
"step": 453
},
{
"epoch": 1.1796875,
"loss": 0.11115504801273346,
"loss_ce": 0.00010158185614272952,
"loss_iou": 0.58984375,
"loss_num": 0.022216796875,
"loss_xval": 0.11083984375,
"num_input_tokens_seen": 78216596,
"step": 453
},
{
"epoch": 1.1822916666666667,
"grad_norm": 5.099784873283209,
"learning_rate": 5e-06,
"loss": 0.0496,
"num_input_tokens_seen": 78389868,
"step": 454
},
{
"epoch": 1.1822916666666667,
"loss": 0.05530470609664917,
"loss_ce": 6.788992322981358e-05,
"loss_iou": 0.57421875,
"loss_num": 0.01104736328125,
"loss_xval": 0.05517578125,
"num_input_tokens_seen": 78389868,
"step": 454
},
{
"epoch": 1.1848958333333333,
"grad_norm": 22.327108090070816,
"learning_rate": 5e-06,
"loss": 0.0734,
"num_input_tokens_seen": 78562732,
"step": 455
},
{
"epoch": 1.1848958333333333,
"loss": 0.1318536400794983,
"loss_ce": 7.873401773395017e-05,
"loss_iou": 0.453125,
"loss_num": 0.0263671875,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 78562732,
"step": 455
},
{
"epoch": 1.1875,
"grad_norm": 6.402774286125369,
"learning_rate": 5e-06,
"loss": 0.0785,
"num_input_tokens_seen": 78735540,
"step": 456
},
{
"epoch": 1.1875,
"loss": 0.08449774980545044,
"loss_ce": 4.035345773445442e-05,
"loss_iou": 0.359375,
"loss_num": 0.016845703125,
"loss_xval": 0.08447265625,
"num_input_tokens_seen": 78735540,
"step": 456
},
{
"epoch": 1.1901041666666667,
"grad_norm": 3.7359791226023495,
"learning_rate": 5e-06,
"loss": 0.0667,
"num_input_tokens_seen": 78907916,
"step": 457
},
{
"epoch": 1.1901041666666667,
"loss": 0.04169946163892746,
"loss_ce": 4.2965810280293226e-05,
"loss_iou": 0.474609375,
"loss_num": 0.00830078125,
"loss_xval": 0.041748046875,
"num_input_tokens_seen": 78907916,
"step": 457
},
{
"epoch": 1.1927083333333333,
"grad_norm": 7.721272031766003,
"learning_rate": 5e-06,
"loss": 0.1165,
"num_input_tokens_seen": 79080080,
"step": 458
},
{
"epoch": 1.1927083333333333,
"loss": 0.09451837837696075,
"loss_ce": 0.00012751182657666504,
"loss_iou": 0.421875,
"loss_num": 0.0189208984375,
"loss_xval": 0.09423828125,
"num_input_tokens_seen": 79080080,
"step": 458
},
{
"epoch": 1.1953125,
"grad_norm": 3.5323313091644755,
"learning_rate": 5e-06,
"loss": 0.0699,
"num_input_tokens_seen": 79252520,
"step": 459
},
{
"epoch": 1.1953125,
"loss": 0.09058161079883575,
"loss_ce": 2.0699575543403625e-05,
"loss_iou": 0.51953125,
"loss_num": 0.0181884765625,
"loss_xval": 0.09033203125,
"num_input_tokens_seen": 79252520,
"step": 459
},
{
"epoch": 1.1979166666666667,
"grad_norm": 6.54371810379822,
"learning_rate": 5e-06,
"loss": 0.0901,
"num_input_tokens_seen": 79425288,
"step": 460
},
{
"epoch": 1.1979166666666667,
"loss": 0.06754864007234573,
"loss_ce": 4.375486241769977e-05,
"loss_iou": 0.5703125,
"loss_num": 0.01348876953125,
"loss_xval": 0.0673828125,
"num_input_tokens_seen": 79425288,
"step": 460
},
{
"epoch": 1.2005208333333333,
"grad_norm": 4.16776605785161,
"learning_rate": 5e-06,
"loss": 0.0483,
"num_input_tokens_seen": 79597852,
"step": 461
},
{
"epoch": 1.2005208333333333,
"loss": 0.040252070873975754,
"loss_ce": 2.990448228956666e-05,
"loss_iou": 0.458984375,
"loss_num": 0.008056640625,
"loss_xval": 0.040283203125,
"num_input_tokens_seen": 79597852,
"step": 461
},
{
"epoch": 1.203125,
"grad_norm": 10.246051862590502,
"learning_rate": 5e-06,
"loss": 0.0741,
"num_input_tokens_seen": 79770236,
"step": 462
},
{
"epoch": 1.203125,
"loss": 0.04001723229885101,
"loss_ce": 5.4462791013065726e-05,
"loss_iou": 0.5625,
"loss_num": 0.00799560546875,
"loss_xval": 0.0400390625,
"num_input_tokens_seen": 79770236,
"step": 462
},
{
"epoch": 1.2057291666666667,
"grad_norm": 6.092643709859456,
"learning_rate": 5e-06,
"loss": 0.0521,
"num_input_tokens_seen": 79943152,
"step": 463
},
{
"epoch": 1.2057291666666667,
"loss": 0.03291913866996765,
"loss_ce": 2.1190953702898696e-05,
"loss_iou": 0.5390625,
"loss_num": 0.006591796875,
"loss_xval": 0.032958984375,
"num_input_tokens_seen": 79943152,
"step": 463
},
{
"epoch": 1.2083333333333333,
"grad_norm": 4.950298356741838,
"learning_rate": 5e-06,
"loss": 0.0743,
"num_input_tokens_seen": 80115996,
"step": 464
},
{
"epoch": 1.2083333333333333,
"loss": 0.06474019587039948,
"loss_ce": 4.293021993362345e-05,
"loss_iou": 0.50390625,
"loss_num": 0.012939453125,
"loss_xval": 0.064453125,
"num_input_tokens_seen": 80115996,
"step": 464
},
{
"epoch": 1.2109375,
"grad_norm": 6.018163435684629,
"learning_rate": 5e-06,
"loss": 0.0688,
"num_input_tokens_seen": 80288564,
"step": 465
},
{
"epoch": 1.2109375,
"loss": 0.06020050495862961,
"loss_ce": 1.984027767321095e-05,
"loss_iou": 0.59765625,
"loss_num": 0.01202392578125,
"loss_xval": 0.06005859375,
"num_input_tokens_seen": 80288564,
"step": 465
},
{
"epoch": 1.2135416666666667,
"grad_norm": 4.280669670593218,
"learning_rate": 5e-06,
"loss": 0.074,
"num_input_tokens_seen": 80461048,
"step": 466
},
{
"epoch": 1.2135416666666667,
"loss": 0.05621056258678436,
"loss_ce": 2.77029030257836e-05,
"loss_iou": 0.57421875,
"loss_num": 0.01123046875,
"loss_xval": 0.05615234375,
"num_input_tokens_seen": 80461048,
"step": 466
},
{
"epoch": 1.2161458333333333,
"grad_norm": 29.24577961253563,
"learning_rate": 5e-06,
"loss": 0.072,
"num_input_tokens_seen": 80633544,
"step": 467
},
{
"epoch": 1.2161458333333333,
"loss": 0.036091044545173645,
"loss_ce": 2.689398024813272e-05,
"loss_iou": 0.50390625,
"loss_num": 0.007232666015625,
"loss_xval": 0.0361328125,
"num_input_tokens_seen": 80633544,
"step": 467
},
{
"epoch": 1.21875,
"grad_norm": 3.5977371268772007,
"learning_rate": 5e-06,
"loss": 0.0541,
"num_input_tokens_seen": 80805856,
"step": 468
},
{
"epoch": 1.21875,
"loss": 0.05104288086295128,
"loss_ce": 3.274788468843326e-05,
"loss_iou": 0.53515625,
"loss_num": 0.01019287109375,
"loss_xval": 0.051025390625,
"num_input_tokens_seen": 80805856,
"step": 468
},
{
"epoch": 1.2213541666666667,
"grad_norm": 8.075177097555214,
"learning_rate": 5e-06,
"loss": 0.0605,
"num_input_tokens_seen": 80978184,
"step": 469
},
{
"epoch": 1.2213541666666667,
"loss": 0.04464123770594597,
"loss_ce": 2.4538327124901116e-05,
"loss_iou": 0.61328125,
"loss_num": 0.0089111328125,
"loss_xval": 0.044677734375,
"num_input_tokens_seen": 80978184,
"step": 469
},
{
"epoch": 1.2239583333333333,
"grad_norm": 6.996084067501281,
"learning_rate": 5e-06,
"loss": 0.0577,
"num_input_tokens_seen": 81150552,
"step": 470
},
{
"epoch": 1.2239583333333333,
"loss": 0.04917052388191223,
"loss_ce": 5.248059460427612e-05,
"loss_iou": 0.59765625,
"loss_num": 0.00982666015625,
"loss_xval": 0.049072265625,
"num_input_tokens_seen": 81150552,
"step": 470
},
{
"epoch": 1.2265625,
"grad_norm": 9.557588670219046,
"learning_rate": 5e-06,
"loss": 0.0598,
"num_input_tokens_seen": 81323276,
"step": 471
},
{
"epoch": 1.2265625,
"loss": 0.08455046266317368,
"loss_ce": 3.203285814379342e-05,
"loss_iou": 0.404296875,
"loss_num": 0.016845703125,
"loss_xval": 0.08447265625,
"num_input_tokens_seen": 81323276,
"step": 471
},
{
"epoch": 1.2291666666666667,
"grad_norm": 4.882489644855878,
"learning_rate": 5e-06,
"loss": 0.0455,
"num_input_tokens_seen": 81496112,
"step": 472
},
{
"epoch": 1.2291666666666667,
"loss": 0.032756030559539795,
"loss_ce": 0.0001327365607721731,
"loss_iou": 0.53125,
"loss_num": 0.00653076171875,
"loss_xval": 0.03271484375,
"num_input_tokens_seen": 81496112,
"step": 472
},
{
"epoch": 1.2317708333333333,
"grad_norm": 10.129497104665319,
"learning_rate": 5e-06,
"loss": 0.0865,
"num_input_tokens_seen": 81668408,
"step": 473
},
{
"epoch": 1.2317708333333333,
"loss": 0.053121719509363174,
"loss_ce": 2.1133846530574374e-05,
"loss_iou": 0.5703125,
"loss_num": 0.0106201171875,
"loss_xval": 0.05322265625,
"num_input_tokens_seen": 81668408,
"step": 473
},
{
"epoch": 1.234375,
"grad_norm": 34.29728711508608,
"learning_rate": 5e-06,
"loss": 0.0801,
"num_input_tokens_seen": 81841444,
"step": 474
},
{
"epoch": 1.234375,
"loss": 0.1159551739692688,
"loss_ce": 1.8899745555245318e-05,
"loss_iou": 0.46484375,
"loss_num": 0.023193359375,
"loss_xval": 0.11572265625,
"num_input_tokens_seen": 81841444,
"step": 474
},
{
"epoch": 1.2369791666666667,
"grad_norm": 5.248583896165671,
"learning_rate": 5e-06,
"loss": 0.0835,
"num_input_tokens_seen": 82014424,
"step": 475
},
{
"epoch": 1.2369791666666667,
"loss": 0.046804144978523254,
"loss_ce": 2.0697760191978887e-05,
"loss_iou": 0.65234375,
"loss_num": 0.00933837890625,
"loss_xval": 0.046875,
"num_input_tokens_seen": 82014424,
"step": 475
},
{
"epoch": 1.2395833333333333,
"grad_norm": 4.890262555680429,
"learning_rate": 5e-06,
"loss": 0.0638,
"num_input_tokens_seen": 82187060,
"step": 476
},
{
"epoch": 1.2395833333333333,
"loss": 0.05300772190093994,
"loss_ce": 2.9203043595771305e-05,
"loss_iou": 0.54296875,
"loss_num": 0.0106201171875,
"loss_xval": 0.052978515625,
"num_input_tokens_seen": 82187060,
"step": 476
},
{
"epoch": 1.2421875,
"grad_norm": 6.375507009761332,
"learning_rate": 5e-06,
"loss": 0.0746,
"num_input_tokens_seen": 82359884,
"step": 477
},
{
"epoch": 1.2421875,
"loss": 0.08658900111913681,
"loss_ce": 0.00011744195217033848,
"loss_iou": 0.515625,
"loss_num": 0.017333984375,
"loss_xval": 0.08642578125,
"num_input_tokens_seen": 82359884,
"step": 477
},
{
"epoch": 1.2447916666666667,
"grad_norm": 6.190781448434917,
"learning_rate": 5e-06,
"loss": 0.0682,
"num_input_tokens_seen": 82532312,
"step": 478
},
{
"epoch": 1.2447916666666667,
"loss": 0.06419570744037628,
"loss_ce": 1.724117828416638e-05,
"loss_iou": 0.5390625,
"loss_num": 0.0128173828125,
"loss_xval": 0.06396484375,
"num_input_tokens_seen": 82532312,
"step": 478
},
{
"epoch": 1.2473958333333333,
"grad_norm": 4.492503172851453,
"learning_rate": 5e-06,
"loss": 0.0545,
"num_input_tokens_seen": 82705224,
"step": 479
},
{
"epoch": 1.2473958333333333,
"loss": 0.043702684342861176,
"loss_ce": 4.728833300760016e-05,
"loss_iou": 0.4453125,
"loss_num": 0.00872802734375,
"loss_xval": 0.043701171875,
"num_input_tokens_seen": 82705224,
"step": 479
},
{
"epoch": 1.25,
"grad_norm": 4.614176563274451,
"learning_rate": 5e-06,
"loss": 0.0825,
"num_input_tokens_seen": 82877740,
"step": 480
},
{
"epoch": 1.25,
"loss": 0.0817180722951889,
"loss_ce": 8.355021418537945e-05,
"loss_iou": 0.703125,
"loss_num": 0.016357421875,
"loss_xval": 0.08154296875,
"num_input_tokens_seen": 82877740,
"step": 480
},
{
"epoch": 1.2526041666666667,
"grad_norm": 4.135440424213399,
"learning_rate": 5e-06,
"loss": 0.0639,
"num_input_tokens_seen": 83050904,
"step": 481
},
{
"epoch": 1.2526041666666667,
"loss": 0.045239534229040146,
"loss_ce": 2.774174208752811e-05,
"loss_iou": 0.470703125,
"loss_num": 0.009033203125,
"loss_xval": 0.045166015625,
"num_input_tokens_seen": 83050904,
"step": 481
},
{
"epoch": 1.2552083333333333,
"grad_norm": 5.953250787402434,
"learning_rate": 5e-06,
"loss": 0.0758,
"num_input_tokens_seen": 83223916,
"step": 482
},
{
"epoch": 1.2552083333333333,
"loss": 0.039626024663448334,
"loss_ce": 2.946431777672842e-05,
"loss_iou": 0.60546875,
"loss_num": 0.0079345703125,
"loss_xval": 0.03955078125,
"num_input_tokens_seen": 83223916,
"step": 482
},
{
"epoch": 1.2578125,
"grad_norm": 5.778983695199196,
"learning_rate": 5e-06,
"loss": 0.0476,
"num_input_tokens_seen": 83397368,
"step": 483
},
{
"epoch": 1.2578125,
"loss": 0.041962604969739914,
"loss_ce": 4.6711622417205945e-05,
"loss_iou": 0.5703125,
"loss_num": 0.00836181640625,
"loss_xval": 0.0419921875,
"num_input_tokens_seen": 83397368,
"step": 483
},
{
"epoch": 1.2604166666666667,
"grad_norm": 5.433318803087276,
"learning_rate": 5e-06,
"loss": 0.0785,
"num_input_tokens_seen": 83569504,
"step": 484
},
{
"epoch": 1.2604166666666667,
"loss": 0.05590134114027023,
"loss_ce": 2.3658354621147737e-05,
"loss_iou": 0.58984375,
"loss_num": 0.01116943359375,
"loss_xval": 0.055908203125,
"num_input_tokens_seen": 83569504,
"step": 484
},
{
"epoch": 1.2630208333333333,
"grad_norm": 4.6826104330453955,
"learning_rate": 5e-06,
"loss": 0.054,
"num_input_tokens_seen": 83742676,
"step": 485
},
{
"epoch": 1.2630208333333333,
"loss": 0.03859255462884903,
"loss_ce": 6.411132198991254e-05,
"loss_iou": 0.50390625,
"loss_num": 0.0076904296875,
"loss_xval": 0.03857421875,
"num_input_tokens_seen": 83742676,
"step": 485
},
{
"epoch": 1.265625,
"grad_norm": 4.369179337344076,
"learning_rate": 5e-06,
"loss": 0.0529,
"num_input_tokens_seen": 83915776,
"step": 486
},
{
"epoch": 1.265625,
"loss": 0.04284074157476425,
"loss_ce": 7.035740418359637e-05,
"loss_iou": 0.5234375,
"loss_num": 0.008544921875,
"loss_xval": 0.042724609375,
"num_input_tokens_seen": 83915776,
"step": 486
},
{
"epoch": 1.2682291666666667,
"grad_norm": 4.855681248964782,
"learning_rate": 5e-06,
"loss": 0.0893,
"num_input_tokens_seen": 84088164,
"step": 487
},
{
"epoch": 1.2682291666666667,
"loss": 0.0853077843785286,
"loss_ce": 4.1672632505651563e-05,
"loss_iou": 0.59375,
"loss_num": 0.01708984375,
"loss_xval": 0.08544921875,
"num_input_tokens_seen": 84088164,
"step": 487
},
{
"epoch": 1.2708333333333333,
"grad_norm": 4.574747694340549,
"learning_rate": 5e-06,
"loss": 0.0667,
"num_input_tokens_seen": 84261012,
"step": 488
},
{
"epoch": 1.2708333333333333,
"loss": 0.1186133474111557,
"loss_ce": 3.7304311263142154e-05,
"loss_iou": 0.455078125,
"loss_num": 0.0238037109375,
"loss_xval": 0.11865234375,
"num_input_tokens_seen": 84261012,
"step": 488
},
{
"epoch": 1.2734375,
"grad_norm": 6.201362257140882,
"learning_rate": 5e-06,
"loss": 0.0656,
"num_input_tokens_seen": 84433984,
"step": 489
},
{
"epoch": 1.2734375,
"loss": 0.055373311042785645,
"loss_ce": 4.4942811655346304e-05,
"loss_iou": 0.61328125,
"loss_num": 0.01104736328125,
"loss_xval": 0.055419921875,
"num_input_tokens_seen": 84433984,
"step": 489
},
{
"epoch": 1.2760416666666667,
"grad_norm": 4.576166685047339,
"learning_rate": 5e-06,
"loss": 0.0666,
"num_input_tokens_seen": 84606516,
"step": 490
},
{
"epoch": 1.2760416666666667,
"loss": 0.046068161725997925,
"loss_ce": 4.765454650623724e-05,
"loss_iou": 0.0,
"loss_num": 0.00921630859375,
"loss_xval": 0.0458984375,
"num_input_tokens_seen": 84606516,
"step": 490
},
{
"epoch": 1.2786458333333333,
"grad_norm": 4.832131210851992,
"learning_rate": 5e-06,
"loss": 0.0737,
"num_input_tokens_seen": 84779356,
"step": 491
},
{
"epoch": 1.2786458333333333,
"loss": 0.11677989363670349,
"loss_ce": 1.964074544957839e-05,
"loss_iou": 0.484375,
"loss_num": 0.0233154296875,
"loss_xval": 0.11669921875,
"num_input_tokens_seen": 84779356,
"step": 491
},
{
"epoch": 1.28125,
"grad_norm": 5.262752038477657,
"learning_rate": 5e-06,
"loss": 0.071,
"num_input_tokens_seen": 84952020,
"step": 492
},
{
"epoch": 1.28125,
"loss": 0.0537094846367836,
"loss_ce": 2.9066111892461777e-05,
"loss_iou": 0.58203125,
"loss_num": 0.0107421875,
"loss_xval": 0.0537109375,
"num_input_tokens_seen": 84952020,
"step": 492
},
{
"epoch": 1.2838541666666667,
"grad_norm": 2.9344225414677836,
"learning_rate": 5e-06,
"loss": 0.0495,
"num_input_tokens_seen": 85124876,
"step": 493
},
{
"epoch": 1.2838541666666667,
"loss": 0.03429765999317169,
"loss_ce": 0.00013323240273166448,
"loss_iou": 0.49609375,
"loss_num": 0.0068359375,
"loss_xval": 0.0341796875,
"num_input_tokens_seen": 85124876,
"step": 493
},
{
"epoch": 1.2864583333333333,
"grad_norm": 12.093642895702288,
"learning_rate": 5e-06,
"loss": 0.083,
"num_input_tokens_seen": 85297824,
"step": 494
},
{
"epoch": 1.2864583333333333,
"loss": 0.13448233902454376,
"loss_ce": 3.7149860872887075e-05,
"loss_iou": 0.498046875,
"loss_num": 0.02685546875,
"loss_xval": 0.134765625,
"num_input_tokens_seen": 85297824,
"step": 494
},
{
"epoch": 1.2890625,
"grad_norm": 5.311410396179597,
"learning_rate": 5e-06,
"loss": 0.0622,
"num_input_tokens_seen": 85469896,
"step": 495
},
{
"epoch": 1.2890625,
"loss": 0.10600131750106812,
"loss_ce": 4.428675310919061e-05,
"loss_iou": 0.466796875,
"loss_num": 0.0211181640625,
"loss_xval": 0.10595703125,
"num_input_tokens_seen": 85469896,
"step": 495
},
{
"epoch": 1.2916666666666667,
"grad_norm": 13.126940553733593,
"learning_rate": 5e-06,
"loss": 0.0509,
"num_input_tokens_seen": 85642324,
"step": 496
},
{
"epoch": 1.2916666666666667,
"loss": 0.04261418431997299,
"loss_ce": 0.00010319902503397316,
"loss_iou": 0.53515625,
"loss_num": 0.00848388671875,
"loss_xval": 0.04248046875,
"num_input_tokens_seen": 85642324,
"step": 496
},
{
"epoch": 1.2942708333333333,
"grad_norm": 9.141153643623982,
"learning_rate": 5e-06,
"loss": 0.0514,
"num_input_tokens_seen": 85815252,
"step": 497
},
{
"epoch": 1.2942708333333333,
"loss": 0.04319656640291214,
"loss_ce": 4.471266584005207e-05,
"loss_iou": 0.46875,
"loss_num": 0.0086669921875,
"loss_xval": 0.043212890625,
"num_input_tokens_seen": 85815252,
"step": 497
},
{
"epoch": 1.296875,
"grad_norm": 5.456561002723919,
"learning_rate": 5e-06,
"loss": 0.0744,
"num_input_tokens_seen": 85988240,
"step": 498
},
{
"epoch": 1.296875,
"loss": 0.0488949790596962,
"loss_ce": 3.6337674828246236e-05,
"loss_iou": 0.5859375,
"loss_num": 0.009765625,
"loss_xval": 0.048828125,
"num_input_tokens_seen": 85988240,
"step": 498
},
{
"epoch": 1.2994791666666667,
"grad_norm": 4.197467000151624,
"learning_rate": 5e-06,
"loss": 0.0618,
"num_input_tokens_seen": 86160724,
"step": 499
},
{
"epoch": 1.2994791666666667,
"loss": 0.04255600646138191,
"loss_ce": 2.9761704354314134e-05,
"loss_iou": 0.443359375,
"loss_num": 0.00848388671875,
"loss_xval": 0.04248046875,
"num_input_tokens_seen": 86160724,
"step": 499
},
{
"epoch": 1.3020833333333333,
"grad_norm": 16.018482571236348,
"learning_rate": 5e-06,
"loss": 0.0857,
"num_input_tokens_seen": 86333348,
"step": 500
},
{
"epoch": 1.3020833333333333,
"eval_seeclick_CIoU": 0.49157558381557465,
"eval_seeclick_GIoU": 0.4883834272623062,
"eval_seeclick_IoU": 0.5341370701789856,
"eval_seeclick_MAE_all": 0.07028103247284889,
"eval_seeclick_MAE_h": 0.05726983770728111,
"eval_seeclick_MAE_w": 0.08522269874811172,
"eval_seeclick_MAE_x": 0.08005227893590927,
"eval_seeclick_MAE_y": 0.058579325675964355,
"eval_seeclick_NUM_probability": 0.9999949038028717,
"eval_seeclick_inside_bbox": 0.8764204680919647,
"eval_seeclick_loss": 0.9519317150115967,
"eval_seeclick_loss_ce": 0.6910622417926788,
"eval_seeclick_loss_iou": 0.6273193359375,
"eval_seeclick_loss_num": 0.053680419921875,
"eval_seeclick_loss_xval": 0.26849365234375,
"eval_seeclick_runtime": 71.7405,
"eval_seeclick_samples_per_second": 0.599,
"eval_seeclick_steps_per_second": 0.028,
"num_input_tokens_seen": 86333348,
"step": 500
},
{
"epoch": 1.3020833333333333,
"eval_icons_CIoU": 0.7799727618694305,
"eval_icons_GIoU": 0.7758736610412598,
"eval_icons_IoU": 0.7871803939342499,
"eval_icons_MAE_all": 0.026267122477293015,
"eval_icons_MAE_h": 0.024472126737236977,
"eval_icons_MAE_w": 0.029545767232775688,
"eval_icons_MAE_x": 0.02697262354195118,
"eval_icons_MAE_y": 0.024077963083982468,
"eval_icons_NUM_probability": 0.9999885261058807,
"eval_icons_inside_bbox": 1.0,
"eval_icons_loss": 0.07963114976882935,
"eval_icons_loss_ce": 0.0020425044931471348,
"eval_icons_loss_iou": 0.5069580078125,
"eval_icons_loss_num": 0.014467239379882812,
"eval_icons_loss_xval": 0.07232666015625,
"eval_icons_runtime": 80.3553,
"eval_icons_samples_per_second": 0.622,
"eval_icons_steps_per_second": 0.025,
"num_input_tokens_seen": 86333348,
"step": 500
},
{
"epoch": 1.3020833333333333,
"eval_screenspot_CIoU": 0.3665693998336792,
"eval_screenspot_GIoU": 0.3608221113681793,
"eval_screenspot_IoU": 0.4541289210319519,
"eval_screenspot_MAE_all": 0.13468862076600394,
"eval_screenspot_MAE_h": 0.07963093866904576,
"eval_screenspot_MAE_w": 0.2195572853088379,
"eval_screenspot_MAE_x": 0.16379199425379434,
"eval_screenspot_MAE_y": 0.07577425986528397,
"eval_screenspot_NUM_probability": 0.9999738732973734,
"eval_screenspot_inside_bbox": 0.7116666634877523,
"eval_screenspot_loss": 0.9175184369087219,
"eval_screenspot_loss_ce": 0.42678311467170715,
"eval_screenspot_loss_iou": 0.4466145833333333,
"eval_screenspot_loss_num": 0.09850565592447917,
"eval_screenspot_loss_xval": 0.4925130208333333,
"eval_screenspot_runtime": 149.8949,
"eval_screenspot_samples_per_second": 0.594,
"eval_screenspot_steps_per_second": 0.02,
"num_input_tokens_seen": 86333348,
"step": 500
},
{
"epoch": 1.3020833333333333,
"eval_compot_CIoU": 0.9082967340946198,
"eval_compot_GIoU": 0.9085466265678406,
"eval_compot_IoU": 0.9093170166015625,
"eval_compot_MAE_all": 0.009244627552106977,
"eval_compot_MAE_h": 0.004357840050943196,
"eval_compot_MAE_w": 0.014095565304160118,
"eval_compot_MAE_x": 0.012027833610773087,
"eval_compot_MAE_y": 0.006497269030660391,
"eval_compot_NUM_probability": 0.9999580085277557,
"eval_compot_inside_bbox": 1.0,
"eval_compot_loss": 0.04286140948534012,
"eval_compot_loss_ce": 4.613543933373876e-05,
"eval_compot_loss_iou": 0.507080078125,
"eval_compot_loss_num": 0.009250640869140625,
"eval_compot_loss_xval": 0.0462188720703125,
"eval_compot_runtime": 84.1131,
"eval_compot_samples_per_second": 0.594,
"eval_compot_steps_per_second": 0.024,
"num_input_tokens_seen": 86333348,
"step": 500
}
],
"logging_steps": 1.0,
"max_steps": 1152,
"num_input_tokens_seen": 86333348,
"num_train_epochs": 3,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 622740728971264.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}