|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.04202894743754761,
|
|
"eval_steps": 200,
|
|
"global_step": 600,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 7.004824572924602e-05,
|
|
"grad_norm": 6.222772121429443,
|
|
"learning_rate": 9.99930017513135e-05,
|
|
"loss": 1.1076,
|
|
"num_input_tokens_seen": 16384,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.00014009649145849205,
|
|
"grad_norm": 6.042057037353516,
|
|
"learning_rate": 9.998600350262697e-05,
|
|
"loss": 1.1086,
|
|
"num_input_tokens_seen": 32768,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.00021014473718773804,
|
|
"grad_norm": 7.119229316711426,
|
|
"learning_rate": 9.997900525394046e-05,
|
|
"loss": 1.4047,
|
|
"num_input_tokens_seen": 49152,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0002801929829169841,
|
|
"grad_norm": 7.133191108703613,
|
|
"learning_rate": 9.997200700525395e-05,
|
|
"loss": 1.3921,
|
|
"num_input_tokens_seen": 65536,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0003502412286462301,
|
|
"grad_norm": 6.1078338623046875,
|
|
"learning_rate": 9.996500875656743e-05,
|
|
"loss": 1.3171,
|
|
"num_input_tokens_seen": 81920,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0004202894743754761,
|
|
"grad_norm": 6.466420650482178,
|
|
"learning_rate": 9.995801050788092e-05,
|
|
"loss": 1.0732,
|
|
"num_input_tokens_seen": 97344,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0004903377201047221,
|
|
"grad_norm": 5.578189849853516,
|
|
"learning_rate": 9.99510122591944e-05,
|
|
"loss": 0.9929,
|
|
"num_input_tokens_seen": 113728,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0005603859658339682,
|
|
"grad_norm": 7.197720527648926,
|
|
"learning_rate": 9.994401401050789e-05,
|
|
"loss": 1.2512,
|
|
"num_input_tokens_seen": 129528,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0006304342115632141,
|
|
"grad_norm": 6.618913650512695,
|
|
"learning_rate": 9.993701576182136e-05,
|
|
"loss": 1.3495,
|
|
"num_input_tokens_seen": 145704,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.0007004824572924602,
|
|
"grad_norm": 6.955508232116699,
|
|
"learning_rate": 9.993001751313485e-05,
|
|
"loss": 1.1823,
|
|
"num_input_tokens_seen": 161664,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0007705307030217062,
|
|
"grad_norm": 6.6807074546813965,
|
|
"learning_rate": 9.992301926444835e-05,
|
|
"loss": 1.1693,
|
|
"num_input_tokens_seen": 177960,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.0008405789487509522,
|
|
"grad_norm": 6.784447193145752,
|
|
"learning_rate": 9.991602101576183e-05,
|
|
"loss": 1.3744,
|
|
"num_input_tokens_seen": 194344,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0009106271944801982,
|
|
"grad_norm": 6.7418437004089355,
|
|
"learning_rate": 9.990902276707532e-05,
|
|
"loss": 1.22,
|
|
"num_input_tokens_seen": 210728,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.0009806754402094443,
|
|
"grad_norm": 6.43395471572876,
|
|
"learning_rate": 9.990202451838879e-05,
|
|
"loss": 1.1772,
|
|
"num_input_tokens_seen": 227112,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.0010507236859386903,
|
|
"grad_norm": 6.09422492980957,
|
|
"learning_rate": 9.989502626970228e-05,
|
|
"loss": 1.195,
|
|
"num_input_tokens_seen": 243496,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.0011207719316679364,
|
|
"grad_norm": 6.238271236419678,
|
|
"learning_rate": 9.988802802101577e-05,
|
|
"loss": 1.2623,
|
|
"num_input_tokens_seen": 259744,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0011908201773971822,
|
|
"grad_norm": 6.56187629699707,
|
|
"learning_rate": 9.988102977232926e-05,
|
|
"loss": 1.2721,
|
|
"num_input_tokens_seen": 276128,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.0012608684231264283,
|
|
"grad_norm": 6.818358898162842,
|
|
"learning_rate": 9.987403152364275e-05,
|
|
"loss": 1.2649,
|
|
"num_input_tokens_seen": 292512,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.0013309166688556743,
|
|
"grad_norm": 5.950352191925049,
|
|
"learning_rate": 9.986703327495622e-05,
|
|
"loss": 1.0024,
|
|
"num_input_tokens_seen": 308632,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.0014009649145849204,
|
|
"grad_norm": 6.387479305267334,
|
|
"learning_rate": 9.986003502626971e-05,
|
|
"loss": 1.2783,
|
|
"num_input_tokens_seen": 325016,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0014710131603141664,
|
|
"grad_norm": 6.187346458435059,
|
|
"learning_rate": 9.985303677758318e-05,
|
|
"loss": 1.1701,
|
|
"num_input_tokens_seen": 341384,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.0015410614060434125,
|
|
"grad_norm": 5.371951103210449,
|
|
"learning_rate": 9.984603852889667e-05,
|
|
"loss": 1.0483,
|
|
"num_input_tokens_seen": 357768,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.0016111096517726585,
|
|
"grad_norm": 6.2206807136535645,
|
|
"learning_rate": 9.983904028021016e-05,
|
|
"loss": 1.2516,
|
|
"num_input_tokens_seen": 374152,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.0016811578975019044,
|
|
"grad_norm": 6.121264457702637,
|
|
"learning_rate": 9.983204203152365e-05,
|
|
"loss": 1.1506,
|
|
"num_input_tokens_seen": 390536,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.0017512061432311504,
|
|
"grad_norm": 6.353756904602051,
|
|
"learning_rate": 9.982504378283714e-05,
|
|
"loss": 1.3118,
|
|
"num_input_tokens_seen": 406920,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0018212543889603965,
|
|
"grad_norm": 6.270686149597168,
|
|
"learning_rate": 9.981804553415061e-05,
|
|
"loss": 1.0883,
|
|
"num_input_tokens_seen": 422728,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.0018913026346896425,
|
|
"grad_norm": 6.117632865905762,
|
|
"learning_rate": 9.98110472854641e-05,
|
|
"loss": 1.3346,
|
|
"num_input_tokens_seen": 439112,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.0019613508804188886,
|
|
"grad_norm": 6.429015159606934,
|
|
"learning_rate": 9.980404903677759e-05,
|
|
"loss": 1.2494,
|
|
"num_input_tokens_seen": 455144,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.0020313991261481346,
|
|
"grad_norm": 6.4467620849609375,
|
|
"learning_rate": 9.979705078809107e-05,
|
|
"loss": 1.3335,
|
|
"num_input_tokens_seen": 470360,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.0021014473718773807,
|
|
"grad_norm": 6.57926082611084,
|
|
"learning_rate": 9.979005253940455e-05,
|
|
"loss": 1.2126,
|
|
"num_input_tokens_seen": 486120,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0021714956176066267,
|
|
"grad_norm": 5.650569915771484,
|
|
"learning_rate": 9.978305429071804e-05,
|
|
"loss": 1.1363,
|
|
"num_input_tokens_seen": 501896,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.0022415438633358728,
|
|
"grad_norm": 6.380292892456055,
|
|
"learning_rate": 9.977605604203153e-05,
|
|
"loss": 1.2251,
|
|
"num_input_tokens_seen": 517752,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.002311592109065119,
|
|
"grad_norm": 5.704173564910889,
|
|
"learning_rate": 9.976905779334502e-05,
|
|
"loss": 1.1685,
|
|
"num_input_tokens_seen": 534136,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.0023816403547943644,
|
|
"grad_norm": 5.342978000640869,
|
|
"learning_rate": 9.97620595446585e-05,
|
|
"loss": 1.2012,
|
|
"num_input_tokens_seen": 550216,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0024516886005236105,
|
|
"grad_norm": 5.7014241218566895,
|
|
"learning_rate": 9.975506129597198e-05,
|
|
"loss": 1.2342,
|
|
"num_input_tokens_seen": 566600,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.0025217368462528565,
|
|
"grad_norm": 6.26229190826416,
|
|
"learning_rate": 9.974806304728546e-05,
|
|
"loss": 1.2041,
|
|
"num_input_tokens_seen": 582984,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.0025917850919821026,
|
|
"grad_norm": 6.583463191986084,
|
|
"learning_rate": 9.974106479859896e-05,
|
|
"loss": 1.3021,
|
|
"num_input_tokens_seen": 598968,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.0026618333377113486,
|
|
"grad_norm": 5.58498477935791,
|
|
"learning_rate": 9.973406654991245e-05,
|
|
"loss": 1.1622,
|
|
"num_input_tokens_seen": 614840,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.0027318815834405947,
|
|
"grad_norm": 5.906906604766846,
|
|
"learning_rate": 9.972706830122592e-05,
|
|
"loss": 1.1971,
|
|
"num_input_tokens_seen": 631224,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.0028019298291698407,
|
|
"grad_norm": 5.962359428405762,
|
|
"learning_rate": 9.972007005253941e-05,
|
|
"loss": 1.1326,
|
|
"num_input_tokens_seen": 647000,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.002871978074899087,
|
|
"grad_norm": 6.447500705718994,
|
|
"learning_rate": 9.971307180385289e-05,
|
|
"loss": 1.0905,
|
|
"num_input_tokens_seen": 662480,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.002942026320628333,
|
|
"grad_norm": 5.7290520668029785,
|
|
"learning_rate": 9.970607355516638e-05,
|
|
"loss": 1.3585,
|
|
"num_input_tokens_seen": 678480,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.003012074566357579,
|
|
"grad_norm": 6.063445568084717,
|
|
"learning_rate": 9.969907530647987e-05,
|
|
"loss": 1.2841,
|
|
"num_input_tokens_seen": 694256,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.003082122812086825,
|
|
"grad_norm": 5.302809238433838,
|
|
"learning_rate": 9.969207705779335e-05,
|
|
"loss": 1.1168,
|
|
"num_input_tokens_seen": 710152,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.003152171057816071,
|
|
"grad_norm": 5.634128093719482,
|
|
"learning_rate": 9.968507880910684e-05,
|
|
"loss": 1.0609,
|
|
"num_input_tokens_seen": 726184,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.003222219303545317,
|
|
"grad_norm": 5.652642726898193,
|
|
"learning_rate": 9.967808056042032e-05,
|
|
"loss": 1.2228,
|
|
"num_input_tokens_seen": 742520,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0032922675492745627,
|
|
"grad_norm": 5.340751647949219,
|
|
"learning_rate": 9.96710823117338e-05,
|
|
"loss": 1.0595,
|
|
"num_input_tokens_seen": 758904,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.0033623157950038087,
|
|
"grad_norm": 5.422239780426025,
|
|
"learning_rate": 9.966408406304728e-05,
|
|
"loss": 1.1161,
|
|
"num_input_tokens_seen": 775040,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.0034323640407330548,
|
|
"grad_norm": 5.29241418838501,
|
|
"learning_rate": 9.965708581436077e-05,
|
|
"loss": 1.0255,
|
|
"num_input_tokens_seen": 790856,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.003502412286462301,
|
|
"grad_norm": 5.146270275115967,
|
|
"learning_rate": 9.965008756567426e-05,
|
|
"loss": 0.9762,
|
|
"num_input_tokens_seen": 807064,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.003572460532191547,
|
|
"grad_norm": 5.825758457183838,
|
|
"learning_rate": 9.964308931698775e-05,
|
|
"loss": 1.2108,
|
|
"num_input_tokens_seen": 823448,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.003642508777920793,
|
|
"grad_norm": 6.179538726806641,
|
|
"learning_rate": 9.963609106830124e-05,
|
|
"loss": 1.322,
|
|
"num_input_tokens_seen": 838888,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.003712557023650039,
|
|
"grad_norm": 6.464454174041748,
|
|
"learning_rate": 9.962909281961471e-05,
|
|
"loss": 1.5077,
|
|
"num_input_tokens_seen": 855272,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.003782605269379285,
|
|
"grad_norm": 5.4227294921875,
|
|
"learning_rate": 9.96220945709282e-05,
|
|
"loss": 1.2679,
|
|
"num_input_tokens_seen": 871656,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.003852653515108531,
|
|
"grad_norm": 5.949041366577148,
|
|
"learning_rate": 9.961509632224169e-05,
|
|
"loss": 1.3618,
|
|
"num_input_tokens_seen": 888040,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.003922701760837777,
|
|
"grad_norm": 6.050904750823975,
|
|
"learning_rate": 9.960809807355516e-05,
|
|
"loss": 1.3155,
|
|
"num_input_tokens_seen": 904400,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.003992750006567023,
|
|
"grad_norm": 6.048308849334717,
|
|
"learning_rate": 9.960109982486866e-05,
|
|
"loss": 1.3131,
|
|
"num_input_tokens_seen": 919952,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.004062798252296269,
|
|
"grad_norm": 5.683863162994385,
|
|
"learning_rate": 9.959410157618214e-05,
|
|
"loss": 1.1692,
|
|
"num_input_tokens_seen": 936336,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.004132846498025515,
|
|
"grad_norm": 5.449287414550781,
|
|
"learning_rate": 9.958710332749563e-05,
|
|
"loss": 1.0613,
|
|
"num_input_tokens_seen": 952152,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.004202894743754761,
|
|
"grad_norm": 5.31496524810791,
|
|
"learning_rate": 9.958010507880912e-05,
|
|
"loss": 0.9605,
|
|
"num_input_tokens_seen": 967824,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.004272942989484007,
|
|
"grad_norm": 5.57105016708374,
|
|
"learning_rate": 9.957310683012259e-05,
|
|
"loss": 1.1701,
|
|
"num_input_tokens_seen": 983864,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.004342991235213253,
|
|
"grad_norm": 5.3456830978393555,
|
|
"learning_rate": 9.956610858143608e-05,
|
|
"loss": 1.0995,
|
|
"num_input_tokens_seen": 1000248,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.004413039480942499,
|
|
"grad_norm": 5.453295707702637,
|
|
"learning_rate": 9.955911033274957e-05,
|
|
"loss": 1.2413,
|
|
"num_input_tokens_seen": 1016632,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.0044830877266717455,
|
|
"grad_norm": 4.975449562072754,
|
|
"learning_rate": 9.955211208406306e-05,
|
|
"loss": 1.0961,
|
|
"num_input_tokens_seen": 1033016,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.004553135972400991,
|
|
"grad_norm": 5.542137145996094,
|
|
"learning_rate": 9.954511383537655e-05,
|
|
"loss": 1.1171,
|
|
"num_input_tokens_seen": 1049400,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.004623184218130238,
|
|
"grad_norm": 5.213950157165527,
|
|
"learning_rate": 9.953811558669002e-05,
|
|
"loss": 1.2228,
|
|
"num_input_tokens_seen": 1065784,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.004693232463859483,
|
|
"grad_norm": 5.496099948883057,
|
|
"learning_rate": 9.953111733800351e-05,
|
|
"loss": 1.1529,
|
|
"num_input_tokens_seen": 1082168,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.004763280709588729,
|
|
"grad_norm": 5.64145565032959,
|
|
"learning_rate": 9.952411908931698e-05,
|
|
"loss": 1.2301,
|
|
"num_input_tokens_seen": 1098024,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.004833328955317975,
|
|
"grad_norm": 5.566709995269775,
|
|
"learning_rate": 9.951712084063047e-05,
|
|
"loss": 1.2679,
|
|
"num_input_tokens_seen": 1114408,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.004903377201047221,
|
|
"grad_norm": 6.443673133850098,
|
|
"learning_rate": 9.951012259194396e-05,
|
|
"loss": 1.2313,
|
|
"num_input_tokens_seen": 1130792,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.0049734254467764675,
|
|
"grad_norm": 5.882962226867676,
|
|
"learning_rate": 9.950312434325745e-05,
|
|
"loss": 1.4304,
|
|
"num_input_tokens_seen": 1147176,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.005043473692505713,
|
|
"grad_norm": 6.0052666664123535,
|
|
"learning_rate": 9.949612609457094e-05,
|
|
"loss": 1.3027,
|
|
"num_input_tokens_seen": 1160968,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.0051135219382349596,
|
|
"grad_norm": 5.260256767272949,
|
|
"learning_rate": 9.948912784588441e-05,
|
|
"loss": 1.1526,
|
|
"num_input_tokens_seen": 1177352,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.005183570183964205,
|
|
"grad_norm": 5.641814708709717,
|
|
"learning_rate": 9.94821295971979e-05,
|
|
"loss": 1.0666,
|
|
"num_input_tokens_seen": 1193032,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.005253618429693452,
|
|
"grad_norm": 5.121115207672119,
|
|
"learning_rate": 9.947513134851138e-05,
|
|
"loss": 1.2404,
|
|
"num_input_tokens_seen": 1208952,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.005323666675422697,
|
|
"grad_norm": 5.63930082321167,
|
|
"learning_rate": 9.946813309982487e-05,
|
|
"loss": 1.5127,
|
|
"num_input_tokens_seen": 1225000,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.005393714921151944,
|
|
"grad_norm": 4.880716800689697,
|
|
"learning_rate": 9.946113485113837e-05,
|
|
"loss": 1.1484,
|
|
"num_input_tokens_seen": 1241384,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.005463763166881189,
|
|
"grad_norm": 5.59611177444458,
|
|
"learning_rate": 9.945413660245184e-05,
|
|
"loss": 1.1678,
|
|
"num_input_tokens_seen": 1257680,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.005533811412610436,
|
|
"grad_norm": 5.052026271820068,
|
|
"learning_rate": 9.944713835376533e-05,
|
|
"loss": 1.2207,
|
|
"num_input_tokens_seen": 1274064,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.0056038596583396815,
|
|
"grad_norm": 5.285096168518066,
|
|
"learning_rate": 9.944014010507881e-05,
|
|
"loss": 1.1457,
|
|
"num_input_tokens_seen": 1290448,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.005673907904068927,
|
|
"grad_norm": 5.4286580085754395,
|
|
"learning_rate": 9.94331418563923e-05,
|
|
"loss": 1.3047,
|
|
"num_input_tokens_seen": 1306832,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.005743956149798174,
|
|
"grad_norm": 5.937953472137451,
|
|
"learning_rate": 9.942614360770578e-05,
|
|
"loss": 1.4353,
|
|
"num_input_tokens_seen": 1323216,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.005814004395527419,
|
|
"grad_norm": 5.129006385803223,
|
|
"learning_rate": 9.941914535901927e-05,
|
|
"loss": 1.1434,
|
|
"num_input_tokens_seen": 1339408,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.005884052641256666,
|
|
"grad_norm": 5.179675102233887,
|
|
"learning_rate": 9.941214711033276e-05,
|
|
"loss": 1.2452,
|
|
"num_input_tokens_seen": 1355792,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.005954100886985911,
|
|
"grad_norm": 4.912832736968994,
|
|
"learning_rate": 9.940514886164624e-05,
|
|
"loss": 1.1255,
|
|
"num_input_tokens_seen": 1372176,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.006024149132715158,
|
|
"grad_norm": 5.190899848937988,
|
|
"learning_rate": 9.939815061295973e-05,
|
|
"loss": 1.2543,
|
|
"num_input_tokens_seen": 1388560,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.006094197378444403,
|
|
"grad_norm": 5.1751275062561035,
|
|
"learning_rate": 9.939115236427321e-05,
|
|
"loss": 1.3145,
|
|
"num_input_tokens_seen": 1404944,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.00616424562417365,
|
|
"grad_norm": 5.450705528259277,
|
|
"learning_rate": 9.938415411558669e-05,
|
|
"loss": 1.2844,
|
|
"num_input_tokens_seen": 1421328,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.0062342938699028955,
|
|
"grad_norm": 5.593935012817383,
|
|
"learning_rate": 9.937715586690018e-05,
|
|
"loss": 1.3284,
|
|
"num_input_tokens_seen": 1437464,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.006304342115632142,
|
|
"grad_norm": 5.156428813934326,
|
|
"learning_rate": 9.937015761821367e-05,
|
|
"loss": 1.1682,
|
|
"num_input_tokens_seen": 1452952,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.006374390361361388,
|
|
"grad_norm": 4.673638820648193,
|
|
"learning_rate": 9.936315936952715e-05,
|
|
"loss": 1.004,
|
|
"num_input_tokens_seen": 1469336,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.006444438607090634,
|
|
"grad_norm": 4.996700763702393,
|
|
"learning_rate": 9.935616112084064e-05,
|
|
"loss": 1.087,
|
|
"num_input_tokens_seen": 1485448,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.00651448685281988,
|
|
"grad_norm": 4.817474365234375,
|
|
"learning_rate": 9.934916287215412e-05,
|
|
"loss": 1.151,
|
|
"num_input_tokens_seen": 1501472,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.006584535098549125,
|
|
"grad_norm": 5.400479316711426,
|
|
"learning_rate": 9.934216462346761e-05,
|
|
"loss": 1.3144,
|
|
"num_input_tokens_seen": 1516424,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.006654583344278372,
|
|
"grad_norm": 5.232216835021973,
|
|
"learning_rate": 9.933516637478108e-05,
|
|
"loss": 1.0019,
|
|
"num_input_tokens_seen": 1532792,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.006724631590007617,
|
|
"grad_norm": 5.392521381378174,
|
|
"learning_rate": 9.932816812609457e-05,
|
|
"loss": 1.3195,
|
|
"num_input_tokens_seen": 1548600,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.006794679835736864,
|
|
"grad_norm": 5.5280866622924805,
|
|
"learning_rate": 9.932116987740806e-05,
|
|
"loss": 1.283,
|
|
"num_input_tokens_seen": 1564088,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.0068647280814661095,
|
|
"grad_norm": 4.963179588317871,
|
|
"learning_rate": 9.931417162872155e-05,
|
|
"loss": 1.2716,
|
|
"num_input_tokens_seen": 1580040,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.006934776327195356,
|
|
"grad_norm": 4.920302391052246,
|
|
"learning_rate": 9.930717338003504e-05,
|
|
"loss": 1.088,
|
|
"num_input_tokens_seen": 1595880,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.007004824572924602,
|
|
"grad_norm": 4.935486793518066,
|
|
"learning_rate": 9.930017513134851e-05,
|
|
"loss": 1.0122,
|
|
"num_input_tokens_seen": 1611864,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.007074872818653848,
|
|
"grad_norm": 5.099087238311768,
|
|
"learning_rate": 9.9293176882662e-05,
|
|
"loss": 1.1605,
|
|
"num_input_tokens_seen": 1627472,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.007144921064383094,
|
|
"grad_norm": 5.3764328956604,
|
|
"learning_rate": 9.928617863397548e-05,
|
|
"loss": 1.2225,
|
|
"num_input_tokens_seen": 1643856,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.00721496931011234,
|
|
"grad_norm": 5.281564712524414,
|
|
"learning_rate": 9.927918038528898e-05,
|
|
"loss": 1.1483,
|
|
"num_input_tokens_seen": 1660240,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.007285017555841586,
|
|
"grad_norm": 5.395167827606201,
|
|
"learning_rate": 9.927218213660247e-05,
|
|
"loss": 1.6014,
|
|
"num_input_tokens_seen": 1676624,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.007355065801570832,
|
|
"grad_norm": 5.322319507598877,
|
|
"learning_rate": 9.926518388791594e-05,
|
|
"loss": 1.0933,
|
|
"num_input_tokens_seen": 1693008,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.007425114047300078,
|
|
"grad_norm": 5.301229953765869,
|
|
"learning_rate": 9.925818563922943e-05,
|
|
"loss": 1.1998,
|
|
"num_input_tokens_seen": 1708424,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.0074951622930293236,
|
|
"grad_norm": 4.958597183227539,
|
|
"learning_rate": 9.92511873905429e-05,
|
|
"loss": 1.3285,
|
|
"num_input_tokens_seen": 1724808,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.00756521053875857,
|
|
"grad_norm": 4.3913960456848145,
|
|
"learning_rate": 9.924418914185639e-05,
|
|
"loss": 0.9017,
|
|
"num_input_tokens_seen": 1740752,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.007635258784487816,
|
|
"grad_norm": 5.401021480560303,
|
|
"learning_rate": 9.923719089316988e-05,
|
|
"loss": 1.3646,
|
|
"num_input_tokens_seen": 1755176,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.007705307030217062,
|
|
"grad_norm": 4.894444942474365,
|
|
"learning_rate": 9.923019264448337e-05,
|
|
"loss": 0.9955,
|
|
"num_input_tokens_seen": 1771560,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.007775355275946308,
|
|
"grad_norm": 4.878688335418701,
|
|
"learning_rate": 9.922319439579686e-05,
|
|
"loss": 1.1766,
|
|
"num_input_tokens_seen": 1787944,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.007845403521675554,
|
|
"grad_norm": 4.9379777908325195,
|
|
"learning_rate": 9.921619614711033e-05,
|
|
"loss": 1.1631,
|
|
"num_input_tokens_seen": 1803568,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.0079154517674048,
|
|
"grad_norm": 5.101811408996582,
|
|
"learning_rate": 9.920919789842382e-05,
|
|
"loss": 1.2165,
|
|
"num_input_tokens_seen": 1819952,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.007985500013134045,
|
|
"grad_norm": 5.32574987411499,
|
|
"learning_rate": 9.920219964973731e-05,
|
|
"loss": 1.3012,
|
|
"num_input_tokens_seen": 1835296,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.008055548258863293,
|
|
"grad_norm": 5.2391180992126465,
|
|
"learning_rate": 9.919520140105079e-05,
|
|
"loss": 1.2451,
|
|
"num_input_tokens_seen": 1851224,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.008125596504592538,
|
|
"grad_norm": 4.865017890930176,
|
|
"learning_rate": 9.918820315236427e-05,
|
|
"loss": 1.1683,
|
|
"num_input_tokens_seen": 1867608,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.008195644750321784,
|
|
"grad_norm": 4.943136215209961,
|
|
"learning_rate": 9.918120490367776e-05,
|
|
"loss": 1.31,
|
|
"num_input_tokens_seen": 1883696,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.00826569299605103,
|
|
"grad_norm": 4.769871711730957,
|
|
"learning_rate": 9.917420665499125e-05,
|
|
"loss": 1.1212,
|
|
"num_input_tokens_seen": 1900080,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.008335741241780275,
|
|
"grad_norm": 4.785780429840088,
|
|
"learning_rate": 9.916720840630474e-05,
|
|
"loss": 1.2415,
|
|
"num_input_tokens_seen": 1916464,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.008405789487509523,
|
|
"grad_norm": 4.802333831787109,
|
|
"learning_rate": 9.916021015761822e-05,
|
|
"loss": 1.0513,
|
|
"num_input_tokens_seen": 1932848,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.008475837733238768,
|
|
"grad_norm": 5.22212553024292,
|
|
"learning_rate": 9.91532119089317e-05,
|
|
"loss": 1.2574,
|
|
"num_input_tokens_seen": 1949232,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.008545885978968014,
|
|
"grad_norm": 5.104204177856445,
|
|
"learning_rate": 9.914621366024518e-05,
|
|
"loss": 1.0436,
|
|
"num_input_tokens_seen": 1964184,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.00861593422469726,
|
|
"grad_norm": 5.11055326461792,
|
|
"learning_rate": 9.913921541155868e-05,
|
|
"loss": 1.1939,
|
|
"num_input_tokens_seen": 1980568,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.008685982470426507,
|
|
"grad_norm": 4.784866809844971,
|
|
"learning_rate": 9.913221716287216e-05,
|
|
"loss": 1.2056,
|
|
"num_input_tokens_seen": 1996952,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.008756030716155752,
|
|
"grad_norm": 4.763037204742432,
|
|
"learning_rate": 9.912521891418564e-05,
|
|
"loss": 1.1403,
|
|
"num_input_tokens_seen": 2013336,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.008826078961884998,
|
|
"grad_norm": 4.813408851623535,
|
|
"learning_rate": 9.911822066549913e-05,
|
|
"loss": 1.1897,
|
|
"num_input_tokens_seen": 2029720,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.008896127207614244,
|
|
"grad_norm": 4.79008674621582,
|
|
"learning_rate": 9.911122241681261e-05,
|
|
"loss": 1.2315,
|
|
"num_input_tokens_seen": 2046104,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.008966175453343491,
|
|
"grad_norm": 4.843508720397949,
|
|
"learning_rate": 9.91042241681261e-05,
|
|
"loss": 1.0883,
|
|
"num_input_tokens_seen": 2061592,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.009036223699072737,
|
|
"grad_norm": 4.917592525482178,
|
|
"learning_rate": 9.909722591943959e-05,
|
|
"loss": 1.2512,
|
|
"num_input_tokens_seen": 2077792,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.009106271944801982,
|
|
"grad_norm": 4.9154133796691895,
|
|
"learning_rate": 9.909022767075307e-05,
|
|
"loss": 1.3284,
|
|
"num_input_tokens_seen": 2094176,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.009176320190531228,
|
|
"grad_norm": 5.2125420570373535,
|
|
"learning_rate": 9.908322942206656e-05,
|
|
"loss": 1.3469,
|
|
"num_input_tokens_seen": 2110480,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.009246368436260475,
|
|
"grad_norm": 4.715712547302246,
|
|
"learning_rate": 9.907623117338004e-05,
|
|
"loss": 1.0844,
|
|
"num_input_tokens_seen": 2126864,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.009316416681989721,
|
|
"grad_norm": 4.805694580078125,
|
|
"learning_rate": 9.906923292469353e-05,
|
|
"loss": 1.069,
|
|
"num_input_tokens_seen": 2142848,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.009386464927718966,
|
|
"grad_norm": 4.961355209350586,
|
|
"learning_rate": 9.9062234676007e-05,
|
|
"loss": 1.3387,
|
|
"num_input_tokens_seen": 2159232,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.009456513173448212,
|
|
"grad_norm": 4.582219123840332,
|
|
"learning_rate": 9.905523642732049e-05,
|
|
"loss": 1.2013,
|
|
"num_input_tokens_seen": 2175616,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.009526561419177458,
|
|
"grad_norm": 5.195998191833496,
|
|
"learning_rate": 9.904823817863398e-05,
|
|
"loss": 1.2552,
|
|
"num_input_tokens_seen": 2191872,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.009596609664906705,
|
|
"grad_norm": 4.934189319610596,
|
|
"learning_rate": 9.904123992994747e-05,
|
|
"loss": 1.2961,
|
|
"num_input_tokens_seen": 2208208,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.00966665791063595,
|
|
"grad_norm": 4.981037616729736,
|
|
"learning_rate": 9.903424168126096e-05,
|
|
"loss": 1.1546,
|
|
"num_input_tokens_seen": 2224592,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.009736706156365196,
|
|
"grad_norm": 5.469496250152588,
|
|
"learning_rate": 9.902724343257443e-05,
|
|
"loss": 1.3833,
|
|
"num_input_tokens_seen": 2240976,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.009806754402094442,
|
|
"grad_norm": 4.889583587646484,
|
|
"learning_rate": 9.902024518388792e-05,
|
|
"loss": 1.2095,
|
|
"num_input_tokens_seen": 2257360,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.00987680264782369,
|
|
"grad_norm": 4.532052516937256,
|
|
"learning_rate": 9.901324693520141e-05,
|
|
"loss": 1.143,
|
|
"num_input_tokens_seen": 2272848,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.009946850893552935,
|
|
"grad_norm": 5.278079032897949,
|
|
"learning_rate": 9.900624868651488e-05,
|
|
"loss": 1.2849,
|
|
"num_input_tokens_seen": 2289232,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.01001689913928218,
|
|
"grad_norm": 4.549891948699951,
|
|
"learning_rate": 9.899925043782839e-05,
|
|
"loss": 1.0482,
|
|
"num_input_tokens_seen": 2305424,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.010086947385011426,
|
|
"grad_norm": 4.7777180671691895,
|
|
"learning_rate": 9.899225218914186e-05,
|
|
"loss": 1.1926,
|
|
"num_input_tokens_seen": 2320968,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.010156995630740673,
|
|
"grad_norm": 4.320313453674316,
|
|
"learning_rate": 9.898525394045535e-05,
|
|
"loss": 1.0468,
|
|
"num_input_tokens_seen": 2337352,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.010227043876469919,
|
|
"grad_norm": 4.915202617645264,
|
|
"learning_rate": 9.897825569176882e-05,
|
|
"loss": 1.1326,
|
|
"num_input_tokens_seen": 2353064,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.010297092122199165,
|
|
"grad_norm": 4.569783687591553,
|
|
"learning_rate": 9.897125744308231e-05,
|
|
"loss": 0.8586,
|
|
"num_input_tokens_seen": 2369128,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.01036714036792841,
|
|
"grad_norm": 4.591664791107178,
|
|
"learning_rate": 9.89642591943958e-05,
|
|
"loss": 1.1369,
|
|
"num_input_tokens_seen": 2385512,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.010437188613657656,
|
|
"grad_norm": 4.913016319274902,
|
|
"learning_rate": 9.895726094570929e-05,
|
|
"loss": 1.1564,
|
|
"num_input_tokens_seen": 2401208,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.010507236859386903,
|
|
"grad_norm": 4.908018112182617,
|
|
"learning_rate": 9.895026269702278e-05,
|
|
"loss": 1.1247,
|
|
"num_input_tokens_seen": 2417592,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.010577285105116149,
|
|
"grad_norm": 4.536910057067871,
|
|
"learning_rate": 9.894326444833625e-05,
|
|
"loss": 1.014,
|
|
"num_input_tokens_seen": 2433976,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.010647333350845395,
|
|
"grad_norm": 4.899227142333984,
|
|
"learning_rate": 9.893626619964974e-05,
|
|
"loss": 1.0418,
|
|
"num_input_tokens_seen": 2448072,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.01071738159657464,
|
|
"grad_norm": 4.600861072540283,
|
|
"learning_rate": 9.892926795096323e-05,
|
|
"loss": 1.0459,
|
|
"num_input_tokens_seen": 2464240,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.010787429842303888,
|
|
"grad_norm": 4.707681179046631,
|
|
"learning_rate": 9.89222697022767e-05,
|
|
"loss": 1.0859,
|
|
"num_input_tokens_seen": 2480624,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.010857478088033133,
|
|
"grad_norm": 4.748518466949463,
|
|
"learning_rate": 9.89152714535902e-05,
|
|
"loss": 1.0608,
|
|
"num_input_tokens_seen": 2497008,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.010927526333762379,
|
|
"grad_norm": 4.794179439544678,
|
|
"learning_rate": 9.890827320490368e-05,
|
|
"loss": 1.2243,
|
|
"num_input_tokens_seen": 2513392,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.010997574579491624,
|
|
"grad_norm": 4.593925476074219,
|
|
"learning_rate": 9.890127495621717e-05,
|
|
"loss": 1.1002,
|
|
"num_input_tokens_seen": 2529776,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.011067622825220872,
|
|
"grad_norm": 4.318257808685303,
|
|
"learning_rate": 9.889427670753066e-05,
|
|
"loss": 0.9561,
|
|
"num_input_tokens_seen": 2546160,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.011137671070950117,
|
|
"grad_norm": 4.631777286529541,
|
|
"learning_rate": 9.888727845884414e-05,
|
|
"loss": 1.1553,
|
|
"num_input_tokens_seen": 2562544,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.011207719316679363,
|
|
"grad_norm": 4.896609783172607,
|
|
"learning_rate": 9.888028021015762e-05,
|
|
"loss": 1.1779,
|
|
"num_input_tokens_seen": 2578088,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.011277767562408609,
|
|
"grad_norm": 4.3978681564331055,
|
|
"learning_rate": 9.88732819614711e-05,
|
|
"loss": 1.1778,
|
|
"num_input_tokens_seen": 2594416,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.011347815808137854,
|
|
"grad_norm": 4.82927942276001,
|
|
"learning_rate": 9.886628371278459e-05,
|
|
"loss": 1.0339,
|
|
"num_input_tokens_seen": 2609776,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.011417864053867102,
|
|
"grad_norm": 4.413319110870361,
|
|
"learning_rate": 9.885928546409809e-05,
|
|
"loss": 1.0992,
|
|
"num_input_tokens_seen": 2626160,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.011487912299596347,
|
|
"grad_norm": 4.626354694366455,
|
|
"learning_rate": 9.885228721541156e-05,
|
|
"loss": 1.1948,
|
|
"num_input_tokens_seen": 2642464,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.011557960545325593,
|
|
"grad_norm": 4.328434467315674,
|
|
"learning_rate": 9.884528896672505e-05,
|
|
"loss": 1.1493,
|
|
"num_input_tokens_seen": 2658528,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.011628008791054838,
|
|
"grad_norm": 4.57839822769165,
|
|
"learning_rate": 9.883829071803853e-05,
|
|
"loss": 1.0775,
|
|
"num_input_tokens_seen": 2674912,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.011698057036784086,
|
|
"grad_norm": 5.103973865509033,
|
|
"learning_rate": 9.883129246935202e-05,
|
|
"loss": 1.2458,
|
|
"num_input_tokens_seen": 2690792,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.011768105282513331,
|
|
"grad_norm": 4.558016300201416,
|
|
"learning_rate": 9.88242942206655e-05,
|
|
"loss": 1.0122,
|
|
"num_input_tokens_seen": 2705616,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.011838153528242577,
|
|
"grad_norm": 4.811260223388672,
|
|
"learning_rate": 9.8817295971979e-05,
|
|
"loss": 1.2989,
|
|
"num_input_tokens_seen": 2721704,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.011908201773971823,
|
|
"grad_norm": 4.726966857910156,
|
|
"learning_rate": 9.881029772329248e-05,
|
|
"loss": 1.176,
|
|
"num_input_tokens_seen": 2738088,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.01197825001970107,
|
|
"grad_norm": 4.874902725219727,
|
|
"learning_rate": 9.880329947460596e-05,
|
|
"loss": 1.2586,
|
|
"num_input_tokens_seen": 2754040,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.012048298265430316,
|
|
"grad_norm": 4.379549980163574,
|
|
"learning_rate": 9.879630122591945e-05,
|
|
"loss": 1.1771,
|
|
"num_input_tokens_seen": 2770424,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.012118346511159561,
|
|
"grad_norm": 4.455331802368164,
|
|
"learning_rate": 9.878930297723292e-05,
|
|
"loss": 1.0714,
|
|
"num_input_tokens_seen": 2786808,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.012188394756888807,
|
|
"grad_norm": 4.42273473739624,
|
|
"learning_rate": 9.878230472854641e-05,
|
|
"loss": 1.1798,
|
|
"num_input_tokens_seen": 2803176,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.012258443002618052,
|
|
"grad_norm": 4.4078874588012695,
|
|
"learning_rate": 9.87753064798599e-05,
|
|
"loss": 1.1672,
|
|
"num_input_tokens_seen": 2819448,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.0123284912483473,
|
|
"grad_norm": 4.79048490524292,
|
|
"learning_rate": 9.876830823117339e-05,
|
|
"loss": 1.3331,
|
|
"num_input_tokens_seen": 2835832,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.012398539494076545,
|
|
"grad_norm": 4.212133884429932,
|
|
"learning_rate": 9.876130998248688e-05,
|
|
"loss": 1.0007,
|
|
"num_input_tokens_seen": 2851776,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.012468587739805791,
|
|
"grad_norm": 5.7587738037109375,
|
|
"learning_rate": 9.875431173380035e-05,
|
|
"loss": 1.4729,
|
|
"num_input_tokens_seen": 2867896,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.012538635985535037,
|
|
"grad_norm": 4.3469462394714355,
|
|
"learning_rate": 9.874731348511384e-05,
|
|
"loss": 0.957,
|
|
"num_input_tokens_seen": 2884280,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.012608684231264284,
|
|
"grad_norm": 4.584625244140625,
|
|
"learning_rate": 9.874031523642733e-05,
|
|
"loss": 1.0753,
|
|
"num_input_tokens_seen": 2899208,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.01267873247699353,
|
|
"grad_norm": 4.544627666473389,
|
|
"learning_rate": 9.87333169877408e-05,
|
|
"loss": 1.1706,
|
|
"num_input_tokens_seen": 2915416,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.012748780722722775,
|
|
"grad_norm": 4.8749237060546875,
|
|
"learning_rate": 9.872631873905429e-05,
|
|
"loss": 1.3382,
|
|
"num_input_tokens_seen": 2931360,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.01281882896845202,
|
|
"grad_norm": 4.593903541564941,
|
|
"learning_rate": 9.871932049036778e-05,
|
|
"loss": 1.1588,
|
|
"num_input_tokens_seen": 2947744,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.012888877214181268,
|
|
"grad_norm": 4.478219509124756,
|
|
"learning_rate": 9.871232224168127e-05,
|
|
"loss": 1.1013,
|
|
"num_input_tokens_seen": 2963664,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.012958925459910514,
|
|
"grad_norm": 5.028106212615967,
|
|
"learning_rate": 9.870532399299476e-05,
|
|
"loss": 1.3223,
|
|
"num_input_tokens_seen": 2980048,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.01302897370563976,
|
|
"grad_norm": 4.866946697235107,
|
|
"learning_rate": 9.869832574430823e-05,
|
|
"loss": 1.2376,
|
|
"num_input_tokens_seen": 2995992,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.013099021951369005,
|
|
"grad_norm": 4.421341419219971,
|
|
"learning_rate": 9.869132749562172e-05,
|
|
"loss": 1.2252,
|
|
"num_input_tokens_seen": 3012000,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.01316907019709825,
|
|
"grad_norm": 4.88083028793335,
|
|
"learning_rate": 9.86843292469352e-05,
|
|
"loss": 1.2951,
|
|
"num_input_tokens_seen": 3028384,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.013239118442827498,
|
|
"grad_norm": 4.654318809509277,
|
|
"learning_rate": 9.86773309982487e-05,
|
|
"loss": 1.2839,
|
|
"num_input_tokens_seen": 3044768,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.013309166688556744,
|
|
"grad_norm": 4.626763820648193,
|
|
"learning_rate": 9.867033274956219e-05,
|
|
"loss": 1.2389,
|
|
"num_input_tokens_seen": 3061152,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.01337921493428599,
|
|
"grad_norm": 4.178484916687012,
|
|
"learning_rate": 9.866333450087566e-05,
|
|
"loss": 1.1186,
|
|
"num_input_tokens_seen": 3077056,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.013449263180015235,
|
|
"grad_norm": 4.755034923553467,
|
|
"learning_rate": 9.865633625218915e-05,
|
|
"loss": 1.0594,
|
|
"num_input_tokens_seen": 3093400,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.013519311425744482,
|
|
"grad_norm": 4.437506198883057,
|
|
"learning_rate": 9.864933800350263e-05,
|
|
"loss": 1.2078,
|
|
"num_input_tokens_seen": 3109784,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.013589359671473728,
|
|
"grad_norm": 5.140488624572754,
|
|
"learning_rate": 9.864233975481611e-05,
|
|
"loss": 1.4312,
|
|
"num_input_tokens_seen": 3124976,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.013659407917202973,
|
|
"grad_norm": 4.72155237197876,
|
|
"learning_rate": 9.86353415061296e-05,
|
|
"loss": 1.1752,
|
|
"num_input_tokens_seen": 3140632,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.013729456162932219,
|
|
"grad_norm": 4.914645671844482,
|
|
"learning_rate": 9.862834325744309e-05,
|
|
"loss": 1.2464,
|
|
"num_input_tokens_seen": 3156616,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.013799504408661466,
|
|
"grad_norm": 4.23387336730957,
|
|
"learning_rate": 9.862134500875658e-05,
|
|
"loss": 0.9722,
|
|
"num_input_tokens_seen": 3172840,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.013869552654390712,
|
|
"grad_norm": 4.659370422363281,
|
|
"learning_rate": 9.861434676007005e-05,
|
|
"loss": 1.1981,
|
|
"num_input_tokens_seen": 3188584,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.013939600900119958,
|
|
"grad_norm": 4.580902576446533,
|
|
"learning_rate": 9.860734851138354e-05,
|
|
"loss": 1.1913,
|
|
"num_input_tokens_seen": 3204432,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.014009649145849203,
|
|
"grad_norm": 4.208237648010254,
|
|
"learning_rate": 9.860035026269702e-05,
|
|
"loss": 1.2056,
|
|
"num_input_tokens_seen": 3220816,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.014009649145849203,
|
|
"eval_loss": 1.2226407527923584,
|
|
"eval_runtime": 0.3992,
|
|
"eval_samples_per_second": 2.505,
|
|
"eval_steps_per_second": 2.505,
|
|
"num_input_tokens_seen": 3220816,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.014079697391578449,
|
|
"grad_norm": 4.526260852813721,
|
|
"learning_rate": 9.85933520140105e-05,
|
|
"loss": 1.0488,
|
|
"num_input_tokens_seen": 3237200,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.014149745637307696,
|
|
"grad_norm": 4.46895170211792,
|
|
"learning_rate": 9.8586353765324e-05,
|
|
"loss": 1.1101,
|
|
"num_input_tokens_seen": 3253336,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.014219793883036942,
|
|
"grad_norm": 4.367347717285156,
|
|
"learning_rate": 9.857935551663748e-05,
|
|
"loss": 1.0425,
|
|
"num_input_tokens_seen": 3269632,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.014289842128766187,
|
|
"grad_norm": 4.860860347747803,
|
|
"learning_rate": 9.857235726795097e-05,
|
|
"loss": 1.4068,
|
|
"num_input_tokens_seen": 3285432,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.014359890374495433,
|
|
"grad_norm": 4.336480617523193,
|
|
"learning_rate": 9.856535901926445e-05,
|
|
"loss": 1.2579,
|
|
"num_input_tokens_seen": 3301632,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.01442993862022468,
|
|
"grad_norm": 4.587873458862305,
|
|
"learning_rate": 9.855836077057794e-05,
|
|
"loss": 1.1508,
|
|
"num_input_tokens_seen": 3318016,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.014499986865953926,
|
|
"grad_norm": 4.719262599945068,
|
|
"learning_rate": 9.855136252189142e-05,
|
|
"loss": 1.0208,
|
|
"num_input_tokens_seen": 3333168,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.014570035111683172,
|
|
"grad_norm": 4.419138431549072,
|
|
"learning_rate": 9.85443642732049e-05,
|
|
"loss": 1.2576,
|
|
"num_input_tokens_seen": 3349384,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.014640083357412417,
|
|
"grad_norm": 4.3150835037231445,
|
|
"learning_rate": 9.85373660245184e-05,
|
|
"loss": 1.1786,
|
|
"num_input_tokens_seen": 3365768,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.014710131603141665,
|
|
"grad_norm": 4.5917649269104,
|
|
"learning_rate": 9.853036777583188e-05,
|
|
"loss": 1.2821,
|
|
"num_input_tokens_seen": 3382152,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.01478017984887091,
|
|
"grad_norm": 4.9094343185424805,
|
|
"learning_rate": 9.852336952714537e-05,
|
|
"loss": 1.2415,
|
|
"num_input_tokens_seen": 3397896,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.014850228094600156,
|
|
"grad_norm": 4.394861698150635,
|
|
"learning_rate": 9.851637127845885e-05,
|
|
"loss": 1.1776,
|
|
"num_input_tokens_seen": 3414280,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.014920276340329401,
|
|
"grad_norm": 4.196374416351318,
|
|
"learning_rate": 9.850937302977233e-05,
|
|
"loss": 1.065,
|
|
"num_input_tokens_seen": 3430584,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.014990324586058647,
|
|
"grad_norm": 4.728682518005371,
|
|
"learning_rate": 9.850237478108582e-05,
|
|
"loss": 1.2686,
|
|
"num_input_tokens_seen": 3446968,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.015060372831787894,
|
|
"grad_norm": 4.291411876678467,
|
|
"learning_rate": 9.84953765323993e-05,
|
|
"loss": 1.1877,
|
|
"num_input_tokens_seen": 3462568,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.01513042107751714,
|
|
"grad_norm": 4.405060768127441,
|
|
"learning_rate": 9.84883782837128e-05,
|
|
"loss": 1.2873,
|
|
"num_input_tokens_seen": 3478952,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.015200469323246386,
|
|
"grad_norm": 4.254365921020508,
|
|
"learning_rate": 9.848138003502628e-05,
|
|
"loss": 1.1062,
|
|
"num_input_tokens_seen": 3495304,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.015270517568975631,
|
|
"grad_norm": 4.741672039031982,
|
|
"learning_rate": 9.847438178633976e-05,
|
|
"loss": 1.1983,
|
|
"num_input_tokens_seen": 3511688,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.015340565814704879,
|
|
"grad_norm": 4.352742671966553,
|
|
"learning_rate": 9.846738353765325e-05,
|
|
"loss": 1.2028,
|
|
"num_input_tokens_seen": 3528072,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.015410614060434124,
|
|
"grad_norm": 4.996603488922119,
|
|
"learning_rate": 9.846038528896672e-05,
|
|
"loss": 1.1561,
|
|
"num_input_tokens_seen": 3542904,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.01548066230616337,
|
|
"grad_norm": 4.911815166473389,
|
|
"learning_rate": 9.845338704028021e-05,
|
|
"loss": 1.3375,
|
|
"num_input_tokens_seen": 3558352,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.015550710551892616,
|
|
"grad_norm": 4.638419151306152,
|
|
"learning_rate": 9.84463887915937e-05,
|
|
"loss": 1.1963,
|
|
"num_input_tokens_seen": 3574736,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.015620758797621863,
|
|
"grad_norm": 4.323521614074707,
|
|
"learning_rate": 9.843939054290719e-05,
|
|
"loss": 1.1224,
|
|
"num_input_tokens_seen": 3591120,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.01569080704335111,
|
|
"grad_norm": 4.466544151306152,
|
|
"learning_rate": 9.843239229422068e-05,
|
|
"loss": 1.3988,
|
|
"num_input_tokens_seen": 3607392,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.015760855289080354,
|
|
"grad_norm": 4.476973533630371,
|
|
"learning_rate": 9.842539404553415e-05,
|
|
"loss": 1.184,
|
|
"num_input_tokens_seen": 3623776,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.0158309035348096,
|
|
"grad_norm": 4.648625373840332,
|
|
"learning_rate": 9.841839579684764e-05,
|
|
"loss": 1.1768,
|
|
"num_input_tokens_seen": 3640008,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.015900951780538845,
|
|
"grad_norm": 4.364476203918457,
|
|
"learning_rate": 9.841139754816112e-05,
|
|
"loss": 1.0208,
|
|
"num_input_tokens_seen": 3656392,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.01597100002626809,
|
|
"grad_norm": 4.3054633140563965,
|
|
"learning_rate": 9.84043992994746e-05,
|
|
"loss": 1.1215,
|
|
"num_input_tokens_seen": 3672392,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.016041048271997337,
|
|
"grad_norm": 4.83436918258667,
|
|
"learning_rate": 9.83974010507881e-05,
|
|
"loss": 1.2284,
|
|
"num_input_tokens_seen": 3688776,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.016111096517726586,
|
|
"grad_norm": 4.447519779205322,
|
|
"learning_rate": 9.839040280210158e-05,
|
|
"loss": 1.1765,
|
|
"num_input_tokens_seen": 3705080,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.01618114476345583,
|
|
"grad_norm": 4.269217491149902,
|
|
"learning_rate": 9.838340455341507e-05,
|
|
"loss": 1.0466,
|
|
"num_input_tokens_seen": 3721464,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.016251193009185077,
|
|
"grad_norm": 4.41223669052124,
|
|
"learning_rate": 9.837640630472854e-05,
|
|
"loss": 1.2098,
|
|
"num_input_tokens_seen": 3737184,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.016321241254914323,
|
|
"grad_norm": 4.632737159729004,
|
|
"learning_rate": 9.836940805604203e-05,
|
|
"loss": 1.1562,
|
|
"num_input_tokens_seen": 3753192,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.016391289500643568,
|
|
"grad_norm": 4.379425525665283,
|
|
"learning_rate": 9.836240980735552e-05,
|
|
"loss": 1.1219,
|
|
"num_input_tokens_seen": 3767976,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.016461337746372814,
|
|
"grad_norm": 4.28551721572876,
|
|
"learning_rate": 9.835541155866901e-05,
|
|
"loss": 1.0259,
|
|
"num_input_tokens_seen": 3784008,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.01653138599210206,
|
|
"grad_norm": 4.642453670501709,
|
|
"learning_rate": 9.83484133099825e-05,
|
|
"loss": 1.1684,
|
|
"num_input_tokens_seen": 3800000,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.016601434237831305,
|
|
"grad_norm": 4.367178440093994,
|
|
"learning_rate": 9.834141506129597e-05,
|
|
"loss": 1.2877,
|
|
"num_input_tokens_seen": 3816384,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.01667148248356055,
|
|
"grad_norm": 4.5724005699157715,
|
|
"learning_rate": 9.833441681260946e-05,
|
|
"loss": 1.1814,
|
|
"num_input_tokens_seen": 3830328,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.0167415307292898,
|
|
"grad_norm": 4.318159580230713,
|
|
"learning_rate": 9.832741856392295e-05,
|
|
"loss": 1.1143,
|
|
"num_input_tokens_seen": 3846712,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.016811578975019045,
|
|
"grad_norm": 4.408501625061035,
|
|
"learning_rate": 9.832042031523643e-05,
|
|
"loss": 1.1508,
|
|
"num_input_tokens_seen": 3861776,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.01688162722074829,
|
|
"grad_norm": 4.20060920715332,
|
|
"learning_rate": 9.831342206654991e-05,
|
|
"loss": 1.209,
|
|
"num_input_tokens_seen": 3877736,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.016951675466477537,
|
|
"grad_norm": 4.431649208068848,
|
|
"learning_rate": 9.83064238178634e-05,
|
|
"loss": 1.2458,
|
|
"num_input_tokens_seen": 3893320,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.017021723712206782,
|
|
"grad_norm": 4.000490188598633,
|
|
"learning_rate": 9.829942556917689e-05,
|
|
"loss": 1.0274,
|
|
"num_input_tokens_seen": 3909704,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.017091771957936028,
|
|
"grad_norm": 4.703495025634766,
|
|
"learning_rate": 9.829242732049038e-05,
|
|
"loss": 1.1711,
|
|
"num_input_tokens_seen": 3925808,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.017161820203665273,
|
|
"grad_norm": 4.639338970184326,
|
|
"learning_rate": 9.828542907180386e-05,
|
|
"loss": 1.3046,
|
|
"num_input_tokens_seen": 3942192,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.01723186844939452,
|
|
"grad_norm": 4.414276599884033,
|
|
"learning_rate": 9.827843082311734e-05,
|
|
"loss": 1.271,
|
|
"num_input_tokens_seen": 3958528,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.017301916695123768,
|
|
"grad_norm": 4.404853820800781,
|
|
"learning_rate": 9.827143257443082e-05,
|
|
"loss": 1.0693,
|
|
"num_input_tokens_seen": 3974912,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.017371964940853014,
|
|
"grad_norm": 4.519491195678711,
|
|
"learning_rate": 9.826443432574431e-05,
|
|
"loss": 1.2894,
|
|
"num_input_tokens_seen": 3991296,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.01744201318658226,
|
|
"grad_norm": 4.261727809906006,
|
|
"learning_rate": 9.825743607705781e-05,
|
|
"loss": 1.2059,
|
|
"num_input_tokens_seen": 4006544,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.017512061432311505,
|
|
"grad_norm": 4.102485656738281,
|
|
"learning_rate": 9.825043782837129e-05,
|
|
"loss": 0.9365,
|
|
"num_input_tokens_seen": 4022320,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.01758210967804075,
|
|
"grad_norm": 4.804764270782471,
|
|
"learning_rate": 9.824343957968477e-05,
|
|
"loss": 1.3344,
|
|
"num_input_tokens_seen": 4037048,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.017652157923769996,
|
|
"grad_norm": 4.130600452423096,
|
|
"learning_rate": 9.823644133099825e-05,
|
|
"loss": 1.2349,
|
|
"num_input_tokens_seen": 4053432,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.017722206169499242,
|
|
"grad_norm": 4.234742641448975,
|
|
"learning_rate": 9.822944308231174e-05,
|
|
"loss": 1.1371,
|
|
"num_input_tokens_seen": 4069816,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.017792254415228487,
|
|
"grad_norm": 4.754928112030029,
|
|
"learning_rate": 9.822244483362521e-05,
|
|
"loss": 1.5168,
|
|
"num_input_tokens_seen": 4085864,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.017862302660957733,
|
|
"grad_norm": 4.542768478393555,
|
|
"learning_rate": 9.821544658493871e-05,
|
|
"loss": 1.1943,
|
|
"num_input_tokens_seen": 4102240,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.017932350906686982,
|
|
"grad_norm": 4.411310195922852,
|
|
"learning_rate": 9.82084483362522e-05,
|
|
"loss": 1.2694,
|
|
"num_input_tokens_seen": 4118544,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.018002399152416228,
|
|
"grad_norm": 4.205377101898193,
|
|
"learning_rate": 9.820145008756568e-05,
|
|
"loss": 1.1581,
|
|
"num_input_tokens_seen": 4134928,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.018072447398145473,
|
|
"grad_norm": 4.451165199279785,
|
|
"learning_rate": 9.819445183887917e-05,
|
|
"loss": 1.089,
|
|
"num_input_tokens_seen": 4150848,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.01814249564387472,
|
|
"grad_norm": 4.366336822509766,
|
|
"learning_rate": 9.818745359019264e-05,
|
|
"loss": 1.1767,
|
|
"num_input_tokens_seen": 4167184,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.018212543889603965,
|
|
"grad_norm": 4.394649982452393,
|
|
"learning_rate": 9.818045534150613e-05,
|
|
"loss": 1.0741,
|
|
"num_input_tokens_seen": 4183376,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.01828259213533321,
|
|
"grad_norm": 4.344518184661865,
|
|
"learning_rate": 9.817345709281962e-05,
|
|
"loss": 1.2282,
|
|
"num_input_tokens_seen": 4199760,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.018352640381062456,
|
|
"grad_norm": 4.403041362762451,
|
|
"learning_rate": 9.816645884413311e-05,
|
|
"loss": 1.2317,
|
|
"num_input_tokens_seen": 4215816,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.0184226886267917,
|
|
"grad_norm": 4.715320110321045,
|
|
"learning_rate": 9.81594605954466e-05,
|
|
"loss": 1.3074,
|
|
"num_input_tokens_seen": 4231504,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.01849273687252095,
|
|
"grad_norm": 4.5754265785217285,
|
|
"learning_rate": 9.815246234676007e-05,
|
|
"loss": 1.253,
|
|
"num_input_tokens_seen": 4247888,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.018562785118250196,
|
|
"grad_norm": 4.2346930503845215,
|
|
"learning_rate": 9.814546409807356e-05,
|
|
"loss": 1.1727,
|
|
"num_input_tokens_seen": 4264248,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.018632833363979442,
|
|
"grad_norm": 4.186713218688965,
|
|
"learning_rate": 9.813846584938705e-05,
|
|
"loss": 1.2693,
|
|
"num_input_tokens_seen": 4280632,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.018702881609708687,
|
|
"grad_norm": 4.6356706619262695,
|
|
"learning_rate": 9.813146760070052e-05,
|
|
"loss": 1.3755,
|
|
"num_input_tokens_seen": 4296648,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.018772929855437933,
|
|
"grad_norm": 4.466466903686523,
|
|
"learning_rate": 9.812446935201401e-05,
|
|
"loss": 1.283,
|
|
"num_input_tokens_seen": 4311408,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.01884297810116718,
|
|
"grad_norm": 4.3369140625,
|
|
"learning_rate": 9.81174711033275e-05,
|
|
"loss": 1.1555,
|
|
"num_input_tokens_seen": 4326736,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.018913026346896424,
|
|
"grad_norm": 4.434782028198242,
|
|
"learning_rate": 9.811047285464099e-05,
|
|
"loss": 1.2859,
|
|
"num_input_tokens_seen": 4343120,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.01898307459262567,
|
|
"grad_norm": 4.346708297729492,
|
|
"learning_rate": 9.810347460595448e-05,
|
|
"loss": 1.1421,
|
|
"num_input_tokens_seen": 4359504,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.019053122838354915,
|
|
"grad_norm": 4.529878616333008,
|
|
"learning_rate": 9.809647635726795e-05,
|
|
"loss": 1.2654,
|
|
"num_input_tokens_seen": 4375888,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.019123171084084165,
|
|
"grad_norm": 4.051745891571045,
|
|
"learning_rate": 9.808947810858144e-05,
|
|
"loss": 1.1469,
|
|
"num_input_tokens_seen": 4392224,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.01919321932981341,
|
|
"grad_norm": 4.403522491455078,
|
|
"learning_rate": 9.808247985989492e-05,
|
|
"loss": 1.233,
|
|
"num_input_tokens_seen": 4408608,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.019263267575542656,
|
|
"grad_norm": 4.166261196136475,
|
|
"learning_rate": 9.807548161120842e-05,
|
|
"loss": 1.1697,
|
|
"num_input_tokens_seen": 4424992,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.0193333158212719,
|
|
"grad_norm": 4.29187536239624,
|
|
"learning_rate": 9.806848336252191e-05,
|
|
"loss": 1.0503,
|
|
"num_input_tokens_seen": 4441376,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.019403364067001147,
|
|
"grad_norm": 4.4056172370910645,
|
|
"learning_rate": 9.806148511383538e-05,
|
|
"loss": 1.1965,
|
|
"num_input_tokens_seen": 4457760,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.019473412312730393,
|
|
"grad_norm": 4.355875015258789,
|
|
"learning_rate": 9.805448686514887e-05,
|
|
"loss": 1.1024,
|
|
"num_input_tokens_seen": 4474144,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.019543460558459638,
|
|
"grad_norm": 4.46420955657959,
|
|
"learning_rate": 9.804748861646235e-05,
|
|
"loss": 1.203,
|
|
"num_input_tokens_seen": 4488912,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.019613508804188884,
|
|
"grad_norm": 4.48052453994751,
|
|
"learning_rate": 9.804049036777583e-05,
|
|
"loss": 1.2089,
|
|
"num_input_tokens_seen": 4505296,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.01968355704991813,
|
|
"grad_norm": 4.458749294281006,
|
|
"learning_rate": 9.803349211908932e-05,
|
|
"loss": 1.1557,
|
|
"num_input_tokens_seen": 4520576,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.01975360529564738,
|
|
"grad_norm": 4.551771640777588,
|
|
"learning_rate": 9.802649387040281e-05,
|
|
"loss": 1.1671,
|
|
"num_input_tokens_seen": 4536960,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.019823653541376624,
|
|
"grad_norm": 4.038064956665039,
|
|
"learning_rate": 9.80194956217163e-05,
|
|
"loss": 1.1562,
|
|
"num_input_tokens_seen": 4553344,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.01989370178710587,
|
|
"grad_norm": 4.647075653076172,
|
|
"learning_rate": 9.801249737302978e-05,
|
|
"loss": 1.3069,
|
|
"num_input_tokens_seen": 4568928,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.019963750032835115,
|
|
"grad_norm": 4.258941650390625,
|
|
"learning_rate": 9.800549912434326e-05,
|
|
"loss": 1.0349,
|
|
"num_input_tokens_seen": 4585312,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.02003379827856436,
|
|
"grad_norm": 4.348769664764404,
|
|
"learning_rate": 9.799850087565674e-05,
|
|
"loss": 1.1163,
|
|
"num_input_tokens_seen": 4601696,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.020103846524293607,
|
|
"grad_norm": 4.105901718139648,
|
|
"learning_rate": 9.799150262697023e-05,
|
|
"loss": 1.0313,
|
|
"num_input_tokens_seen": 4617312,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.020173894770022852,
|
|
"grad_norm": 4.079495429992676,
|
|
"learning_rate": 9.798450437828372e-05,
|
|
"loss": 1.0828,
|
|
"num_input_tokens_seen": 4633696,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.020243943015752098,
|
|
"grad_norm": 4.03472375869751,
|
|
"learning_rate": 9.79775061295972e-05,
|
|
"loss": 0.9475,
|
|
"num_input_tokens_seen": 4650080,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.020313991261481347,
|
|
"grad_norm": 4.077049732208252,
|
|
"learning_rate": 9.797050788091069e-05,
|
|
"loss": 1.1323,
|
|
"num_input_tokens_seen": 4666328,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.020384039507210593,
|
|
"grad_norm": 4.086606025695801,
|
|
"learning_rate": 9.796350963222417e-05,
|
|
"loss": 1.1218,
|
|
"num_input_tokens_seen": 4682256,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.020454087752939838,
|
|
"grad_norm": 4.296900749206543,
|
|
"learning_rate": 9.795651138353766e-05,
|
|
"loss": 1.2964,
|
|
"num_input_tokens_seen": 4698640,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.020524135998669084,
|
|
"grad_norm": 4.040759086608887,
|
|
"learning_rate": 9.794951313485115e-05,
|
|
"loss": 1.1077,
|
|
"num_input_tokens_seen": 4714928,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.02059418424439833,
|
|
"grad_norm": 3.8260273933410645,
|
|
"learning_rate": 9.794251488616462e-05,
|
|
"loss": 0.9667,
|
|
"num_input_tokens_seen": 4731312,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.020664232490127575,
|
|
"grad_norm": 4.294517993927002,
|
|
"learning_rate": 9.793551663747811e-05,
|
|
"loss": 1.2704,
|
|
"num_input_tokens_seen": 4747544,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.02073428073585682,
|
|
"grad_norm": 4.206037521362305,
|
|
"learning_rate": 9.79285183887916e-05,
|
|
"loss": 1.1593,
|
|
"num_input_tokens_seen": 4763928,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.020804328981586066,
|
|
"grad_norm": 4.147867202758789,
|
|
"learning_rate": 9.792152014010509e-05,
|
|
"loss": 1.1256,
|
|
"num_input_tokens_seen": 4780312,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.020874377227315312,
|
|
"grad_norm": 4.23718786239624,
|
|
"learning_rate": 9.791452189141857e-05,
|
|
"loss": 1.2353,
|
|
"num_input_tokens_seen": 4796384,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.02094442547304456,
|
|
"grad_norm": 4.172685146331787,
|
|
"learning_rate": 9.790752364273205e-05,
|
|
"loss": 1.1868,
|
|
"num_input_tokens_seen": 4812768,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.021014473718773807,
|
|
"grad_norm": 4.167289733886719,
|
|
"learning_rate": 9.790052539404554e-05,
|
|
"loss": 1.0606,
|
|
"num_input_tokens_seen": 4829152,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.021084521964503052,
|
|
"grad_norm": 4.096963882446289,
|
|
"learning_rate": 9.789352714535903e-05,
|
|
"loss": 1.0557,
|
|
"num_input_tokens_seen": 4845384,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.021154570210232298,
|
|
"grad_norm": 4.223779678344727,
|
|
"learning_rate": 9.788652889667252e-05,
|
|
"loss": 1.1485,
|
|
"num_input_tokens_seen": 4861768,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.021224618455961543,
|
|
"grad_norm": 3.8243472576141357,
|
|
"learning_rate": 9.7879530647986e-05,
|
|
"loss": 1.004,
|
|
"num_input_tokens_seen": 4878152,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.02129466670169079,
|
|
"grad_norm": 4.092590808868408,
|
|
"learning_rate": 9.787253239929948e-05,
|
|
"loss": 1.0211,
|
|
"num_input_tokens_seen": 4894536,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.021364714947420035,
|
|
"grad_norm": 4.42412805557251,
|
|
"learning_rate": 9.786553415061297e-05,
|
|
"loss": 0.9915,
|
|
"num_input_tokens_seen": 4910320,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.02143476319314928,
|
|
"grad_norm": 4.488316535949707,
|
|
"learning_rate": 9.785853590192644e-05,
|
|
"loss": 1.1782,
|
|
"num_input_tokens_seen": 4926704,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.021504811438878526,
|
|
"grad_norm": 4.110256195068359,
|
|
"learning_rate": 9.785153765323993e-05,
|
|
"loss": 1.102,
|
|
"num_input_tokens_seen": 4943088,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.021574859684607775,
|
|
"grad_norm": 4.246950149536133,
|
|
"learning_rate": 9.784453940455342e-05,
|
|
"loss": 1.067,
|
|
"num_input_tokens_seen": 4958736,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.02164490793033702,
|
|
"grad_norm": 4.175214767456055,
|
|
"learning_rate": 9.783754115586691e-05,
|
|
"loss": 1.0638,
|
|
"num_input_tokens_seen": 4975120,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.021714956176066266,
|
|
"grad_norm": 4.427795886993408,
|
|
"learning_rate": 9.78305429071804e-05,
|
|
"loss": 1.1347,
|
|
"num_input_tokens_seen": 4991504,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.021785004421795512,
|
|
"grad_norm": 4.158191204071045,
|
|
"learning_rate": 9.782354465849387e-05,
|
|
"loss": 1.1662,
|
|
"num_input_tokens_seen": 5007152,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.021855052667524758,
|
|
"grad_norm": 4.184347629547119,
|
|
"learning_rate": 9.781654640980736e-05,
|
|
"loss": 1.0791,
|
|
"num_input_tokens_seen": 5023536,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.021925100913254003,
|
|
"grad_norm": 3.8506295680999756,
|
|
"learning_rate": 9.780954816112084e-05,
|
|
"loss": 1.0615,
|
|
"num_input_tokens_seen": 5039728,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.02199514915898325,
|
|
"grad_norm": 4.310062408447266,
|
|
"learning_rate": 9.780254991243432e-05,
|
|
"loss": 1.1363,
|
|
"num_input_tokens_seen": 5056008,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.022065197404712494,
|
|
"grad_norm": 4.215006351470947,
|
|
"learning_rate": 9.779555166374781e-05,
|
|
"loss": 1.1715,
|
|
"num_input_tokens_seen": 5072096,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.022135245650441743,
|
|
"grad_norm": 4.219073295593262,
|
|
"learning_rate": 9.77885534150613e-05,
|
|
"loss": 1.219,
|
|
"num_input_tokens_seen": 5088432,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.02220529389617099,
|
|
"grad_norm": 4.319522857666016,
|
|
"learning_rate": 9.778155516637479e-05,
|
|
"loss": 1.3085,
|
|
"num_input_tokens_seen": 5104240,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.022275342141900235,
|
|
"grad_norm": 4.118961334228516,
|
|
"learning_rate": 9.777455691768827e-05,
|
|
"loss": 1.0926,
|
|
"num_input_tokens_seen": 5120624,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.02234539038762948,
|
|
"grad_norm": 4.195051193237305,
|
|
"learning_rate": 9.776755866900175e-05,
|
|
"loss": 1.0894,
|
|
"num_input_tokens_seen": 5137008,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.022415438633358726,
|
|
"grad_norm": 4.114197254180908,
|
|
"learning_rate": 9.776056042031524e-05,
|
|
"loss": 1.1897,
|
|
"num_input_tokens_seen": 5153272,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.02248548687908797,
|
|
"grad_norm": 4.014908313751221,
|
|
"learning_rate": 9.775356217162872e-05,
|
|
"loss": 1.0932,
|
|
"num_input_tokens_seen": 5169472,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.022555535124817217,
|
|
"grad_norm": 4.190642356872559,
|
|
"learning_rate": 9.774656392294222e-05,
|
|
"loss": 1.1413,
|
|
"num_input_tokens_seen": 5185856,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.022625583370546463,
|
|
"grad_norm": 4.562993049621582,
|
|
"learning_rate": 9.77395656742557e-05,
|
|
"loss": 1.2865,
|
|
"num_input_tokens_seen": 5202240,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.02269563161627571,
|
|
"grad_norm": 4.607022762298584,
|
|
"learning_rate": 9.773256742556918e-05,
|
|
"loss": 1.1465,
|
|
"num_input_tokens_seen": 5218168,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.022765679862004957,
|
|
"grad_norm": 3.956439256668091,
|
|
"learning_rate": 9.772556917688267e-05,
|
|
"loss": 1.028,
|
|
"num_input_tokens_seen": 5234368,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.022835728107734203,
|
|
"grad_norm": 4.20713472366333,
|
|
"learning_rate": 9.771857092819615e-05,
|
|
"loss": 1.2332,
|
|
"num_input_tokens_seen": 5249808,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.02290577635346345,
|
|
"grad_norm": 4.4092864990234375,
|
|
"learning_rate": 9.771157267950964e-05,
|
|
"loss": 1.104,
|
|
"num_input_tokens_seen": 5266120,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.022975824599192694,
|
|
"grad_norm": 4.529845237731934,
|
|
"learning_rate": 9.770457443082312e-05,
|
|
"loss": 1.3475,
|
|
"num_input_tokens_seen": 5282504,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.02304587284492194,
|
|
"grad_norm": 4.221986293792725,
|
|
"learning_rate": 9.769757618213661e-05,
|
|
"loss": 1.4115,
|
|
"num_input_tokens_seen": 5298344,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.023115921090651186,
|
|
"grad_norm": 4.29000186920166,
|
|
"learning_rate": 9.76905779334501e-05,
|
|
"loss": 1.2855,
|
|
"num_input_tokens_seen": 5314728,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.02318596933638043,
|
|
"grad_norm": 4.426812648773193,
|
|
"learning_rate": 9.768357968476358e-05,
|
|
"loss": 1.514,
|
|
"num_input_tokens_seen": 5330816,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.023256017582109677,
|
|
"grad_norm": 4.210752964019775,
|
|
"learning_rate": 9.767658143607706e-05,
|
|
"loss": 1.0854,
|
|
"num_input_tokens_seen": 5346552,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.023326065827838922,
|
|
"grad_norm": 4.216427326202393,
|
|
"learning_rate": 9.766958318739054e-05,
|
|
"loss": 1.1573,
|
|
"num_input_tokens_seen": 5362936,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.02339611407356817,
|
|
"grad_norm": 4.132325649261475,
|
|
"learning_rate": 9.766258493870403e-05,
|
|
"loss": 1.0942,
|
|
"num_input_tokens_seen": 5379320,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.023466162319297417,
|
|
"grad_norm": 4.277027130126953,
|
|
"learning_rate": 9.765558669001752e-05,
|
|
"loss": 1.1227,
|
|
"num_input_tokens_seen": 5395704,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.023536210565026663,
|
|
"grad_norm": 4.228096961975098,
|
|
"learning_rate": 9.7648588441331e-05,
|
|
"loss": 1.1094,
|
|
"num_input_tokens_seen": 5412088,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.02360625881075591,
|
|
"grad_norm": 4.194522380828857,
|
|
"learning_rate": 9.76415901926445e-05,
|
|
"loss": 1.2066,
|
|
"num_input_tokens_seen": 5428472,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.023676307056485154,
|
|
"grad_norm": 4.336326599121094,
|
|
"learning_rate": 9.763459194395797e-05,
|
|
"loss": 1.2251,
|
|
"num_input_tokens_seen": 5444856,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.0237463553022144,
|
|
"grad_norm": 4.2723307609558105,
|
|
"learning_rate": 9.762759369527146e-05,
|
|
"loss": 1.0927,
|
|
"num_input_tokens_seen": 5460304,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.023816403547943645,
|
|
"grad_norm": 4.190036773681641,
|
|
"learning_rate": 9.762059544658493e-05,
|
|
"loss": 1.2036,
|
|
"num_input_tokens_seen": 5476688,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.02388645179367289,
|
|
"grad_norm": 4.477560043334961,
|
|
"learning_rate": 9.761359719789842e-05,
|
|
"loss": 1.362,
|
|
"num_input_tokens_seen": 5493072,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.02395650003940214,
|
|
"grad_norm": 4.160232067108154,
|
|
"learning_rate": 9.760659894921192e-05,
|
|
"loss": 1.1602,
|
|
"num_input_tokens_seen": 5509456,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.024026548285131386,
|
|
"grad_norm": 3.857335090637207,
|
|
"learning_rate": 9.75996007005254e-05,
|
|
"loss": 1.0963,
|
|
"num_input_tokens_seen": 5525840,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.02409659653086063,
|
|
"grad_norm": 4.141246318817139,
|
|
"learning_rate": 9.759260245183889e-05,
|
|
"loss": 1.2009,
|
|
"num_input_tokens_seen": 5541888,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.024166644776589877,
|
|
"grad_norm": 4.50364875793457,
|
|
"learning_rate": 9.758560420315236e-05,
|
|
"loss": 1.1483,
|
|
"num_input_tokens_seen": 5557848,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.024236693022319122,
|
|
"grad_norm": 4.3343353271484375,
|
|
"learning_rate": 9.757860595446585e-05,
|
|
"loss": 1.3594,
|
|
"num_input_tokens_seen": 5573504,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.024306741268048368,
|
|
"grad_norm": 4.050408363342285,
|
|
"learning_rate": 9.757160770577934e-05,
|
|
"loss": 1.0563,
|
|
"num_input_tokens_seen": 5589544,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.024376789513777614,
|
|
"grad_norm": 4.051811695098877,
|
|
"learning_rate": 9.756460945709283e-05,
|
|
"loss": 1.0288,
|
|
"num_input_tokens_seen": 5605368,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.02444683775950686,
|
|
"grad_norm": 4.365113258361816,
|
|
"learning_rate": 9.755761120840632e-05,
|
|
"loss": 1.3054,
|
|
"num_input_tokens_seen": 5621752,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.024516886005236105,
|
|
"grad_norm": 4.0057501792907715,
|
|
"learning_rate": 9.755061295971979e-05,
|
|
"loss": 1.1302,
|
|
"num_input_tokens_seen": 5638136,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.024586934250965354,
|
|
"grad_norm": 4.254896640777588,
|
|
"learning_rate": 9.754361471103328e-05,
|
|
"loss": 1.0495,
|
|
"num_input_tokens_seen": 5653168,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.0246569824966946,
|
|
"grad_norm": 3.8119771480560303,
|
|
"learning_rate": 9.753661646234677e-05,
|
|
"loss": 1.0349,
|
|
"num_input_tokens_seen": 5669504,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.024727030742423845,
|
|
"grad_norm": 4.5082621574401855,
|
|
"learning_rate": 9.752961821366024e-05,
|
|
"loss": 1.2537,
|
|
"num_input_tokens_seen": 5685168,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.02479707898815309,
|
|
"grad_norm": 4.392731189727783,
|
|
"learning_rate": 9.752261996497373e-05,
|
|
"loss": 1.2534,
|
|
"num_input_tokens_seen": 5701240,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.024867127233882336,
|
|
"grad_norm": 4.293395519256592,
|
|
"learning_rate": 9.751562171628722e-05,
|
|
"loss": 1.2774,
|
|
"num_input_tokens_seen": 5717624,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.024937175479611582,
|
|
"grad_norm": 4.64813756942749,
|
|
"learning_rate": 9.750862346760071e-05,
|
|
"loss": 1.2795,
|
|
"num_input_tokens_seen": 5733104,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.025007223725340828,
|
|
"grad_norm": 4.5166778564453125,
|
|
"learning_rate": 9.75016252189142e-05,
|
|
"loss": 1.1301,
|
|
"num_input_tokens_seen": 5749488,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.025077271971070073,
|
|
"grad_norm": 3.894291400909424,
|
|
"learning_rate": 9.749462697022767e-05,
|
|
"loss": 0.901,
|
|
"num_input_tokens_seen": 5765872,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.02514732021679932,
|
|
"grad_norm": 4.10056209564209,
|
|
"learning_rate": 9.748762872154116e-05,
|
|
"loss": 1.0529,
|
|
"num_input_tokens_seen": 5780856,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.025217368462528568,
|
|
"grad_norm": 4.6277666091918945,
|
|
"learning_rate": 9.748063047285464e-05,
|
|
"loss": 1.3649,
|
|
"num_input_tokens_seen": 5796856,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.025287416708257814,
|
|
"grad_norm": 4.029720306396484,
|
|
"learning_rate": 9.747363222416813e-05,
|
|
"loss": 0.8863,
|
|
"num_input_tokens_seen": 5812176,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.02535746495398706,
|
|
"grad_norm": 3.7772202491760254,
|
|
"learning_rate": 9.746663397548161e-05,
|
|
"loss": 1.0448,
|
|
"num_input_tokens_seen": 5828064,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.025427513199716305,
|
|
"grad_norm": 4.379861354827881,
|
|
"learning_rate": 9.74596357267951e-05,
|
|
"loss": 1.3274,
|
|
"num_input_tokens_seen": 5843680,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.02549756144544555,
|
|
"grad_norm": 4.254587173461914,
|
|
"learning_rate": 9.745263747810859e-05,
|
|
"loss": 1.1502,
|
|
"num_input_tokens_seen": 5859024,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.025567609691174796,
|
|
"grad_norm": 4.271276473999023,
|
|
"learning_rate": 9.744563922942207e-05,
|
|
"loss": 1.2785,
|
|
"num_input_tokens_seen": 5874320,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.02563765793690404,
|
|
"grad_norm": 4.224324703216553,
|
|
"learning_rate": 9.743864098073555e-05,
|
|
"loss": 1.0926,
|
|
"num_input_tokens_seen": 5890704,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.025707706182633287,
|
|
"grad_norm": 4.289444446563721,
|
|
"learning_rate": 9.743164273204903e-05,
|
|
"loss": 1.1913,
|
|
"num_input_tokens_seen": 5906016,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.025777754428362536,
|
|
"grad_norm": 4.280707359313965,
|
|
"learning_rate": 9.742464448336253e-05,
|
|
"loss": 1.2238,
|
|
"num_input_tokens_seen": 5921784,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.025847802674091782,
|
|
"grad_norm": 4.554803848266602,
|
|
"learning_rate": 9.741764623467602e-05,
|
|
"loss": 1.2491,
|
|
"num_input_tokens_seen": 5938072,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.025917850919821028,
|
|
"grad_norm": 4.677784442901611,
|
|
"learning_rate": 9.74106479859895e-05,
|
|
"loss": 1.2387,
|
|
"num_input_tokens_seen": 5954456,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.025987899165550273,
|
|
"grad_norm": 4.268225193023682,
|
|
"learning_rate": 9.740364973730298e-05,
|
|
"loss": 1.2983,
|
|
"num_input_tokens_seen": 5970664,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.02605794741127952,
|
|
"grad_norm": 4.361818790435791,
|
|
"learning_rate": 9.739665148861646e-05,
|
|
"loss": 1.199,
|
|
"num_input_tokens_seen": 5987048,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.026127995657008764,
|
|
"grad_norm": 3.9990735054016113,
|
|
"learning_rate": 9.738965323992995e-05,
|
|
"loss": 1.0777,
|
|
"num_input_tokens_seen": 6003432,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.02619804390273801,
|
|
"grad_norm": 3.992142915725708,
|
|
"learning_rate": 9.738265499124344e-05,
|
|
"loss": 1.0443,
|
|
"num_input_tokens_seen": 6019816,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.026268092148467256,
|
|
"grad_norm": 4.270167827606201,
|
|
"learning_rate": 9.737565674255693e-05,
|
|
"loss": 1.1764,
|
|
"num_input_tokens_seen": 6036200,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.0263381403941965,
|
|
"grad_norm": 4.362086296081543,
|
|
"learning_rate": 9.736865849387041e-05,
|
|
"loss": 1.2735,
|
|
"num_input_tokens_seen": 6052120,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.02640818863992575,
|
|
"grad_norm": 3.6900475025177,
|
|
"learning_rate": 9.736166024518389e-05,
|
|
"loss": 0.8729,
|
|
"num_input_tokens_seen": 6068264,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.026478236885654996,
|
|
"grad_norm": 3.8281285762786865,
|
|
"learning_rate": 9.735466199649738e-05,
|
|
"loss": 1.1096,
|
|
"num_input_tokens_seen": 6084504,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.02654828513138424,
|
|
"grad_norm": 3.9335553646087646,
|
|
"learning_rate": 9.734766374781087e-05,
|
|
"loss": 1.0763,
|
|
"num_input_tokens_seen": 6100592,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.026618333377113487,
|
|
"grad_norm": 4.332645416259766,
|
|
"learning_rate": 9.734066549912434e-05,
|
|
"loss": 1.1751,
|
|
"num_input_tokens_seen": 6116976,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.026688381622842733,
|
|
"grad_norm": 4.160863399505615,
|
|
"learning_rate": 9.733366725043783e-05,
|
|
"loss": 1.0778,
|
|
"num_input_tokens_seen": 6133360,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.02675842986857198,
|
|
"grad_norm": 4.388178825378418,
|
|
"learning_rate": 9.732666900175132e-05,
|
|
"loss": 1.2214,
|
|
"num_input_tokens_seen": 6149744,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.026828478114301224,
|
|
"grad_norm": 4.354910373687744,
|
|
"learning_rate": 9.73196707530648e-05,
|
|
"loss": 1.4115,
|
|
"num_input_tokens_seen": 6166048,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.02689852636003047,
|
|
"grad_norm": 4.058071613311768,
|
|
"learning_rate": 9.73126725043783e-05,
|
|
"loss": 1.0934,
|
|
"num_input_tokens_seen": 6181840,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.026968574605759715,
|
|
"grad_norm": 4.060855865478516,
|
|
"learning_rate": 9.730567425569177e-05,
|
|
"loss": 1.1395,
|
|
"num_input_tokens_seen": 6198224,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.027038622851488964,
|
|
"grad_norm": 4.316681385040283,
|
|
"learning_rate": 9.729867600700526e-05,
|
|
"loss": 1.1052,
|
|
"num_input_tokens_seen": 6214608,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.02710867109721821,
|
|
"grad_norm": 4.322516918182373,
|
|
"learning_rate": 9.729167775831873e-05,
|
|
"loss": 1.2512,
|
|
"num_input_tokens_seen": 6230992,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.027178719342947456,
|
|
"grad_norm": 4.090857028961182,
|
|
"learning_rate": 9.728467950963224e-05,
|
|
"loss": 1.0772,
|
|
"num_input_tokens_seen": 6246760,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.0272487675886767,
|
|
"grad_norm": 4.0143961906433105,
|
|
"learning_rate": 9.727768126094571e-05,
|
|
"loss": 1.0578,
|
|
"num_input_tokens_seen": 6261968,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.027318815834405947,
|
|
"grad_norm": 4.911194324493408,
|
|
"learning_rate": 9.72706830122592e-05,
|
|
"loss": 1.3016,
|
|
"num_input_tokens_seen": 6276664,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.027388864080135192,
|
|
"grad_norm": 4.057498931884766,
|
|
"learning_rate": 9.726368476357269e-05,
|
|
"loss": 1.026,
|
|
"num_input_tokens_seen": 6293048,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.027458912325864438,
|
|
"grad_norm": 3.9827401638031006,
|
|
"learning_rate": 9.725668651488616e-05,
|
|
"loss": 1.136,
|
|
"num_input_tokens_seen": 6309432,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.027528960571593684,
|
|
"grad_norm": 4.640822887420654,
|
|
"learning_rate": 9.724968826619965e-05,
|
|
"loss": 1.2823,
|
|
"num_input_tokens_seen": 6325568,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.027599008817322933,
|
|
"grad_norm": 4.372538089752197,
|
|
"learning_rate": 9.724269001751314e-05,
|
|
"loss": 1.0354,
|
|
"num_input_tokens_seen": 6341952,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.02766905706305218,
|
|
"grad_norm": 4.018289566040039,
|
|
"learning_rate": 9.723569176882663e-05,
|
|
"loss": 1.029,
|
|
"num_input_tokens_seen": 6358336,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.027739105308781424,
|
|
"grad_norm": 4.440858364105225,
|
|
"learning_rate": 9.722869352014012e-05,
|
|
"loss": 1.2272,
|
|
"num_input_tokens_seen": 6374680,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.02780915355451067,
|
|
"grad_norm": 4.246788024902344,
|
|
"learning_rate": 9.722169527145359e-05,
|
|
"loss": 1.0161,
|
|
"num_input_tokens_seen": 6390672,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.027879201800239915,
|
|
"grad_norm": 4.27274751663208,
|
|
"learning_rate": 9.721469702276708e-05,
|
|
"loss": 1.293,
|
|
"num_input_tokens_seen": 6407056,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.02794925004596916,
|
|
"grad_norm": 4.171760559082031,
|
|
"learning_rate": 9.720769877408056e-05,
|
|
"loss": 1.2766,
|
|
"num_input_tokens_seen": 6423440,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.028019298291698407,
|
|
"grad_norm": 4.174622535705566,
|
|
"learning_rate": 9.720070052539405e-05,
|
|
"loss": 1.049,
|
|
"num_input_tokens_seen": 6439824,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.028019298291698407,
|
|
"eval_loss": 1.1994441747665405,
|
|
"eval_runtime": 0.2131,
|
|
"eval_samples_per_second": 4.693,
|
|
"eval_steps_per_second": 4.693,
|
|
"num_input_tokens_seen": 6439824,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.028089346537427652,
|
|
"grad_norm": 4.199150562286377,
|
|
"learning_rate": 9.719370227670753e-05,
|
|
"loss": 1.3432,
|
|
"num_input_tokens_seen": 6456208,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.028159394783156898,
|
|
"grad_norm": 3.9011733531951904,
|
|
"learning_rate": 9.718670402802102e-05,
|
|
"loss": 1.0895,
|
|
"num_input_tokens_seen": 6472592,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.028229443028886147,
|
|
"grad_norm": 4.142306327819824,
|
|
"learning_rate": 9.717970577933451e-05,
|
|
"loss": 0.9031,
|
|
"num_input_tokens_seen": 6488976,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.028299491274615392,
|
|
"grad_norm": 3.9745633602142334,
|
|
"learning_rate": 9.717270753064799e-05,
|
|
"loss": 0.9951,
|
|
"num_input_tokens_seen": 6505360,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.028369539520344638,
|
|
"grad_norm": 3.838865280151367,
|
|
"learning_rate": 9.716570928196147e-05,
|
|
"loss": 0.809,
|
|
"num_input_tokens_seen": 6521744,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.028439587766073884,
|
|
"grad_norm": 4.48146390914917,
|
|
"learning_rate": 9.715871103327496e-05,
|
|
"loss": 1.4985,
|
|
"num_input_tokens_seen": 6538128,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.02850963601180313,
|
|
"grad_norm": 4.393556594848633,
|
|
"learning_rate": 9.715171278458844e-05,
|
|
"loss": 1.2355,
|
|
"num_input_tokens_seen": 6554512,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.028579684257532375,
|
|
"grad_norm": 3.970860004425049,
|
|
"learning_rate": 9.714471453590194e-05,
|
|
"loss": 1.1513,
|
|
"num_input_tokens_seen": 6570896,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.02864973250326162,
|
|
"grad_norm": 4.166610240936279,
|
|
"learning_rate": 9.713771628721542e-05,
|
|
"loss": 1.108,
|
|
"num_input_tokens_seen": 6587216,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.028719780748990866,
|
|
"grad_norm": 3.9887096881866455,
|
|
"learning_rate": 9.71307180385289e-05,
|
|
"loss": 1.1639,
|
|
"num_input_tokens_seen": 6603600,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.028789828994720112,
|
|
"grad_norm": 4.195802211761475,
|
|
"learning_rate": 9.712371978984239e-05,
|
|
"loss": 1.1478,
|
|
"num_input_tokens_seen": 6619984,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.02885987724044936,
|
|
"grad_norm": 4.011331081390381,
|
|
"learning_rate": 9.711672154115587e-05,
|
|
"loss": 0.9554,
|
|
"num_input_tokens_seen": 6635904,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.028929925486178606,
|
|
"grad_norm": 4.4170026779174805,
|
|
"learning_rate": 9.710972329246936e-05,
|
|
"loss": 1.1452,
|
|
"num_input_tokens_seen": 6651944,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.028999973731907852,
|
|
"grad_norm": 4.073450088500977,
|
|
"learning_rate": 9.710272504378284e-05,
|
|
"loss": 1.1187,
|
|
"num_input_tokens_seen": 6668096,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.029070021977637098,
|
|
"grad_norm": 4.161722183227539,
|
|
"learning_rate": 9.709572679509633e-05,
|
|
"loss": 1.1603,
|
|
"num_input_tokens_seen": 6684480,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.029140070223366343,
|
|
"grad_norm": 4.540097713470459,
|
|
"learning_rate": 9.708872854640981e-05,
|
|
"loss": 1.2143,
|
|
"num_input_tokens_seen": 6700536,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.02921011846909559,
|
|
"grad_norm": 4.030871868133545,
|
|
"learning_rate": 9.70817302977233e-05,
|
|
"loss": 0.9791,
|
|
"num_input_tokens_seen": 6716920,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.029280166714824835,
|
|
"grad_norm": 4.1743268966674805,
|
|
"learning_rate": 9.707473204903679e-05,
|
|
"loss": 0.9818,
|
|
"num_input_tokens_seen": 6733304,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.02935021496055408,
|
|
"grad_norm": 4.227272987365723,
|
|
"learning_rate": 9.706773380035026e-05,
|
|
"loss": 1.0945,
|
|
"num_input_tokens_seen": 6749688,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.02942026320628333,
|
|
"grad_norm": 4.406428813934326,
|
|
"learning_rate": 9.706073555166375e-05,
|
|
"loss": 1.0302,
|
|
"num_input_tokens_seen": 6766072,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.029490311452012575,
|
|
"grad_norm": 4.17899227142334,
|
|
"learning_rate": 9.705373730297724e-05,
|
|
"loss": 1.1048,
|
|
"num_input_tokens_seen": 6782456,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.02956035969774182,
|
|
"grad_norm": 4.034752368927002,
|
|
"learning_rate": 9.704673905429073e-05,
|
|
"loss": 1.2639,
|
|
"num_input_tokens_seen": 6798840,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.029630407943471066,
|
|
"grad_norm": 4.795727729797363,
|
|
"learning_rate": 9.703974080560421e-05,
|
|
"loss": 1.2448,
|
|
"num_input_tokens_seen": 6814912,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.029700456189200312,
|
|
"grad_norm": 4.509056568145752,
|
|
"learning_rate": 9.703274255691769e-05,
|
|
"loss": 1.2157,
|
|
"num_input_tokens_seen": 6830720,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.029770504434929557,
|
|
"grad_norm": 4.064620494842529,
|
|
"learning_rate": 9.702574430823118e-05,
|
|
"loss": 1.2042,
|
|
"num_input_tokens_seen": 6847104,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.029840552680658803,
|
|
"grad_norm": 3.9060182571411133,
|
|
"learning_rate": 9.701874605954465e-05,
|
|
"loss": 0.9116,
|
|
"num_input_tokens_seen": 6862952,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.02991060092638805,
|
|
"grad_norm": 3.9900951385498047,
|
|
"learning_rate": 9.701174781085814e-05,
|
|
"loss": 1.1621,
|
|
"num_input_tokens_seen": 6879336,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.029980649172117294,
|
|
"grad_norm": 4.371436595916748,
|
|
"learning_rate": 9.700474956217164e-05,
|
|
"loss": 1.2731,
|
|
"num_input_tokens_seen": 6895720,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.030050697417846543,
|
|
"grad_norm": 3.9422085285186768,
|
|
"learning_rate": 9.699775131348512e-05,
|
|
"loss": 0.9636,
|
|
"num_input_tokens_seen": 6912104,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.03012074566357579,
|
|
"grad_norm": 4.080913543701172,
|
|
"learning_rate": 9.699075306479861e-05,
|
|
"loss": 1.1507,
|
|
"num_input_tokens_seen": 6928488,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.030190793909305035,
|
|
"grad_norm": 4.493942737579346,
|
|
"learning_rate": 9.698375481611208e-05,
|
|
"loss": 1.2274,
|
|
"num_input_tokens_seen": 6944664,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.03026084215503428,
|
|
"grad_norm": 4.073723793029785,
|
|
"learning_rate": 9.697675656742557e-05,
|
|
"loss": 1.0498,
|
|
"num_input_tokens_seen": 6960344,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.030330890400763526,
|
|
"grad_norm": 3.9672274589538574,
|
|
"learning_rate": 9.696975831873906e-05,
|
|
"loss": 1.007,
|
|
"num_input_tokens_seen": 6976720,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.03040093864649277,
|
|
"grad_norm": 4.497872829437256,
|
|
"learning_rate": 9.696276007005255e-05,
|
|
"loss": 1.1339,
|
|
"num_input_tokens_seen": 6992552,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.030470986892222017,
|
|
"grad_norm": 4.422168731689453,
|
|
"learning_rate": 9.695576182136604e-05,
|
|
"loss": 1.34,
|
|
"num_input_tokens_seen": 7008936,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.030541035137951263,
|
|
"grad_norm": 4.3009138107299805,
|
|
"learning_rate": 9.694876357267951e-05,
|
|
"loss": 1.2479,
|
|
"num_input_tokens_seen": 7024512,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.030611083383680508,
|
|
"grad_norm": 4.04030704498291,
|
|
"learning_rate": 9.6941765323993e-05,
|
|
"loss": 1.097,
|
|
"num_input_tokens_seen": 7040896,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.030681131629409757,
|
|
"grad_norm": 3.877417802810669,
|
|
"learning_rate": 9.693476707530649e-05,
|
|
"loss": 1.1363,
|
|
"num_input_tokens_seen": 7057280,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.030751179875139003,
|
|
"grad_norm": 3.8185505867004395,
|
|
"learning_rate": 9.692776882661996e-05,
|
|
"loss": 0.9067,
|
|
"num_input_tokens_seen": 7072544,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.03082122812086825,
|
|
"grad_norm": 4.028950214385986,
|
|
"learning_rate": 9.692077057793345e-05,
|
|
"loss": 1.1195,
|
|
"num_input_tokens_seen": 7088928,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.030891276366597494,
|
|
"grad_norm": 4.2786431312561035,
|
|
"learning_rate": 9.691377232924694e-05,
|
|
"loss": 1.1199,
|
|
"num_input_tokens_seen": 7105248,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.03096132461232674,
|
|
"grad_norm": 4.193462371826172,
|
|
"learning_rate": 9.690677408056043e-05,
|
|
"loss": 1.1812,
|
|
"num_input_tokens_seen": 7121008,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.031031372858055985,
|
|
"grad_norm": 3.93597412109375,
|
|
"learning_rate": 9.68997758318739e-05,
|
|
"loss": 1.0677,
|
|
"num_input_tokens_seen": 7136944,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.03110142110378523,
|
|
"grad_norm": 4.3208537101745605,
|
|
"learning_rate": 9.68927775831874e-05,
|
|
"loss": 1.1358,
|
|
"num_input_tokens_seen": 7152928,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.031171469349514477,
|
|
"grad_norm": 3.9743378162384033,
|
|
"learning_rate": 9.688577933450088e-05,
|
|
"loss": 1.094,
|
|
"num_input_tokens_seen": 7169312,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.031241517595243726,
|
|
"grad_norm": 4.226114273071289,
|
|
"learning_rate": 9.687878108581436e-05,
|
|
"loss": 1.1752,
|
|
"num_input_tokens_seen": 7185696,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.03131156584097297,
|
|
"grad_norm": 4.210222244262695,
|
|
"learning_rate": 9.687178283712785e-05,
|
|
"loss": 1.1262,
|
|
"num_input_tokens_seen": 7201784,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.03138161408670222,
|
|
"grad_norm": 4.311635971069336,
|
|
"learning_rate": 9.686478458844133e-05,
|
|
"loss": 1.2491,
|
|
"num_input_tokens_seen": 7218168,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.03145166233243146,
|
|
"grad_norm": 4.56603479385376,
|
|
"learning_rate": 9.685778633975482e-05,
|
|
"loss": 1.3512,
|
|
"num_input_tokens_seen": 7233360,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.03152171057816071,
|
|
"grad_norm": 4.232856750488281,
|
|
"learning_rate": 9.685078809106831e-05,
|
|
"loss": 0.9387,
|
|
"num_input_tokens_seen": 7248280,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.031591758823889954,
|
|
"grad_norm": 4.512947082519531,
|
|
"learning_rate": 9.684378984238179e-05,
|
|
"loss": 1.1988,
|
|
"num_input_tokens_seen": 7264664,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.0316618070696192,
|
|
"grad_norm": 4.273897171020508,
|
|
"learning_rate": 9.683679159369528e-05,
|
|
"loss": 1.2523,
|
|
"num_input_tokens_seen": 7281048,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.031731855315348445,
|
|
"grad_norm": 4.288438320159912,
|
|
"learning_rate": 9.682979334500875e-05,
|
|
"loss": 1.1692,
|
|
"num_input_tokens_seen": 7297424,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.03180190356107769,
|
|
"grad_norm": 4.27367639541626,
|
|
"learning_rate": 9.682279509632225e-05,
|
|
"loss": 1.1868,
|
|
"num_input_tokens_seen": 7312792,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.031871951806806936,
|
|
"grad_norm": 3.978926181793213,
|
|
"learning_rate": 9.681579684763574e-05,
|
|
"loss": 1.0382,
|
|
"num_input_tokens_seen": 7329176,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.03194200005253618,
|
|
"grad_norm": 4.4399919509887695,
|
|
"learning_rate": 9.680879859894922e-05,
|
|
"loss": 1.2072,
|
|
"num_input_tokens_seen": 7345560,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.03201204829826543,
|
|
"grad_norm": 3.9786529541015625,
|
|
"learning_rate": 9.68018003502627e-05,
|
|
"loss": 1.1704,
|
|
"num_input_tokens_seen": 7361944,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.03208209654399467,
|
|
"grad_norm": 4.171195030212402,
|
|
"learning_rate": 9.679480210157618e-05,
|
|
"loss": 1.1307,
|
|
"num_input_tokens_seen": 7378328,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.032152144789723926,
|
|
"grad_norm": 3.9415268898010254,
|
|
"learning_rate": 9.678780385288967e-05,
|
|
"loss": 0.9971,
|
|
"num_input_tokens_seen": 7394208,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.03222219303545317,
|
|
"grad_norm": 4.066036224365234,
|
|
"learning_rate": 9.678080560420316e-05,
|
|
"loss": 1.1227,
|
|
"num_input_tokens_seen": 7410328,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.03229224128118242,
|
|
"grad_norm": 4.22513484954834,
|
|
"learning_rate": 9.677380735551665e-05,
|
|
"loss": 1.0883,
|
|
"num_input_tokens_seen": 7426712,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.03236228952691166,
|
|
"grad_norm": 4.310954570770264,
|
|
"learning_rate": 9.676680910683013e-05,
|
|
"loss": 1.1695,
|
|
"num_input_tokens_seen": 7442736,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.03243233777264091,
|
|
"grad_norm": 4.2868828773498535,
|
|
"learning_rate": 9.675981085814361e-05,
|
|
"loss": 1.0594,
|
|
"num_input_tokens_seen": 7458560,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.032502386018370154,
|
|
"grad_norm": 4.318186283111572,
|
|
"learning_rate": 9.67528126094571e-05,
|
|
"loss": 1.1791,
|
|
"num_input_tokens_seen": 7474944,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.0325724342640994,
|
|
"grad_norm": 4.040421009063721,
|
|
"learning_rate": 9.674581436077059e-05,
|
|
"loss": 1.0649,
|
|
"num_input_tokens_seen": 7490344,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.032642482509828645,
|
|
"grad_norm": 3.914815902709961,
|
|
"learning_rate": 9.673881611208406e-05,
|
|
"loss": 1.1381,
|
|
"num_input_tokens_seen": 7506728,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.03271253075555789,
|
|
"grad_norm": 4.054527282714844,
|
|
"learning_rate": 9.673181786339755e-05,
|
|
"loss": 1.2264,
|
|
"num_input_tokens_seen": 7522912,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.032782579001287136,
|
|
"grad_norm": 4.295147895812988,
|
|
"learning_rate": 9.672481961471104e-05,
|
|
"loss": 1.1369,
|
|
"num_input_tokens_seen": 7539040,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.03285262724701638,
|
|
"grad_norm": 4.109183311462402,
|
|
"learning_rate": 9.671782136602453e-05,
|
|
"loss": 1.1676,
|
|
"num_input_tokens_seen": 7555424,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.03292267549274563,
|
|
"grad_norm": 4.131369590759277,
|
|
"learning_rate": 9.6710823117338e-05,
|
|
"loss": 1.1188,
|
|
"num_input_tokens_seen": 7571808,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.03299272373847487,
|
|
"grad_norm": 3.998414993286133,
|
|
"learning_rate": 9.670382486865149e-05,
|
|
"loss": 1.0201,
|
|
"num_input_tokens_seen": 7587528,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.03306277198420412,
|
|
"grad_norm": 4.1235551834106445,
|
|
"learning_rate": 9.669682661996498e-05,
|
|
"loss": 1.1265,
|
|
"num_input_tokens_seen": 7603912,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.033132820229933364,
|
|
"grad_norm": 4.800798416137695,
|
|
"learning_rate": 9.668982837127845e-05,
|
|
"loss": 1.3634,
|
|
"num_input_tokens_seen": 7617512,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.03320286847566261,
|
|
"grad_norm": 4.068000316619873,
|
|
"learning_rate": 9.668283012259196e-05,
|
|
"loss": 1.1427,
|
|
"num_input_tokens_seen": 7633040,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.033272916721391856,
|
|
"grad_norm": 4.0715484619140625,
|
|
"learning_rate": 9.667583187390543e-05,
|
|
"loss": 1.0633,
|
|
"num_input_tokens_seen": 7648416,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.0333429649671211,
|
|
"grad_norm": 3.937807321548462,
|
|
"learning_rate": 9.666883362521892e-05,
|
|
"loss": 1.1393,
|
|
"num_input_tokens_seen": 7664624,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.033413013212850354,
|
|
"grad_norm": 4.195656776428223,
|
|
"learning_rate": 9.666183537653241e-05,
|
|
"loss": 1.1801,
|
|
"num_input_tokens_seen": 7680480,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.0334830614585796,
|
|
"grad_norm": 4.227575778961182,
|
|
"learning_rate": 9.665483712784588e-05,
|
|
"loss": 1.0453,
|
|
"num_input_tokens_seen": 7696632,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.033553109704308845,
|
|
"grad_norm": 4.328822135925293,
|
|
"learning_rate": 9.664783887915937e-05,
|
|
"loss": 1.221,
|
|
"num_input_tokens_seen": 7713016,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.03362315795003809,
|
|
"grad_norm": 4.086736679077148,
|
|
"learning_rate": 9.664084063047286e-05,
|
|
"loss": 1.2817,
|
|
"num_input_tokens_seen": 7729400,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.033693206195767336,
|
|
"grad_norm": 4.555233955383301,
|
|
"learning_rate": 9.663384238178635e-05,
|
|
"loss": 1.483,
|
|
"num_input_tokens_seen": 7745784,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.03376325444149658,
|
|
"grad_norm": 4.118983745574951,
|
|
"learning_rate": 9.662684413309984e-05,
|
|
"loss": 0.9139,
|
|
"num_input_tokens_seen": 7762168,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.03383330268722583,
|
|
"grad_norm": 4.232059001922607,
|
|
"learning_rate": 9.661984588441331e-05,
|
|
"loss": 1.1269,
|
|
"num_input_tokens_seen": 7777920,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.03390335093295507,
|
|
"grad_norm": 6.288865089416504,
|
|
"learning_rate": 9.66128476357268e-05,
|
|
"loss": 1.0642,
|
|
"num_input_tokens_seen": 7794304,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.03397339917868432,
|
|
"grad_norm": 4.133046627044678,
|
|
"learning_rate": 9.660584938704028e-05,
|
|
"loss": 1.2067,
|
|
"num_input_tokens_seen": 7810200,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.034043447424413564,
|
|
"grad_norm": 4.147965431213379,
|
|
"learning_rate": 9.659885113835377e-05,
|
|
"loss": 1.0367,
|
|
"num_input_tokens_seen": 7826384,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.03411349567014281,
|
|
"grad_norm": 4.1191020011901855,
|
|
"learning_rate": 9.659185288966725e-05,
|
|
"loss": 1.0972,
|
|
"num_input_tokens_seen": 7841704,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.034183543915872056,
|
|
"grad_norm": 4.518441677093506,
|
|
"learning_rate": 9.658485464098074e-05,
|
|
"loss": 1.263,
|
|
"num_input_tokens_seen": 7858088,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.0342535921616013,
|
|
"grad_norm": 4.321181297302246,
|
|
"learning_rate": 9.657785639229423e-05,
|
|
"loss": 1.1378,
|
|
"num_input_tokens_seen": 7874472,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.03432364040733055,
|
|
"grad_norm": 4.366185665130615,
|
|
"learning_rate": 9.65708581436077e-05,
|
|
"loss": 1.1636,
|
|
"num_input_tokens_seen": 7890856,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.03439368865305979,
|
|
"grad_norm": 4.042731761932373,
|
|
"learning_rate": 9.65638598949212e-05,
|
|
"loss": 1.0601,
|
|
"num_input_tokens_seen": 7906776,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.03446373689878904,
|
|
"grad_norm": 3.743668556213379,
|
|
"learning_rate": 9.655686164623468e-05,
|
|
"loss": 1.0441,
|
|
"num_input_tokens_seen": 7923160,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.034533785144518284,
|
|
"grad_norm": 3.8547139167785645,
|
|
"learning_rate": 9.654986339754816e-05,
|
|
"loss": 1.0842,
|
|
"num_input_tokens_seen": 7939296,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.034603833390247536,
|
|
"grad_norm": 4.238414287567139,
|
|
"learning_rate": 9.654286514886166e-05,
|
|
"loss": 1.2498,
|
|
"num_input_tokens_seen": 7955504,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.03467388163597678,
|
|
"grad_norm": 4.134857177734375,
|
|
"learning_rate": 9.653586690017514e-05,
|
|
"loss": 1.1241,
|
|
"num_input_tokens_seen": 7971888,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.03474392988170603,
|
|
"grad_norm": 4.2501983642578125,
|
|
"learning_rate": 9.652886865148862e-05,
|
|
"loss": 1.1829,
|
|
"num_input_tokens_seen": 7988272,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.03481397812743527,
|
|
"grad_norm": 7.4397053718566895,
|
|
"learning_rate": 9.65218704028021e-05,
|
|
"loss": 0.9952,
|
|
"num_input_tokens_seen": 8003744,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.03488402637316452,
|
|
"grad_norm": 4.2750959396362305,
|
|
"learning_rate": 9.651487215411559e-05,
|
|
"loss": 1.2387,
|
|
"num_input_tokens_seen": 8019184,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.034954074618893764,
|
|
"grad_norm": 4.156162261962891,
|
|
"learning_rate": 9.650787390542908e-05,
|
|
"loss": 1.1201,
|
|
"num_input_tokens_seen": 8035176,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.03502412286462301,
|
|
"grad_norm": 4.178225040435791,
|
|
"learning_rate": 9.650087565674257e-05,
|
|
"loss": 1.2026,
|
|
"num_input_tokens_seen": 8051560,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.035094171110352256,
|
|
"grad_norm": 4.147096157073975,
|
|
"learning_rate": 9.649387740805605e-05,
|
|
"loss": 1.2465,
|
|
"num_input_tokens_seen": 8067944,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.0351642193560815,
|
|
"grad_norm": 4.329249858856201,
|
|
"learning_rate": 9.648687915936953e-05,
|
|
"loss": 1.2742,
|
|
"num_input_tokens_seen": 8083824,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.03523426760181075,
|
|
"grad_norm": 4.404232978820801,
|
|
"learning_rate": 9.647988091068302e-05,
|
|
"loss": 1.1511,
|
|
"num_input_tokens_seen": 8100208,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.03530431584753999,
|
|
"grad_norm": 4.190586090087891,
|
|
"learning_rate": 9.64728826619965e-05,
|
|
"loss": 0.9884,
|
|
"num_input_tokens_seen": 8116048,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.03537436409326924,
|
|
"grad_norm": 4.262845516204834,
|
|
"learning_rate": 9.646588441330998e-05,
|
|
"loss": 1.1321,
|
|
"num_input_tokens_seen": 8132432,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.035444412338998484,
|
|
"grad_norm": 4.452746391296387,
|
|
"learning_rate": 9.645888616462347e-05,
|
|
"loss": 1.1667,
|
|
"num_input_tokens_seen": 8148816,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.03551446058472773,
|
|
"grad_norm": 4.111443042755127,
|
|
"learning_rate": 9.645188791593696e-05,
|
|
"loss": 1.0049,
|
|
"num_input_tokens_seen": 8164856,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.035584508830456975,
|
|
"grad_norm": 4.292227268218994,
|
|
"learning_rate": 9.644488966725045e-05,
|
|
"loss": 1.1535,
|
|
"num_input_tokens_seen": 8181240,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.03565455707618622,
|
|
"grad_norm": 4.295238971710205,
|
|
"learning_rate": 9.643789141856394e-05,
|
|
"loss": 1.236,
|
|
"num_input_tokens_seen": 8197624,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.035724605321915466,
|
|
"grad_norm": 3.930659294128418,
|
|
"learning_rate": 9.643089316987741e-05,
|
|
"loss": 0.9195,
|
|
"num_input_tokens_seen": 8213816,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.03579465356764472,
|
|
"grad_norm": 4.092316150665283,
|
|
"learning_rate": 9.64238949211909e-05,
|
|
"loss": 1.0799,
|
|
"num_input_tokens_seen": 8229632,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.035864701813373964,
|
|
"grad_norm": 4.2939252853393555,
|
|
"learning_rate": 9.641689667250437e-05,
|
|
"loss": 1.111,
|
|
"num_input_tokens_seen": 8245232,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.03593475005910321,
|
|
"grad_norm": 4.191503524780273,
|
|
"learning_rate": 9.640989842381786e-05,
|
|
"loss": 0.9399,
|
|
"num_input_tokens_seen": 8260912,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.036004798304832455,
|
|
"grad_norm": 4.141485214233398,
|
|
"learning_rate": 9.640290017513136e-05,
|
|
"loss": 1.1334,
|
|
"num_input_tokens_seen": 8276864,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.0360748465505617,
|
|
"grad_norm": 3.890547752380371,
|
|
"learning_rate": 9.639590192644484e-05,
|
|
"loss": 1.0055,
|
|
"num_input_tokens_seen": 8292720,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.03614489479629095,
|
|
"grad_norm": 4.405922889709473,
|
|
"learning_rate": 9.638890367775833e-05,
|
|
"loss": 1.2238,
|
|
"num_input_tokens_seen": 8309104,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.03621494304202019,
|
|
"grad_norm": 4.207942485809326,
|
|
"learning_rate": 9.63819054290718e-05,
|
|
"loss": 1.0688,
|
|
"num_input_tokens_seen": 8325304,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.03628499128774944,
|
|
"grad_norm": 4.174366474151611,
|
|
"learning_rate": 9.637490718038529e-05,
|
|
"loss": 1.2303,
|
|
"num_input_tokens_seen": 8341688,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.036355039533478684,
|
|
"grad_norm": 3.9641714096069336,
|
|
"learning_rate": 9.636790893169878e-05,
|
|
"loss": 1.2244,
|
|
"num_input_tokens_seen": 8357760,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.03642508777920793,
|
|
"grad_norm": 5.832678318023682,
|
|
"learning_rate": 9.636091068301227e-05,
|
|
"loss": 1.0645,
|
|
"num_input_tokens_seen": 8372712,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.036495136024937175,
|
|
"grad_norm": 3.7905161380767822,
|
|
"learning_rate": 9.635391243432576e-05,
|
|
"loss": 1.0551,
|
|
"num_input_tokens_seen": 8389096,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.03656518427066642,
|
|
"grad_norm": 3.6744072437286377,
|
|
"learning_rate": 9.634691418563923e-05,
|
|
"loss": 1.0687,
|
|
"num_input_tokens_seen": 8405216,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.036635232516395666,
|
|
"grad_norm": 4.897486209869385,
|
|
"learning_rate": 9.633991593695272e-05,
|
|
"loss": 1.1968,
|
|
"num_input_tokens_seen": 8421600,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.03670528076212491,
|
|
"grad_norm": 3.821457862854004,
|
|
"learning_rate": 9.63329176882662e-05,
|
|
"loss": 1.0473,
|
|
"num_input_tokens_seen": 8437984,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.03677532900785416,
|
|
"grad_norm": 3.873832941055298,
|
|
"learning_rate": 9.632591943957969e-05,
|
|
"loss": 0.9656,
|
|
"num_input_tokens_seen": 8453760,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.0368453772535834,
|
|
"grad_norm": 4.139901161193848,
|
|
"learning_rate": 9.631892119089317e-05,
|
|
"loss": 1.0881,
|
|
"num_input_tokens_seen": 8470144,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.03691542549931265,
|
|
"grad_norm": 3.9512782096862793,
|
|
"learning_rate": 9.631192294220666e-05,
|
|
"loss": 1.1093,
|
|
"num_input_tokens_seen": 8486528,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.0369854737450419,
|
|
"grad_norm": 3.8937103748321533,
|
|
"learning_rate": 9.630492469352015e-05,
|
|
"loss": 0.9722,
|
|
"num_input_tokens_seen": 8502912,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.03705552199077115,
|
|
"grad_norm": 4.482640743255615,
|
|
"learning_rate": 9.629792644483363e-05,
|
|
"loss": 1.056,
|
|
"num_input_tokens_seen": 8519296,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.03712557023650039,
|
|
"grad_norm": 4.127941131591797,
|
|
"learning_rate": 9.629092819614711e-05,
|
|
"loss": 1.0285,
|
|
"num_input_tokens_seen": 8535160,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.03719561848222964,
|
|
"grad_norm": 3.973585844039917,
|
|
"learning_rate": 9.62839299474606e-05,
|
|
"loss": 1.0356,
|
|
"num_input_tokens_seen": 8551256,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.037265666727958884,
|
|
"grad_norm": 4.22855281829834,
|
|
"learning_rate": 9.627693169877408e-05,
|
|
"loss": 1.134,
|
|
"num_input_tokens_seen": 8567640,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.03733571497368813,
|
|
"grad_norm": 4.144021511077881,
|
|
"learning_rate": 9.626993345008757e-05,
|
|
"loss": 1.0963,
|
|
"num_input_tokens_seen": 8583504,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.037405763219417375,
|
|
"grad_norm": 3.8666226863861084,
|
|
"learning_rate": 9.626293520140106e-05,
|
|
"loss": 0.912,
|
|
"num_input_tokens_seen": 8599888,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.03747581146514662,
|
|
"grad_norm": 4.215412616729736,
|
|
"learning_rate": 9.625593695271454e-05,
|
|
"loss": 1.1055,
|
|
"num_input_tokens_seen": 8616256,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.037545859710875866,
|
|
"grad_norm": 4.353022575378418,
|
|
"learning_rate": 9.624893870402803e-05,
|
|
"loss": 1.0379,
|
|
"num_input_tokens_seen": 8632640,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.03761590795660511,
|
|
"grad_norm": 3.778947591781616,
|
|
"learning_rate": 9.624194045534151e-05,
|
|
"loss": 1.0547,
|
|
"num_input_tokens_seen": 8648624,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.03768595620233436,
|
|
"grad_norm": 4.481568336486816,
|
|
"learning_rate": 9.6234942206655e-05,
|
|
"loss": 1.3407,
|
|
"num_input_tokens_seen": 8664200,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.0377560044480636,
|
|
"grad_norm": 4.066302299499512,
|
|
"learning_rate": 9.622794395796847e-05,
|
|
"loss": 0.995,
|
|
"num_input_tokens_seen": 8680584,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.03782605269379285,
|
|
"grad_norm": 4.262768268585205,
|
|
"learning_rate": 9.622094570928197e-05,
|
|
"loss": 1.3054,
|
|
"num_input_tokens_seen": 8696968,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.037896100939522094,
|
|
"grad_norm": 3.777597665786743,
|
|
"learning_rate": 9.621394746059546e-05,
|
|
"loss": 0.9831,
|
|
"num_input_tokens_seen": 8713352,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.03796614918525134,
|
|
"grad_norm": 3.9732742309570312,
|
|
"learning_rate": 9.620694921190894e-05,
|
|
"loss": 1.0699,
|
|
"num_input_tokens_seen": 8729048,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.038036197430980585,
|
|
"grad_norm": 4.543329238891602,
|
|
"learning_rate": 9.619995096322243e-05,
|
|
"loss": 1.1546,
|
|
"num_input_tokens_seen": 8745432,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.03810624567670983,
|
|
"grad_norm": 4.903865814208984,
|
|
"learning_rate": 9.61929527145359e-05,
|
|
"loss": 1.1548,
|
|
"num_input_tokens_seen": 8760296,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.03817629392243908,
|
|
"grad_norm": 4.197691917419434,
|
|
"learning_rate": 9.618595446584939e-05,
|
|
"loss": 1.1616,
|
|
"num_input_tokens_seen": 8776680,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.03824634216816833,
|
|
"grad_norm": 3.912689208984375,
|
|
"learning_rate": 9.617895621716288e-05,
|
|
"loss": 0.9926,
|
|
"num_input_tokens_seen": 8793064,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.038316390413897575,
|
|
"grad_norm": 4.291840076446533,
|
|
"learning_rate": 9.617195796847637e-05,
|
|
"loss": 1.1943,
|
|
"num_input_tokens_seen": 8809448,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.03838643865962682,
|
|
"grad_norm": 3.9053072929382324,
|
|
"learning_rate": 9.616495971978985e-05,
|
|
"loss": 1.2437,
|
|
"num_input_tokens_seen": 8825536,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.038456486905356066,
|
|
"grad_norm": 4.860696315765381,
|
|
"learning_rate": 9.615796147110333e-05,
|
|
"loss": 1.3045,
|
|
"num_input_tokens_seen": 8841920,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.03852653515108531,
|
|
"grad_norm": 3.9394373893737793,
|
|
"learning_rate": 9.615096322241682e-05,
|
|
"loss": 1.1367,
|
|
"num_input_tokens_seen": 8858304,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.03859658339681456,
|
|
"grad_norm": 3.8160409927368164,
|
|
"learning_rate": 9.61439649737303e-05,
|
|
"loss": 1.0864,
|
|
"num_input_tokens_seen": 8874688,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.0386666316425438,
|
|
"grad_norm": 4.3792805671691895,
|
|
"learning_rate": 9.613696672504378e-05,
|
|
"loss": 1.2516,
|
|
"num_input_tokens_seen": 8891072,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.03873667988827305,
|
|
"grad_norm": 4.103452682495117,
|
|
"learning_rate": 9.612996847635727e-05,
|
|
"loss": 0.9737,
|
|
"num_input_tokens_seen": 8907456,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.038806728134002294,
|
|
"grad_norm": 4.117603302001953,
|
|
"learning_rate": 9.612297022767076e-05,
|
|
"loss": 1.096,
|
|
"num_input_tokens_seen": 8923816,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.03887677637973154,
|
|
"grad_norm": 4.272468566894531,
|
|
"learning_rate": 9.611597197898425e-05,
|
|
"loss": 1.161,
|
|
"num_input_tokens_seen": 8939344,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.038946824625460785,
|
|
"grad_norm": 4.323635578155518,
|
|
"learning_rate": 9.610897373029772e-05,
|
|
"loss": 1.1922,
|
|
"num_input_tokens_seen": 8954920,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.03901687287119003,
|
|
"grad_norm": 3.783510684967041,
|
|
"learning_rate": 9.610197548161121e-05,
|
|
"loss": 1.0658,
|
|
"num_input_tokens_seen": 8971304,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.039086921116919277,
|
|
"grad_norm": 4.3757548332214355,
|
|
"learning_rate": 9.60949772329247e-05,
|
|
"loss": 1.3186,
|
|
"num_input_tokens_seen": 8987672,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.03915696936264852,
|
|
"grad_norm": 4.048824787139893,
|
|
"learning_rate": 9.608797898423818e-05,
|
|
"loss": 1.1452,
|
|
"num_input_tokens_seen": 9003896,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.03922701760837777,
|
|
"grad_norm": 4.06865930557251,
|
|
"learning_rate": 9.608098073555168e-05,
|
|
"loss": 0.9861,
|
|
"num_input_tokens_seen": 9020280,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.03929706585410701,
|
|
"grad_norm": 3.966737747192383,
|
|
"learning_rate": 9.607398248686515e-05,
|
|
"loss": 1.0323,
|
|
"num_input_tokens_seen": 9036280,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.03936711409983626,
|
|
"grad_norm": 4.466656684875488,
|
|
"learning_rate": 9.606698423817864e-05,
|
|
"loss": 1.2462,
|
|
"num_input_tokens_seen": 9052664,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.03943716234556551,
|
|
"grad_norm": 4.312132358551025,
|
|
"learning_rate": 9.605998598949213e-05,
|
|
"loss": 1.2133,
|
|
"num_input_tokens_seen": 9068832,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.03950721059129476,
|
|
"grad_norm": 3.9202895164489746,
|
|
"learning_rate": 9.60529877408056e-05,
|
|
"loss": 1.0723,
|
|
"num_input_tokens_seen": 9084680,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.039577258837024,
|
|
"grad_norm": 5.139899730682373,
|
|
"learning_rate": 9.604598949211909e-05,
|
|
"loss": 1.1165,
|
|
"num_input_tokens_seen": 9099792,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.03964730708275325,
|
|
"grad_norm": 4.398557186126709,
|
|
"learning_rate": 9.603899124343258e-05,
|
|
"loss": 1.1737,
|
|
"num_input_tokens_seen": 9116136,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.039717355328482494,
|
|
"grad_norm": 4.350982666015625,
|
|
"learning_rate": 9.603199299474607e-05,
|
|
"loss": 1.2174,
|
|
"num_input_tokens_seen": 9132520,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.03978740357421174,
|
|
"grad_norm": 3.787644386291504,
|
|
"learning_rate": 9.602499474605956e-05,
|
|
"loss": 0.9914,
|
|
"num_input_tokens_seen": 9148856,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.039857451819940985,
|
|
"grad_norm": 4.630245685577393,
|
|
"learning_rate": 9.601799649737303e-05,
|
|
"loss": 1.4135,
|
|
"num_input_tokens_seen": 9164888,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.03992750006567023,
|
|
"grad_norm": 4.063969135284424,
|
|
"learning_rate": 9.601099824868652e-05,
|
|
"loss": 1.1312,
|
|
"num_input_tokens_seen": 9181272,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.039997548311399476,
|
|
"grad_norm": 4.2443413734436035,
|
|
"learning_rate": 9.6004e-05,
|
|
"loss": 1.1627,
|
|
"num_input_tokens_seen": 9197344,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.04006759655712872,
|
|
"grad_norm": 4.396352767944336,
|
|
"learning_rate": 9.599700175131349e-05,
|
|
"loss": 1.1222,
|
|
"num_input_tokens_seen": 9212312,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.04013764480285797,
|
|
"grad_norm": 4.364585876464844,
|
|
"learning_rate": 9.599000350262697e-05,
|
|
"loss": 1.0522,
|
|
"num_input_tokens_seen": 9228696,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.04020769304858721,
|
|
"grad_norm": 3.9348409175872803,
|
|
"learning_rate": 9.598300525394046e-05,
|
|
"loss": 1.1375,
|
|
"num_input_tokens_seen": 9245080,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.04027774129431646,
|
|
"grad_norm": 4.051416873931885,
|
|
"learning_rate": 9.597600700525395e-05,
|
|
"loss": 1.0265,
|
|
"num_input_tokens_seen": 9260752,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.040347789540045705,
|
|
"grad_norm": 4.661770820617676,
|
|
"learning_rate": 9.596900875656743e-05,
|
|
"loss": 1.192,
|
|
"num_input_tokens_seen": 9276792,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.04041783778577495,
|
|
"grad_norm": 4.378422260284424,
|
|
"learning_rate": 9.596201050788092e-05,
|
|
"loss": 1.0497,
|
|
"num_input_tokens_seen": 9292768,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.040487886031504196,
|
|
"grad_norm": 4.4690399169921875,
|
|
"learning_rate": 9.595501225919439e-05,
|
|
"loss": 1.2398,
|
|
"num_input_tokens_seen": 9309152,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.04055793427723344,
|
|
"grad_norm": 4.1711273193359375,
|
|
"learning_rate": 9.594801401050788e-05,
|
|
"loss": 1.097,
|
|
"num_input_tokens_seen": 9325536,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.040627982522962694,
|
|
"grad_norm": 3.8115949630737305,
|
|
"learning_rate": 9.594101576182137e-05,
|
|
"loss": 1.0317,
|
|
"num_input_tokens_seen": 9341920,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.04069803076869194,
|
|
"grad_norm": 4.072190284729004,
|
|
"learning_rate": 9.593401751313486e-05,
|
|
"loss": 1.0649,
|
|
"num_input_tokens_seen": 9357904,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.040768079014421185,
|
|
"grad_norm": 3.895766258239746,
|
|
"learning_rate": 9.592701926444835e-05,
|
|
"loss": 1.1906,
|
|
"num_input_tokens_seen": 9373496,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.04083812726015043,
|
|
"grad_norm": 4.026490688323975,
|
|
"learning_rate": 9.592002101576182e-05,
|
|
"loss": 0.9913,
|
|
"num_input_tokens_seen": 9389824,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.040908175505879676,
|
|
"grad_norm": 3.612987518310547,
|
|
"learning_rate": 9.591302276707531e-05,
|
|
"loss": 0.9376,
|
|
"num_input_tokens_seen": 9406208,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.04097822375160892,
|
|
"grad_norm": 4.4619646072387695,
|
|
"learning_rate": 9.59060245183888e-05,
|
|
"loss": 1.2198,
|
|
"num_input_tokens_seen": 9422592,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.04104827199733817,
|
|
"grad_norm": 3.990372896194458,
|
|
"learning_rate": 9.589902626970229e-05,
|
|
"loss": 1.082,
|
|
"num_input_tokens_seen": 9438816,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.04111832024306741,
|
|
"grad_norm": 3.7697947025299072,
|
|
"learning_rate": 9.589202802101577e-05,
|
|
"loss": 1.0173,
|
|
"num_input_tokens_seen": 9455200,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.04118836848879666,
|
|
"grad_norm": 4.066056728363037,
|
|
"learning_rate": 9.588502977232925e-05,
|
|
"loss": 1.124,
|
|
"num_input_tokens_seen": 9471320,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.041258416734525905,
|
|
"grad_norm": 3.913506507873535,
|
|
"learning_rate": 9.587803152364274e-05,
|
|
"loss": 1.0501,
|
|
"num_input_tokens_seen": 9487304,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.04132846498025515,
|
|
"grad_norm": 3.9049429893493652,
|
|
"learning_rate": 9.587103327495623e-05,
|
|
"loss": 1.0563,
|
|
"num_input_tokens_seen": 9503688,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.041398513225984396,
|
|
"grad_norm": 4.316978454589844,
|
|
"learning_rate": 9.58640350262697e-05,
|
|
"loss": 1.1333,
|
|
"num_input_tokens_seen": 9519488,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.04146856147171364,
|
|
"grad_norm": 3.7818517684936523,
|
|
"learning_rate": 9.585703677758319e-05,
|
|
"loss": 1.0537,
|
|
"num_input_tokens_seen": 9535872,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.04153860971744289,
|
|
"grad_norm": 3.8751401901245117,
|
|
"learning_rate": 9.585003852889668e-05,
|
|
"loss": 1.1745,
|
|
"num_input_tokens_seen": 9551928,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.04160865796317213,
|
|
"grad_norm": 4.357265949249268,
|
|
"learning_rate": 9.584304028021017e-05,
|
|
"loss": 1.1154,
|
|
"num_input_tokens_seen": 9568312,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.04167870620890138,
|
|
"grad_norm": 4.184159755706787,
|
|
"learning_rate": 9.583604203152366e-05,
|
|
"loss": 1.125,
|
|
"num_input_tokens_seen": 9583968,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.041748754454630624,
|
|
"grad_norm": 3.9540369510650635,
|
|
"learning_rate": 9.582904378283713e-05,
|
|
"loss": 1.2032,
|
|
"num_input_tokens_seen": 9600152,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.04181880270035987,
|
|
"grad_norm": 4.401122093200684,
|
|
"learning_rate": 9.582204553415062e-05,
|
|
"loss": 1.4808,
|
|
"num_input_tokens_seen": 9615632,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.04188885094608912,
|
|
"grad_norm": 4.418131351470947,
|
|
"learning_rate": 9.58150472854641e-05,
|
|
"loss": 1.0077,
|
|
"num_input_tokens_seen": 9631712,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.04195889919181837,
|
|
"grad_norm": 4.362226963043213,
|
|
"learning_rate": 9.580804903677758e-05,
|
|
"loss": 1.1614,
|
|
"num_input_tokens_seen": 9648096,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.04202894743754761,
|
|
"grad_norm": 4.051177024841309,
|
|
"learning_rate": 9.580105078809107e-05,
|
|
"loss": 1.0718,
|
|
"num_input_tokens_seen": 9663792,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.04202894743754761,
|
|
"eval_loss": 1.1809133291244507,
|
|
"eval_runtime": 0.2062,
|
|
"eval_samples_per_second": 4.849,
|
|
"eval_steps_per_second": 4.849,
|
|
"num_input_tokens_seen": 9663792,
|
|
"step": 600
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 14275,
|
|
"num_input_tokens_seen": 9663792,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.0751917469364224e+16,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|