|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.005349166867260424, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.061666666666666e-05, |
|
"loss": 4.7122, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.123333333333332e-05, |
|
"loss": 4.6911, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.185e-05, |
|
"loss": 4.724, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012246666666666664, |
|
"loss": 4.6028, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015308333333333333, |
|
"loss": 4.2087, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001837, |
|
"loss": 3.9055, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018369229239676365, |
|
"loss": 3.6594, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018366917088062342, |
|
"loss": 3.3363, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018363063933206865, |
|
"loss": 3.1973, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018357670421785793, |
|
"loss": 3.0688, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001835073745899338, |
|
"loss": 2.8881, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018342266208390355, |
|
"loss": 2.9587, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018332258091708636, |
|
"loss": 2.844, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018320714788612737, |
|
"loss": 2.7755, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001830763823641785, |
|
"loss": 2.7166, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018293030629764714, |
|
"loss": 2.6935, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018276894420251292, |
|
"loss": 2.7225, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018259232316021308, |
|
"loss": 2.6965, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018240047281309753, |
|
"loss": 2.6583, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018219342535945378, |
|
"loss": 2.6823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018197121554810325, |
|
"loss": 2.6228, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018173388067256935, |
|
"loss": 2.4769, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018148146056481843, |
|
"loss": 2.437, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001812139975885748, |
|
"loss": 2.4892, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018093153663221082, |
|
"loss": 2.4044, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018063412510121334, |
|
"loss": 2.3818, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018032181291022746, |
|
"loss": 2.3269, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017999465247467954, |
|
"loss": 2.348, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017965269870198018, |
|
"loss": 2.2777, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001792960089823091, |
|
"loss": 2.2388, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017892464317898344, |
|
"loss": 2.354, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017853866361841074, |
|
"loss": 2.2417, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001781381350796288, |
|
"loss": 2.2249, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017772312478343374, |
|
"loss": 2.1922, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017729370238109832, |
|
"loss": 2.0696, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001768499399426824, |
|
"loss": 2.0623, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001763919119449373, |
|
"loss": 2.1225, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001759196952588065, |
|
"loss": 2.0444, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017543336913652426, |
|
"loss": 2.1441, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001749330151983147, |
|
"loss": 2.0458, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017441871741869357, |
|
"loss": 2.109, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001738905621123746, |
|
"loss": 2.1298, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001733486379197835, |
|
"loss": 2.0426, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001727930357921812, |
|
"loss": 2.0356, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017222384897639967, |
|
"loss": 1.9853, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017164117299919217, |
|
"loss": 2.0567, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000171045105651201, |
|
"loss": 1.9754, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017043574697054526, |
|
"loss": 2.07, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001698131992260315, |
|
"loss": 1.9816, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001691775668999898, |
|
"loss": 2.047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016852895667073854, |
|
"loss": 2.0334, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016786747739468068, |
|
"loss": 2.012, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001671932400880341, |
|
"loss": 1.968, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001665063579082, |
|
"loss": 1.9014, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016580694613477136, |
|
"loss": 1.9496, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016509512215018588, |
|
"loss": 1.9224, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016437100542002542, |
|
"loss": 1.9407, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016363471747296617, |
|
"loss": 1.9245, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016288638188038235, |
|
"loss": 1.8933, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016212612423560733, |
|
"loss": 1.9372, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001613540721328553, |
|
"loss": 1.9722, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001605703551458069, |
|
"loss": 1.92, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015977510480586304, |
|
"loss": 1.9598, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001589684545800698, |
|
"loss": 1.8803, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015815053984871886, |
|
"loss": 1.9744, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015732149788262627, |
|
"loss": 1.8559, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001564814678200945, |
|
"loss": 1.938, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001556305906435608, |
|
"loss": 1.9794, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015476900915593596, |
|
"loss": 1.9055, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001538968679566378, |
|
"loss": 1.9784, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015301431341732292, |
|
"loss": 1.9461, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015212149365732115, |
|
"loss": 1.91, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015121855851877672, |
|
"loss": 1.927, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00015030565954150013, |
|
"loss": 1.9173, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014938294993753506, |
|
"loss": 1.9565, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014845058456544515, |
|
"loss": 1.9549, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014750871990432368, |
|
"loss": 1.9234, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014655751402753173, |
|
"loss": 1.9352, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014559712657616884, |
|
"loss": 1.902, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014462771873228027, |
|
"loss": 1.9706, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014364945319180576, |
|
"loss": 1.8017, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001426624941372744, |
|
"loss": 1.8949, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014166700721024962, |
|
"loss": 1.9567, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014066315948352973, |
|
"loss": 1.773, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013965111943310788, |
|
"loss": 1.8803, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001386310569098969, |
|
"loss": 1.9013, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013760314311122285, |
|
"loss": 1.8125, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001365675505520934, |
|
"loss": 1.8461, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013552445303624408, |
|
"loss": 1.866, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013447402562696928, |
|
"loss": 1.8816, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001334164446177409, |
|
"loss": 1.8429, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013235188750262118, |
|
"loss": 1.8838, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013128053294647385, |
|
"loss": 1.8784, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013020256075497866, |
|
"loss": 1.8919, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001291181518444547, |
|
"loss": 1.9255, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000128027488211497, |
|
"loss": 1.8819, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001269307529024322, |
|
"loss": 1.7187, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012582812998259752, |
|
"loss": 1.8393, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001247198045054494, |
|
"loss": 1.899, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012360596248150578, |
|
"loss": 1.8248, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012248679084712788, |
|
"loss": 1.7758, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012136247743314659, |
|
"loss": 1.8896, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012023321093333892, |
|
"loss": 1.8227, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001190991808727594, |
|
"loss": 1.7834, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011796057757593203, |
|
"loss": 1.9018, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001168175921349081, |
|
"loss": 1.8601, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011567041637719514, |
|
"loss": 1.77, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011451924283356254, |
|
"loss": 1.7776, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011336426470572898, |
|
"loss": 1.7542, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011220567583393738, |
|
"loss": 1.9201, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011104367066442263, |
|
"loss": 1.7519, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010987844421677765, |
|
"loss": 1.7057, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010871019205122319, |
|
"loss": 1.8886, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010753911023578701, |
|
"loss": 1.7797, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010636539531339759, |
|
"loss": 1.8853, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010518924426889829, |
|
"loss": 1.826, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010401085449598752, |
|
"loss": 1.7474, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010283042376408993, |
|
"loss": 1.8552, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010164815018516475, |
|
"loss": 1.7706, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010046423218045675, |
|
"loss": 1.7976, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.927886844719498e-05, |
|
"loss": 1.7527, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.80922579252454e-05, |
|
"loss": 1.8078, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.690459976372293e-05, |
|
"loss": 1.8556, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.571609328756794e-05, |
|
"loss": 1.7546, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.452693796409372e-05, |
|
"loss": 1.8492, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.333733336950976e-05, |
|
"loss": 1.8373, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.214747915542679e-05, |
|
"loss": 1.8429, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.095757501534924e-05, |
|
"loss": 1.8388, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.976782065116062e-05, |
|
"loss": 1.7977, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.857841573960755e-05, |
|
"loss": 1.8325, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.738955989878794e-05, |
|
"loss": 1.8479, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.620145265464898e-05, |
|
"loss": 1.65, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.501429340750077e-05, |
|
"loss": 1.8146, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.382828139855082e-05, |
|
"loss": 1.7693, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.264361567646539e-05, |
|
"loss": 1.9136, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.146049506396317e-05, |
|
"loss": 1.8657, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.02791181244466e-05, |
|
"loss": 1.8238, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.909968312867728e-05, |
|
"loss": 1.8138, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.792238802149986e-05, |
|
"loss": 1.8056, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.674743038862096e-05, |
|
"loss": 1.8203, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.557500742344842e-05, |
|
"loss": 1.7885, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.440531589399613e-05, |
|
"loss": 1.737, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.323855210986062e-05, |
|
"loss": 1.796, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.207491188927419e-05, |
|
"loss": 1.7995, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.091459052624075e-05, |
|
"loss": 1.7675, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.97577827577596e-05, |
|
"loss": 1.851, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.860468273114271e-05, |
|
"loss": 1.7414, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.745548397143076e-05, |
|
"loss": 1.819, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.631037934891401e-05, |
|
"loss": 1.8442, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.51695610467628e-05, |
|
"loss": 1.7498, |
|
"step": 150 |
|
} |
|
], |
|
"max_steps": 200, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.555500537053184e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|