RodrigoSalazar-U's picture
Training in progress, step 9000, checkpoint
3661c69 verified
raw
history blame
16.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.928806133625411,
"eval_steps": 500,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.054764512595837894,
"grad_norm": 4.206851005554199,
"learning_rate": 1.095290251916758e-05,
"loss": 2.3255,
"step": 100
},
{
"epoch": 0.10952902519167579,
"grad_norm": 2.8459718227386475,
"learning_rate": 2.190580503833516e-05,
"loss": 1.8525,
"step": 200
},
{
"epoch": 0.16429353778751368,
"grad_norm": 3.025334119796753,
"learning_rate": 3.285870755750274e-05,
"loss": 1.7615,
"step": 300
},
{
"epoch": 0.21905805038335158,
"grad_norm": 3.115264654159546,
"learning_rate": 4.381161007667032e-05,
"loss": 1.7006,
"step": 400
},
{
"epoch": 0.2738225629791895,
"grad_norm": 2.6407604217529297,
"learning_rate": 5.47645125958379e-05,
"loss": 1.6693,
"step": 500
},
{
"epoch": 0.32858707557502737,
"grad_norm": 2.2635467052459717,
"learning_rate": 6.571741511500547e-05,
"loss": 1.6554,
"step": 600
},
{
"epoch": 0.3833515881708653,
"grad_norm": 2.4853227138519287,
"learning_rate": 7.667031763417306e-05,
"loss": 1.589,
"step": 700
},
{
"epoch": 0.43811610076670315,
"grad_norm": 2.712339162826538,
"learning_rate": 8.762322015334064e-05,
"loss": 1.5892,
"step": 800
},
{
"epoch": 0.4928806133625411,
"grad_norm": 2.6400351524353027,
"learning_rate": 9.857612267250822e-05,
"loss": 1.5559,
"step": 900
},
{
"epoch": 0.547645125958379,
"grad_norm": 3.203749656677246,
"learning_rate": 9.997234258138696e-05,
"loss": 1.5673,
"step": 1000
},
{
"epoch": 0.6024096385542169,
"grad_norm": 2.3736095428466797,
"learning_rate": 9.987226456522884e-05,
"loss": 1.5324,
"step": 1100
},
{
"epoch": 0.6571741511500547,
"grad_norm": 2.253758192062378,
"learning_rate": 9.969929463456831e-05,
"loss": 1.52,
"step": 1200
},
{
"epoch": 0.7119386637458927,
"grad_norm": 2.2543797492980957,
"learning_rate": 9.945368559744425e-05,
"loss": 1.463,
"step": 1300
},
{
"epoch": 0.7667031763417306,
"grad_norm": 2.094783067703247,
"learning_rate": 9.913579642919276e-05,
"loss": 1.4819,
"step": 1400
},
{
"epoch": 0.8214676889375685,
"grad_norm": 2.028780937194824,
"learning_rate": 9.874609174777887e-05,
"loss": 1.4738,
"step": 1500
},
{
"epoch": 0.8762322015334063,
"grad_norm": 2.2184860706329346,
"learning_rate": 9.82851411347238e-05,
"loss": 1.4481,
"step": 1600
},
{
"epoch": 0.9309967141292442,
"grad_norm": 2.685318946838379,
"learning_rate": 9.775361830262055e-05,
"loss": 1.4143,
"step": 1700
},
{
"epoch": 0.9857612267250822,
"grad_norm": 2.3410556316375732,
"learning_rate": 9.715230011045415e-05,
"loss": 1.3997,
"step": 1800
},
{
"epoch": 1.04052573932092,
"grad_norm": 2.4343554973602295,
"learning_rate": 9.648206542816636e-05,
"loss": 1.2183,
"step": 1900
},
{
"epoch": 1.095290251916758,
"grad_norm": 2.7375845909118652,
"learning_rate": 9.574389385212366e-05,
"loss": 1.1348,
"step": 2000
},
{
"epoch": 1.1500547645125958,
"grad_norm": 2.488781213760376,
"learning_rate": 9.493886427336657e-05,
"loss": 1.1794,
"step": 2100
},
{
"epoch": 1.2048192771084336,
"grad_norm": 2.216630697250366,
"learning_rate": 9.406815330073244e-05,
"loss": 1.1533,
"step": 2200
},
{
"epoch": 1.2595837897042717,
"grad_norm": 2.5205495357513428,
"learning_rate": 9.313303354115677e-05,
"loss": 1.1383,
"step": 2300
},
{
"epoch": 1.3143483023001095,
"grad_norm": 2.7046151161193848,
"learning_rate": 9.213487173966623e-05,
"loss": 1.1639,
"step": 2400
},
{
"epoch": 1.3691128148959475,
"grad_norm": 2.2721831798553467,
"learning_rate": 9.107512678178223e-05,
"loss": 1.1572,
"step": 2500
},
{
"epoch": 1.4238773274917853,
"grad_norm": 2.1899495124816895,
"learning_rate": 8.99553475612544e-05,
"loss": 1.0975,
"step": 2600
},
{
"epoch": 1.4786418400876231,
"grad_norm": 2.541968822479248,
"learning_rate": 8.877717071624055e-05,
"loss": 1.1102,
"step": 2700
},
{
"epoch": 1.5334063526834611,
"grad_norm": 1.9248064756393433,
"learning_rate": 8.754231823724187e-05,
"loss": 1.1012,
"step": 2800
},
{
"epoch": 1.588170865279299,
"grad_norm": 2.002230644226074,
"learning_rate": 8.62525949502896e-05,
"loss": 1.0992,
"step": 2900
},
{
"epoch": 1.642935377875137,
"grad_norm": 2.2088050842285156,
"learning_rate": 8.490988587906137e-05,
"loss": 1.1102,
"step": 3000
},
{
"epoch": 1.6976998904709748,
"grad_norm": 2.2829463481903076,
"learning_rate": 8.351615348978318e-05,
"loss": 1.1058,
"step": 3100
},
{
"epoch": 1.7524644030668126,
"grad_norm": 2.018911600112915,
"learning_rate": 8.207343482294323e-05,
"loss": 1.0693,
"step": 3200
},
{
"epoch": 1.8072289156626506,
"grad_norm": 1.948006272315979,
"learning_rate": 8.058383851601027e-05,
"loss": 1.0797,
"step": 3300
},
{
"epoch": 1.8619934282584885,
"grad_norm": 1.9828593730926514,
"learning_rate": 7.904954172150776e-05,
"loss": 1.0454,
"step": 3400
},
{
"epoch": 1.9167579408543265,
"grad_norm": 2.2209079265594482,
"learning_rate": 7.747278692494825e-05,
"loss": 1.0665,
"step": 3500
},
{
"epoch": 1.9715224534501643,
"grad_norm": 2.0689291954040527,
"learning_rate": 7.585587866727898e-05,
"loss": 1.0571,
"step": 3600
},
{
"epoch": 2.026286966046002,
"grad_norm": 1.5938904285430908,
"learning_rate": 7.420118017662894e-05,
"loss": 0.8671,
"step": 3700
},
{
"epoch": 2.08105147864184,
"grad_norm": 2.3023173809051514,
"learning_rate": 7.251110991428034e-05,
"loss": 0.6557,
"step": 3800
},
{
"epoch": 2.135815991237678,
"grad_norm": 1.6988605260849,
"learning_rate": 7.07881380399129e-05,
"loss": 0.6836,
"step": 3900
},
{
"epoch": 2.190580503833516,
"grad_norm": 1.5740976333618164,
"learning_rate": 6.903478280128721e-05,
"loss": 0.6712,
"step": 4000
},
{
"epoch": 2.245345016429354,
"grad_norm": 2.228999137878418,
"learning_rate": 6.725360685364384e-05,
"loss": 0.668,
"step": 4100
},
{
"epoch": 2.3001095290251916,
"grad_norm": 2.125016450881958,
"learning_rate": 6.54472135141977e-05,
"loss": 0.6735,
"step": 4200
},
{
"epoch": 2.3548740416210294,
"grad_norm": 2.3794703483581543,
"learning_rate": 6.361824295720199e-05,
"loss": 0.6594,
"step": 4300
},
{
"epoch": 2.4096385542168672,
"grad_norm": 2.231340169906616,
"learning_rate": 6.176936835514312e-05,
"loss": 0.6691,
"step": 4400
},
{
"epoch": 2.4644030668127055,
"grad_norm": 2.1866331100463867,
"learning_rate": 5.9903291971706e-05,
"loss": 0.687,
"step": 4500
},
{
"epoch": 2.5191675794085433,
"grad_norm": 1.7319729328155518,
"learning_rate": 5.8022741212220623e-05,
"loss": 0.6605,
"step": 4600
},
{
"epoch": 2.573932092004381,
"grad_norm": 2.021991729736328,
"learning_rate": 5.6130464637362466e-05,
"loss": 0.6499,
"step": 4700
},
{
"epoch": 2.628696604600219,
"grad_norm": 2.0492424964904785,
"learning_rate": 5.4229227945932446e-05,
"loss": 0.6634,
"step": 4800
},
{
"epoch": 2.6834611171960567,
"grad_norm": 1.9110430479049683,
"learning_rate": 5.2321809932588664e-05,
"loss": 0.6407,
"step": 4900
},
{
"epoch": 2.738225629791895,
"grad_norm": 2.151268482208252,
"learning_rate": 5.041099842643736e-05,
"loss": 0.6336,
"step": 5000
},
{
"epoch": 2.792990142387733,
"grad_norm": 1.8292102813720703,
"learning_rate": 4.849958621641945e-05,
"loss": 0.6421,
"step": 5100
},
{
"epoch": 2.8477546549835706,
"grad_norm": 1.5233772993087769,
"learning_rate": 4.659036696944793e-05,
"loss": 0.6052,
"step": 5200
},
{
"epoch": 2.9025191675794084,
"grad_norm": 2.1103007793426514,
"learning_rate": 4.4686131147261994e-05,
"loss": 0.6316,
"step": 5300
},
{
"epoch": 2.9572836801752462,
"grad_norm": 1.618807315826416,
"learning_rate": 4.2789661927965795e-05,
"loss": 0.635,
"step": 5400
},
{
"epoch": 3.0120481927710845,
"grad_norm": 1.661723256111145,
"learning_rate": 4.090373113821281e-05,
"loss": 0.553,
"step": 5500
},
{
"epoch": 3.0668127053669223,
"grad_norm": 1.5551363229751587,
"learning_rate": 3.9031095201980976e-05,
"loss": 0.3049,
"step": 5600
},
{
"epoch": 3.12157721796276,
"grad_norm": 1.7245244979858398,
"learning_rate": 3.717449111186025e-05,
"loss": 0.3081,
"step": 5700
},
{
"epoch": 3.176341730558598,
"grad_norm": 1.4479619264602661,
"learning_rate": 3.5336632428740265e-05,
"loss": 0.3103,
"step": 5800
},
{
"epoch": 3.2311062431544357,
"grad_norm": 2.026216745376587,
"learning_rate": 3.352020531574527e-05,
"loss": 0.3035,
"step": 5900
},
{
"epoch": 3.285870755750274,
"grad_norm": 1.2143455743789673,
"learning_rate": 3.172786461221279e-05,
"loss": 0.3146,
"step": 6000
},
{
"epoch": 3.340635268346112,
"grad_norm": 1.7206776142120361,
"learning_rate": 2.996222995345437e-05,
"loss": 0.3138,
"step": 6100
},
{
"epoch": 3.3953997809419496,
"grad_norm": 1.966302514076233,
"learning_rate": 2.822588194196941e-05,
"loss": 0.3059,
"step": 6200
},
{
"epoch": 3.4501642935377874,
"grad_norm": 1.6685985326766968,
"learning_rate": 2.6521358375708428e-05,
"loss": 0.3006,
"step": 6300
},
{
"epoch": 3.5049288061336252,
"grad_norm": 1.6338154077529907,
"learning_rate": 2.4851150538898028e-05,
"loss": 0.3017,
"step": 6400
},
{
"epoch": 3.5596933187294635,
"grad_norm": 1.6607939004898071,
"learning_rate": 2.321769956084937e-05,
"loss": 0.2991,
"step": 6500
},
{
"epoch": 3.6144578313253013,
"grad_norm": 1.7949954271316528,
"learning_rate": 2.1623392848071354e-05,
"loss": 0.3045,
"step": 6600
},
{
"epoch": 3.669222343921139,
"grad_norm": 1.6750141382217407,
"learning_rate": 2.007056059490364e-05,
"loss": 0.3077,
"step": 6700
},
{
"epoch": 3.723986856516977,
"grad_norm": 1.425621509552002,
"learning_rate": 1.856147237776956e-05,
"loss": 0.2989,
"step": 6800
},
{
"epoch": 3.7787513691128147,
"grad_norm": 1.534443736076355,
"learning_rate": 1.7098333838026275e-05,
"loss": 0.2975,
"step": 6900
},
{
"epoch": 3.833515881708653,
"grad_norm": 1.511762261390686,
"learning_rate": 1.5683283458260718e-05,
"loss": 0.2968,
"step": 7000
},
{
"epoch": 3.888280394304491,
"grad_norm": 1.968000054359436,
"learning_rate": 1.4318389436742962e-05,
"loss": 0.2907,
"step": 7100
},
{
"epoch": 3.9430449069003286,
"grad_norm": 1.5500705242156982,
"learning_rate": 1.3005646664605165e-05,
"loss": 0.2922,
"step": 7200
},
{
"epoch": 3.9978094194961664,
"grad_norm": 1.7522929906845093,
"learning_rate": 1.1746973810164147e-05,
"loss": 0.2815,
"step": 7300
},
{
"epoch": 4.052573932092004,
"grad_norm": 0.8427908420562744,
"learning_rate": 1.0544210514649233e-05,
"loss": 0.1758,
"step": 7400
},
{
"epoch": 4.1073384446878425,
"grad_norm": 0.8239675760269165,
"learning_rate": 9.399114703433688e-06,
"loss": 0.17,
"step": 7500
},
{
"epoch": 4.16210295728368,
"grad_norm": 0.85968416929245,
"learning_rate": 8.313360016700011e-06,
"loss": 0.1679,
"step": 7600
},
{
"epoch": 4.216867469879518,
"grad_norm": 0.9793341159820557,
"learning_rate": 7.288533363293959e-06,
"loss": 0.1719,
"step": 7700
},
{
"epoch": 4.271631982475356,
"grad_norm": 0.9244991540908813,
"learning_rate": 6.32613260134271e-06,
"loss": 0.1718,
"step": 7800
},
{
"epoch": 4.326396495071194,
"grad_norm": 1.2018598318099976,
"learning_rate": 5.427564349027098e-06,
"loss": 0.1669,
"step": 7900
},
{
"epoch": 4.381161007667032,
"grad_norm": 0.9171732664108276,
"learning_rate": 4.594141928707629e-06,
"loss": 0.1685,
"step": 8000
},
{
"epoch": 4.435925520262869,
"grad_norm": 0.9013499021530151,
"learning_rate": 3.8270834474090466e-06,
"loss": 0.1684,
"step": 8100
},
{
"epoch": 4.490690032858708,
"grad_norm": 0.8273316621780396,
"learning_rate": 3.1275100164689543e-06,
"loss": 0.1666,
"step": 8200
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.8490225076675415,
"learning_rate": 2.496444112952734e-06,
"loss": 0.1709,
"step": 8300
},
{
"epoch": 4.600219058050383,
"grad_norm": 0.9616093635559082,
"learning_rate": 1.9348080852294783e-06,
"loss": 0.1676,
"step": 8400
},
{
"epoch": 4.6549835706462215,
"grad_norm": 0.6257395148277283,
"learning_rate": 1.4434228048932796e-06,
"loss": 0.1657,
"step": 8500
},
{
"epoch": 4.709748083242059,
"grad_norm": 1.020652174949646,
"learning_rate": 1.023006467000115e-06,
"loss": 0.1708,
"step": 8600
},
{
"epoch": 4.764512595837897,
"grad_norm": 1.2859959602355957,
"learning_rate": 6.741735403739901e-07,
"loss": 0.1712,
"step": 8700
},
{
"epoch": 4.8192771084337345,
"grad_norm": 0.8666063547134399,
"learning_rate": 3.974338695163393e-07,
"loss": 0.1657,
"step": 8800
},
{
"epoch": 4.874041621029573,
"grad_norm": 0.6807364821434021,
"learning_rate": 1.9319192943152986e-07,
"loss": 0.1675,
"step": 8900
},
{
"epoch": 4.928806133625411,
"grad_norm": 1.1967036724090576,
"learning_rate": 6.174623445742155e-08,
"loss": 0.1721,
"step": 9000
}
],
"logging_steps": 100,
"max_steps": 9130,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.325169651624493e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}