SMILES-DeBERTa-small / trainer_state.json
Ihor's picture
Upload folder using huggingface_hub
aaf39c0 verified
raw
history blame
10.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.819185320199062,
"eval_steps": 500,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9.965867278325039e-05,
"loss": 1.8363,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 9.931734556650079e-05,
"loss": 1.2927,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 9.897601834975117e-05,
"loss": 1.0069,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 9.863469113300157e-05,
"loss": 0.8362,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 9.829336391625195e-05,
"loss": 0.7217,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 9.795203669950235e-05,
"loss": 0.6462,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 9.761070948275274e-05,
"loss": 0.5914,
"step": 3500
},
{
"epoch": 0.08,
"learning_rate": 9.726938226600313e-05,
"loss": 0.5497,
"step": 4000
},
{
"epoch": 0.09,
"learning_rate": 9.692805504925352e-05,
"loss": 0.5154,
"step": 4500
},
{
"epoch": 0.1,
"learning_rate": 9.658672783250392e-05,
"loss": 0.4867,
"step": 5000
},
{
"epoch": 0.11,
"learning_rate": 9.62454006157543e-05,
"loss": 0.4628,
"step": 5500
},
{
"epoch": 0.12,
"learning_rate": 9.590407339900468e-05,
"loss": 0.4393,
"step": 6000
},
{
"epoch": 0.13,
"learning_rate": 9.556274618225508e-05,
"loss": 0.4199,
"step": 6500
},
{
"epoch": 0.14,
"learning_rate": 9.522141896550547e-05,
"loss": 0.3996,
"step": 7000
},
{
"epoch": 0.15,
"learning_rate": 9.488009174875586e-05,
"loss": 0.383,
"step": 7500
},
{
"epoch": 0.16,
"learning_rate": 9.453876453200626e-05,
"loss": 0.37,
"step": 8000
},
{
"epoch": 0.17,
"learning_rate": 9.419743731525666e-05,
"loss": 0.3577,
"step": 8500
},
{
"epoch": 0.18,
"learning_rate": 9.385611009850704e-05,
"loss": 0.3468,
"step": 9000
},
{
"epoch": 0.19,
"learning_rate": 9.351478288175744e-05,
"loss": 0.3364,
"step": 9500
},
{
"epoch": 0.2,
"learning_rate": 9.317345566500783e-05,
"loss": 0.3281,
"step": 10000
},
{
"epoch": 0.22,
"learning_rate": 9.283212844825821e-05,
"loss": 0.3198,
"step": 10500
},
{
"epoch": 0.23,
"learning_rate": 9.249080123150861e-05,
"loss": 0.3124,
"step": 11000
},
{
"epoch": 0.24,
"learning_rate": 9.214947401475899e-05,
"loss": 0.3061,
"step": 11500
},
{
"epoch": 0.25,
"learning_rate": 9.180814679800939e-05,
"loss": 0.2999,
"step": 12000
},
{
"epoch": 0.26,
"learning_rate": 9.146681958125977e-05,
"loss": 0.2938,
"step": 12500
},
{
"epoch": 0.27,
"learning_rate": 9.112549236451017e-05,
"loss": 0.2886,
"step": 13000
},
{
"epoch": 0.28,
"learning_rate": 9.078416514776056e-05,
"loss": 0.2834,
"step": 13500
},
{
"epoch": 0.29,
"learning_rate": 9.044283793101095e-05,
"loss": 0.2791,
"step": 14000
},
{
"epoch": 0.3,
"learning_rate": 9.010151071426134e-05,
"loss": 0.2748,
"step": 14500
},
{
"epoch": 0.31,
"learning_rate": 8.976018349751174e-05,
"loss": 0.2708,
"step": 15000
},
{
"epoch": 0.32,
"learning_rate": 8.941885628076212e-05,
"loss": 0.2673,
"step": 15500
},
{
"epoch": 0.33,
"learning_rate": 8.90775290640125e-05,
"loss": 0.2634,
"step": 16000
},
{
"epoch": 0.34,
"learning_rate": 8.87362018472629e-05,
"loss": 0.2604,
"step": 16500
},
{
"epoch": 0.35,
"learning_rate": 8.839487463051329e-05,
"loss": 0.2575,
"step": 17000
},
{
"epoch": 0.36,
"learning_rate": 8.805354741376368e-05,
"loss": 0.2542,
"step": 17500
},
{
"epoch": 0.37,
"learning_rate": 8.771222019701407e-05,
"loss": 0.251,
"step": 18000
},
{
"epoch": 0.38,
"learning_rate": 8.737089298026447e-05,
"loss": 0.2483,
"step": 18500
},
{
"epoch": 0.39,
"learning_rate": 8.702956576351485e-05,
"loss": 0.2458,
"step": 19000
},
{
"epoch": 0.4,
"learning_rate": 8.668823854676525e-05,
"loss": 0.2429,
"step": 19500
},
{
"epoch": 0.41,
"learning_rate": 8.634691133001563e-05,
"loss": 0.2405,
"step": 20000
},
{
"epoch": 0.42,
"learning_rate": 8.600558411326603e-05,
"loss": 0.2391,
"step": 20500
},
{
"epoch": 0.43,
"learning_rate": 8.566425689651641e-05,
"loss": 0.237,
"step": 21000
},
{
"epoch": 0.44,
"learning_rate": 8.532292967976681e-05,
"loss": 0.2342,
"step": 21500
},
{
"epoch": 0.45,
"learning_rate": 8.49816024630172e-05,
"loss": 0.2326,
"step": 22000
},
{
"epoch": 0.46,
"learning_rate": 8.464027524626758e-05,
"loss": 0.2304,
"step": 22500
},
{
"epoch": 0.47,
"learning_rate": 8.429894802951798e-05,
"loss": 0.2286,
"step": 23000
},
{
"epoch": 0.48,
"learning_rate": 8.395762081276836e-05,
"loss": 0.2271,
"step": 23500
},
{
"epoch": 0.49,
"learning_rate": 8.361629359601877e-05,
"loss": 0.2252,
"step": 24000
},
{
"epoch": 0.5,
"learning_rate": 8.327496637926916e-05,
"loss": 0.2231,
"step": 24500
},
{
"epoch": 0.51,
"learning_rate": 8.293363916251956e-05,
"loss": 0.2219,
"step": 25000
},
{
"epoch": 0.52,
"learning_rate": 8.259231194576994e-05,
"loss": 0.2199,
"step": 25500
},
{
"epoch": 0.53,
"learning_rate": 8.225098472902032e-05,
"loss": 0.219,
"step": 26000
},
{
"epoch": 0.54,
"learning_rate": 8.190965751227072e-05,
"loss": 0.2169,
"step": 26500
},
{
"epoch": 0.55,
"learning_rate": 8.15683302955211e-05,
"loss": 0.2157,
"step": 27000
},
{
"epoch": 0.56,
"learning_rate": 8.12270030787715e-05,
"loss": 0.214,
"step": 27500
},
{
"epoch": 0.57,
"learning_rate": 8.088567586202189e-05,
"loss": 0.2129,
"step": 28000
},
{
"epoch": 0.58,
"learning_rate": 8.054434864527229e-05,
"loss": 0.2117,
"step": 28500
},
{
"epoch": 0.59,
"learning_rate": 8.020302142852267e-05,
"loss": 0.2111,
"step": 29000
},
{
"epoch": 0.6,
"learning_rate": 7.986169421177307e-05,
"loss": 0.2091,
"step": 29500
},
{
"epoch": 0.61,
"learning_rate": 7.952036699502345e-05,
"loss": 0.2084,
"step": 30000
},
{
"epoch": 0.62,
"learning_rate": 7.917903977827385e-05,
"loss": 0.2071,
"step": 30500
},
{
"epoch": 0.63,
"learning_rate": 7.883771256152423e-05,
"loss": 0.2057,
"step": 31000
},
{
"epoch": 0.65,
"learning_rate": 7.849638534477462e-05,
"loss": 0.2051,
"step": 31500
},
{
"epoch": 0.66,
"learning_rate": 7.815505812802502e-05,
"loss": 0.2036,
"step": 32000
},
{
"epoch": 0.67,
"learning_rate": 7.78137309112754e-05,
"loss": 0.2027,
"step": 32500
},
{
"epoch": 0.68,
"learning_rate": 7.74724036945258e-05,
"loss": 0.2017,
"step": 33000
},
{
"epoch": 0.69,
"learning_rate": 7.713107647777618e-05,
"loss": 0.2009,
"step": 33500
},
{
"epoch": 0.7,
"learning_rate": 7.678974926102658e-05,
"loss": 0.2003,
"step": 34000
},
{
"epoch": 0.71,
"learning_rate": 7.644842204427697e-05,
"loss": 0.1991,
"step": 34500
},
{
"epoch": 0.72,
"learning_rate": 7.610709482752736e-05,
"loss": 0.198,
"step": 35000
},
{
"epoch": 0.73,
"learning_rate": 7.576576761077775e-05,
"loss": 0.1974,
"step": 35500
},
{
"epoch": 0.74,
"learning_rate": 7.542444039402814e-05,
"loss": 0.1964,
"step": 36000
},
{
"epoch": 0.75,
"learning_rate": 7.508311317727853e-05,
"loss": 0.1958,
"step": 36500
},
{
"epoch": 0.76,
"learning_rate": 7.474178596052893e-05,
"loss": 0.1949,
"step": 37000
},
{
"epoch": 0.77,
"learning_rate": 7.440045874377931e-05,
"loss": 0.1937,
"step": 37500
},
{
"epoch": 0.78,
"learning_rate": 7.40591315270297e-05,
"loss": 0.1931,
"step": 38000
},
{
"epoch": 0.79,
"learning_rate": 7.37178043102801e-05,
"loss": 0.1926,
"step": 38500
},
{
"epoch": 0.8,
"learning_rate": 7.337647709353048e-05,
"loss": 0.1919,
"step": 39000
},
{
"epoch": 0.81,
"learning_rate": 7.303514987678088e-05,
"loss": 0.1909,
"step": 39500
},
{
"epoch": 0.82,
"learning_rate": 7.269382266003127e-05,
"loss": 0.1902,
"step": 40000
}
],
"logging_steps": 500,
"max_steps": 146487,
"num_train_epochs": 3,
"save_steps": 10000,
"total_flos": 2.527620759552e+17,
"trial_name": null,
"trial_params": null
}