|
{ |
|
"best_metric": 0.5012531328320803, |
|
"best_model_checkpoint": "/content/our_data/checkpoint-9000", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 12410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.91941982272361e-05, |
|
"loss": 1.9177, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6004288777698356, |
|
"eval_f1": 0.0379746835443038, |
|
"eval_loss": 1.6839170455932617, |
|
"eval_precision": 0.06, |
|
"eval_recall": 0.027777777777777776, |
|
"eval_runtime": 3.4052, |
|
"eval_samples_per_second": 89.275, |
|
"eval_steps_per_second": 44.637, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.83883964544722e-05, |
|
"loss": 1.4976, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.63128425065523, |
|
"eval_f1": 0.24557116676847893, |
|
"eval_loss": 1.4935959577560425, |
|
"eval_precision": 0.22814982973893302, |
|
"eval_recall": 0.26587301587301587, |
|
"eval_runtime": 2.3845, |
|
"eval_samples_per_second": 127.488, |
|
"eval_steps_per_second": 63.744, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.75825946817083e-05, |
|
"loss": 1.2309, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.6657136049559209, |
|
"eval_f1": 0.2877871825876663, |
|
"eval_loss": 1.2914698123931885, |
|
"eval_precision": 0.2650334075723831, |
|
"eval_recall": 0.3148148148148148, |
|
"eval_runtime": 3.4851, |
|
"eval_samples_per_second": 87.229, |
|
"eval_steps_per_second": 43.615, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.67767929089444e-05, |
|
"loss": 1.0546, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6803669287586371, |
|
"eval_f1": 0.33198380566801616, |
|
"eval_loss": 1.2454315423965454, |
|
"eval_precision": 0.2949640287769784, |
|
"eval_recall": 0.37962962962962965, |
|
"eval_runtime": 2.4542, |
|
"eval_samples_per_second": 123.867, |
|
"eval_steps_per_second": 61.934, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.59709911361805e-05, |
|
"loss": 0.9405, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.6915654038599, |
|
"eval_f1": 0.35719063545150503, |
|
"eval_loss": 1.2377290725708008, |
|
"eval_precision": 0.36129905277401897, |
|
"eval_recall": 0.3531746031746032, |
|
"eval_runtime": 3.3956, |
|
"eval_samples_per_second": 89.527, |
|
"eval_steps_per_second": 44.763, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.5165189363416601e-05, |
|
"loss": 0.7501, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.7170598046223493, |
|
"eval_f1": 0.3872549019607843, |
|
"eval_loss": 1.1723062992095947, |
|
"eval_precision": 0.3607305936073059, |
|
"eval_recall": 0.41798941798941797, |
|
"eval_runtime": 2.4666, |
|
"eval_samples_per_second": 123.248, |
|
"eval_steps_per_second": 61.624, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.4359387590652701e-05, |
|
"loss": 0.7133, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7159876101977604, |
|
"eval_f1": 0.39976204640095175, |
|
"eval_loss": 1.1583572626113892, |
|
"eval_precision": 0.36324324324324325, |
|
"eval_recall": 0.4444444444444444, |
|
"eval_runtime": 2.4767, |
|
"eval_samples_per_second": 122.743, |
|
"eval_steps_per_second": 61.371, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.35535858178888e-05, |
|
"loss": 0.5896, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.7306409340004766, |
|
"eval_f1": 0.42666666666666664, |
|
"eval_loss": 1.2287709712982178, |
|
"eval_precision": 0.41025641025641024, |
|
"eval_recall": 0.4444444444444444, |
|
"eval_runtime": 3.2475, |
|
"eval_samples_per_second": 93.609, |
|
"eval_steps_per_second": 46.805, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.27477840451249e-05, |
|
"loss": 0.5353, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.7253990945913747, |
|
"eval_f1": 0.4356672651107121, |
|
"eval_loss": 1.2319059371948242, |
|
"eval_precision": 0.3978142076502732, |
|
"eval_recall": 0.48148148148148145, |
|
"eval_runtime": 2.3963, |
|
"eval_samples_per_second": 126.861, |
|
"eval_steps_per_second": 63.431, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.1941982272361e-05, |
|
"loss": 0.5432, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.7306409340004766, |
|
"eval_f1": 0.4548825710754017, |
|
"eval_loss": 1.2172613143920898, |
|
"eval_precision": 0.42691415313225056, |
|
"eval_recall": 0.48677248677248675, |
|
"eval_runtime": 3.3895, |
|
"eval_samples_per_second": 89.689, |
|
"eval_steps_per_second": 44.845, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.11361804995971e-05, |
|
"loss": 0.4062, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.7271860852990231, |
|
"eval_f1": 0.4691358024691359, |
|
"eval_loss": 1.283239722251892, |
|
"eval_precision": 0.4398148148148148, |
|
"eval_recall": 0.5026455026455027, |
|
"eval_runtime": 2.4143, |
|
"eval_samples_per_second": 125.916, |
|
"eval_steps_per_second": 62.958, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0330378726833199e-05, |
|
"loss": 0.4485, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.7412437455325233, |
|
"eval_f1": 0.4610778443113772, |
|
"eval_loss": 1.2196030616760254, |
|
"eval_precision": 0.4212253829321663, |
|
"eval_recall": 0.5092592592592593, |
|
"eval_runtime": 2.7872, |
|
"eval_samples_per_second": 109.069, |
|
"eval_steps_per_second": 54.534, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 9.5245769540693e-06, |
|
"loss": 0.3614, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.732547057421968, |
|
"eval_f1": 0.46210720887245843, |
|
"eval_loss": 1.3155299425125122, |
|
"eval_precision": 0.43252595155709345, |
|
"eval_recall": 0.49603174603174605, |
|
"eval_runtime": 3.3893, |
|
"eval_samples_per_second": 89.694, |
|
"eval_steps_per_second": 44.847, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 8.7187751813054e-06, |
|
"loss": 0.3308, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7354062425542054, |
|
"eval_f1": 0.4604402141582391, |
|
"eval_loss": 1.3501168489456177, |
|
"eval_precision": 0.4183783783783784, |
|
"eval_recall": 0.5119047619047619, |
|
"eval_runtime": 2.4288, |
|
"eval_samples_per_second": 125.165, |
|
"eval_steps_per_second": 62.583, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.9129734085415e-06, |
|
"loss": 0.3645, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_accuracy": 0.7365975696926376, |
|
"eval_f1": 0.4730792498487599, |
|
"eval_loss": 1.3390766382217407, |
|
"eval_precision": 0.4358974358974359, |
|
"eval_recall": 0.5171957671957672, |
|
"eval_runtime": 2.391, |
|
"eval_samples_per_second": 127.141, |
|
"eval_steps_per_second": 63.571, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.107171635777599e-06, |
|
"loss": 0.2982, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.7314748629973791, |
|
"eval_f1": 0.4590354445090064, |
|
"eval_loss": 1.3889434337615967, |
|
"eval_precision": 0.40932642487046633, |
|
"eval_recall": 0.5224867724867724, |
|
"eval_runtime": 3.0054, |
|
"eval_samples_per_second": 101.151, |
|
"eval_steps_per_second": 50.576, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.2845, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.7376697641172266, |
|
"eval_f1": 0.47794117647058826, |
|
"eval_loss": 1.4109262228012085, |
|
"eval_precision": 0.4452054794520548, |
|
"eval_recall": 0.5158730158730159, |
|
"eval_runtime": 2.4417, |
|
"eval_samples_per_second": 124.505, |
|
"eval_steps_per_second": 62.253, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.495568090249799e-06, |
|
"loss": 0.2482, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.7375506314033834, |
|
"eval_f1": 0.5012531328320803, |
|
"eval_loss": 1.4667584896087646, |
|
"eval_precision": 0.47619047619047616, |
|
"eval_recall": 0.5291005291005291, |
|
"eval_runtime": 2.4653, |
|
"eval_samples_per_second": 123.313, |
|
"eval_steps_per_second": 61.657, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.689766317485899e-06, |
|
"loss": 0.2636, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"eval_accuracy": 0.73409578270193, |
|
"eval_f1": 0.49150485436893204, |
|
"eval_loss": 1.4925192594528198, |
|
"eval_precision": 0.45403587443946186, |
|
"eval_recall": 0.5357142857142857, |
|
"eval_runtime": 3.4322, |
|
"eval_samples_per_second": 88.572, |
|
"eval_steps_per_second": 44.286, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.883964544721999e-06, |
|
"loss": 0.2605, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7405289492494639, |
|
"eval_f1": 0.49358582773365917, |
|
"eval_loss": 1.4916423559188843, |
|
"eval_precision": 0.4585698070374574, |
|
"eval_recall": 0.5343915343915344, |
|
"eval_runtime": 2.4755, |
|
"eval_samples_per_second": 122.805, |
|
"eval_steps_per_second": 61.403, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 3.0781627719580986e-06, |
|
"loss": 0.1989, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.7387419585418156, |
|
"eval_f1": 0.4990780577750461, |
|
"eval_loss": 1.5096321105957031, |
|
"eval_precision": 0.4661308840413318, |
|
"eval_recall": 0.5370370370370371, |
|
"eval_runtime": 3.3245, |
|
"eval_samples_per_second": 91.444, |
|
"eval_steps_per_second": 45.722, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.2723609991941985e-06, |
|
"loss": 0.2415, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.744341196092447, |
|
"eval_f1": 0.4990914597213809, |
|
"eval_loss": 1.4698182344436646, |
|
"eval_precision": 0.46033519553072627, |
|
"eval_recall": 0.544973544973545, |
|
"eval_runtime": 2.5811, |
|
"eval_samples_per_second": 117.779, |
|
"eval_steps_per_second": 58.889, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.4665592264302982e-06, |
|
"loss": 0.2488, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_accuracy": 0.7455325232308792, |
|
"eval_f1": 0.491421568627451, |
|
"eval_loss": 1.4736005067825317, |
|
"eval_precision": 0.4577625570776256, |
|
"eval_recall": 0.5304232804232805, |
|
"eval_runtime": 3.3981, |
|
"eval_samples_per_second": 89.462, |
|
"eval_steps_per_second": 44.731, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 6.607574536663981e-07, |
|
"loss": 0.2129, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_accuracy": 0.7438646652370741, |
|
"eval_f1": 0.5012165450121654, |
|
"eval_loss": 1.506749153137207, |
|
"eval_precision": 0.46396396396396394, |
|
"eval_recall": 0.544973544973545, |
|
"eval_runtime": 2.854, |
|
"eval_samples_per_second": 106.517, |
|
"eval_steps_per_second": 53.258, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 12410, |
|
"total_flos": 503702005049490.0, |
|
"train_loss": 0.5683070008357046, |
|
"train_runtime": 1353.7746, |
|
"train_samples_per_second": 18.327, |
|
"train_steps_per_second": 9.167 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 503702005049490.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|