|
{ |
|
"best_metric": 0.4396501457725948, |
|
"best_model_checkpoint": "/content/our_data/checkpoint-11500", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 12410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.91941982272361e-05, |
|
"loss": 1.7927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6139180171591992, |
|
"eval_f1": 0.08594319009468317, |
|
"eval_loss": 1.5607472658157349, |
|
"eval_precision": 0.09562398703403566, |
|
"eval_recall": 0.07804232804232804, |
|
"eval_runtime": 1.9857, |
|
"eval_samples_per_second": 153.092, |
|
"eval_steps_per_second": 76.546, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.83883964544722e-05, |
|
"loss": 1.3551, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.6495471877979028, |
|
"eval_f1": 0.22113022113022113, |
|
"eval_loss": 1.3530131578445435, |
|
"eval_precision": 0.20642201834862386, |
|
"eval_recall": 0.23809523809523808, |
|
"eval_runtime": 1.4676, |
|
"eval_samples_per_second": 207.142, |
|
"eval_steps_per_second": 103.571, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.75825946817083e-05, |
|
"loss": 1.0432, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.6739752144899904, |
|
"eval_f1": 0.26135726303982054, |
|
"eval_loss": 1.310741662979126, |
|
"eval_precision": 0.22687439143135346, |
|
"eval_recall": 0.3082010582010582, |
|
"eval_runtime": 1.4924, |
|
"eval_samples_per_second": 203.693, |
|
"eval_steps_per_second": 101.847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.67767929089444e-05, |
|
"loss": 0.8468, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6767159199237369, |
|
"eval_f1": 0.28364849833147937, |
|
"eval_loss": 1.249666690826416, |
|
"eval_precision": 0.24472168905950095, |
|
"eval_recall": 0.3373015873015873, |
|
"eval_runtime": 1.9366, |
|
"eval_samples_per_second": 156.977, |
|
"eval_steps_per_second": 78.489, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.59709911361805e-05, |
|
"loss": 0.7775, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.6938751191611058, |
|
"eval_f1": 0.3260115606936416, |
|
"eval_loss": 1.2709521055221558, |
|
"eval_precision": 0.28952772073921973, |
|
"eval_recall": 0.373015873015873, |
|
"eval_runtime": 2.1473, |
|
"eval_samples_per_second": 141.572, |
|
"eval_steps_per_second": 70.786, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.5165189363416601e-05, |
|
"loss": 0.5374, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.7043612964728313, |
|
"eval_f1": 0.34498308906426156, |
|
"eval_loss": 1.3020099401474, |
|
"eval_precision": 0.3005893909626719, |
|
"eval_recall": 0.40476190476190477, |
|
"eval_runtime": 1.4555, |
|
"eval_samples_per_second": 208.867, |
|
"eval_steps_per_second": 104.433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.4359387590652701e-05, |
|
"loss": 0.5071, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7080552907530981, |
|
"eval_f1": 0.34189944134078215, |
|
"eval_loss": 1.2613815069198608, |
|
"eval_precision": 0.29593810444874274, |
|
"eval_recall": 0.40476190476190477, |
|
"eval_runtime": 1.655, |
|
"eval_samples_per_second": 183.688, |
|
"eval_steps_per_second": 91.844, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.35535858178888e-05, |
|
"loss": 0.4237, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.7166348903717826, |
|
"eval_f1": 0.38166189111747856, |
|
"eval_loss": 1.3250571489334106, |
|
"eval_precision": 0.3367037411526795, |
|
"eval_recall": 0.44047619047619047, |
|
"eval_runtime": 1.9058, |
|
"eval_samples_per_second": 159.516, |
|
"eval_steps_per_second": 79.758, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.27477840451249e-05, |
|
"loss": 0.3597, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.7124642516682554, |
|
"eval_f1": 0.38974358974358975, |
|
"eval_loss": 1.3852567672729492, |
|
"eval_precision": 0.34234234234234234, |
|
"eval_recall": 0.4523809523809524, |
|
"eval_runtime": 1.478, |
|
"eval_samples_per_second": 205.686, |
|
"eval_steps_per_second": 102.843, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.1941982272361e-05, |
|
"loss": 0.3632, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.7127025738798856, |
|
"eval_f1": 0.39836924868957485, |
|
"eval_loss": 1.415600061416626, |
|
"eval_precision": 0.3558792924037461, |
|
"eval_recall": 0.4523809523809524, |
|
"eval_runtime": 1.5214, |
|
"eval_samples_per_second": 199.817, |
|
"eval_steps_per_second": 99.908, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.11361804995971e-05, |
|
"loss": 0.2589, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.717349857006673, |
|
"eval_f1": 0.40180586907449206, |
|
"eval_loss": 1.4472498893737793, |
|
"eval_precision": 0.35039370078740156, |
|
"eval_recall": 0.4708994708994709, |
|
"eval_runtime": 1.4818, |
|
"eval_samples_per_second": 205.152, |
|
"eval_steps_per_second": 102.576, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0330378726833199e-05, |
|
"loss": 0.323, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.7222354623450906, |
|
"eval_f1": 0.39455782312925164, |
|
"eval_loss": 1.399746298789978, |
|
"eval_precision": 0.34523809523809523, |
|
"eval_recall": 0.4603174603174603, |
|
"eval_runtime": 1.4893, |
|
"eval_samples_per_second": 204.116, |
|
"eval_steps_per_second": 102.058, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 9.5245769540693e-06, |
|
"loss": 0.2167, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.7233079122974261, |
|
"eval_f1": 0.39499146272054636, |
|
"eval_loss": 1.519398808479309, |
|
"eval_precision": 0.34665334665334663, |
|
"eval_recall": 0.458994708994709, |
|
"eval_runtime": 1.803, |
|
"eval_samples_per_second": 168.606, |
|
"eval_steps_per_second": 84.303, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 8.7187751813054e-06, |
|
"loss": 0.2363, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7222354623450906, |
|
"eval_f1": 0.4024802705749719, |
|
"eval_loss": 1.5585495233535767, |
|
"eval_precision": 0.3506876227897839, |
|
"eval_recall": 0.4722222222222222, |
|
"eval_runtime": 2.0739, |
|
"eval_samples_per_second": 146.582, |
|
"eval_steps_per_second": 73.291, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.9129734085415e-06, |
|
"loss": 0.2721, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_accuracy": 0.7210438512869399, |
|
"eval_f1": 0.4208715596330276, |
|
"eval_loss": 1.5420488119125366, |
|
"eval_precision": 0.3714574898785425, |
|
"eval_recall": 0.48544973544973546, |
|
"eval_runtime": 1.4879, |
|
"eval_samples_per_second": 204.311, |
|
"eval_steps_per_second": 102.155, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.107171635777599e-06, |
|
"loss": 0.2073, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.7147283126787417, |
|
"eval_f1": 0.40914158305462656, |
|
"eval_loss": 1.5877846479415894, |
|
"eval_precision": 0.3535645472061657, |
|
"eval_recall": 0.48544973544973546, |
|
"eval_runtime": 1.4406, |
|
"eval_samples_per_second": 211.016, |
|
"eval_steps_per_second": 105.508, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.2021, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.7197330791229742, |
|
"eval_f1": 0.42135476463834676, |
|
"eval_loss": 1.6636826992034912, |
|
"eval_precision": 0.372210953346856, |
|
"eval_recall": 0.48544973544973546, |
|
"eval_runtime": 1.7337, |
|
"eval_samples_per_second": 175.352, |
|
"eval_steps_per_second": 87.676, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.495568090249799e-06, |
|
"loss": 0.1648, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.7254528122020972, |
|
"eval_f1": 0.42339181286549704, |
|
"eval_loss": 1.6723591089248657, |
|
"eval_precision": 0.37945492662473795, |
|
"eval_recall": 0.47883597883597884, |
|
"eval_runtime": 2.0842, |
|
"eval_samples_per_second": 145.858, |
|
"eval_steps_per_second": 72.929, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.689766317485899e-06, |
|
"loss": 0.1927, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"eval_accuracy": 0.7244995233555768, |
|
"eval_f1": 0.4298850574712644, |
|
"eval_loss": 1.689092993736267, |
|
"eval_precision": 0.3800813008130081, |
|
"eval_recall": 0.4947089947089947, |
|
"eval_runtime": 1.551, |
|
"eval_samples_per_second": 196.006, |
|
"eval_steps_per_second": 98.003, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.883964544721999e-06, |
|
"loss": 0.1958, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7280743565300286, |
|
"eval_f1": 0.4395090590298071, |
|
"eval_loss": 1.67740797996521, |
|
"eval_precision": 0.393717277486911, |
|
"eval_recall": 0.4973544973544973, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 199.375, |
|
"eval_steps_per_second": 99.688, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 3.0781627719580986e-06, |
|
"loss": 0.1508, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.7259294566253575, |
|
"eval_f1": 0.4272409778812573, |
|
"eval_loss": 1.7378581762313843, |
|
"eval_precision": 0.3814968814968815, |
|
"eval_recall": 0.48544973544973546, |
|
"eval_runtime": 1.5008, |
|
"eval_samples_per_second": 202.555, |
|
"eval_steps_per_second": 101.278, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.2723609991941985e-06, |
|
"loss": 0.184, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.7277168732125834, |
|
"eval_f1": 0.43638457109959694, |
|
"eval_loss": 1.700131893157959, |
|
"eval_precision": 0.38634046890927626, |
|
"eval_recall": 0.5013227513227513, |
|
"eval_runtime": 1.5178, |
|
"eval_samples_per_second": 200.287, |
|
"eval_steps_per_second": 100.144, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.4665592264302982e-06, |
|
"loss": 0.1696, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_accuracy": 0.7295042897998093, |
|
"eval_f1": 0.4396501457725948, |
|
"eval_loss": 1.6932308673858643, |
|
"eval_precision": 0.3931178310740355, |
|
"eval_recall": 0.49867724867724866, |
|
"eval_runtime": 2.1423, |
|
"eval_samples_per_second": 141.904, |
|
"eval_steps_per_second": 70.952, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 6.607574536663981e-07, |
|
"loss": 0.1425, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_accuracy": 0.7275977121067684, |
|
"eval_f1": 0.433886662850601, |
|
"eval_loss": 1.7137079238891602, |
|
"eval_precision": 0.38244197780020184, |
|
"eval_recall": 0.5013227513227513, |
|
"eval_runtime": 2.2155, |
|
"eval_samples_per_second": 137.215, |
|
"eval_steps_per_second": 68.608, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 12410, |
|
"total_flos": 243842156652198.0, |
|
"train_loss": 0.4448028106443542, |
|
"train_runtime": 816.9339, |
|
"train_samples_per_second": 30.37, |
|
"train_steps_per_second": 15.191 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 243842156652198.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|