{ "best_metric": 0.935483870967742, "best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertv2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-1337/checkpoint-4326", "epoch": 8.0, "eval_steps": 500, "global_step": 4944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16181229773462782, "grad_norm": 9.89955997467041, "learning_rate": 1.0101010101010103e-05, "loss": 1.8738, "step": 100 }, { "epoch": 0.32362459546925565, "grad_norm": 2.3764805793762207, "learning_rate": 2.0202020202020206e-05, "loss": 0.6979, "step": 200 }, { "epoch": 0.4854368932038835, "grad_norm": 1.3664543628692627, "learning_rate": 3.030303030303031e-05, "loss": 0.5111, "step": 300 }, { "epoch": 0.6472491909385113, "grad_norm": 0.6372264623641968, "learning_rate": 4.040404040404041e-05, "loss": 0.2666, "step": 400 }, { "epoch": 0.8090614886731392, "grad_norm": 0.5098221302032471, "learning_rate": 4.9943807597212865e-05, "loss": 0.1199, "step": 500 }, { "epoch": 0.970873786407767, "grad_norm": 0.5974541902542114, "learning_rate": 4.8819959541470004e-05, "loss": 0.0775, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.9852635038895584, "eval_f1": 0.7820512820512822, "eval_loss": 0.0750068947672844, "eval_precision": 0.7514492753623189, "eval_recall": 0.815251572327044, "eval_runtime": 3.2799, "eval_samples_per_second": 376.537, "eval_steps_per_second": 47.258, "step": 618 }, { "epoch": 1.132686084142395, "grad_norm": 0.15989889204502106, "learning_rate": 4.7696111485727136e-05, "loss": 0.0648, "step": 700 }, { "epoch": 1.2944983818770226, "grad_norm": 0.28292131423950195, "learning_rate": 4.6572263429984275e-05, "loss": 0.0555, "step": 800 }, { "epoch": 1.4563106796116505, "grad_norm": 0.09367953985929489, "learning_rate": 4.544841537424141e-05, "loss": 0.0485, "step": 900 }, { "epoch": 1.6181229773462782, "grad_norm": 0.3826428949832916, "learning_rate": 4.4324567318498546e-05, "loss": 0.0401, "step": 1000 }, { "epoch": 1.779935275080906, "grad_norm": 0.18068315088748932, "learning_rate": 4.3200719262755685e-05, "loss": 0.0369, "step": 1100 }, { "epoch": 1.941747572815534, "grad_norm": 0.23946309089660645, "learning_rate": 4.207687120701282e-05, "loss": 0.0387, "step": 1200 }, { "epoch": 2.0, "eval_accuracy": 0.9903582776377781, "eval_f1": 0.8799067236688691, "eval_loss": 0.04682581126689911, "eval_precision": 0.8700999231360492, "eval_recall": 0.889937106918239, "eval_runtime": 2.8072, "eval_samples_per_second": 439.943, "eval_steps_per_second": 55.215, "step": 1236 }, { "epoch": 2.103559870550162, "grad_norm": 0.8596442937850952, "learning_rate": 4.0953023151269956e-05, "loss": 0.0285, "step": 1300 }, { "epoch": 2.26537216828479, "grad_norm": 0.03754520043730736, "learning_rate": 3.9829175095527095e-05, "loss": 0.0322, "step": 1400 }, { "epoch": 2.4271844660194173, "grad_norm": 0.6684575080871582, "learning_rate": 3.870532703978423e-05, "loss": 0.023, "step": 1500 }, { "epoch": 2.588996763754045, "grad_norm": 0.03833441436290741, "learning_rate": 3.758147898404136e-05, "loss": 0.0268, "step": 1600 }, { "epoch": 2.750809061488673, "grad_norm": 0.3890291452407837, "learning_rate": 3.6457630928298505e-05, "loss": 0.0217, "step": 1700 }, { "epoch": 2.912621359223301, "grad_norm": 0.4564450681209564, "learning_rate": 3.533378287255564e-05, "loss": 0.0295, "step": 1800 }, { "epoch": 3.0, "eval_accuracy": 0.9906869727183083, "eval_f1": 0.8855799373040752, "eval_loss": 0.039505813270807266, "eval_precision": 0.8828125, "eval_recall": 0.8883647798742138, "eval_runtime": 2.8133, "eval_samples_per_second": 438.979, "eval_steps_per_second": 55.095, "step": 1854 }, { "epoch": 3.074433656957929, "grad_norm": 0.027059998363256454, "learning_rate": 3.420993481681277e-05, "loss": 0.0166, "step": 1900 }, { "epoch": 3.236245954692557, "grad_norm": 0.030333412811160088, "learning_rate": 3.308608676106991e-05, "loss": 0.0174, "step": 2000 }, { "epoch": 3.3980582524271843, "grad_norm": 0.13804250955581665, "learning_rate": 3.196223870532705e-05, "loss": 0.0153, "step": 2100 }, { "epoch": 3.559870550161812, "grad_norm": 0.2849176824092865, "learning_rate": 3.083839064958418e-05, "loss": 0.0152, "step": 2200 }, { "epoch": 3.72168284789644, "grad_norm": 0.14825651049613953, "learning_rate": 2.971454259384132e-05, "loss": 0.0171, "step": 2300 }, { "epoch": 3.883495145631068, "grad_norm": 0.045380860567092896, "learning_rate": 2.8590694538098453e-05, "loss": 0.0255, "step": 2400 }, { "epoch": 4.0, "eval_accuracy": 0.9920565355538512, "eval_f1": 0.8999999999999999, "eval_loss": 0.03599809855222702, "eval_precision": 0.9014195583596214, "eval_recall": 0.8985849056603774, "eval_runtime": 2.8186, "eval_samples_per_second": 438.161, "eval_steps_per_second": 54.992, "step": 2472 }, { "epoch": 4.0453074433656955, "grad_norm": 0.5658661723136902, "learning_rate": 2.746684648235559e-05, "loss": 0.0228, "step": 2500 }, { "epoch": 4.207119741100324, "grad_norm": 0.11415175348520279, "learning_rate": 2.6342998426612728e-05, "loss": 0.0162, "step": 2600 }, { "epoch": 4.368932038834951, "grad_norm": 0.1993759125471115, "learning_rate": 2.5219150370869863e-05, "loss": 0.0135, "step": 2700 }, { "epoch": 4.53074433656958, "grad_norm": 0.11497118324041367, "learning_rate": 2.4095302315127e-05, "loss": 0.0159, "step": 2800 }, { "epoch": 4.692556634304207, "grad_norm": 0.2147281914949417, "learning_rate": 2.2971454259384134e-05, "loss": 0.0156, "step": 2900 }, { "epoch": 4.854368932038835, "grad_norm": 0.1083710715174675, "learning_rate": 2.1847606203641273e-05, "loss": 0.0094, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9922756656075381, "eval_f1": 0.9050980392156862, "eval_loss": 0.03369523212313652, "eval_precision": 0.9029733959311425, "eval_recall": 0.9072327044025157, "eval_runtime": 2.8037, "eval_samples_per_second": 440.494, "eval_steps_per_second": 55.285, "step": 3090 }, { "epoch": 5.016181229773463, "grad_norm": 0.013677417300641537, "learning_rate": 2.072375814789841e-05, "loss": 0.016, "step": 3100 }, { "epoch": 5.17799352750809, "grad_norm": 0.08207657188177109, "learning_rate": 1.9599910092155544e-05, "loss": 0.0133, "step": 3200 }, { "epoch": 5.339805825242719, "grad_norm": 0.02103651873767376, "learning_rate": 1.847606203641268e-05, "loss": 0.0092, "step": 3300 }, { "epoch": 5.501618122977346, "grad_norm": 1.4357458353042603, "learning_rate": 1.735221398066982e-05, "loss": 0.0122, "step": 3400 }, { "epoch": 5.663430420711974, "grad_norm": 0.16999904811382294, "learning_rate": 1.622836592492695e-05, "loss": 0.0086, "step": 3500 }, { "epoch": 5.825242718446602, "grad_norm": 0.09043747931718826, "learning_rate": 1.510451786918409e-05, "loss": 0.0093, "step": 3600 }, { "epoch": 5.9870550161812295, "grad_norm": 0.06608462333679199, "learning_rate": 1.3980669813441227e-05, "loss": 0.0067, "step": 3700 }, { "epoch": 6.0, "eval_accuracy": 0.9932617508491289, "eval_f1": 0.9301960784313724, "eval_loss": 0.033360060304403305, "eval_precision": 0.9280125195618153, "eval_recall": 0.9323899371069182, "eval_runtime": 2.8189, "eval_samples_per_second": 438.116, "eval_steps_per_second": 54.986, "step": 3708 }, { "epoch": 6.148867313915858, "grad_norm": 0.2284722775220871, "learning_rate": 1.285682175769836e-05, "loss": 0.0107, "step": 3800 }, { "epoch": 6.310679611650485, "grad_norm": 0.02673812210559845, "learning_rate": 1.1732973701955498e-05, "loss": 0.0052, "step": 3900 }, { "epoch": 6.472491909385114, "grad_norm": 0.33707210421562195, "learning_rate": 1.0609125646212633e-05, "loss": 0.0072, "step": 4000 }, { "epoch": 6.634304207119741, "grad_norm": 0.0059865182265639305, "learning_rate": 9.48527759046977e-06, "loss": 0.0049, "step": 4100 }, { "epoch": 6.796116504854369, "grad_norm": 0.2759881615638733, "learning_rate": 8.361429534726907e-06, "loss": 0.016, "step": 4200 }, { "epoch": 6.957928802588997, "grad_norm": 0.18257270753383636, "learning_rate": 7.237581478984042e-06, "loss": 0.0069, "step": 4300 }, { "epoch": 7.0, "eval_accuracy": 0.9937000109565027, "eval_f1": 0.935483870967742, "eval_loss": 0.0347304567694664, "eval_precision": 0.9362204724409449, "eval_recall": 0.934748427672956, "eval_runtime": 2.8106, "eval_samples_per_second": 439.402, "eval_steps_per_second": 55.148, "step": 4326 }, { "epoch": 7.119741100323624, "grad_norm": 0.007623529061675072, "learning_rate": 6.113733423241179e-06, "loss": 0.0046, "step": 4400 }, { "epoch": 7.281553398058253, "grad_norm": 0.043167050927877426, "learning_rate": 4.989885367498316e-06, "loss": 0.009, "step": 4500 }, { "epoch": 7.44336569579288, "grad_norm": 0.009674232453107834, "learning_rate": 3.866037311755451e-06, "loss": 0.0046, "step": 4600 }, { "epoch": 7.605177993527509, "grad_norm": 0.05575043708086014, "learning_rate": 2.742189256012588e-06, "loss": 0.0052, "step": 4700 }, { "epoch": 7.766990291262136, "grad_norm": 0.006715767551213503, "learning_rate": 1.6183412002697239e-06, "loss": 0.0044, "step": 4800 }, { "epoch": 7.9288025889967635, "grad_norm": 0.009280543774366379, "learning_rate": 4.9449314452686e-07, "loss": 0.0054, "step": 4900 }, { "epoch": 8.0, "eval_accuracy": 0.9936726196997918, "eval_f1": 0.93401413982718, "eval_loss": 0.03279910609126091, "eval_precision": 0.9332810047095761, "eval_recall": 0.934748427672956, "eval_runtime": 2.829, "eval_samples_per_second": 436.551, "eval_steps_per_second": 54.79, "step": 4944 }, { "epoch": 8.0, "step": 4944, "total_flos": 2833132740217920.0, "train_loss": 0.08807948804957774, "train_runtime": 679.3683, "train_samples_per_second": 116.355, "train_steps_per_second": 7.277 } ], "logging_steps": 100, "max_steps": 4944, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2833132740217920.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }