{ "best_metric": 5.793323516845703, "best_model_checkpoint": "turkish-embedding-model/checkpoint-1564", "epoch": 2.0, "eval_steps": 500, "global_step": 1564, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0319693094629156, "grad_norm": 70.72708129882812, "learning_rate": 1.1253196930946293e-06, "loss": 17.17, "step": 25 }, { "epoch": 0.0639386189258312, "grad_norm": 81.68770599365234, "learning_rate": 2.4040920716112534e-06, "loss": 16.4932, "step": 50 }, { "epoch": 0.0959079283887468, "grad_norm": 109.91338348388672, "learning_rate": 3.6828644501278778e-06, "loss": 16.5976, "step": 75 }, { "epoch": 0.1278772378516624, "grad_norm": 73.892578125, "learning_rate": 4.961636828644502e-06, "loss": 15.6991, "step": 100 }, { "epoch": 0.159846547314578, "grad_norm": 79.35150909423828, "learning_rate": 6.240409207161126e-06, "loss": 14.876, "step": 125 }, { "epoch": 0.1918158567774936, "grad_norm": 83.0904541015625, "learning_rate": 7.5191815856777495e-06, "loss": 14.4828, "step": 150 }, { "epoch": 0.2237851662404092, "grad_norm": 76.82855987548828, "learning_rate": 8.797953964194374e-06, "loss": 12.7061, "step": 175 }, { "epoch": 0.2557544757033248, "grad_norm": 51.30181121826172, "learning_rate": 1.0076726342710998e-05, "loss": 10.8687, "step": 200 }, { "epoch": 0.2877237851662404, "grad_norm": 18.70808219909668, "learning_rate": 1.1355498721227622e-05, "loss": 8.3797, "step": 225 }, { "epoch": 0.319693094629156, "grad_norm": 1.3039417266845703, "learning_rate": 1.2634271099744246e-05, "loss": 6.2029, "step": 250 }, { "epoch": 0.3516624040920716, "grad_norm": 0.2324853092432022, "learning_rate": 1.391304347826087e-05, "loss": 5.8228, "step": 275 }, { "epoch": 0.3836317135549872, "grad_norm": 0.1757364720106125, "learning_rate": 1.5191815856777494e-05, "loss": 5.811, "step": 300 }, { "epoch": 0.4156010230179028, "grad_norm": 0.1788654774427414, "learning_rate": 1.647058823529412e-05, "loss": 5.8079, "step": 325 }, { "epoch": 0.4475703324808184, "grad_norm": 0.12862567603588104, "learning_rate": 1.7749360613810744e-05, "loss": 5.8077, "step": 350 }, { "epoch": 0.479539641943734, "grad_norm": 0.14497514069080353, "learning_rate": 1.9028132992327367e-05, "loss": 5.8035, "step": 375 }, { "epoch": 0.5115089514066496, "grad_norm": 0.1350390762090683, "learning_rate": 1.996589940323956e-05, "loss": 5.8072, "step": 400 }, { "epoch": 0.5434782608695652, "grad_norm": 0.1435602754354477, "learning_rate": 1.9823813583404378e-05, "loss": 5.8033, "step": 425 }, { "epoch": 0.5754475703324808, "grad_norm": 0.11389254033565521, "learning_rate": 1.96817277635692e-05, "loss": 5.8086, "step": 450 }, { "epoch": 0.6074168797953964, "grad_norm": 0.15821650624275208, "learning_rate": 1.9539641943734017e-05, "loss": 5.81, "step": 475 }, { "epoch": 0.639386189258312, "grad_norm": 0.1179889366030693, "learning_rate": 1.9397556123898838e-05, "loss": 5.7949, "step": 500 }, { "epoch": 0.6713554987212276, "grad_norm": 0.10912967473268509, "learning_rate": 1.9255470304063656e-05, "loss": 5.8079, "step": 525 }, { "epoch": 0.7033248081841432, "grad_norm": 0.11702870577573776, "learning_rate": 1.9113384484228477e-05, "loss": 5.8057, "step": 550 }, { "epoch": 0.7352941176470589, "grad_norm": 0.13132448494434357, "learning_rate": 1.8971298664393295e-05, "loss": 5.8097, "step": 575 }, { "epoch": 0.7672634271099744, "grad_norm": 0.15833145380020142, "learning_rate": 1.8829212844558116e-05, "loss": 5.7986, "step": 600 }, { "epoch": 0.7992327365728901, "grad_norm": 0.11651863902807236, "learning_rate": 1.8687127024722937e-05, "loss": 5.8051, "step": 625 }, { "epoch": 0.8312020460358056, "grad_norm": 0.5393890142440796, "learning_rate": 1.854504120488775e-05, "loss": 5.8041, "step": 650 }, { "epoch": 0.8631713554987213, "grad_norm": 0.6457561254501343, "learning_rate": 1.8402955385052572e-05, "loss": 5.7907, "step": 675 }, { "epoch": 0.8951406649616368, "grad_norm": 0.5643135905265808, "learning_rate": 1.8260869565217393e-05, "loss": 5.7991, "step": 700 }, { "epoch": 0.9271099744245525, "grad_norm": 3.214787721633911, "learning_rate": 1.811878374538221e-05, "loss": 5.8035, "step": 725 }, { "epoch": 0.959079283887468, "grad_norm": 2.781162977218628, "learning_rate": 1.7976697925547032e-05, "loss": 5.7945, "step": 750 }, { "epoch": 0.9910485933503836, "grad_norm": 0.38559335470199585, "learning_rate": 1.783461210571185e-05, "loss": 5.8077, "step": 775 }, { "epoch": 1.0, "eval_loss": 5.8023600578308105, "eval_runtime": 18.0632, "eval_samples_per_second": 276.805, "eval_steps_per_second": 4.374, "eval_tr_ling_pearson_cosine": 0.017751548525136808, "eval_tr_ling_pearson_dot": 0.025703597820631346, "eval_tr_ling_pearson_euclidean": 0.02195284877201089, "eval_tr_ling_pearson_manhattan": 0.02083376479528459, "eval_tr_ling_pearson_max": 0.025703597820631346, "eval_tr_ling_spearman_cosine": 0.027108099994157316, "eval_tr_ling_spearman_dot": 0.03304394653738539, "eval_tr_ling_spearman_euclidean": 0.025485959636772793, "eval_tr_ling_spearman_manhattan": 0.024466610177699702, "eval_tr_ling_spearman_max": 0.03304394653738539, "step": 782 }, { "epoch": 1.0230179028132993, "grad_norm": 0.3645063638687134, "learning_rate": 1.769252628587667e-05, "loss": 5.6703, "step": 800 }, { "epoch": 1.054987212276215, "grad_norm": 0.9638137817382812, "learning_rate": 1.7550440466041488e-05, "loss": 5.8052, "step": 825 }, { "epoch": 1.0869565217391304, "grad_norm": 2.114203691482544, "learning_rate": 1.740835464620631e-05, "loss": 5.7936, "step": 850 }, { "epoch": 1.118925831202046, "grad_norm": 1.8992066383361816, "learning_rate": 1.7266268826371127e-05, "loss": 5.7924, "step": 875 }, { "epoch": 1.1508951406649617, "grad_norm": 2.8299577236175537, "learning_rate": 1.7124183006535948e-05, "loss": 5.7806, "step": 900 }, { "epoch": 1.1828644501278773, "grad_norm": 1.956953525543213, "learning_rate": 1.698209718670077e-05, "loss": 5.7835, "step": 925 }, { "epoch": 1.2148337595907928, "grad_norm": 2.658413887023926, "learning_rate": 1.6840011366865587e-05, "loss": 5.7619, "step": 950 }, { "epoch": 1.2468030690537084, "grad_norm": 1.2760388851165771, "learning_rate": 1.6697925547030408e-05, "loss": 5.8038, "step": 975 }, { "epoch": 1.278772378516624, "grad_norm": 1.7434897422790527, "learning_rate": 1.6555839727195226e-05, "loss": 5.779, "step": 1000 }, { "epoch": 1.3107416879795397, "grad_norm": 1.3532071113586426, "learning_rate": 1.6413753907360047e-05, "loss": 5.7904, "step": 1025 }, { "epoch": 1.3427109974424551, "grad_norm": 3.7385997772216797, "learning_rate": 1.6271668087524864e-05, "loss": 5.7696, "step": 1050 }, { "epoch": 1.3746803069053708, "grad_norm": 0.9061102867126465, "learning_rate": 1.6129582267689685e-05, "loss": 5.7919, "step": 1075 }, { "epoch": 1.4066496163682864, "grad_norm": 2.7104809284210205, "learning_rate": 1.5987496447854503e-05, "loss": 5.7785, "step": 1100 }, { "epoch": 1.438618925831202, "grad_norm": 1.7147830724716187, "learning_rate": 1.5845410628019324e-05, "loss": 5.7862, "step": 1125 }, { "epoch": 1.4705882352941178, "grad_norm": 2.525214672088623, "learning_rate": 1.5703324808184145e-05, "loss": 5.7703, "step": 1150 }, { "epoch": 1.5025575447570332, "grad_norm": 1.7794997692108154, "learning_rate": 1.5561238988348963e-05, "loss": 5.773, "step": 1175 }, { "epoch": 1.5345268542199488, "grad_norm": 4.901644229888916, "learning_rate": 1.5419153168513784e-05, "loss": 5.7627, "step": 1200 }, { "epoch": 1.5664961636828645, "grad_norm": 3.360812187194824, "learning_rate": 1.52770673486786e-05, "loss": 5.7596, "step": 1225 }, { "epoch": 1.59846547314578, "grad_norm": 1.2768888473510742, "learning_rate": 1.5134981528843423e-05, "loss": 5.7882, "step": 1250 }, { "epoch": 1.6304347826086958, "grad_norm": 2.206226348876953, "learning_rate": 1.4992895709008242e-05, "loss": 5.7828, "step": 1275 }, { "epoch": 1.6624040920716112, "grad_norm": 1.4602406024932861, "learning_rate": 1.4850809889173061e-05, "loss": 5.771, "step": 1300 }, { "epoch": 1.6943734015345269, "grad_norm": 1.1597537994384766, "learning_rate": 1.4708724069337881e-05, "loss": 5.788, "step": 1325 }, { "epoch": 1.7263427109974425, "grad_norm": 3.7494003772735596, "learning_rate": 1.45666382495027e-05, "loss": 5.7719, "step": 1350 }, { "epoch": 1.758312020460358, "grad_norm": 1.6271498203277588, "learning_rate": 1.442455242966752e-05, "loss": 5.7846, "step": 1375 }, { "epoch": 1.7902813299232738, "grad_norm": 2.0469117164611816, "learning_rate": 1.4282466609832339e-05, "loss": 5.7838, "step": 1400 }, { "epoch": 1.8222506393861893, "grad_norm": 2.533921003341675, "learning_rate": 1.4140380789997158e-05, "loss": 5.7912, "step": 1425 }, { "epoch": 1.854219948849105, "grad_norm": 3.291757583618164, "learning_rate": 1.3998294970161978e-05, "loss": 5.7686, "step": 1450 }, { "epoch": 1.8861892583120206, "grad_norm": 3.0181350708007812, "learning_rate": 1.3856209150326799e-05, "loss": 5.7938, "step": 1475 }, { "epoch": 1.918158567774936, "grad_norm": 2.553502321243286, "learning_rate": 1.3714123330491618e-05, "loss": 5.7847, "step": 1500 }, { "epoch": 1.9501278772378516, "grad_norm": 1.8034719228744507, "learning_rate": 1.3572037510656438e-05, "loss": 5.7952, "step": 1525 }, { "epoch": 1.9820971867007673, "grad_norm": 3.7138864994049072, "learning_rate": 1.3429951690821257e-05, "loss": 5.7528, "step": 1550 }, { "epoch": 2.0, "eval_loss": 5.793323516845703, "eval_runtime": 18.2796, "eval_samples_per_second": 273.528, "eval_steps_per_second": 4.322, "eval_tr_ling_pearson_cosine": 0.037604255015168134, "eval_tr_ling_pearson_dot": 0.0673696846368413, "eval_tr_ling_pearson_euclidean": 0.03698411306484619, "eval_tr_ling_pearson_manhattan": 0.034740275152181296, "eval_tr_ling_pearson_max": 0.0673696846368413, "eval_tr_ling_spearman_cosine": 0.04804112988506346, "eval_tr_ling_spearman_dot": 0.06818119362900125, "eval_tr_ling_spearman_euclidean": 0.03903062430281842, "eval_tr_ling_spearman_manhattan": 0.03769766156967754, "eval_tr_ling_spearman_max": 0.06818119362900125, "step": 1564 } ], "logging_steps": 25, "max_steps": 3910, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }