End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +766 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: top_9_ranking_stackexchange
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # top_9_ranking_stackexchange
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7694

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: top_9_ranking_stackexchange
 # top_9_ranking_stackexchange
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/top_9_ranking_stackexchange dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7694

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7694440484046936,
+    "eval_runtime": 356.3502,
+    "eval_samples_per_second": 25.329,
+    "eval_steps_per_second": 0.398,
+    "total_flos": 1683203052011520.0,
+    "train_loss": 0.7514027657200448,
+    "train_runtime": 59331.9423,
+    "train_samples_per_second": 8.67,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7694440484046936,
+    "eval_runtime": 356.3502,
+    "eval_samples_per_second": 25.329,
+    "eval_steps_per_second": 0.398
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1683203052011520.0,
+    "train_loss": 0.7514027657200448,
+    "train_runtime": 59331.9423,
+    "train_samples_per_second": 8.67,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,766 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1005,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.029850746268656716,
+      "grad_norm": 3.479292993887848,
+      "learning_rate": 5e-06,
+      "loss": 1.015,
+      "step": 10
+    },
+    {
+      "epoch": 0.05970149253731343,
+      "grad_norm": 3.332094949060961,
+      "learning_rate": 5e-06,
+      "loss": 0.9229,
+      "step": 20
+    },
+    {
+      "epoch": 0.08955223880597014,
+      "grad_norm": 1.206482374615331,
+      "learning_rate": 5e-06,
+      "loss": 0.8863,
+      "step": 30
+    },
+    {
+      "epoch": 0.11940298507462686,
+      "grad_norm": 4.163345933152562,
+      "learning_rate": 5e-06,
+      "loss": 0.8671,
+      "step": 40
+    },
+    {
+      "epoch": 0.14925373134328357,
+      "grad_norm": 12.16630142884249,
+      "learning_rate": 5e-06,
+      "loss": 0.8564,
+      "step": 50
+    },
+    {
+      "epoch": 0.1791044776119403,
+      "grad_norm": 1.0327331476094606,
+      "learning_rate": 5e-06,
+      "loss": 0.8496,
+      "step": 60
+    },
+    {
+      "epoch": 0.208955223880597,
+      "grad_norm": 0.9990373506833108,
+      "learning_rate": 5e-06,
+      "loss": 0.8316,
+      "step": 70
+    },
+    {
+      "epoch": 0.23880597014925373,
+      "grad_norm": 0.9110436069182296,
+      "learning_rate": 5e-06,
+      "loss": 0.8315,
+      "step": 80
+    },
+    {
+      "epoch": 0.26865671641791045,
+      "grad_norm": 0.8361675958854492,
+      "learning_rate": 5e-06,
+      "loss": 0.8252,
+      "step": 90
+    },
+    {
+      "epoch": 0.29850746268656714,
+      "grad_norm": 0.9109432628272915,
+      "learning_rate": 5e-06,
+      "loss": 0.8254,
+      "step": 100
+    },
+    {
+      "epoch": 0.3283582089552239,
+      "grad_norm": 0.5754887816326917,
+      "learning_rate": 5e-06,
+      "loss": 0.8123,
+      "step": 110
+    },
+    {
+      "epoch": 0.3582089552238806,
+      "grad_norm": 0.5177271737564838,
+      "learning_rate": 5e-06,
+      "loss": 0.813,
+      "step": 120
+    },
+    {
+      "epoch": 0.3880597014925373,
+      "grad_norm": 0.5512037618652581,
+      "learning_rate": 5e-06,
+      "loss": 0.811,
+      "step": 130
+    },
+    {
+      "epoch": 0.417910447761194,
+      "grad_norm": 0.5700224076141376,
+      "learning_rate": 5e-06,
+      "loss": 0.8096,
+      "step": 140
+    },
+    {
+      "epoch": 0.44776119402985076,
+      "grad_norm": 0.5599860608870274,
+      "learning_rate": 5e-06,
+      "loss": 0.8046,
+      "step": 150
+    },
+    {
+      "epoch": 0.47761194029850745,
+      "grad_norm": 0.569170892894082,
+      "learning_rate": 5e-06,
+      "loss": 0.8034,
+      "step": 160
+    },
+    {
+      "epoch": 0.5074626865671642,
+      "grad_norm": 0.8243458549827131,
+      "learning_rate": 5e-06,
+      "loss": 0.7987,
+      "step": 170
+    },
+    {
+      "epoch": 0.5373134328358209,
+      "grad_norm": 0.5121916860141221,
+      "learning_rate": 5e-06,
+      "loss": 0.806,
+      "step": 180
+    },
+    {
+      "epoch": 0.5671641791044776,
+      "grad_norm": 0.6107343758472424,
+      "learning_rate": 5e-06,
+      "loss": 0.7948,
+      "step": 190
+    },
+    {
+      "epoch": 0.5970149253731343,
+      "grad_norm": 0.5131585752019031,
+      "learning_rate": 5e-06,
+      "loss": 0.7946,
+      "step": 200
+    },
+    {
+      "epoch": 0.6268656716417911,
+      "grad_norm": 0.6765312142993251,
+      "learning_rate": 5e-06,
+      "loss": 0.7992,
+      "step": 210
+    },
+    {
+      "epoch": 0.6567164179104478,
+      "grad_norm": 0.6992511282844082,
+      "learning_rate": 5e-06,
+      "loss": 0.7932,
+      "step": 220
+    },
+    {
+      "epoch": 0.6865671641791045,
+      "grad_norm": 0.5561440226388349,
+      "learning_rate": 5e-06,
+      "loss": 0.7947,
+      "step": 230
+    },
+    {
+      "epoch": 0.7164179104477612,
+      "grad_norm": 0.611629440350604,
+      "learning_rate": 5e-06,
+      "loss": 0.7952,
+      "step": 240
+    },
+    {
+      "epoch": 0.746268656716418,
+      "grad_norm": 0.7112309549807347,
+      "learning_rate": 5e-06,
+      "loss": 0.7961,
+      "step": 250
+    },
+    {
+      "epoch": 0.7761194029850746,
+      "grad_norm": 0.6315198037305403,
+      "learning_rate": 5e-06,
+      "loss": 0.7903,
+      "step": 260
+    },
+    {
+      "epoch": 0.8059701492537313,
+      "grad_norm": 0.6187802379727967,
+      "learning_rate": 5e-06,
+      "loss": 0.7938,
+      "step": 270
+    },
+    {
+      "epoch": 0.835820895522388,
+      "grad_norm": 0.6525337849629936,
+      "learning_rate": 5e-06,
+      "loss": 0.7845,
+      "step": 280
+    },
+    {
+      "epoch": 0.8656716417910447,
+      "grad_norm": 0.4910252956115715,
+      "learning_rate": 5e-06,
+      "loss": 0.7858,
+      "step": 290
+    },
+    {
+      "epoch": 0.8955223880597015,
+      "grad_norm": 0.5424409964718268,
+      "learning_rate": 5e-06,
+      "loss": 0.7889,
+      "step": 300
+    },
+    {
+      "epoch": 0.9253731343283582,
+      "grad_norm": 0.5025244254926571,
+      "learning_rate": 5e-06,
+      "loss": 0.7901,
+      "step": 310
+    },
+    {
+      "epoch": 0.9552238805970149,
+      "grad_norm": 0.611552104571597,
+      "learning_rate": 5e-06,
+      "loss": 0.7876,
+      "step": 320
+    },
+    {
+      "epoch": 0.9850746268656716,
+      "grad_norm": 0.574143500541867,
+      "learning_rate": 5e-06,
+      "loss": 0.788,
+      "step": 330
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.7820407152175903,
+      "eval_runtime": 356.5952,
+      "eval_samples_per_second": 25.312,
+      "eval_steps_per_second": 0.398,
+      "step": 335
+    },
+    {
+      "epoch": 1.0149253731343284,
+      "grad_norm": 0.6790286112759999,
+      "learning_rate": 5e-06,
+      "loss": 0.7615,
+      "step": 340
+    },
+    {
+      "epoch": 1.044776119402985,
+      "grad_norm": 0.5150260000277413,
+      "learning_rate": 5e-06,
+      "loss": 0.7459,
+      "step": 350
+    },
+    {
+      "epoch": 1.0746268656716418,
+      "grad_norm": 0.6700846811383229,
+      "learning_rate": 5e-06,
+      "loss": 0.7394,
+      "step": 360
+    },
+    {
+      "epoch": 1.1044776119402986,
+      "grad_norm": 0.720307351495057,
+      "learning_rate": 5e-06,
+      "loss": 0.741,
+      "step": 370
+    },
+    {
+      "epoch": 1.1343283582089552,
+      "grad_norm": 0.6813902538633815,
+      "learning_rate": 5e-06,
+      "loss": 0.7369,
+      "step": 380
+    },
+    {
+      "epoch": 1.164179104477612,
+      "grad_norm": 0.63094991849189,
+      "learning_rate": 5e-06,
+      "loss": 0.7418,
+      "step": 390
+    },
+    {
+      "epoch": 1.1940298507462686,
+      "grad_norm": 0.5792845851337866,
+      "learning_rate": 5e-06,
+      "loss": 0.7414,
+      "step": 400
+    },
+    {
+      "epoch": 1.2238805970149254,
+      "grad_norm": 0.603252393791289,
+      "learning_rate": 5e-06,
+      "loss": 0.7457,
+      "step": 410
+    },
+    {
+      "epoch": 1.2537313432835822,
+      "grad_norm": 0.6398195179422785,
+      "learning_rate": 5e-06,
+      "loss": 0.74,
+      "step": 420
+    },
+    {
+      "epoch": 1.2835820895522387,
+      "grad_norm": 0.5963738792349057,
+      "learning_rate": 5e-06,
+      "loss": 0.7485,
+      "step": 430
+    },
+    {
+      "epoch": 1.3134328358208955,
+      "grad_norm": 0.6940326337185055,
+      "learning_rate": 5e-06,
+      "loss": 0.7448,
+      "step": 440
+    },
+    {
+      "epoch": 1.3432835820895521,
+      "grad_norm": 0.5568283328438208,
+      "learning_rate": 5e-06,
+      "loss": 0.736,
+      "step": 450
+    },
+    {
+      "epoch": 1.373134328358209,
+      "grad_norm": 0.5197916467255219,
+      "learning_rate": 5e-06,
+      "loss": 0.7411,
+      "step": 460
+    },
+    {
+      "epoch": 1.4029850746268657,
+      "grad_norm": 0.5460153463704294,
+      "learning_rate": 5e-06,
+      "loss": 0.7418,
+      "step": 470
+    },
+    {
+      "epoch": 1.4328358208955223,
+      "grad_norm": 0.5763789808771301,
+      "learning_rate": 5e-06,
+      "loss": 0.7363,
+      "step": 480
+    },
+    {
+      "epoch": 1.462686567164179,
+      "grad_norm": 0.5189698614326181,
+      "learning_rate": 5e-06,
+      "loss": 0.744,
+      "step": 490
+    },
+    {
+      "epoch": 1.4925373134328357,
+      "grad_norm": 0.567498164042545,
+      "learning_rate": 5e-06,
+      "loss": 0.7404,
+      "step": 500
+    },
+    {
+      "epoch": 1.5223880597014925,
+      "grad_norm": 0.6393388739204092,
+      "learning_rate": 5e-06,
+      "loss": 0.7414,
+      "step": 510
+    },
+    {
+      "epoch": 1.5522388059701493,
+      "grad_norm": 0.5147979956127535,
+      "learning_rate": 5e-06,
+      "loss": 0.7374,
+      "step": 520
+    },
+    {
+      "epoch": 1.582089552238806,
+      "grad_norm": 0.6152783990954829,
+      "learning_rate": 5e-06,
+      "loss": 0.7414,
+      "step": 530
+    },
+    {
+      "epoch": 1.6119402985074627,
+      "grad_norm": 0.6919403728057012,
+      "learning_rate": 5e-06,
+      "loss": 0.7354,
+      "step": 540
+    },
+    {
+      "epoch": 1.6417910447761193,
+      "grad_norm": 0.6057934431622807,
+      "learning_rate": 5e-06,
+      "loss": 0.7422,
+      "step": 550
+    },
+    {
+      "epoch": 1.671641791044776,
+      "grad_norm": 0.6659075705857131,
+      "learning_rate": 5e-06,
+      "loss": 0.7359,
+      "step": 560
+    },
+    {
+      "epoch": 1.7014925373134329,
+      "grad_norm": 0.5120895019545025,
+      "learning_rate": 5e-06,
+      "loss": 0.7387,
+      "step": 570
+    },
+    {
+      "epoch": 1.7313432835820897,
+      "grad_norm": 0.6192820246303359,
+      "learning_rate": 5e-06,
+      "loss": 0.7428,
+      "step": 580
+    },
+    {
+      "epoch": 1.7611940298507462,
+      "grad_norm": 0.5647576041065644,
+      "learning_rate": 5e-06,
+      "loss": 0.7421,
+      "step": 590
+    },
+    {
+      "epoch": 1.7910447761194028,
+      "grad_norm": 0.6215429682194679,
+      "learning_rate": 5e-06,
+      "loss": 0.7387,
+      "step": 600
+    },
+    {
+      "epoch": 1.8208955223880596,
+      "grad_norm": 0.6981771564826721,
+      "learning_rate": 5e-06,
+      "loss": 0.7357,
+      "step": 610
+    },
+    {
+      "epoch": 1.8507462686567164,
+      "grad_norm": 0.5861737897737739,
+      "learning_rate": 5e-06,
+      "loss": 0.7359,
+      "step": 620
+    },
+    {
+      "epoch": 1.8805970149253732,
+      "grad_norm": 0.49215660200886596,
+      "learning_rate": 5e-06,
+      "loss": 0.7382,
+      "step": 630
+    },
+    {
+      "epoch": 1.9104477611940298,
+      "grad_norm": 0.5126805399974429,
+      "learning_rate": 5e-06,
+      "loss": 0.7374,
+      "step": 640
+    },
+    {
+      "epoch": 1.9402985074626866,
+      "grad_norm": 0.5418246376116465,
+      "learning_rate": 5e-06,
+      "loss": 0.7383,
+      "step": 650
+    },
+    {
+      "epoch": 1.9701492537313432,
+      "grad_norm": 0.5200260002573389,
+      "learning_rate": 5e-06,
+      "loss": 0.7386,
+      "step": 660
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.4891478343024747,
+      "learning_rate": 5e-06,
+      "loss": 0.7335,
+      "step": 670
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7694990634918213,
+      "eval_runtime": 357.9559,
+      "eval_samples_per_second": 25.215,
+      "eval_steps_per_second": 0.397,
+      "step": 670
+    },
+    {
+      "epoch": 2.029850746268657,
+      "grad_norm": 0.6765562554325313,
+      "learning_rate": 5e-06,
+      "loss": 0.6893,
+      "step": 680
+    },
+    {
+      "epoch": 2.0597014925373136,
+      "grad_norm": 0.778608535751458,
+      "learning_rate": 5e-06,
+      "loss": 0.6913,
+      "step": 690
+    },
+    {
+      "epoch": 2.08955223880597,
+      "grad_norm": 0.5671615670514111,
+      "learning_rate": 5e-06,
+      "loss": 0.6895,
+      "step": 700
+    },
+    {
+      "epoch": 2.1194029850746268,
+      "grad_norm": 0.6665893513460222,
+      "learning_rate": 5e-06,
+      "loss": 0.6916,
+      "step": 710
+    },
+    {
+      "epoch": 2.1492537313432836,
+      "grad_norm": 0.6806197074554154,
+      "learning_rate": 5e-06,
+      "loss": 0.6935,
+      "step": 720
+    },
+    {
+      "epoch": 2.1791044776119404,
+      "grad_norm": 0.576145716852151,
+      "learning_rate": 5e-06,
+      "loss": 0.6951,
+      "step": 730
+    },
+    {
+      "epoch": 2.208955223880597,
+      "grad_norm": 0.643535683911534,
+      "learning_rate": 5e-06,
+      "loss": 0.6904,
+      "step": 740
+    },
+    {
+      "epoch": 2.2388059701492535,
+      "grad_norm": 0.5623086433806925,
+      "learning_rate": 5e-06,
+      "loss": 0.6935,
+      "step": 750
+    },
+    {
+      "epoch": 2.2686567164179103,
+      "grad_norm": 0.5766234212764187,
+      "learning_rate": 5e-06,
+      "loss": 0.6985,
+      "step": 760
+    },
+    {
+      "epoch": 2.298507462686567,
+      "grad_norm": 0.5751133605924373,
+      "learning_rate": 5e-06,
+      "loss": 0.6989,
+      "step": 770
+    },
+    {
+      "epoch": 2.328358208955224,
+      "grad_norm": 0.6038731996857178,
+      "learning_rate": 5e-06,
+      "loss": 0.6993,
+      "step": 780
+    },
+    {
+      "epoch": 2.3582089552238807,
+      "grad_norm": 0.6822024466871226,
+      "learning_rate": 5e-06,
+      "loss": 0.6946,
+      "step": 790
+    },
+    {
+      "epoch": 2.388059701492537,
+      "grad_norm": 0.6198933460670559,
+      "learning_rate": 5e-06,
+      "loss": 0.6962,
+      "step": 800
+    },
+    {
+      "epoch": 2.417910447761194,
+      "grad_norm": 0.6172129701431681,
+      "learning_rate": 5e-06,
+      "loss": 0.6953,
+      "step": 810
+    },
+    {
+      "epoch": 2.4477611940298507,
+      "grad_norm": 0.5875229638376829,
+      "learning_rate": 5e-06,
+      "loss": 0.6931,
+      "step": 820
+    },
+    {
+      "epoch": 2.4776119402985075,
+      "grad_norm": 0.6023272569262241,
+      "learning_rate": 5e-06,
+      "loss": 0.6955,
+      "step": 830
+    },
+    {
+      "epoch": 2.5074626865671643,
+      "grad_norm": 0.5877478924208155,
+      "learning_rate": 5e-06,
+      "loss": 0.6933,
+      "step": 840
+    },
+    {
+      "epoch": 2.5373134328358207,
+      "grad_norm": 0.5553889178901485,
+      "learning_rate": 5e-06,
+      "loss": 0.6945,
+      "step": 850
+    },
+    {
+      "epoch": 2.5671641791044775,
+      "grad_norm": 0.595671809413151,
+      "learning_rate": 5e-06,
+      "loss": 0.6983,
+      "step": 860
+    },
+    {
+      "epoch": 2.5970149253731343,
+      "grad_norm": 0.5458221648218067,
+      "learning_rate": 5e-06,
+      "loss": 0.6986,
+      "step": 870
+    },
+    {
+      "epoch": 2.626865671641791,
+      "grad_norm": 0.5634666832714791,
+      "learning_rate": 5e-06,
+      "loss": 0.6992,
+      "step": 880
+    },
+    {
+      "epoch": 2.656716417910448,
+      "grad_norm": 0.5986690445210529,
+      "learning_rate": 5e-06,
+      "loss": 0.697,
+      "step": 890
+    },
+    {
+      "epoch": 2.6865671641791042,
+      "grad_norm": 0.6806823973105847,
+      "learning_rate": 5e-06,
+      "loss": 0.697,
+      "step": 900
+    },
+    {
+      "epoch": 2.716417910447761,
+      "grad_norm": 0.5843820752773177,
+      "learning_rate": 5e-06,
+      "loss": 0.6926,
+      "step": 910
+    },
+    {
+      "epoch": 2.746268656716418,
+      "grad_norm": 0.5748608122491111,
+      "learning_rate": 5e-06,
+      "loss": 0.6954,
+      "step": 920
+    },
+    {
+      "epoch": 2.7761194029850746,
+      "grad_norm": 0.570179781829274,
+      "learning_rate": 5e-06,
+      "loss": 0.6992,
+      "step": 930
+    },
+    {
+      "epoch": 2.8059701492537314,
+      "grad_norm": 0.5247671197226909,
+      "learning_rate": 5e-06,
+      "loss": 0.6946,
+      "step": 940
+    },
+    {
+      "epoch": 2.835820895522388,
+      "grad_norm": 0.5328797886127241,
+      "learning_rate": 5e-06,
+      "loss": 0.6954,
+      "step": 950
+    },
+    {
+      "epoch": 2.8656716417910446,
+      "grad_norm": 0.533230211383855,
+      "learning_rate": 5e-06,
+      "loss": 0.6991,
+      "step": 960
+    },
+    {
+      "epoch": 2.8955223880597014,
+      "grad_norm": 0.6204281218133497,
+      "learning_rate": 5e-06,
+      "loss": 0.6988,
+      "step": 970
+    },
+    {
+      "epoch": 2.925373134328358,
+      "grad_norm": 0.5715769762952346,
+      "learning_rate": 5e-06,
+      "loss": 0.6985,
+      "step": 980
+    },
+    {
+      "epoch": 2.955223880597015,
+      "grad_norm": 0.5731281297330721,
+      "learning_rate": 5e-06,
+      "loss": 0.6957,
+      "step": 990
+    },
+    {
+      "epoch": 2.9850746268656714,
+      "grad_norm": 0.6065892745042316,
+      "learning_rate": 5e-06,
+      "loss": 0.6979,
+      "step": 1000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7694440484046936,
+      "eval_runtime": 357.3084,
+      "eval_samples_per_second": 25.261,
+      "eval_steps_per_second": 0.397,
+      "step": 1005
+    },
+    {
+      "epoch": 3.0,
+      "step": 1005,
+      "total_flos": 1683203052011520.0,
+      "train_loss": 0.7514027657200448,
+      "train_runtime": 59331.9423,
+      "train_samples_per_second": 8.67,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1005,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1683203052011520.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed