End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +682 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: top_8_ranking_stackexchange
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # top_8_ranking_stackexchange
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7736

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: top_8_ranking_stackexchange
 # top_8_ranking_stackexchange
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/top_8_ranking_stackexchange dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7736

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7735684514045715,
+    "eval_runtime": 315.4705,
+    "eval_samples_per_second": 25.112,
+    "eval_steps_per_second": 0.393,
+    "total_flos": 1477173470822400.0,
+    "train_loss": 0.7552210683184687,
+    "train_runtime": 52264.4132,
+    "train_samples_per_second": 8.64,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7735684514045715,
+    "eval_runtime": 315.4705,
+    "eval_samples_per_second": 25.112,
+    "eval_steps_per_second": 0.393
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1477173470822400.0,
+    "train_loss": 0.7552210683184687,
+    "train_runtime": 52264.4132,
+    "train_samples_per_second": 8.64,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,682 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 882,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.034013605442176874,
+      "grad_norm": 5.093772950493088,
+      "learning_rate": 5e-06,
+      "loss": 1.0253,
+      "step": 10
+    },
+    {
+      "epoch": 0.06802721088435375,
+      "grad_norm": 1.582810434950759,
+      "learning_rate": 5e-06,
+      "loss": 0.9164,
+      "step": 20
+    },
+    {
+      "epoch": 0.10204081632653061,
+      "grad_norm": 1.081419957922015,
+      "learning_rate": 5e-06,
+      "loss": 0.8837,
+      "step": 30
+    },
+    {
+      "epoch": 0.1360544217687075,
+      "grad_norm": 1.334991004257816,
+      "learning_rate": 5e-06,
+      "loss": 0.8662,
+      "step": 40
+    },
+    {
+      "epoch": 0.17006802721088435,
+      "grad_norm": 0.6437293692108047,
+      "learning_rate": 5e-06,
+      "loss": 0.8474,
+      "step": 50
+    },
+    {
+      "epoch": 0.20408163265306123,
+      "grad_norm": 0.7291088099940165,
+      "learning_rate": 5e-06,
+      "loss": 0.8403,
+      "step": 60
+    },
+    {
+      "epoch": 0.23809523809523808,
+      "grad_norm": 1.2250728318971373,
+      "learning_rate": 5e-06,
+      "loss": 0.8324,
+      "step": 70
+    },
+    {
+      "epoch": 0.272108843537415,
+      "grad_norm": 0.5271316099486635,
+      "learning_rate": 5e-06,
+      "loss": 0.8255,
+      "step": 80
+    },
+    {
+      "epoch": 0.30612244897959184,
+      "grad_norm": 0.657776756921105,
+      "learning_rate": 5e-06,
+      "loss": 0.8268,
+      "step": 90
+    },
+    {
+      "epoch": 0.3401360544217687,
+      "grad_norm": 0.6441066353244067,
+      "learning_rate": 5e-06,
+      "loss": 0.8244,
+      "step": 100
+    },
+    {
+      "epoch": 0.3741496598639456,
+      "grad_norm": 0.8285320708808935,
+      "learning_rate": 5e-06,
+      "loss": 0.8204,
+      "step": 110
+    },
+    {
+      "epoch": 0.40816326530612246,
+      "grad_norm": 0.7161434853368004,
+      "learning_rate": 5e-06,
+      "loss": 0.8162,
+      "step": 120
+    },
+    {
+      "epoch": 0.4421768707482993,
+      "grad_norm": 0.5513713742579371,
+      "learning_rate": 5e-06,
+      "loss": 0.8145,
+      "step": 130
+    },
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 0.7457252021496298,
+      "learning_rate": 5e-06,
+      "loss": 0.8055,
+      "step": 140
+    },
+    {
+      "epoch": 0.5102040816326531,
+      "grad_norm": 0.5974600771687919,
+      "learning_rate": 5e-06,
+      "loss": 0.8046,
+      "step": 150
+    },
+    {
+      "epoch": 0.54421768707483,
+      "grad_norm": 0.5106810999294438,
+      "learning_rate": 5e-06,
+      "loss": 0.8038,
+      "step": 160
+    },
+    {
+      "epoch": 0.5782312925170068,
+      "grad_norm": 0.5233604769586638,
+      "learning_rate": 5e-06,
+      "loss": 0.7984,
+      "step": 170
+    },
+    {
+      "epoch": 0.6122448979591837,
+      "grad_norm": 0.6621030752821899,
+      "learning_rate": 5e-06,
+      "loss": 0.8053,
+      "step": 180
+    },
+    {
+      "epoch": 0.6462585034013606,
+      "grad_norm": 0.6082801478115033,
+      "learning_rate": 5e-06,
+      "loss": 0.7971,
+      "step": 190
+    },
+    {
+      "epoch": 0.6802721088435374,
+      "grad_norm": 0.6921185518271565,
+      "learning_rate": 5e-06,
+      "loss": 0.7962,
+      "step": 200
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.7932977516942055,
+      "learning_rate": 5e-06,
+      "loss": 0.7965,
+      "step": 210
+    },
+    {
+      "epoch": 0.7482993197278912,
+      "grad_norm": 0.6324128246753213,
+      "learning_rate": 5e-06,
+      "loss": 0.7934,
+      "step": 220
+    },
+    {
+      "epoch": 0.782312925170068,
+      "grad_norm": 0.559311808415173,
+      "learning_rate": 5e-06,
+      "loss": 0.7989,
+      "step": 230
+    },
+    {
+      "epoch": 0.8163265306122449,
+      "grad_norm": 0.6432666654821662,
+      "learning_rate": 5e-06,
+      "loss": 0.7969,
+      "step": 240
+    },
+    {
+      "epoch": 0.8503401360544217,
+      "grad_norm": 0.744813344130182,
+      "learning_rate": 5e-06,
+      "loss": 0.7919,
+      "step": 250
+    },
+    {
+      "epoch": 0.8843537414965986,
+      "grad_norm": 0.5433587318374555,
+      "learning_rate": 5e-06,
+      "loss": 0.7902,
+      "step": 260
+    },
+    {
+      "epoch": 0.9183673469387755,
+      "grad_norm": 0.7120343599921604,
+      "learning_rate": 5e-06,
+      "loss": 0.7906,
+      "step": 270
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 1.1772393318908365,
+      "learning_rate": 5e-06,
+      "loss": 0.7883,
+      "step": 280
+    },
+    {
+      "epoch": 0.9863945578231292,
+      "grad_norm": 1.289674010358365,
+      "learning_rate": 5e-06,
+      "loss": 0.7865,
+      "step": 290
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.787599503993988,
+      "eval_runtime": 312.4094,
+      "eval_samples_per_second": 25.358,
+      "eval_steps_per_second": 0.397,
+      "step": 294
+    },
+    {
+      "epoch": 1.0204081632653061,
+      "grad_norm": 0.9036542003842971,
+      "learning_rate": 5e-06,
+      "loss": 0.7746,
+      "step": 300
+    },
+    {
+      "epoch": 1.054421768707483,
+      "grad_norm": 0.8034701067288486,
+      "learning_rate": 5e-06,
+      "loss": 0.7488,
+      "step": 310
+    },
+    {
+      "epoch": 1.08843537414966,
+      "grad_norm": 0.6023506564223867,
+      "learning_rate": 5e-06,
+      "loss": 0.7475,
+      "step": 320
+    },
+    {
+      "epoch": 1.1224489795918366,
+      "grad_norm": 0.6504967920737302,
+      "learning_rate": 5e-06,
+      "loss": 0.7431,
+      "step": 330
+    },
+    {
+      "epoch": 1.1564625850340136,
+      "grad_norm": 0.5813877515629605,
+      "learning_rate": 5e-06,
+      "loss": 0.7462,
+      "step": 340
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 0.6259897603352809,
+      "learning_rate": 5e-06,
+      "loss": 0.747,
+      "step": 350
+    },
+    {
+      "epoch": 1.2244897959183674,
+      "grad_norm": 0.6862271161840088,
+      "learning_rate": 5e-06,
+      "loss": 0.7455,
+      "step": 360
+    },
+    {
+      "epoch": 1.2585034013605443,
+      "grad_norm": 0.5769690007814378,
+      "learning_rate": 5e-06,
+      "loss": 0.7406,
+      "step": 370
+    },
+    {
+      "epoch": 1.2925170068027212,
+      "grad_norm": 0.557540098903492,
+      "learning_rate": 5e-06,
+      "loss": 0.7538,
+      "step": 380
+    },
+    {
+      "epoch": 1.3265306122448979,
+      "grad_norm": 0.7131020070740424,
+      "learning_rate": 5e-06,
+      "loss": 0.7458,
+      "step": 390
+    },
+    {
+      "epoch": 1.3605442176870748,
+      "grad_norm": 0.5948565780053893,
+      "learning_rate": 5e-06,
+      "loss": 0.7446,
+      "step": 400
+    },
+    {
+      "epoch": 1.3945578231292517,
+      "grad_norm": 0.6911445705490143,
+      "learning_rate": 5e-06,
+      "loss": 0.7497,
+      "step": 410
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 0.6252849135367671,
+      "learning_rate": 5e-06,
+      "loss": 0.7469,
+      "step": 420
+    },
+    {
+      "epoch": 1.4625850340136055,
+      "grad_norm": 0.6646202868499425,
+      "learning_rate": 5e-06,
+      "loss": 0.7424,
+      "step": 430
+    },
+    {
+      "epoch": 1.4965986394557822,
+      "grad_norm": 0.6970040399316466,
+      "learning_rate": 5e-06,
+      "loss": 0.7445,
+      "step": 440
+    },
+    {
+      "epoch": 1.5306122448979593,
+      "grad_norm": 0.6004043771512654,
+      "learning_rate": 5e-06,
+      "loss": 0.7492,
+      "step": 450
+    },
+    {
+      "epoch": 1.564625850340136,
+      "grad_norm": 0.6563540827889115,
+      "learning_rate": 5e-06,
+      "loss": 0.7423,
+      "step": 460
+    },
+    {
+      "epoch": 1.598639455782313,
+      "grad_norm": 0.7454418696472762,
+      "learning_rate": 5e-06,
+      "loss": 0.7434,
+      "step": 470
+    },
+    {
+      "epoch": 1.6326530612244898,
+      "grad_norm": 0.5564890350927186,
+      "learning_rate": 5e-06,
+      "loss": 0.7365,
+      "step": 480
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.7834978093049169,
+      "learning_rate": 5e-06,
+      "loss": 0.7421,
+      "step": 490
+    },
+    {
+      "epoch": 1.7006802721088436,
+      "grad_norm": 0.583066628701426,
+      "learning_rate": 5e-06,
+      "loss": 0.7463,
+      "step": 500
+    },
+    {
+      "epoch": 1.7346938775510203,
+      "grad_norm": 0.5893517422275613,
+      "learning_rate": 5e-06,
+      "loss": 0.7376,
+      "step": 510
+    },
+    {
+      "epoch": 1.7687074829931972,
+      "grad_norm": 0.5037802702044101,
+      "learning_rate": 5e-06,
+      "loss": 0.7441,
+      "step": 520
+    },
+    {
+      "epoch": 1.8027210884353742,
+      "grad_norm": 0.5931792416654984,
+      "learning_rate": 5e-06,
+      "loss": 0.7431,
+      "step": 530
+    },
+    {
+      "epoch": 1.836734693877551,
+      "grad_norm": 0.6402441098113224,
+      "learning_rate": 5e-06,
+      "loss": 0.7438,
+      "step": 540
+    },
+    {
+      "epoch": 1.870748299319728,
+      "grad_norm": 0.5654472975194821,
+      "learning_rate": 5e-06,
+      "loss": 0.7393,
+      "step": 550
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.5753064985895123,
+      "learning_rate": 5e-06,
+      "loss": 0.7397,
+      "step": 560
+    },
+    {
+      "epoch": 1.9387755102040818,
+      "grad_norm": 0.7250548373178971,
+      "learning_rate": 5e-06,
+      "loss": 0.7397,
+      "step": 570
+    },
+    {
+      "epoch": 1.9727891156462585,
+      "grad_norm": 0.5431763568539679,
+      "learning_rate": 5e-06,
+      "loss": 0.7399,
+      "step": 580
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7747776508331299,
+      "eval_runtime": 316.2944,
+      "eval_samples_per_second": 25.046,
+      "eval_steps_per_second": 0.392,
+      "step": 588
+    },
+    {
+      "epoch": 2.006802721088435,
+      "grad_norm": 1.0710107167046419,
+      "learning_rate": 5e-06,
+      "loss": 0.7322,
+      "step": 590
+    },
+    {
+      "epoch": 2.0408163265306123,
+      "grad_norm": 0.8179848172880277,
+      "learning_rate": 5e-06,
+      "loss": 0.6959,
+      "step": 600
+    },
+    {
+      "epoch": 2.074829931972789,
+      "grad_norm": 0.7213840408679012,
+      "learning_rate": 5e-06,
+      "loss": 0.6963,
+      "step": 610
+    },
+    {
+      "epoch": 2.108843537414966,
+      "grad_norm": 0.842144783291229,
+      "learning_rate": 5e-06,
+      "loss": 0.6919,
+      "step": 620
+    },
+    {
+      "epoch": 2.142857142857143,
+      "grad_norm": 0.8281153884085551,
+      "learning_rate": 5e-06,
+      "loss": 0.6885,
+      "step": 630
+    },
+    {
+      "epoch": 2.17687074829932,
+      "grad_norm": 0.6466546656089918,
+      "learning_rate": 5e-06,
+      "loss": 0.696,
+      "step": 640
+    },
+    {
+      "epoch": 2.2108843537414966,
+      "grad_norm": 0.526364663799012,
+      "learning_rate": 5e-06,
+      "loss": 0.6937,
+      "step": 650
+    },
+    {
+      "epoch": 2.2448979591836733,
+      "grad_norm": 0.5634674036723205,
+      "learning_rate": 5e-06,
+      "loss": 0.6981,
+      "step": 660
+    },
+    {
+      "epoch": 2.2789115646258504,
+      "grad_norm": 0.5968432701967212,
+      "learning_rate": 5e-06,
+      "loss": 0.7015,
+      "step": 670
+    },
+    {
+      "epoch": 2.312925170068027,
+      "grad_norm": 0.5913462184907319,
+      "learning_rate": 5e-06,
+      "loss": 0.7012,
+      "step": 680
+    },
+    {
+      "epoch": 2.3469387755102042,
+      "grad_norm": 0.7450042075059763,
+      "learning_rate": 5e-06,
+      "loss": 0.6952,
+      "step": 690
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 0.5383186417121737,
+      "learning_rate": 5e-06,
+      "loss": 0.6953,
+      "step": 700
+    },
+    {
+      "epoch": 2.4149659863945576,
+      "grad_norm": 0.7331040417967113,
+      "learning_rate": 5e-06,
+      "loss": 0.6997,
+      "step": 710
+    },
+    {
+      "epoch": 2.4489795918367347,
+      "grad_norm": 0.707608838125245,
+      "learning_rate": 5e-06,
+      "loss": 0.6947,
+      "step": 720
+    },
+    {
+      "epoch": 2.4829931972789114,
+      "grad_norm": 0.5739038681907664,
+      "learning_rate": 5e-06,
+      "loss": 0.7002,
+      "step": 730
+    },
+    {
+      "epoch": 2.5170068027210886,
+      "grad_norm": 0.6882559394709682,
+      "learning_rate": 5e-06,
+      "loss": 0.7,
+      "step": 740
+    },
+    {
+      "epoch": 2.5510204081632653,
+      "grad_norm": 0.5748954947299686,
+      "learning_rate": 5e-06,
+      "loss": 0.6977,
+      "step": 750
+    },
+    {
+      "epoch": 2.5850340136054424,
+      "grad_norm": 0.5661948160767387,
+      "learning_rate": 5e-06,
+      "loss": 0.6972,
+      "step": 760
+    },
+    {
+      "epoch": 2.619047619047619,
+      "grad_norm": 0.6170581920248837,
+      "learning_rate": 5e-06,
+      "loss": 0.6988,
+      "step": 770
+    },
+    {
+      "epoch": 2.6530612244897958,
+      "grad_norm": 0.7539930534954333,
+      "learning_rate": 5e-06,
+      "loss": 0.6977,
+      "step": 780
+    },
+    {
+      "epoch": 2.687074829931973,
+      "grad_norm": 0.5811347422760094,
+      "learning_rate": 5e-06,
+      "loss": 0.6987,
+      "step": 790
+    },
+    {
+      "epoch": 2.7210884353741496,
+      "grad_norm": 0.6704654193944088,
+      "learning_rate": 5e-06,
+      "loss": 0.6982,
+      "step": 800
+    },
+    {
+      "epoch": 2.7551020408163263,
+      "grad_norm": 0.6505538631815906,
+      "learning_rate": 5e-06,
+      "loss": 0.7006,
+      "step": 810
+    },
+    {
+      "epoch": 2.7891156462585034,
+      "grad_norm": 0.5672733814024299,
+      "learning_rate": 5e-06,
+      "loss": 0.7038,
+      "step": 820
+    },
+    {
+      "epoch": 2.8231292517006805,
+      "grad_norm": 0.6464062527883608,
+      "learning_rate": 5e-06,
+      "loss": 0.7031,
+      "step": 830
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.6783589732638796,
+      "learning_rate": 5e-06,
+      "loss": 0.7001,
+      "step": 840
+    },
+    {
+      "epoch": 2.891156462585034,
+      "grad_norm": 0.6780111327853662,
+      "learning_rate": 5e-06,
+      "loss": 0.6999,
+      "step": 850
+    },
+    {
+      "epoch": 2.925170068027211,
+      "grad_norm": 0.6861118715829543,
+      "learning_rate": 5e-06,
+      "loss": 0.7007,
+      "step": 860
+    },
+    {
+      "epoch": 2.9591836734693877,
+      "grad_norm": 0.6468827032168267,
+      "learning_rate": 5e-06,
+      "loss": 0.7029,
+      "step": 870
+    },
+    {
+      "epoch": 2.9931972789115644,
+      "grad_norm": 0.5936344265821127,
+      "learning_rate": 5e-06,
+      "loss": 0.7006,
+      "step": 880
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7735684514045715,
+      "eval_runtime": 316.0096,
+      "eval_samples_per_second": 25.069,
+      "eval_steps_per_second": 0.392,
+      "step": 882
+    },
+    {
+      "epoch": 3.0,
+      "step": 882,
+      "total_flos": 1477173470822400.0,
+      "train_loss": 0.7552210683184687,
+      "train_runtime": 52264.4132,
+      "train_samples_per_second": 8.64,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 882,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1477173470822400.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed