MHGanainy/best-performing-clustering-7

Browse files

Files changed (5) hide show

README.md +2 -0
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +336 -0

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # best-performing-clustering-7
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 ## Model description

 # best-performing-clustering-7
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.8230
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 1.8229724168777466,
+    "eval_runtime": 65.6293,
+    "eval_samples_per_second": 10.895,
+    "eval_steps_per_second": 1.371,
+    "perplexity": 6.190231078134954,
+    "total_flos": 7.717588795392e+16,
+    "train_loss": 1.9823122004278713,
+    "train_runtime": 2216.0595,
+    "train_samples_per_second": 3.824,
+    "train_steps_per_second": 1.912
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 1.8229724168777466,
+    "eval_runtime": 65.6293,
+    "eval_samples_per_second": 10.895,
+    "eval_steps_per_second": 1.371,
+    "perplexity": 6.190231078134954
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 7.717588795392e+16,
+    "train_loss": 1.9823122004278713,
+    "train_runtime": 2216.0595,
+    "train_samples_per_second": 3.824,
+    "train_steps_per_second": 1.912
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,336 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4237,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.023601604909133822,
+      "grad_norm": 0.10703875869512558,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 2.4126,
+      "step": 100
+    },
+    {
+      "epoch": 0.047203209818267644,
+      "grad_norm": 0.20992980897426605,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 2.3493,
+      "step": 200
+    },
+    {
+      "epoch": 0.07080481472740147,
+      "grad_norm": 0.38506367802619934,
+      "learning_rate": 2e-05,
+      "loss": 2.2884,
+      "step": 300
+    },
+    {
+      "epoch": 0.09440641963653529,
+      "grad_norm": 0.5256543159484863,
+      "learning_rate": 1.9968179392958305e-05,
+      "loss": 2.2043,
+      "step": 400
+    },
+    {
+      "epoch": 0.11800802454566911,
+      "grad_norm": 0.5416532158851624,
+      "learning_rate": 1.987292008203972e-05,
+      "loss": 2.1605,
+      "step": 500
+    },
+    {
+      "epoch": 0.14160962945480293,
+      "grad_norm": 0.6629260778427124,
+      "learning_rate": 1.9714828309064202e-05,
+      "loss": 2.138,
+      "step": 600
+    },
+    {
+      "epoch": 0.16521123436393675,
+      "grad_norm": 0.7280109524726868,
+      "learning_rate": 1.9494910189268627e-05,
+      "loss": 2.091,
+      "step": 700
+    },
+    {
+      "epoch": 0.18881283927307058,
+      "grad_norm": 0.8726409673690796,
+      "learning_rate": 1.921456530826727e-05,
+      "loss": 2.0719,
+      "step": 800
+    },
+    {
+      "epoch": 0.2124144441822044,
+      "grad_norm": 0.786993145942688,
+      "learning_rate": 1.8875577814919035e-05,
+      "loss": 2.0387,
+      "step": 900
+    },
+    {
+      "epoch": 0.23601604909133822,
+      "grad_norm": 0.7426193952560425,
+      "learning_rate": 1.848010506678749e-05,
+      "loss": 2.0183,
+      "step": 1000
+    },
+    {
+      "epoch": 0.25961765400047204,
+      "grad_norm": 0.9175330996513367,
+      "learning_rate": 1.803066390045544e-05,
+      "loss": 2.045,
+      "step": 1100
+    },
+    {
+      "epoch": 0.28321925890960586,
+      "grad_norm": 0.795050859451294,
+      "learning_rate": 1.753011461407132e-05,
+      "loss": 2.0278,
+      "step": 1200
+    },
+    {
+      "epoch": 0.3068208638187397,
+      "grad_norm": 0.9610442519187927,
+      "learning_rate": 1.6981642764064544e-05,
+      "loss": 2.0395,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3304224687278735,
+      "grad_norm": 0.863545298576355,
+      "learning_rate": 1.6388738891877607e-05,
+      "loss": 2.0042,
+      "step": 1400
+    },
+    {
+      "epoch": 0.35402407363700733,
+      "grad_norm": 1.0420805215835571,
+      "learning_rate": 1.5755176309736586e-05,
+      "loss": 1.9713,
+      "step": 1500
+    },
+    {
+      "epoch": 0.37762567854614115,
+      "grad_norm": 1.0546514987945557,
+      "learning_rate": 1.5084987086834003e-05,
+      "loss": 1.968,
+      "step": 1600
+    },
+    {
+      "epoch": 0.401227283455275,
+      "grad_norm": 0.8405762910842896,
+      "learning_rate": 1.4382436388750968e-05,
+      "loss": 1.9442,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4248288883644088,
+      "grad_norm": 1.089908480644226,
+      "learning_rate": 1.36519953334256e-05,
+      "loss": 1.908,
+      "step": 1800
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 1.351189136505127,
+      "learning_rate": 1.2898312536415628e-05,
+      "loss": 1.9473,
+      "step": 1900
+    },
+    {
+      "epoch": 0.47203209818267644,
+      "grad_norm": 0.981865406036377,
+      "learning_rate": 1.2126184526544591e-05,
+      "loss": 1.9577,
+      "step": 2000
+    },
+    {
+      "epoch": 0.49563370309181026,
+      "grad_norm": 0.9919114708900452,
+      "learning_rate": 1.1340525220210092e-05,
+      "loss": 1.9136,
+      "step": 2100
+    },
+    {
+      "epoch": 0.5192353080009441,
+      "grad_norm": 1.1692713499069214,
+      "learning_rate": 1.0546334648623235e-05,
+      "loss": 1.9127,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5428369129100779,
+      "grad_norm": 1.028548240661621,
+      "learning_rate": 9.748667137002961e-06,
+      "loss": 1.8947,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5664385178192117,
+      "grad_norm": 1.3835196495056152,
+      "learning_rate": 8.9525991382367e-06,
+      "loss": 1.9242,
+      "step": 2400
+    },
+    {
+      "epoch": 0.5900401227283455,
+      "grad_norm": 1.0017223358154297,
+      "learning_rate": 8.163196925717906e-06,
+      "loss": 1.9241,
+      "step": 2500
+    },
+    {
+      "epoch": 0.6136417276374794,
+      "grad_norm": 1.3177546262741089,
+      "learning_rate": 7.385484350967048e-06,
+      "loss": 1.8967,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6372433325466131,
+      "grad_norm": 1.2489763498306274,
+      "learning_rate": 6.6244108712306435e-06,
+      "loss": 1.9141,
+      "step": 2700
+    },
+    {
+      "epoch": 0.660844937455747,
+      "grad_norm": 1.4832775592803955,
+      "learning_rate": 5.884820050534392e-06,
+      "loss": 1.8921,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6844465423648808,
+      "grad_norm": 1.0875434875488281,
+      "learning_rate": 5.171418734653707e-06,
+      "loss": 1.8483,
+      "step": 2900
+    },
+    {
+      "epoch": 0.7080481472740147,
+      "grad_norm": 1.1204584836959839,
+      "learning_rate": 4.488747096175717e-06,
+      "loss": 1.8557,
+      "step": 3000
+    },
+    {
+      "epoch": 0.7316497521831484,
+      "grad_norm": 0.9455496072769165,
+      "learning_rate": 3.841149740289725e-06,
+      "loss": 1.8695,
+      "step": 3100
+    },
+    {
+      "epoch": 0.7552513570922823,
+      "grad_norm": 0.9588963389396667,
+      "learning_rate": 3.2327480551923107e-06,
+      "loss": 1.9,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7788529620014161,
+      "grad_norm": 0.9655967354774475,
+      "learning_rate": 2.6674139830724722e-06,
+      "loss": 1.9159,
+      "step": 3300
+    },
+    {
+      "epoch": 0.80245456691055,
+      "grad_norm": 1.3765445947647095,
+      "learning_rate": 2.1487453786014513e-06,
+      "loss": 1.8497,
+      "step": 3400
+    },
+    {
+      "epoch": 0.8260561718196837,
+      "grad_norm": 1.257179617881775,
+      "learning_rate": 1.6800431117487958e-06,
+      "loss": 1.8931,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8496577767288176,
+      "grad_norm": 1.1668704748153687,
+      "learning_rate": 1.2642900606451148e-06,
+      "loss": 1.8853,
+      "step": 3600
+    },
+    {
+      "epoch": 0.8732593816379514,
+      "grad_norm": 1.0066955089569092,
+      "learning_rate": 9.04132128183528e-07,
+      "loss": 1.8997,
+      "step": 3700
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 1.234889030456543,
+      "learning_rate": 6.018614031723913e-07,
+      "loss": 1.8899,
+      "step": 3800
+    },
+    {
+      "epoch": 0.920462591456219,
+      "grad_norm": 1.4220229387283325,
+      "learning_rate": 3.594015732038625e-07,
+      "loss": 1.91,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9440641963653529,
+      "grad_norm": 1.0287963151931763,
+      "learning_rate": 1.7829568207250902e-07,
+      "loss": 1.8958,
+      "step": 4000
+    },
+    {
+      "epoch": 0.9676658012744866,
+      "grad_norm": 0.9667606949806213,
+      "learning_rate": 5.969630965725448e-08,
+      "loss": 1.9538,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9912674061836205,
+      "grad_norm": 1.3044230937957764,
+      "learning_rate": 4.3582367631034295e-09,
+      "loss": 1.8733,
+      "step": 4200
+    },
+    {
+      "epoch": 1.0,
+      "step": 4237,
+      "total_flos": 7.717588795392e+16,
+      "train_loss": 1.9823122004278713,
+      "train_runtime": 2216.0595,
+      "train_samples_per_second": 3.824,
+      "train_steps_per_second": 1.912
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4237,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.717588795392e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}