MHGanainy/best-performing-clustering-6

Browse files

Files changed (5) hide show

README.md +2 -0
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +679 -0

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # best-performing-clustering-6
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 ## Model description

 # best-performing-clustering-6
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.1335
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 2.1334645748138428,
+    "eval_runtime": 159.4929,
+    "eval_samples_per_second": 10.916,
+    "eval_steps_per_second": 1.367,
+    "perplexity": 8.444071308426771,
+    "total_flos": 1.6746639458304e+17,
+    "train_loss": 2.2412419762070965,
+    "train_runtime": 4787.9733,
+    "train_samples_per_second": 3.84,
+    "train_steps_per_second": 1.92
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 2.1334645748138428,
+    "eval_runtime": 159.4929,
+    "eval_samples_per_second": 10.916,
+    "eval_steps_per_second": 1.367,
+    "perplexity": 8.444071308426771
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 1.6746639458304e+17,
+    "train_loss": 2.2412419762070965,
+    "train_runtime": 4787.9733,
+    "train_samples_per_second": 3.84,
+    "train_steps_per_second": 1.92
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,679 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 9194,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.010876658690450293,
+      "grad_norm": 0.11775965243577957,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 2.5177,
+      "step": 100
+    },
+    {
+      "epoch": 0.021753317380900587,
+      "grad_norm": 0.21489302814006805,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 2.5141,
+      "step": 200
+    },
+    {
+      "epoch": 0.03262997607135088,
+      "grad_norm": 0.3868540823459625,
+      "learning_rate": 2e-05,
+      "loss": 2.4843,
+      "step": 300
+    },
+    {
+      "epoch": 0.04350663476180117,
+      "grad_norm": 0.40104538202285767,
+      "learning_rate": 1.999376221583229e-05,
+      "loss": 2.3961,
+      "step": 400
+    },
+    {
+      "epoch": 0.05438329345225147,
+      "grad_norm": 0.5164044499397278,
+      "learning_rate": 1.9975056645319428e-05,
+      "loss": 2.35,
+      "step": 500
+    },
+    {
+      "epoch": 0.06525995214270176,
+      "grad_norm": 0.47309133410453796,
+      "learning_rate": 1.9943906624723733e-05,
+      "loss": 2.3903,
+      "step": 600
+    },
+    {
+      "epoch": 0.07613661083315205,
+      "grad_norm": 0.5553125739097595,
+      "learning_rate": 1.990035101546626e-05,
+      "loss": 2.3195,
+      "step": 700
+    },
+    {
+      "epoch": 0.08701326952360235,
+      "grad_norm": 0.5993786454200745,
+      "learning_rate": 1.9844444155644974e-05,
+      "loss": 2.3576,
+      "step": 800
+    },
+    {
+      "epoch": 0.09788992821405264,
+      "grad_norm": 0.5128291845321655,
+      "learning_rate": 1.9776255792244897e-05,
+      "loss": 2.2987,
+      "step": 900
+    },
+    {
+      "epoch": 0.10876658690450294,
+      "grad_norm": 0.5672403573989868,
+      "learning_rate": 1.9695870994124752e-05,
+      "loss": 2.3408,
+      "step": 1000
+    },
+    {
+      "epoch": 0.11964324559495323,
+      "grad_norm": 0.6869469285011292,
+      "learning_rate": 1.960339004588874e-05,
+      "loss": 2.3277,
+      "step": 1100
+    },
+    {
+      "epoch": 0.13051990428540353,
+      "grad_norm": 0.5485507249832153,
+      "learning_rate": 1.949892832277582e-05,
+      "loss": 2.3045,
+      "step": 1200
+    },
+    {
+      "epoch": 0.14139656297585382,
+      "grad_norm": 0.5480315089225769,
+      "learning_rate": 1.9382616146722495e-05,
+      "loss": 2.3185,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1522732216663041,
+      "grad_norm": 0.7362557649612427,
+      "learning_rate": 1.9254598623778825e-05,
+      "loss": 2.268,
+      "step": 1400
+    },
+    {
+      "epoch": 0.1631498803567544,
+      "grad_norm": 0.6089996099472046,
+      "learning_rate": 1.911503546308037e-05,
+      "loss": 2.298,
+      "step": 1500
+    },
+    {
+      "epoch": 0.1740265390472047,
+      "grad_norm": 0.5844936966896057,
+      "learning_rate": 1.896410077760198e-05,
+      "loss": 2.2817,
+      "step": 1600
+    },
+    {
+      "epoch": 0.18490319773765498,
+      "grad_norm": 0.5984183549880981,
+      "learning_rate": 1.880198286694193e-05,
+      "loss": 2.2744,
+      "step": 1700
+    },
+    {
+      "epoch": 0.19577985642810528,
+      "grad_norm": 0.7692469358444214,
+      "learning_rate": 1.8628883982407505e-05,
+      "loss": 2.2415,
+      "step": 1800
+    },
+    {
+      "epoch": 0.20665651511855557,
+      "grad_norm": 0.6598834991455078,
+      "learning_rate": 1.8445020074694992e-05,
+      "loss": 2.2932,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2175331738090059,
+      "grad_norm": 0.8028424978256226,
+      "learning_rate": 1.8250620524478897e-05,
+      "loss": 2.2812,
+      "step": 2000
+    },
+    {
+      "epoch": 0.22840983249945618,
+      "grad_norm": 0.7637801766395569,
+      "learning_rate": 1.8045927856246524e-05,
+      "loss": 2.2787,
+      "step": 2100
+    },
+    {
+      "epoch": 0.23928649118990647,
+      "grad_norm": 0.6362522840499878,
+      "learning_rate": 1.7831197435734912e-05,
+      "loss": 2.2977,
+      "step": 2200
+    },
+    {
+      "epoch": 0.25016314988035676,
+      "grad_norm": 0.7283287048339844,
+      "learning_rate": 1.7606697151347526e-05,
+      "loss": 2.2645,
+      "step": 2300
+    },
+    {
+      "epoch": 0.26103980857080705,
+      "grad_norm": 0.9132130742073059,
+      "learning_rate": 1.7372707079948295e-05,
+      "loss": 2.2763,
+      "step": 2400
+    },
+    {
+      "epoch": 0.27191646726125734,
+      "grad_norm": 0.6510069966316223,
+      "learning_rate": 1.7129519137449776e-05,
+      "loss": 2.24,
+      "step": 2500
+    },
+    {
+      "epoch": 0.28279312595170764,
+      "grad_norm": 0.886200487613678,
+      "learning_rate": 1.6877436714631463e-05,
+      "loss": 2.2413,
+      "step": 2600
+    },
+    {
+      "epoch": 0.2936697846421579,
+      "grad_norm": 0.784830629825592,
+      "learning_rate": 1.661677429864256e-05,
+      "loss": 2.2407,
+      "step": 2700
+    },
+    {
+      "epoch": 0.3045464433326082,
+      "grad_norm": 0.7875649333000183,
+      "learning_rate": 1.6347857080661378e-05,
+      "loss": 2.2516,
+      "step": 2800
+    },
+    {
+      "epoch": 0.3154231020230585,
+      "grad_norm": 0.7136234045028687,
+      "learning_rate": 1.607102055020087e-05,
+      "loss": 2.2033,
+      "step": 2900
+    },
+    {
+      "epoch": 0.3262997607135088,
+      "grad_norm": 0.7511260509490967,
+      "learning_rate": 1.5786610076566388e-05,
+      "loss": 2.2971,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3371764194039591,
+      "grad_norm": 0.7906160354614258,
+      "learning_rate": 1.5494980477987843e-05,
+      "loss": 2.2292,
+      "step": 3100
+    },
+    {
+      "epoch": 0.3480530780944094,
+      "grad_norm": 0.8098989129066467,
+      "learning_rate": 1.5196495578963808e-05,
+      "loss": 2.2046,
+      "step": 3200
+    },
+    {
+      "epoch": 0.3589297367848597,
+      "grad_norm": 0.719405472278595,
+      "learning_rate": 1.4891527756369766e-05,
+      "loss": 2.2208,
+      "step": 3300
+    },
+    {
+      "epoch": 0.36980639547530997,
+      "grad_norm": 0.576866626739502,
+      "learning_rate": 1.4580457474896804e-05,
+      "loss": 2.2388,
+      "step": 3400
+    },
+    {
+      "epoch": 0.38068305416576026,
+      "grad_norm": 0.6755930185317993,
+      "learning_rate": 1.4263672812400284e-05,
+      "loss": 2.2581,
+      "step": 3500
+    },
+    {
+      "epoch": 0.39155971285621055,
+      "grad_norm": 0.7689741253852844,
+      "learning_rate": 1.394156897575067e-05,
+      "loss": 2.2315,
+      "step": 3600
+    },
+    {
+      "epoch": 0.40243637154666084,
+      "grad_norm": 0.7418312430381775,
+      "learning_rate": 1.3614547807790483e-05,
+      "loss": 2.3031,
+      "step": 3700
+    },
+    {
+      "epoch": 0.41331303023711113,
+      "grad_norm": 0.7450283765792847,
+      "learning_rate": 1.328301728601252e-05,
+      "loss": 2.2307,
+      "step": 3800
+    },
+    {
+      "epoch": 0.4241896889275614,
+      "grad_norm": 0.7184864282608032,
+      "learning_rate": 1.2947391013584758e-05,
+      "loss": 2.2386,
+      "step": 3900
+    },
+    {
+      "epoch": 0.4350663476180118,
+      "grad_norm": 0.7286122441291809,
+      "learning_rate": 1.2608087703356877e-05,
+      "loss": 2.2103,
+      "step": 4000
+    },
+    {
+      "epoch": 0.44594300630846206,
+      "grad_norm": 0.9107334613800049,
+      "learning_rate": 1.22655306554922e-05,
+      "loss": 2.1938,
+      "step": 4100
+    },
+    {
+      "epoch": 0.45681966499891236,
+      "grad_norm": 0.8477912545204163,
+      "learning_rate": 1.1920147229376663e-05,
+      "loss": 2.1957,
+      "step": 4200
+    },
+    {
+      "epoch": 0.46769632368936265,
+      "grad_norm": 0.8039445281028748,
+      "learning_rate": 1.1572368310463712e-05,
+      "loss": 2.2234,
+      "step": 4300
+    },
+    {
+      "epoch": 0.47857298237981294,
+      "grad_norm": 0.7385637760162354,
+      "learning_rate": 1.1222627772720196e-05,
+      "loss": 2.2494,
+      "step": 4400
+    },
+    {
+      "epoch": 0.48944964107026323,
+      "grad_norm": 0.873935878276825,
+      "learning_rate": 1.0871361937343945e-05,
+      "loss": 2.209,
+      "step": 4500
+    },
+    {
+      "epoch": 0.5003262997607135,
+      "grad_norm": 1.1287140846252441,
+      "learning_rate": 1.0519009028428272e-05,
+      "loss": 2.1784,
+      "step": 4600
+    },
+    {
+      "epoch": 0.5112029584511638,
+      "grad_norm": 0.7948943376541138,
+      "learning_rate": 1.0166008626252514e-05,
+      "loss": 2.2169,
+      "step": 4700
+    },
+    {
+      "epoch": 0.5220796171416141,
+      "grad_norm": 0.75812828540802,
+      "learning_rate": 9.812801118880645e-06,
+      "loss": 2.2284,
+      "step": 4800
+    },
+    {
+      "epoch": 0.5329562758320644,
+      "grad_norm": 1.0229548215866089,
+      "learning_rate": 9.459827152752154e-06,
+      "loss": 2.2066,
+      "step": 4900
+    },
+    {
+      "epoch": 0.5438329345225147,
+      "grad_norm": 1.0001264810562134,
+      "learning_rate": 9.107527082950537e-06,
+      "loss": 2.2144,
+      "step": 5000
+    },
+    {
+      "epoch": 0.554709593212965,
+      "grad_norm": 1.3419371843338013,
+      "learning_rate": 8.756340423835338e-06,
+      "loss": 2.2282,
+      "step": 5100
+    },
+    {
+      "epoch": 0.5655862519034153,
+      "grad_norm": 1.1223336458206177,
+      "learning_rate": 8.40670530072298e-06,
+      "loss": 2.1892,
+      "step": 5200
+    },
+    {
+      "epoch": 0.5764629105938656,
+      "grad_norm": 0.6637461185455322,
+      "learning_rate": 8.059057903300553e-06,
+      "loss": 2.2495,
+      "step": 5300
+    },
+    {
+      "epoch": 0.5873395692843159,
+      "grad_norm": 0.747543454170227,
+      "learning_rate": 7.713831941454373e-06,
+      "loss": 2.1836,
+      "step": 5400
+    },
+    {
+      "epoch": 0.5982162279747661,
+      "grad_norm": 1.1951714754104614,
+      "learning_rate": 7.371458104192257e-06,
+      "loss": 2.1827,
+      "step": 5500
+    },
+    {
+      "epoch": 0.6090928866652164,
+      "grad_norm": 0.8208951354026794,
+      "learning_rate": 7.032363522334502e-06,
+      "loss": 2.2302,
+      "step": 5600
+    },
+    {
+      "epoch": 0.6199695453556667,
+      "grad_norm": 1.0612913370132446,
+      "learning_rate": 6.696971235643932e-06,
+      "loss": 2.164,
+      "step": 5700
+    },
+    {
+      "epoch": 0.630846204046117,
+      "grad_norm": 0.6853541135787964,
+      "learning_rate": 6.365699665059718e-06,
+      "loss": 2.1821,
+      "step": 5800
+    },
+    {
+      "epoch": 0.6417228627365673,
+      "grad_norm": 0.8484651446342468,
+      "learning_rate": 6.038962090693503e-06,
+      "loss": 2.1819,
+      "step": 5900
+    },
+    {
+      "epoch": 0.6525995214270176,
+      "grad_norm": 1.4542567729949951,
+      "learning_rate": 5.717166136238958e-06,
+      "loss": 2.1762,
+      "step": 6000
+    },
+    {
+      "epoch": 0.6634761801174679,
+      "grad_norm": 1.2847181558609009,
+      "learning_rate": 5.400713260438077e-06,
+      "loss": 2.1901,
+      "step": 6100
+    },
+    {
+      "epoch": 0.6743528388079182,
+      "grad_norm": 0.8455380201339722,
+      "learning_rate": 5.089998256238553e-06,
+      "loss": 2.1897,
+      "step": 6200
+    },
+    {
+      "epoch": 0.6852294974983685,
+      "grad_norm": 1.1807805299758911,
+      "learning_rate": 4.785408758267164e-06,
+      "loss": 2.2193,
+      "step": 6300
+    },
+    {
+      "epoch": 0.6961061561888188,
+      "grad_norm": 0.7739274501800537,
+      "learning_rate": 4.487324759233518e-06,
+      "loss": 2.2115,
+      "step": 6400
+    },
+    {
+      "epoch": 0.7069828148792691,
+      "grad_norm": 0.8319265246391296,
+      "learning_rate": 4.196118135867595e-06,
+      "loss": 2.178,
+      "step": 6500
+    },
+    {
+      "epoch": 0.7178594735697194,
+      "grad_norm": 0.8209885358810425,
+      "learning_rate": 3.912152184982336e-06,
+      "loss": 2.1665,
+      "step": 6600
+    },
+    {
+      "epoch": 0.7287361322601696,
+      "grad_norm": 1.5596791505813599,
+      "learning_rate": 3.6357811702402703e-06,
+      "loss": 2.232,
+      "step": 6700
+    },
+    {
+      "epoch": 0.7396127909506199,
+      "grad_norm": 0.8522664308547974,
+      "learning_rate": 3.3673498801894178e-06,
+      "loss": 2.1873,
+      "step": 6800
+    },
+    {
+      "epoch": 0.7504894496410702,
+      "grad_norm": 0.7811763882637024,
+      "learning_rate": 3.1071931981200353e-06,
+      "loss": 2.1903,
+      "step": 6900
+    },
+    {
+      "epoch": 0.7613661083315205,
+      "grad_norm": 1.0121145248413086,
+      "learning_rate": 2.855635684278618e-06,
+      "loss": 2.2129,
+      "step": 7000
+    },
+    {
+      "epoch": 0.7722427670219708,
+      "grad_norm": 0.8791484832763672,
+      "learning_rate": 2.612991170960595e-06,
+      "loss": 2.1658,
+      "step": 7100
+    },
+    {
+      "epoch": 0.7831194257124211,
+      "grad_norm": 1.054653525352478,
+      "learning_rate": 2.37956237098667e-06,
+      "loss": 2.1966,
+      "step": 7200
+    },
+    {
+      "epoch": 0.7939960844028714,
+      "grad_norm": 0.8596212267875671,
+      "learning_rate": 2.1556405000514015e-06,
+      "loss": 2.2237,
+      "step": 7300
+    },
+    {
+      "epoch": 0.8048727430933217,
+      "grad_norm": 0.8690965175628662,
+      "learning_rate": 1.9415049134150545e-06,
+      "loss": 2.2035,
+      "step": 7400
+    },
+    {
+      "epoch": 0.815749401783772,
+      "grad_norm": 0.8708688020706177,
+      "learning_rate": 1.7374227573920377e-06,
+      "loss": 2.1783,
+      "step": 7500
+    },
+    {
+      "epoch": 0.8266260604742223,
+      "grad_norm": 0.8724594116210938,
+      "learning_rate": 1.5436486360707037e-06,
+      "loss": 2.2229,
+      "step": 7600
+    },
+    {
+      "epoch": 0.8375027191646726,
+      "grad_norm": 0.7637216448783875,
+      "learning_rate": 1.3604242936802702e-06,
+      "loss": 2.2131,
+      "step": 7700
+    },
+    {
+      "epoch": 0.8483793778551229,
+      "grad_norm": 0.8904792070388794,
+      "learning_rate": 1.1879783130011601e-06,
+      "loss": 2.2124,
+      "step": 7800
+    },
+    {
+      "epoch": 0.8592560365455733,
+      "grad_norm": 0.920068085193634,
+      "learning_rate": 1.026525830194983e-06,
+      "loss": 2.2312,
+      "step": 7900
+    },
+    {
+      "epoch": 0.8701326952360235,
+      "grad_norm": 0.853566586971283,
+      "learning_rate": 8.762682664099564e-07,
+      "loss": 2.2061,
+      "step": 8000
+    },
+    {
+      "epoch": 0.8810093539264738,
+      "grad_norm": 0.8593130111694336,
+      "learning_rate": 7.373930764965742e-07,
+      "loss": 2.1988,
+      "step": 8100
+    },
+    {
+      "epoch": 0.8918860126169241,
+      "grad_norm": 0.8238704204559326,
+      "learning_rate": 6.100735151470216e-07,
+      "loss": 2.1747,
+      "step": 8200
+    },
+    {
+      "epoch": 0.9027626713073744,
+      "grad_norm": 1.1148990392684937,
+      "learning_rate": 4.944684207501005e-07,
+      "loss": 2.2112,
+      "step": 8300
+    },
+    {
+      "epoch": 0.9136393299978247,
+      "grad_norm": 1.3240580558776855,
+      "learning_rate": 3.9072201723132084e-07,
+      "loss": 2.1655,
+      "step": 8400
+    },
+    {
+      "epoch": 0.924515988688275,
+      "grad_norm": 0.9755560755729675,
+      "learning_rate": 2.989637341253493e-07,
+      "loss": 2.1778,
+      "step": 8500
+    },
+    {
+      "epoch": 0.9353926473787253,
+      "grad_norm": 0.8737950921058655,
+      "learning_rate": 2.1930804510530536e-07,
+      "loss": 2.1974,
+      "step": 8600
+    },
+    {
+      "epoch": 0.9462693060691756,
+      "grad_norm": 1.1282422542572021,
+      "learning_rate": 1.518543251703586e-07,
+      "loss": 2.2214,
+      "step": 8700
+    },
+    {
+      "epoch": 0.9571459647596259,
+      "grad_norm": 0.8447544574737549,
+      "learning_rate": 9.66867266697602e-08,
+      "loss": 2.143,
+      "step": 8800
+    },
+    {
+      "epoch": 0.9680226234500762,
+      "grad_norm": 0.9211781620979309,
+      "learning_rate": 5.387407431801217e-08,
+      "loss": 2.1945,
+      "step": 8900
+    },
+    {
+      "epoch": 0.9788992821405265,
+      "grad_norm": 1.0062124729156494,
+      "learning_rate": 2.3469779332114806e-08,
+      "loss": 2.2168,
+      "step": 9000
+    },
+    {
+      "epoch": 0.9897759408309768,
+      "grad_norm": 0.9540311694145203,
+      "learning_rate": 5.511772798049153e-09,
+      "loss": 2.186,
+      "step": 9100
+    },
+    {
+      "epoch": 1.0,
+      "step": 9194,
+      "total_flos": 1.6746639458304e+17,
+      "train_loss": 2.2412419762070965,
+      "train_runtime": 4787.9733,
+      "train_samples_per_second": 3.84,
+      "train_steps_per_second": 1.92
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 9194,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6746639458304e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}