Training Examples: 10-20k

Browse files

Files changed (7) hide show

README.md +0 -2
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
optimizer.pt +1 -1
scaler.pt +1 -1
trainer_state.json +127 -127
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,8 +1,6 @@
 ---
 base_model: unsloth/Phi-3.5-mini-instruct
 library_name: peft
-tags:
-- unsloth
 ---
 # Model Card for Model ID

 ---
 base_model: unsloth/Phi-3.5-mini-instruct
 library_name: peft
 ---
 # Model Card for Model ID

adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "down_proj",
-    "gate_proj",
     "o_proj",
     "up_proj",
     "k_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "q_proj",
+    "v_proj",
     "up_proj",
     "k_proj",
+    "down_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f43d47027386dc4eafd698323a31d8c6c40fbd882c2bfe9dd43b1122c186929f
 size 119597408

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec128fc25c61bba289ed835cd135bd259f2e6b0e7461a47f722930c775371b4d
 size 119597408

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83438eaa9a3c3886c6e226e34f99c9abf71eeff99962b4cc773061653827afbd
 size 61227348

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b04f3fdac6e766bf2c5cf4423858daf620bd0e5672f6b35021ccd14bdf331a4
 size 61227348

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d847fa8b77307ec3f023cf0a917c492c209c15c78747caba0466134193ab7151
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:195e5184004bc0db9b878c771cd5c8a05988dae6ecbf71bd37c791801c0f3c2f
 size 988

trainer_state.json CHANGED Viewed

@@ -11,345 +11,345 @@
   "log_history": [
     {
       "epoch": 0.00020742584526031943,
-      "grad_norm": 0.572188675403595,
       "learning_rate": 0.0,
-      "loss": 0.9048,
       "step": 1
     },
     {
       "epoch": 0.020742584526031945,
-      "grad_norm": 0.8375779390335083,
       "learning_rate": 2.0518134715025907e-05,
-      "loss": 0.6388,
       "step": 100
     },
     {
       "epoch": 0.04148516905206389,
-      "grad_norm": 0.80202317237854,
       "learning_rate": 4.1243523316062174e-05,
-      "loss": 0.5854,
       "step": 200
     },
     {
       "epoch": 0.06222775357809583,
-      "grad_norm": 0.7062171101570129,
       "learning_rate": 6.196891191709845e-05,
-      "loss": 0.4707,
       "step": 300
     },
     {
       "epoch": 0.08297033810412778,
-      "grad_norm": 0.644052267074585,
       "learning_rate": 8.269430051813471e-05,
-      "loss": 0.3773,
       "step": 400
     },
     {
       "epoch": 0.10371292263015972,
-      "grad_norm": 0.6500552296638489,
       "learning_rate": 0.000103419689119171,
-      "loss": 0.339,
       "step": 500
     },
     {
       "epoch": 0.12445550715619166,
-      "grad_norm": 0.9881173968315125,
-      "learning_rate": 0.00012414507772020726,
-      "loss": 0.3264,
       "step": 600
     },
     {
       "epoch": 0.1451980916822236,
-      "grad_norm": 0.8607994318008423,
-      "learning_rate": 0.00014487046632124352,
-      "loss": 0.3395,
       "step": 700
     },
     {
       "epoch": 0.16594067620825556,
-      "grad_norm": 1.1204837560653687,
-      "learning_rate": 0.0001655958549222798,
-      "loss": 0.356,
       "step": 800
     },
     {
       "epoch": 0.18668326073428748,
-      "grad_norm": 1.4915101528167725,
-      "learning_rate": 0.00018632124352331608,
-      "loss": 0.3399,
       "step": 900
     },
     {
       "epoch": 0.20742584526031943,
-      "grad_norm": 1.2340389490127563,
-      "learning_rate": 0.00019996163583391267,
-      "loss": 0.371,
       "step": 1000
     },
     {
       "epoch": 0.22816842978635138,
-      "grad_norm": 0.657108724117279,
-      "learning_rate": 0.00019940464789344699,
-      "loss": 0.3402,
       "step": 1100
     },
     {
       "epoch": 0.24891101431238333,
-      "grad_norm": 1.1355221271514893,
-      "learning_rate": 0.00019818819435915685,
-      "loss": 0.3604,
       "step": 1200
     },
     {
       "epoch": 0.2696535988384153,
-      "grad_norm": 0.8293834924697876,
-      "learning_rate": 0.00019632034536930397,
-      "loss": 0.355,
       "step": 1300
     },
     {
       "epoch": 0.2903961833644472,
-      "grad_norm": 1.1846222877502441,
-      "learning_rate": 0.00019381349251894317,
-      "loss": 0.3562,
       "step": 1400
     },
     {
       "epoch": 0.3111387678904792,
-      "grad_norm": 0.7575041055679321,
-      "learning_rate": 0.0001906842666521912,
-      "loss": 0.3853,
       "step": 1500
     },
     {
       "epoch": 0.3318813524165111,
-      "grad_norm": 0.8805419206619263,
-      "learning_rate": 0.0001869534275306705,
-      "loss": 0.3789,
       "step": 1600
     },
     {
       "epoch": 0.352623936942543,
-      "grad_norm": 0.5712432861328125,
-      "learning_rate": 0.00018264572611008452,
-      "loss": 0.4197,
       "step": 1700
     },
     {
       "epoch": 0.37336652146857496,
-      "grad_norm": 1.414759874343872,
-      "learning_rate": 0.00017778974033860325,
-      "loss": 0.3831,
       "step": 1800
     },
     {
       "epoch": 0.3941091059946069,
-      "grad_norm": 1.0402040481567383,
-      "learning_rate": 0.00017241768556639647,
-      "loss": 0.4039,
       "step": 1900
     },
     {
       "epoch": 0.41485169052063886,
-      "grad_norm": 0.68588787317276,
-      "learning_rate": 0.0001665652008240878,
-      "loss": 0.3909,
       "step": 2000
     },
     {
       "epoch": 0.4355942750466708,
-      "grad_norm": 0.967073380947113,
-      "learning_rate": 0.00016027111238799057,
-      "loss": 0.4252,
       "step": 2100
     },
     {
       "epoch": 0.45633685957270276,
-      "grad_norm": 1.229313850402832,
-      "learning_rate": 0.00015357717620066938,
-      "loss": 0.4296,
       "step": 2200
     },
     {
       "epoch": 0.4770794440987347,
-      "grad_norm": 1.2722011804580688,
-      "learning_rate": 0.00014652780085564921,
-      "loss": 0.4027,
       "step": 2300
     },
     {
       "epoch": 0.49782202862476665,
-      "grad_norm": 0.9985523819923401,
-      "learning_rate": 0.00013916975298403346,
-      "loss": 0.4733,
       "step": 2400
     },
     {
       "epoch": 0.5185646131507986,
-      "grad_norm": 1.0977421998977661,
-      "learning_rate": 0.00013155184699754102,
-      "loss": 0.4848,
       "step": 2500
     },
     {
       "epoch": 0.5393071976768306,
-      "grad_norm": 0.9423943758010864,
-      "learning_rate": 0.00012372462124625452,
-      "loss": 0.4491,
       "step": 2600
     },
     {
       "epoch": 0.5600497822028625,
-      "grad_norm": 1.0384944677352905,
-      "learning_rate": 0.00011574000273949858,
-      "loss": 0.4421,
       "step": 2700
     },
     {
       "epoch": 0.5807923667288944,
-      "grad_norm": 0.6461535692214966,
-      "learning_rate": 0.00010765096265414077,
-      "loss": 0.4887,
       "step": 2800
     },
     {
       "epoch": 0.6015349512549264,
-      "grad_norm": 0.7776329517364502,
-      "learning_rate": 9.95111649157258e-05,
-      "loss": 0.5306,
       "step": 2900
     },
     {
       "epoch": 0.6222775357809583,
-      "grad_norm": 0.6103058457374573,
-      "learning_rate": 9.137461018380963e-05,
-      "loss": 0.4847,
       "step": 3000
     },
     {
       "epoch": 0.6430201203069903,
-      "grad_norm": 1.384641170501709,
-      "learning_rate": 8.329527760334861e-05,
-      "loss": 0.5293,
       "step": 3100
     },
     {
       "epoch": 0.6637627048330222,
-      "grad_norm": 1.2251664400100708,
-      "learning_rate": 7.532676669881955e-05,
-      "loss": 0.55,
       "step": 3200
     },
     {
       "epoch": 0.6845052893590542,
-      "grad_norm": 0.5400375127792358,
-      "learning_rate": 6.752194178680041e-05,
-      "loss": 0.5329,
       "step": 3300
     },
     {
       "epoch": 0.705247873885086,
-      "grad_norm": 1.0325515270233154,
-      "learning_rate": 5.9932581266031694e-05,
-      "loss": 0.5638,
       "step": 3400
     },
     {
       "epoch": 0.725990458411118,
-      "grad_norm": 0.4699115455150604,
       "learning_rate": 5.2680792652421385e-05,
-      "loss": 0.5527,
       "step": 3500
     },
     {
       "epoch": 0.7467330429371499,
-      "grad_norm": 0.5923639535903931,
       "learning_rate": 4.5668266493661425e-05,
-      "loss": 0.5647,
       "step": 3600
     },
     {
       "epoch": 0.7674756274631819,
-      "grad_norm": 1.6123884916305542,
       "learning_rate": 3.901618534083994e-05,
-      "loss": 0.6208,
       "step": 3700
     },
     {
       "epoch": 0.7882182119892138,
-      "grad_norm": 0.882792055606842,
       "learning_rate": 3.2768680114799956e-05,
-      "loss": 0.585,
       "step": 3800
     },
     {
       "epoch": 0.8089607965152458,
-      "grad_norm": 0.8842360973358154,
       "learning_rate": 2.696719771798648e-05,
-      "loss": 0.6046,
       "step": 3900
     },
     {
       "epoch": 0.8297033810412777,
-      "grad_norm": 0.9587863087654114,
       "learning_rate": 2.1650226069374525e-05,
-      "loss": 0.6127,
       "step": 4000
     },
     {
       "epoch": 0.8504459655673097,
-      "grad_norm": 0.6551477909088135,
       "learning_rate": 1.6853038769745467e-05,
-      "loss": 0.6291,
       "step": 4100
     },
     {
       "epoch": 0.8711885500933416,
-      "grad_norm": 0.7264061570167542,
       "learning_rate": 1.2607461091239803e-05,
-      "loss": 0.6627,
       "step": 4200
     },
     {
       "epoch": 0.8919311346193736,
-      "grad_norm": 0.40014514327049255,
       "learning_rate": 8.941658843648237e-06,
-      "loss": 0.6575,
       "step": 4300
     },
     {
       "epoch": 0.9126737191454055,
-      "grad_norm": 1.0279369354248047,
       "learning_rate": 5.879951518134263e-06,
-      "loss": 0.7132,
       "step": 4400
     },
     {
       "epoch": 0.9334163036714375,
-      "grad_norm": 0.559190034866333,
       "learning_rate": 3.4426509480207646e-06,
-      "loss": 0.6866,
       "step": 4500
     },
     {
       "epoch": 0.9541588881974694,
-      "grad_norm": 1.0593820810317993,
       "learning_rate": 1.6459265569902738e-06,
-      "loss": 0.6781,
       "step": 4600
     },
     {
       "epoch": 0.9749014727235014,
-      "grad_norm": 0.7888472080230713,
       "learning_rate": 5.016980886622169e-07,
-      "loss": 0.7098,
       "step": 4700
     },
     {
       "epoch": 0.9956440572495333,
-      "grad_norm": 1.0247892141342163,
       "learning_rate": 1.755652919597228e-08,
-      "loss": 0.6915,
       "step": 4800
     }
   ],

   "log_history": [
     {
       "epoch": 0.00020742584526031943,
+      "grad_norm": 0.6779253482818604,
       "learning_rate": 0.0,
+      "loss": 0.9216,
       "step": 1
     },
     {
       "epoch": 0.020742584526031945,
+      "grad_norm": 0.9759976267814636,
       "learning_rate": 2.0518134715025907e-05,
+      "loss": 0.5876,
       "step": 100
     },
     {
       "epoch": 0.04148516905206389,
+      "grad_norm": 1.1005536317825317,
       "learning_rate": 4.1243523316062174e-05,
+      "loss": 0.4583,
       "step": 200
     },
     {
       "epoch": 0.06222775357809583,
+      "grad_norm": 1.5758947134017944,
       "learning_rate": 6.196891191709845e-05,
+      "loss": 0.3009,
       "step": 300
     },
     {
       "epoch": 0.08297033810412778,
+      "grad_norm": 1.0562893152236938,
       "learning_rate": 8.269430051813471e-05,
+      "loss": 0.2051,
       "step": 400
     },
     {
       "epoch": 0.10371292263015972,
+      "grad_norm": 0.8340764045715332,
       "learning_rate": 0.000103419689119171,
+      "loss": 0.1822,
       "step": 500
     },
     {
       "epoch": 0.12445550715619166,
+      "grad_norm": 0.7953233122825623,
+      "learning_rate": 0.0001239378238341969,
+      "loss": 0.1671,
       "step": 600
     },
     {
       "epoch": 0.1451980916822236,
+      "grad_norm": 0.6487672924995422,
+      "learning_rate": 0.00014466321243523318,
+      "loss": 0.1672,
       "step": 700
     },
     {
       "epoch": 0.16594067620825556,
+      "grad_norm": 1.0472800731658936,
+      "learning_rate": 0.00016538860103626943,
+      "loss": 0.167,
       "step": 800
     },
     {
       "epoch": 0.18668326073428748,
+      "grad_norm": 1.3705922365188599,
+      "learning_rate": 0.0001861139896373057,
+      "loss": 0.1519,
       "step": 900
     },
     {
       "epoch": 0.20742584526031943,
+      "grad_norm": 1.5635592937469482,
+      "learning_rate": 0.00019996385922862659,
+      "loss": 0.1767,
       "step": 1000
     },
     {
       "epoch": 0.22816842978635138,
+      "grad_norm": 0.9496662616729736,
+      "learning_rate": 0.00019941349192574383,
+      "loss": 0.1558,
       "step": 1100
     },
     {
       "epoch": 0.24891101431238333,
+      "grad_norm": 0.8596046566963196,
+      "learning_rate": 0.00019820360035637763,
+      "loss": 0.1647,
       "step": 1200
     },
     {
       "epoch": 0.2696535988384153,
+      "grad_norm": 0.9286707043647766,
+      "learning_rate": 0.0001963422111257136,
+      "loss": 0.1737,
       "step": 1300
     },
     {
       "epoch": 0.2903961833644472,
+      "grad_norm": 1.3370907306671143,
+      "learning_rate": 0.000193841672973779,
+      "loss": 0.1646,
       "step": 1400
     },
     {
       "epoch": 0.3111387678904792,
+      "grad_norm": 0.9638619422912598,
+      "learning_rate": 0.00019071857485201819,
+      "loss": 0.1841,
       "step": 1500
     },
     {
       "epoch": 0.3318813524165111,
+      "grad_norm": 1.182265043258667,
+      "learning_rate": 0.0001869936358696588,
+      "loss": 0.1736,
       "step": 1600
     },
     {
       "epoch": 0.352623936942543,
+      "grad_norm": 0.7590048909187317,
+      "learning_rate": 0.00018269156783998138,
+      "loss": 0.2114,
       "step": 1700
     },
     {
       "epoch": 0.37336652146857496,
+      "grad_norm": 1.873295545578003,
+      "learning_rate": 0.00017784091133838138,
+      "loss": 0.1915,
       "step": 1800
     },
     {
       "epoch": 0.3941091059946069,
+      "grad_norm": 1.4994107484817505,
+      "learning_rate": 0.00017247384635983953,
+      "loss": 0.2032,
       "step": 1900
     },
     {
       "epoch": 0.41485169052063886,
+      "grad_norm": 0.8668350577354431,
+      "learning_rate": 0.00016662597883192732,
+      "loss": 0.1936,
       "step": 2000
     },
     {
       "epoch": 0.4355942750466708,
+      "grad_norm": 1.6480194330215454,
+      "learning_rate": 0.00016033610439965355,
+      "loss": 0.2191,
       "step": 2100
     },
     {
       "epoch": 0.45633685957270276,
+      "grad_norm": 1.6885930299758911,
+      "learning_rate": 0.00015364595104923887,
+      "loss": 0.2218,
       "step": 2200
     },
     {
       "epoch": 0.4770794440987347,
+      "grad_norm": 1.2831690311431885,
+      "learning_rate": 0.0001465999022782913,
+      "loss": 0.2198,
       "step": 2300
     },
     {
       "epoch": 0.49782202862476665,
+      "grad_norm": 1.4296784400939941,
+      "learning_rate": 0.0001392447026489145,
+      "loss": 0.2638,
       "step": 2400
     },
     {
       "epoch": 0.5185646131507986,
+      "grad_norm": 1.3762316703796387,
+      "learning_rate": 0.00013162914767715403,
+      "loss": 0.2742,
       "step": 2500
     },
     {
       "epoch": 0.5393071976768306,
+      "grad_norm": 0.78104567527771,
+      "learning_rate": 0.00012380376011610222,
+      "loss": 0.2495,
       "step": 2600
     },
     {
       "epoch": 0.5600497822028625,
+      "grad_norm": 1.5943220853805542,
+      "learning_rate": 0.00011582045478024929,
+      "loss": 0.2562,
       "step": 2700
     },
     {
       "epoch": 0.5807923667288944,
+      "grad_norm": 0.9165586829185486,
+      "learning_rate": 0.00010773219413468682,
+      "loss": 0.2905,
       "step": 2800
     },
     {
       "epoch": 0.6015349512549264,
+      "grad_norm": 0.923703670501709,
+      "learning_rate": 9.959263693403704e-05,
+      "loss": 0.3557,
       "step": 2900
     },
     {
       "epoch": 0.6222775357809583,
+      "grad_norm": 0.8286433219909668,
+      "learning_rate": 9.145578224209172e-05,
+      "loss": 0.3085,
       "step": 3000
     },
     {
       "epoch": 0.6430201203069903,
+      "grad_norm": 1.4901179075241089,
+      "learning_rate": 8.337561119378777e-05,
+      "loss": 0.3469,
       "step": 3100
     },
     {
       "epoch": 0.6637627048330222,
+      "grad_norm": 1.5367130041122437,
+      "learning_rate": 7.540572887612554e-05,
+      "loss": 0.3813,
       "step": 3200
     },
     {
       "epoch": 0.6845052893590542,
+      "grad_norm": 0.7450740933418274,
+      "learning_rate": 6.759900870384683e-05,
+      "loss": 0.3884,
       "step": 3300
     },
     {
       "epoch": 0.705247873885086,
+      "grad_norm": 1.3471629619598389,
+      "learning_rate": 6.00072416491378e-05,
+      "loss": 0.4307,
       "step": 3400
     },
     {
       "epoch": 0.725990458411118,
+      "grad_norm": 0.6345047354698181,
       "learning_rate": 5.2680792652421385e-05,
+      "loss": 0.4336,
       "step": 3500
     },
     {
       "epoch": 0.7467330429371499,
+      "grad_norm": 0.8957846164703369,
       "learning_rate": 4.5668266493661425e-05,
+      "loss": 0.4618,
       "step": 3600
     },
     {
       "epoch": 0.7674756274631819,
+      "grad_norm": 1.8546568155288696,
       "learning_rate": 3.901618534083994e-05,
+      "loss": 0.5159,
       "step": 3700
     },
     {
       "epoch": 0.7882182119892138,
+      "grad_norm": 1.1825199127197266,
       "learning_rate": 3.2768680114799956e-05,
+      "loss": 0.5052,
       "step": 3800
     },
     {
       "epoch": 0.8089607965152458,
+      "grad_norm": 1.0945169925689697,
       "learning_rate": 2.696719771798648e-05,
+      "loss": 0.539,
       "step": 3900
     },
     {
       "epoch": 0.8297033810412777,
+      "grad_norm": 1.0912768840789795,
       "learning_rate": 2.1650226069374525e-05,
+      "loss": 0.5648,
       "step": 4000
     },
     {
       "epoch": 0.8504459655673097,
+      "grad_norm": 0.7788926362991333,
       "learning_rate": 1.6853038769745467e-05,
+      "loss": 0.5894,
       "step": 4100
     },
     {
       "epoch": 0.8711885500933416,
+      "grad_norm": 0.9643399119377136,
       "learning_rate": 1.2607461091239803e-05,
+      "loss": 0.6307,
       "step": 4200
     },
     {
       "epoch": 0.8919311346193736,
+      "grad_norm": 0.5643659234046936,
       "learning_rate": 8.941658843648237e-06,
+      "loss": 0.6353,
       "step": 4300
     },
     {
       "epoch": 0.9126737191454055,
+      "grad_norm": 1.2083373069763184,
       "learning_rate": 5.879951518134263e-06,
+      "loss": 0.7055,
       "step": 4400
     },
     {
       "epoch": 0.9334163036714375,
+      "grad_norm": 0.8048790097236633,
       "learning_rate": 3.4426509480207646e-06,
+      "loss": 0.6827,
       "step": 4500
     },
     {
       "epoch": 0.9541588881974694,
+      "grad_norm": 1.1787850856781006,
       "learning_rate": 1.6459265569902738e-06,
+      "loss": 0.6791,
       "step": 4600
     },
     {
       "epoch": 0.9749014727235014,
+      "grad_norm": 0.9065990447998047,
       "learning_rate": 5.016980886622169e-07,
+      "loss": 0.7151,
       "step": 4700
     },
     {
       "epoch": 0.9956440572495333,
+      "grad_norm": 1.174241542816162,
       "learning_rate": 1.755652919597228e-08,
+      "loss": 0.6988,
       "step": 4800
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:621a556d1da2455eed4e6cd8bccd4f42a7c4222b1175c833beedf9b39ece534d
 size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1490bdd6543d7012eebfc68c6a93d950ca298ac26298e504a163253dfb7c948
 size 5560