lavawolfiee
/

Mixtral-8x7B-Instruct-v0.1-offloading-hqq-4bit-3bit

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

lavawolfiee commited on Dec 30, 2023

Commit

ee0081f

•

1 Parent(s): 6e8b681

Added quantization_config.json

Files changed (1) hide show

quantization_config.json +45 -0

quantization_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "type": "hqq",
+  "attn_config": {
+    "weight_quant_params": {
+      "nbits": 4,
+      "channel_wise": true,
+      "group_size": 64,
+      "optimize": true,
+      "round_zero": true
+    },
+    "scale_quant_params": {
+      "nbits": 8,
+      "channel_wise": true,
+      "group_size": 256,
+      "optimize": false
+    },
+    "zero_quant_params": {
+      "nbits": 8,
+      "channel_wise": false,
+      "group_size": null,
+      "optimize": false
+    }
+  },
+  "experts_config": {
+    "weight_quant_params": {
+      "nbits": 3,
+      "channel_wise": true,
+      "group_size": 64,
+      "optimize": true,
+      "round_zero": false
+    },
+    "scale_quant_params": {
+      "nbits": 8,
+      "channel_wise": true,
+      "group_size": 128,
+      "optimize": false
+    },
+    "zero_quant_params": {
+      "nbits": 8,
+      "channel_wise": false,
+      "group_size": null,
+      "optimize": false
+    }
+  }
+}