RedHatAI
/

granite-3.1-2b-instruct-quantized.w4a16

Text Generation

compressed-tensors

Model card Files Files and versions

Shubhra Pandit commited on Jan 22

Commit

c7228d1

·

1 Parent(s): c70235c

Update model files

Files changed (4) hide show

config.json +6 -6
generation_config.json +1 -1
model.safetensors +2 -2
recipe.yaml +3 -3

config.json CHANGED Viewed

@@ -30,10 +30,10 @@
           "Linear"
         ],
         "weights": {
-          "actorder": "weight",
           "block_structure": null,
           "dynamic": false,
-          "group_size": 128,
           "num_bits": 4,
           "observer": "mse",
           "observer_kwargs": {},
@@ -44,7 +44,7 @@
       }
     },
     "format": "pack-quantized",
-    "global_compression_ratio": 2.079796037675794,
     "ignore": [
       "lm_head"
     ],
@@ -53,7 +53,7 @@
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 0.1399149840945552,
       "ignore": [],
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured",
@@ -66,7 +66,7 @@
   "rope_theta": 5000000.0,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.48.0",
   "use_cache": true,
   "vocab_size": 49155
-}

           "Linear"
         ],
         "weights": {
+          "actorder": null,
           "block_structure": null,
           "dynamic": false,
+          "group_size": 64,
           "num_bits": 4,
           "observer": "mse",
           "observer_kwargs": {},
       }
     },
     "format": "pack-quantized",
+    "global_compression_ratio": 2.0771812517233883,
     "ignore": [
       "lm_head"
     ],
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 0.13633800160480383,
       "ignore": [],
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured",
   "rope_theta": 5000000.0,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.1",
   "use_cache": true,
   "vocab_size": 49155
+}

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "bos_token_id": 0,
   "eos_token_id": 0,
   "pad_token_id": 0,
-  "transformers_version": "4.48.0"
 }

   "bos_token_id": 0,
   "eos_token_id": 0,
   "pad_token_id": 0,
+  "transformers_version": "4.47.1"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:460854a49796290b128cc6adbcc0c673eb5264655f1b09b2f6f3c60c95f4116c
-size 1657477832

 version https://git-lfs.github.com/spec/v1
+oid sha256:80eacfccfdee065bac8252fc7e806717b506678e7e35090eb0b3246380e9b4fb
+size 1695488752

recipe.yaml CHANGED Viewed

@@ -2,10 +2,10 @@ quant_stage:
   quant_modifiers:
     GPTQModifier:
       sequential_update: true
-      dampening_frac: 0.1
       ignore: [lm_head]
       config_groups:
         group_0:
-          weights: {num_bits: 4, type: int, symmetric: true, strategy: group, group_size: 128,
-            observer: mse, actorder: weight}
           targets: [Linear]

   quant_modifiers:
     GPTQModifier:
       sequential_update: true
+      dampening_frac: 0.01
       ignore: [lm_head]
       config_groups:
         group_0:
+          weights: {num_bits: 4, type: int, symmetric: true, strategy: group, group_size: 64,
+            observer: mse}
           targets: [Linear]