nm-testing
/

TinyLlama-1.1B-Chat-v1.0-W4A16_2of4_channel-e2e

Text Generation

text-generation-inference

Inference Endpoints

compressed-tensors

Model card Files Files and versions Community

nm-autobot commited on Dec 18, 2024

Commit

cf51837

·

verified ·

1 Parent(s): ad658e5

Upload folder using huggingface_hub

Files changed (4) hide show

config.json +2 -2
generation_config.json +1 -1
model.safetensors +1 -1
recipe.yaml +1 -2

config.json CHANGED Viewed

@@ -51,7 +51,7 @@
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 0.45357314542111843,
       "ignore": [
         "lm_head"
       ],
@@ -67,7 +67,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.47.0",
   "use_cache": true,
   "vocab_size": 32000
 }

     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 0.45357336616227273,
       "ignore": [
         "lm_head"
       ],
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.1",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
-  "transformers_version": "4.47.0"
 }

   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
+  "transformers_version": "4.47.1"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf5127ce6ddab4186b303b90b89201d53ccce52a214ba34ae704a72509dbe6a3
 size 626506392

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd7a041519eb0710a1676020d8bf538f961d7f77bb2392ed001cb0f2bef707c2
 size 626506392

recipe.yaml CHANGED Viewed

@@ -1,8 +1,7 @@
 sparsity_stage:
   sparsity_modifiers:
     SparseGPTModifier: {sparsity: 0.5, mask_structure: '2:4', sequential_update: false}
-  run_type: &id001 !!python/object/apply:builtins.getattr [!!python/name:llmcompressor.recipe.stage.StageRunType '',
-    ONESHOT]
 quantization_stage:
   quantization_modifiers:
     GPTQModifier:

 sparsity_stage:
   sparsity_modifiers:
     SparseGPTModifier: {sparsity: 0.5, mask_structure: '2:4', sequential_update: false}
+  run_type: &id001 !!python/object/apply:llmcompressor.recipe.stage.StageRunType [oneshot]
 quantization_stage:
   quantization_modifiers:
     GPTQModifier: