mgoin commited on
Commit
c387acd
·
1 Parent(s): 3095bb3

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. config.json +1 -1
  2. model-orig.onnx +2 -2
  3. model.data +2 -2
  4. model.onnx +2 -2
  5. recipe.yaml +16 -2
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "float32",
25
- "transformers_version": "1.6.0.20231121",
26
  "use_cache": true,
27
  "vocab_size": 32003
28
  }
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "float32",
25
+ "transformers_version": "1.6.0.20231122",
26
  "use_cache": true,
27
  "vocab_size": 32003
28
  }
model-orig.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e53c836a8f11f46fc72285b2e4a3c3d36b6ded1476d8ef8b6c557e44b01b5150
3
- size 1507291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b8075e7628dabe70c67c2be3b8ab3422b5a53ace4f26cc2c6b0a14837ce11e
3
+ size 1446314
model.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7307f75e29d15fa819f2ed5a87f5f73707972b7e0ff4ba473379004a297d092c
3
- size 14083016704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84a03128829eade0bb965b7e873f9d7adbc461d12f7172dc497876e50efb8f4
3
+ size 14091612943
model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d10256e6878786d1e850a33e4ad89d6d8e3f88380e8684544f43584b88be6df
3
- size 1489716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:364bf53db1ca1d5288dd847cd99a7b99bb7918475eabd9f6beea40380764715d
3
+ size 1428739
recipe.yaml CHANGED
@@ -11,16 +11,29 @@ test_stage:
11
  - LlamaRotaryEmbedding
12
  - LlamaRMSNorm
13
  - SiLUActivation
14
- # Skip quantizing the BMMs
15
- - QuantizableMatMul
16
  # Skip quantizing the layers with the most sensitive activations
17
  - model.layers.3.mlp.down_proj
18
  - model.layers.38.mlp.down_proj
19
  - model.layers.39.mlp.down_proj
20
  - model.layers.0.mlp.down_proj
21
  - model.layers.37.mlp.down_proj
 
 
22
  post_oneshot_calibration: true
23
  scheme_overrides:
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  Embedding:
25
  input_activations: null
26
  weights:
@@ -34,3 +47,4 @@ test_stage:
34
  percdamp: 0.01
35
  mask_structure: "0:0"
36
  targets: ["re:model.layers.\\d*$"]
 
 
11
  - LlamaRotaryEmbedding
12
  - LlamaRMSNorm
13
  - SiLUActivation
 
 
14
  # Skip quantizing the layers with the most sensitive activations
15
  - model.layers.3.mlp.down_proj
16
  - model.layers.38.mlp.down_proj
17
  - model.layers.39.mlp.down_proj
18
  - model.layers.0.mlp.down_proj
19
  - model.layers.37.mlp.down_proj
20
+ - MatMulOutput_QK
21
+ - MatMulOutput_PV
22
  post_oneshot_calibration: true
23
  scheme_overrides:
24
+ Linear:
25
+ weights:
26
+ num_bits: 8
27
+ symmetric: true
28
+ strategy: channel
29
+ MatMulLeftInput_QK:
30
+ input_activations:
31
+ num_bits: 8
32
+ symmetric: true
33
+ MatMulLeftInput_PV:
34
+ input_activations:
35
+ num_bits: 8
36
+ symmetric: true
37
  Embedding:
38
  input_activations: null
39
  weights:
 
47
  percdamp: 0.01
48
  mask_structure: "0:0"
49
  targets: ["re:model.layers.\\d*$"]
50
+