Upload folder using huggingface_hub

#2
Files changed (3) hide show
  1. config.json +21 -1
  2. qmodel.pt +2 -2
  3. smash_config.json +1 -1
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "deepseek-ai/deepseek-coder-6.7b-instruct",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
@@ -20,6 +20,26 @@
20
  "num_hidden_layers": 32,
21
  "num_key_value_heads": 32,
22
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "rms_norm_eps": 1e-06,
24
  "rope_scaling": {
25
  "factor": 4.0,
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/tmp/models/tmpkeb95yvm/tmpdikbvq1u",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
 
20
  "num_hidden_layers": 32,
21
  "num_key_value_heads": 32,
22
  "pretraining_tp": 1,
23
+ "quantization_config": {
24
+ "quant_config": {
25
+ "offload_meta": false,
26
+ "scale_quant_params": null,
27
+ "weight_quant_params": {
28
+ "axis": 1,
29
+ "channel_wise": true,
30
+ "group_size": 64,
31
+ "nbits": 8,
32
+ "optimize": true,
33
+ "round_zero": false,
34
+ "view_as_float": false
35
+ },
36
+ "zero_quant_params": null
37
+ },
38
+ "quant_method": "hqq",
39
+ "skip_modules": [
40
+ "lm_head"
41
+ ]
42
+ },
43
  "rms_norm_eps": 1e-06,
44
  "rope_scaling": {
45
  "factor": 4.0,
qmodel.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59f908d3159c3d0473c0a0b6cb7e4b8a527ab7f391f72bfde65f1669170fde2d
3
- size 7410020113
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be1e95f80fe53a8f2de4d3c07a98c35c66709d8057649bea216acd9b8c19310
3
+ size 8079534477
smash_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "quant_hqq_weight_bits": 8,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
- "cache_dir": "/tmp/models/tmpj7ftrlmy",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},
 
11
  "quant_hqq_weight_bits": 8,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
+ "cache_dir": "/tmp/models/tmpkeb95yvm",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
  "save_load_fn_args": {},