Vasily Alexeev
commited on
Commit
Β·
6758e8a
1
Parent(s):
70e329e
add asymm quantized model, add two eos in code sample
Browse files- README.md +15 -6
- compressa-config.json +4 -4
- config.json +1 -1
- model-00001-of-00002.safetensors +1 -1
- quant_config.json +1 -1
README.md
CHANGED
@@ -28,7 +28,7 @@ Quantized with [OmniQuant](https://github.com/OpenGVLab/OmniQuant).
|
|
28 |
| | wiki |
|
29 |
| --------- | ---- |
|
30 |
| FP | 8,29 |
|
31 |
-
| **Quantized** |
|
32 |
|
33 |
|
34 |
### Accuracy on English Benchmarks, % (β)
|
@@ -36,7 +36,7 @@ Quantized with [OmniQuant](https://github.com/OpenGVLab/OmniQuant).
|
|
36 |
| | piqa | arc_easy | arc_challenge | boolq | hellaswag | winogrande | mmlu_humanities | mmlu_social_sciences | mmlu_stem | mmlu_other |
|
37 |
| --------- | ---- | -------- | ------------- | ----- | --------- | ---------- | --------------- | -------------------- | --------- | ---------- |
|
38 |
| FP | 78,7 | 81,6 | 53,0 | 83,1 | 57,7 | 72,1 | 67,0 | 70,9 | 54,5 | 68,2 |
|
39 |
-
| **Quantized** | 77,
|
40 |
|
41 |
|
42 |
### Accuracy on Russian Benchmarks, % (β)
|
@@ -44,15 +44,15 @@ Quantized with [OmniQuant](https://github.com/OpenGVLab/OmniQuant).
|
|
44 |
| | danetqa | terra | rwsd | muserc | rucos | lidirus | parus | rcb | russe | rucola |
|
45 |
| --------- | ------- | ----- | ---- | ------ | ----- | ------- | ----- | ---- | ----- | ------ |
|
46 |
| FP | 78,6 | 60,9 | 65,7 | 56,1 | 64,9 | 63,2 | 71,0 | 34,1 | 60,8 | 64,1 |
|
47 |
-
| **Quantized** |
|
48 |
|
49 |
|
50 |
### Summary
|
51 |
|
52 |
| | Avg acc diff on Eng, % (β) | Avg acc diff on Rus, % (β) | Occupied disk space, % (β) |
|
53 |
-
| --------- | -------------------------- | -------------------------- |
|
54 |
-
| FP | 0 | 0 | 100
|
55 |
-
| **Quantized** | \-
|
56 |
|
57 |
|
58 |
## Examples
|
@@ -178,6 +178,13 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
178 |
model_path, use_fast=False, trust_remote_code=True
|
179 |
)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
system_message = "You are a friendly chatbot who always responds in the style of a pirate."
|
182 |
user_message = "Where are we going, Captain?"
|
183 |
messages = [
|
@@ -194,6 +201,7 @@ inputs = {k: v.cuda() for k, v in inputs.items()}
|
|
194 |
outputs = model.generate(
|
195 |
**inputs, max_new_tokens=512,
|
196 |
do_sample=True, temperature=0.7, top_p=0.95,
|
|
|
197 |
)
|
198 |
|
199 |
response = tokenizer.decode(outputs[0])
|
@@ -210,6 +218,7 @@ print(f'Continuation:\n{continuation}\n')
|
|
210 |
pipe = pipeline(
|
211 |
"text-generation",
|
212 |
model=model, tokenizer=tokenizer,
|
|
|
213 |
max_new_tokens=512, do_sample=True,
|
214 |
temperature=0.7, top_p=0.95,
|
215 |
device=0,
|
|
|
28 |
| | wiki |
|
29 |
| --------- | ---- |
|
30 |
| FP | 8,29 |
|
31 |
+
| **Quantized** | 8,97 |
|
32 |
|
33 |
|
34 |
### Accuracy on English Benchmarks, % (β)
|
|
|
36 |
| | piqa | arc_easy | arc_challenge | boolq | hellaswag | winogrande | mmlu_humanities | mmlu_social_sciences | mmlu_stem | mmlu_other |
|
37 |
| --------- | ---- | -------- | ------------- | ----- | --------- | ---------- | --------------- | -------------------- | --------- | ---------- |
|
38 |
| FP | 78,7 | 81,6 | 53,0 | 83,1 | 57,7 | 72,1 | 67,0 | 70,9 | 54,5 | 68,2 |
|
39 |
+
| **Quantized** | 77,2 | 80,7 | 51,8 | 82,8 | 56,8 | 72,5 | 63,4 | 67,6 | 50,1 | 65,0 |
|
40 |
|
41 |
|
42 |
### Accuracy on Russian Benchmarks, % (β)
|
|
|
44 |
| | danetqa | terra | rwsd | muserc | rucos | lidirus | parus | rcb | russe | rucola |
|
45 |
| --------- | ------- | ----- | ---- | ------ | ----- | ------- | ----- | ---- | ----- | ------ |
|
46 |
| FP | 78,6 | 60,9 | 65,7 | 56,1 | 64,9 | 63,2 | 71,0 | 34,1 | 60,8 | 64,1 |
|
47 |
+
| **Quantized** | 71,6 | 60,6 | 52,5 | 63,7 | 57,3 | 57,2 | 74,0 | 33,6 | 36,9 | 67,5 |
|
48 |
|
49 |
|
50 |
### Summary
|
51 |
|
52 |
| | Avg acc diff on Eng, % (β) | Avg acc diff on Rus, % (β) | Occupied disk space, % (β) |
|
53 |
+
| --------- | -------------------------- | -------------------------- | -------------------------- |
|
54 |
+
| FP | 0 | 0 | 100 |
|
55 |
+
| **Quantized** | \-1,9 | \-4,5 | 35,7 |
|
56 |
|
57 |
|
58 |
## Examples
|
|
|
178 |
model_path, use_fast=False, trust_remote_code=True
|
179 |
)
|
180 |
|
181 |
+
# Llama 3 "specifics"
|
182 |
+
# https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/discussions/4
|
183 |
+
terminators = [
|
184 |
+
tokenizer.convert_tokens_to_ids("<|end_of_text|>"),
|
185 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
186 |
+
]
|
187 |
+
|
188 |
system_message = "You are a friendly chatbot who always responds in the style of a pirate."
|
189 |
user_message = "Where are we going, Captain?"
|
190 |
messages = [
|
|
|
201 |
outputs = model.generate(
|
202 |
**inputs, max_new_tokens=512,
|
203 |
do_sample=True, temperature=0.7, top_p=0.95,
|
204 |
+
eos_token_id=terminators,
|
205 |
)
|
206 |
|
207 |
response = tokenizer.decode(outputs[0])
|
|
|
218 |
pipe = pipeline(
|
219 |
"text-generation",
|
220 |
model=model, tokenizer=tokenizer,
|
221 |
+
eos_token_id=terminators,
|
222 |
max_new_tokens=512, do_sample=True,
|
223 |
temperature=0.7, top_p=0.95,
|
224 |
device=0,
|
compressa-config.json
CHANGED
@@ -4,19 +4,19 @@
|
|
4 |
"wbits": 4,
|
5 |
"abits": 16,
|
6 |
"group_size": 128,
|
7 |
-
"symmetric":
|
8 |
},
|
9 |
"resume": null,
|
10 |
"start_sample": 0,
|
11 |
"nsamples": 128,
|
12 |
-
"epochs":
|
13 |
"aug_loss": true,
|
14 |
"eval_ppl": true,
|
15 |
"real_quant": true,
|
16 |
"lwc_lr": 0.01,
|
17 |
"use_lr_scheduler": false,
|
18 |
"cache_dir": "resources/cache",
|
19 |
-
"output_dir": "resources/models/models/NousResearch_Meta-Llama-3-8B-
|
20 |
-
"save_dir": "resources/models/models/NousResearch_Meta-Llama-3-8B-
|
21 |
"config_class": "OmniquantConfig"
|
22 |
}
|
|
|
4 |
"wbits": 4,
|
5 |
"abits": 16,
|
6 |
"group_size": 128,
|
7 |
+
"symmetric": false
|
8 |
},
|
9 |
"resume": null,
|
10 |
"start_sample": 0,
|
11 |
"nsamples": 128,
|
12 |
+
"epochs": 20,
|
13 |
"aug_loss": true,
|
14 |
"eval_ppl": true,
|
15 |
"real_quant": true,
|
16 |
"lwc_lr": 0.01,
|
17 |
"use_lr_scheduler": false,
|
18 |
"cache_dir": "resources/cache",
|
19 |
+
"output_dir": "resources/models/models/NousResearch_Meta-Llama-3-8B-Instruct_omniquant_asymm_e20/logs",
|
20 |
+
"save_dir": "resources/models/models/NousResearch_Meta-Llama-3-8B-Instruct_omniquant_asymm_e20/NousResearch_Meta-Llama-3-8B-Instruct",
|
21 |
"config_class": "OmniquantConfig"
|
22 |
}
|
config.json
CHANGED
@@ -29,7 +29,7 @@
|
|
29 |
"quant_method": "gptq",
|
30 |
"bits": 4,
|
31 |
"group_size": 128,
|
32 |
-
"sym":
|
33 |
"desc_act": true,
|
34 |
"disable_exllama": true
|
35 |
}
|
|
|
29 |
"quant_method": "gptq",
|
30 |
"bits": 4,
|
31 |
"group_size": 128,
|
32 |
+
"sym": false,
|
33 |
"desc_act": true,
|
34 |
"disable_exllama": true
|
35 |
}
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4682270360
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e6dabb82f808aafc414164a93f5c0ba4a631ec63f331bea0fc5e330c691e2c0
|
3 |
size 4682270360
|
quant_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"wbits": 4, "abits": 16, "group_size": 128, "symmetric":
|
|
|
1 |
+
{"wbits": 4, "abits": 16, "group_size": 128, "symmetric": false}
|