|
4bit AWQ version of the [lightblue/Karasu-Mixtral-8x22B-v0.1](https://huggingface.co/lightblue/Karasu-Mixtral-8x22B-v0.1) model. |
|
|
|
Quantized using the following code: |
|
|
|
```python |
|
from awq import AutoAWQForCausalLM |
|
import pandas as pd |
|
from transformers import AutoTokenizer |
|
from tqdm.auto import tqdm |
|
|
|
pretrained_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling' |
|
quantized_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq' |
|
|
|
# The samne dataset as in lightblue/gpt4_conversations_multilingual |
|
df = pd.read_json( |
|
"/workspace/llm_training/axolotl/mixtral_8x22B_training/sharegpt4_multilingual.json", |
|
lines=True) |
|
|
|
role_map = { |
|
"human": "user", |
|
"gpt": "assistant", |
|
} |
|
|
|
df["messages"] = df.conversations.apply(lambda x: [{"role": role_map[y["from"]], "content": y["value"]} for y in x]) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True) |
|
examples = [ |
|
tokenizer.apply_chat_template( |
|
x, tokenize=False, add_generation_prompt=False |
|
) for x in tqdm(df["messages"]) |
|
] |
|
|
|
model_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling' |
|
quant_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq' |
|
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } |
|
|
|
# Load model |
|
model = AutoAWQForCausalLM.from_pretrained(model_path) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) |
|
|
|
# Quantize |
|
model.quantize(tokenizer, quant_config=quant_config, calib_data=examples) |
|
|
|
# Save quantized model |
|
model.save_quantized(quant_path) |
|
tokenizer.save_pretrained(quant_path) |
|
|
|
``` |