|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
from typing import TYPE_CHECKING |
|
|
|
import fire |
|
from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
if TYPE_CHECKING: |
|
from transformers import PreTrainedModel |
|
|
|
|
|
def quantize_loftq( |
|
model_name_or_path: str, |
|
output_dir: str, |
|
loftq_bits: int = 4, |
|
loftq_iter: int = 4, |
|
lora_alpha: int = None, |
|
lora_rank: int = 16, |
|
lora_dropout: float = 0, |
|
lora_target: tuple = ("q_proj", "v_proj"), |
|
save_safetensors: bool = True, |
|
): |
|
r""" |
|
Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) |
|
Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir |
|
""" |
|
if isinstance(lora_target, str): |
|
lora_target = [name.strip() for name in lora_target.split(",")] |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") |
|
|
|
loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) |
|
lora_config = LoraConfig( |
|
task_type=TaskType.CAUSAL_LM, |
|
inference_mode=True, |
|
r=lora_rank, |
|
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, |
|
lora_dropout=lora_dropout, |
|
target_modules=lora_target, |
|
init_lora_weights="loftq", |
|
loftq_config=loftq_config, |
|
) |
|
|
|
|
|
print("Initializing LoftQ weights, it may be take several minutes, wait patiently.") |
|
peft_model = get_peft_model(model, lora_config) |
|
loftq_dir = os.path.join(output_dir, "loftq_init") |
|
|
|
|
|
setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir)) |
|
setattr(peft_model.peft_config["default"], "init_lora_weights", True) |
|
peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors) |
|
print("Adapter weights saved in {}".format(loftq_dir)) |
|
|
|
|
|
base_model: "PreTrainedModel" = peft_model.unload() |
|
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) |
|
tokenizer.save_pretrained(output_dir) |
|
print("Model weights saved in {}".format(output_dir)) |
|
|
|
print("- Fine-tune this model with:") |
|
print("model_name_or_path: {}".format(output_dir)) |
|
print("adapter_name_or_path: {}".format(loftq_dir)) |
|
print("finetuning_type: lora") |
|
print("quantization_bit: {}".format(loftq_bits)) |
|
|
|
|
|
if __name__ == "__main__": |
|
fire.Fire(quantize_loftq) |
|
|