|
import argparse |
|
import os |
|
import shutil |
|
import subprocess |
|
import time |
|
from pathlib import Path |
|
|
|
from git import Repo |
|
|
|
|
|
def clone_hf_with_git(username: str, model_name: str, saved_dir: str): |
|
full_model_name = f"{username}/{model_name}" |
|
url = f"https://huggingface.co/{full_model_name}" |
|
saved = f"{saved_dir}/{model_name}" |
|
|
|
|
|
subprocess.run(["git", "lfs", "install"]) |
|
|
|
print(f"[INFO] Cloning {model_name} from {url} ...") |
|
Repo.clone_from(url, saved) |
|
|
|
|
|
def download_hf_with_git(full_name: str, saved_dir: str): |
|
model_name = full_name.split("/")[1] |
|
url = f"[email protected]:{full_name}" |
|
saved = f"{saved_dir}/{model_name}" |
|
|
|
|
|
subprocess.run(["git", "lfs", "install"]) |
|
|
|
print(f"Cloning {model_name} from {url} ...") |
|
subprocess.run(["git", "clone", "--progress", url, saved]) |
|
|
|
|
|
def convert_hf_to_gguf( |
|
script_path: str, |
|
dir_raw_model: str, |
|
gguf_model_path: str, |
|
pad_vocab: bool = False, |
|
): |
|
if pad_vocab is True: |
|
args = [ |
|
"--outfile", |
|
gguf_model_path, |
|
|
|
|
|
"--pad-vocab", |
|
dir_raw_model, |
|
] |
|
else: |
|
args = ["--outfile", gguf_model_path, dir_raw_model] |
|
|
|
|
|
res = subprocess.run(["python", script_path] + args) |
|
print(res) |
|
|
|
|
|
def quantize_model( |
|
quantizer: str, |
|
f16_gguf_model_path: str, |
|
quantized_gguf_model_path: str, |
|
quant_type: str = "q4_0", |
|
): |
|
print(f"[INFO] quantizer: {quantizer}") |
|
print(f"[INFO] quant_type: {quant_type}") |
|
print(f"[INFO] f16_gguf_model_path: {f16_gguf_model_path}") |
|
print(f"[INFO] quantized_model_filename: {quantized_gguf_model_path}") |
|
subprocess.run( |
|
[ |
|
quantizer, |
|
f16_gguf_model_path, |
|
quantized_gguf_model_path, |
|
quant_type, |
|
] |
|
) |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="Convert and quantize gguf models.") |
|
parser.add_argument( |
|
"--full-name", |
|
type=str, |
|
help="Huggingface model full name. e.g. `username/model_name`", |
|
) |
|
parser.add_argument( |
|
"-s", |
|
"--saved-dir", |
|
type=str, |
|
default="models", |
|
help="The directory to save the model.", |
|
) |
|
parser.add_argument( |
|
"--enable-converter", |
|
action="store_true", |
|
help="Enable the converter. Notice that `--converter` must be specified.", |
|
) |
|
parser.add_argument( |
|
"-c", |
|
"--converter", |
|
type=str, |
|
help="The path to the converter. Notice that `--enable-converter` must be specified if use this option.", |
|
) |
|
parser.add_argument( |
|
"--pad-vocab", |
|
action="store_true", |
|
help="Enable adding pad tokens when model vocab expects more than tokenizer metadata provides. Notice that `--enable-converter` must be specified.", |
|
) |
|
parser.add_argument( |
|
"--enable-quantizer", |
|
action="store_true", |
|
help="Enable the quantizer. Notice that `--quantizer` must be specified.", |
|
) |
|
parser.add_argument( |
|
"-q", |
|
"--quantizer", |
|
type=str, |
|
help="The path to the quantizer. Notice that `--enable-quantizer` must be specified if use this option.", |
|
) |
|
parser.add_argument( |
|
"-t", |
|
"--quant-type", |
|
type=str, |
|
default=None, |
|
help="The quantization type. Notice that `--enable-quantizer` must be specified if use this option.", |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
print(args) |
|
|
|
print("Download model ...") |
|
full_name = args.full_name |
|
username, model_name = full_name.split("/") |
|
saved_dir = args.saved_dir |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if args.enable_converter is True: |
|
print("[CONVERTER] Convert model ...") |
|
converter = args.converter |
|
|
|
raw_model_dir = f"{saved_dir}/{model_name}" |
|
print(f"[CONVERTER] raw_model_dir: {raw_model_dir}") |
|
|
|
gguf_model_dir = Path(raw_model_dir).parent / f"{model_name}-gguf" |
|
if not gguf_model_dir.exists(): |
|
gguf_model_dir.mkdir() |
|
f16_gguf_model_path = gguf_model_dir / f"{model_name}-f16.gguf" |
|
|
|
print(f"[CONVERTER] f16_gguf_model_path: {f16_gguf_model_path}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if args.enable_quantizer is True: |
|
print("[QUANTIZER] Quantize model ...") |
|
quantizer = args.quantizer |
|
print(f"[QUANTIZER] quantizer: {quantizer}") |
|
|
|
if args.quant_type is not None: |
|
quant_type = args.quant_type |
|
quantized_gguf_model_path = ( |
|
gguf_model_dir / f"{model_name}-{quant_type}.gguf" |
|
) |
|
|
|
print(f"[QUANTIZER] quant_type: {quant_type}") |
|
print(f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}") |
|
|
|
try: |
|
quantize_model( |
|
quantizer, |
|
str(f16_gguf_model_path), |
|
str(quantized_gguf_model_path), |
|
quant_type, |
|
) |
|
print( |
|
f"The quantized gguf model is saved in {quantized_gguf_model_path}." |
|
) |
|
|
|
except Exception as e: |
|
print(e) |
|
print("Failed to quantize model.") |
|
return |
|
else: |
|
for quant_type in [ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"Q5_K_M", |
|
|
|
"Q6_K", |
|
"Q8_0", |
|
]: |
|
quantized_gguf_model_path = ( |
|
gguf_model_dir / f"{model_name}-{quant_type}.gguf" |
|
) |
|
|
|
print(f"[QUANTIZER] quant_type: {quant_type}") |
|
print( |
|
f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}" |
|
) |
|
|
|
try: |
|
quantize_model( |
|
quantizer, |
|
str(f16_gguf_model_path), |
|
str(quantized_gguf_model_path), |
|
quant_type, |
|
) |
|
print( |
|
f"The quantized gguf model is saved in {quantized_gguf_model_path}." |
|
) |
|
|
|
except Exception as e: |
|
print(e) |
|
print("Failed to quantize model.") |
|
return |
|
|
|
|
|
|
|
|
|
|
|
print("Done.") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|