apepkuss79's picture
Update models
1e57684 verified
raw
history blame
7.47 kB
import argparse
import os
import shutil
import subprocess
import time
from pathlib import Path
from git import Repo
def clone_hf_with_git(username: str, model_name: str, saved_dir: str):
full_model_name = f"{username}/{model_name}"
url = f"https://huggingface.co/{full_model_name}"
saved = f"{saved_dir}/{model_name}"
# perform `git lfs install`
subprocess.run(["git", "lfs", "install"])
print(f"[INFO] Cloning {model_name} from {url} ...")
Repo.clone_from(url, saved)
def download_hf_with_git(full_name: str, saved_dir: str):
model_name = full_name.split("/")[1]
url = f"[email protected]:{full_name}"
saved = f"{saved_dir}/{model_name}"
# perform `git lfs install`
subprocess.run(["git", "lfs", "install"])
print(f"Cloning {model_name} from {url} ...")
subprocess.run(["git", "clone", "--progress", url, saved])
def convert_hf_to_gguf(
script_path: str,
dir_raw_model: str,
gguf_model_path: str,
pad_vocab: bool = False,
):
if pad_vocab is True:
args = [
"--outfile",
gguf_model_path,
# "--vocab-type",
# "bpe",
"--pad-vocab",
dir_raw_model,
]
else:
args = ["--outfile", gguf_model_path, dir_raw_model]
# convert.py for llama-3
# args = ["--outfile", gguf_model_path, "--vocab-type", "bpe", dir_raw_model]
res = subprocess.run(["python", script_path] + args)
print(res)
def quantize_model(
quantizer: str,
f16_gguf_model_path: str,
quantized_gguf_model_path: str,
quant_type: str = "q4_0",
):
print(f"[INFO] quantizer: {quantizer}")
print(f"[INFO] quant_type: {quant_type}")
print(f"[INFO] f16_gguf_model_path: {f16_gguf_model_path}")
print(f"[INFO] quantized_model_filename: {quantized_gguf_model_path}")
subprocess.run(
[
quantizer,
f16_gguf_model_path,
quantized_gguf_model_path,
quant_type,
]
)
def main():
parser = argparse.ArgumentParser(description="Convert and quantize gguf models.")
parser.add_argument(
"--full-name",
type=str,
help="Huggingface model full name. e.g. `username/model_name`",
)
parser.add_argument(
"-s",
"--saved-dir",
type=str,
default="models",
help="The directory to save the model.",
)
parser.add_argument(
"--enable-converter",
action="store_true",
help="Enable the converter. Notice that `--converter` must be specified.",
)
parser.add_argument(
"-c",
"--converter",
type=str,
help="The path to the converter. Notice that `--enable-converter` must be specified if use this option.",
)
parser.add_argument(
"--pad-vocab",
action="store_true",
help="Enable adding pad tokens when model vocab expects more than tokenizer metadata provides. Notice that `--enable-converter` must be specified.",
)
parser.add_argument(
"--enable-quantizer",
action="store_true",
help="Enable the quantizer. Notice that `--quantizer` must be specified.",
)
parser.add_argument(
"-q",
"--quantizer",
type=str,
help="The path to the quantizer. Notice that `--enable-quantizer` must be specified if use this option.",
)
parser.add_argument(
"-t",
"--quant-type",
type=str,
default=None,
help="The quantization type. Notice that `--enable-quantizer` must be specified if use this option.",
)
args = parser.parse_args()
print(args)
print("Download model ...")
full_name = args.full_name
username, model_name = full_name.split("/")
saved_dir = args.saved_dir
# try:
# download_hf_with_git(full_name, saved_dir)
# print(f"The raw model is saved in {saved_dir}.")
# except Exception as e:
# print(f"Failed to download model. {e}")
# return
if args.enable_converter is True:
print("[CONVERTER] Convert model ...")
converter = args.converter
raw_model_dir = f"{saved_dir}/{model_name}"
print(f"[CONVERTER] raw_model_dir: {raw_model_dir}")
gguf_model_dir = Path(raw_model_dir).parent / f"{model_name}-gguf"
if not gguf_model_dir.exists():
gguf_model_dir.mkdir()
f16_gguf_model_path = gguf_model_dir / f"{model_name}-f16.gguf"
print(f"[CONVERTER] f16_gguf_model_path: {f16_gguf_model_path}")
# try:
# convert_hf_to_gguf(
# converter,
# raw_model_dir,
# str(f16_gguf_model_path),
# args.pad_vocab,
# )
# print(f"The converted gguf model is saved in {f16_gguf_model_path}.")
# except Exception as e:
# print(f"Failed to convert model. {e}")
# return
if args.enable_quantizer is True:
print("[QUANTIZER] Quantize model ...")
quantizer = args.quantizer
print(f"[QUANTIZER] quantizer: {quantizer}")
if args.quant_type is not None:
quant_type = args.quant_type
quantized_gguf_model_path = (
gguf_model_dir / f"{model_name}-{quant_type}.gguf"
)
print(f"[QUANTIZER] quant_type: {quant_type}")
print(f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}")
try:
quantize_model(
quantizer,
str(f16_gguf_model_path),
str(quantized_gguf_model_path),
quant_type,
)
print(
f"The quantized gguf model is saved in {quantized_gguf_model_path}."
)
except Exception as e:
print(e)
print("Failed to quantize model.")
return
else:
for quant_type in [
# "Q2_K",
# "Q3_K_L",
# "Q3_K_M",
# "Q3_K_S",
# "Q4_0",
# "Q4_K_M",
# "Q4_K_S",
# "Q5_0",
"Q5_K_M",
# "Q5_K_S",
"Q6_K",
"Q8_0",
]:
quantized_gguf_model_path = (
gguf_model_dir / f"{model_name}-{quant_type}.gguf"
)
print(f"[QUANTIZER] quant_type: {quant_type}")
print(
f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}"
)
try:
quantize_model(
quantizer,
str(f16_gguf_model_path),
str(quantized_gguf_model_path),
quant_type,
)
print(
f"The quantized gguf model is saved in {quantized_gguf_model_path}."
)
except Exception as e:
print(e)
print("Failed to quantize model.")
return
# # remove the raw model dir for saving space
# print(f"The quantization is done. Remove {raw_model_dir}")
# shutil.rmtree(raw_model_dir)
print("Done.")
if __name__ == "__main__":
main()