File size: 7,467 Bytes

1e57684

import argparse
import os
import shutil
import subprocess
import time
from pathlib import Path

from git import Repo


def clone_hf_with_git(username: str, model_name: str, saved_dir: str):
    full_model_name = f"{username}/{model_name}"
    url = f"https://huggingface.co/{full_model_name}"
    saved = f"{saved_dir}/{model_name}"

    # perform `git lfs install`
    subprocess.run(["git", "lfs", "install"])

    print(f"[INFO] Cloning {model_name} from {url} ...")
    Repo.clone_from(url, saved)


def download_hf_with_git(full_name: str, saved_dir: str):
    model_name = full_name.split("/")[1]
    url = f"[email protected]:{full_name}"
    saved = f"{saved_dir}/{model_name}"

    # perform `git lfs install`
    subprocess.run(["git", "lfs", "install"])

    print(f"Cloning {model_name} from {url} ...")
    subprocess.run(["git", "clone", "--progress", url, saved])


def convert_hf_to_gguf(
    script_path: str,
    dir_raw_model: str,
    gguf_model_path: str,
    pad_vocab: bool = False,
):
    if pad_vocab is True:
        args = [
            "--outfile",
            gguf_model_path,
            # "--vocab-type",
            # "bpe",
            "--pad-vocab",
            dir_raw_model,
        ]
    else:
        args = ["--outfile", gguf_model_path, dir_raw_model]
        # convert.py for llama-3
        # args = ["--outfile", gguf_model_path, "--vocab-type", "bpe", dir_raw_model]
    res = subprocess.run(["python", script_path] + args)
    print(res)


def quantize_model(
    quantizer: str,
    f16_gguf_model_path: str,
    quantized_gguf_model_path: str,
    quant_type: str = "q4_0",
):
    print(f"[INFO] quantizer: {quantizer}")
    print(f"[INFO] quant_type: {quant_type}")
    print(f"[INFO] f16_gguf_model_path: {f16_gguf_model_path}")
    print(f"[INFO] quantized_model_filename: {quantized_gguf_model_path}")
    subprocess.run(
        [
            quantizer,
            f16_gguf_model_path,
            quantized_gguf_model_path,
            quant_type,
        ]
    )


def main():
    parser = argparse.ArgumentParser(description="Convert and quantize gguf models.")
    parser.add_argument(
        "--full-name",
        type=str,
        help="Huggingface model full name. e.g. `username/model_name`",
    )
    parser.add_argument(
        "-s",
        "--saved-dir",
        type=str,
        default="models",
        help="The directory to save the model.",
    )
    parser.add_argument(
        "--enable-converter",
        action="store_true",
        help="Enable the converter. Notice that `--converter` must be specified.",
    )
    parser.add_argument(
        "-c",
        "--converter",
        type=str,
        help="The path to the converter. Notice that `--enable-converter` must be specified if use this option.",
    )
    parser.add_argument(
        "--pad-vocab",
        action="store_true",
        help="Enable adding pad tokens when model vocab expects more than tokenizer metadata provides. Notice that `--enable-converter` must be specified.",
    )
    parser.add_argument(
        "--enable-quantizer",
        action="store_true",
        help="Enable the quantizer. Notice that `--quantizer` must be specified.",
    )
    parser.add_argument(
        "-q",
        "--quantizer",
        type=str,
        help="The path to the quantizer. Notice that `--enable-quantizer` must be specified if use this option.",
    )
    parser.add_argument(
        "-t",
        "--quant-type",
        type=str,
        default=None,
        help="The quantization type. Notice that `--enable-quantizer` must be specified if use this option.",
    )

    args = parser.parse_args()

    print(args)

    print("Download model ...")
    full_name = args.full_name
    username, model_name = full_name.split("/")
    saved_dir = args.saved_dir
    # try:
    #     download_hf_with_git(full_name, saved_dir)
    #     print(f"The raw model is saved in {saved_dir}.")

    # except Exception as e:
    #     print(f"Failed to download model. {e}")
    #     return

    if args.enable_converter is True:
        print("[CONVERTER] Convert model ...")
        converter = args.converter

        raw_model_dir = f"{saved_dir}/{model_name}"
        print(f"[CONVERTER] raw_model_dir: {raw_model_dir}")

        gguf_model_dir = Path(raw_model_dir).parent / f"{model_name}-gguf"
        if not gguf_model_dir.exists():
            gguf_model_dir.mkdir()
        f16_gguf_model_path = gguf_model_dir / f"{model_name}-f16.gguf"

        print(f"[CONVERTER] f16_gguf_model_path: {f16_gguf_model_path}")

        # try:
        #     convert_hf_to_gguf(
        #         converter,
        #         raw_model_dir,
        #         str(f16_gguf_model_path),
        #         args.pad_vocab,
        #     )
        #     print(f"The converted gguf model is saved in {f16_gguf_model_path}.")

        # except Exception as e:
        #     print(f"Failed to convert model. {e}")
        #     return

    if args.enable_quantizer is True:
        print("[QUANTIZER] Quantize model ...")
        quantizer = args.quantizer
        print(f"[QUANTIZER] quantizer: {quantizer}")

        if args.quant_type is not None:
            quant_type = args.quant_type
            quantized_gguf_model_path = (
                gguf_model_dir / f"{model_name}-{quant_type}.gguf"
            )

            print(f"[QUANTIZER] quant_type: {quant_type}")
            print(f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}")

            try:
                quantize_model(
                    quantizer,
                    str(f16_gguf_model_path),
                    str(quantized_gguf_model_path),
                    quant_type,
                )
                print(
                    f"The quantized gguf model is saved in {quantized_gguf_model_path}."
                )

            except Exception as e:
                print(e)
                print("Failed to quantize model.")
                return
        else:
            for quant_type in [
                # "Q2_K",
                # "Q3_K_L",
                # "Q3_K_M",
                # "Q3_K_S",
                # "Q4_0",
                # "Q4_K_M",
                # "Q4_K_S",
                # "Q5_0",
                "Q5_K_M",
                # "Q5_K_S",
                "Q6_K",
                "Q8_0",
            ]:
                quantized_gguf_model_path = (
                    gguf_model_dir / f"{model_name}-{quant_type}.gguf"
                )

                print(f"[QUANTIZER] quant_type: {quant_type}")
                print(
                    f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}"
                )

                try:
                    quantize_model(
                        quantizer,
                        str(f16_gguf_model_path),
                        str(quantized_gguf_model_path),
                        quant_type,
                    )
                    print(
                        f"The quantized gguf model is saved in {quantized_gguf_model_path}."
                    )

                except Exception as e:
                    print(e)
                    print("Failed to quantize model.")
                    return

        # # remove the raw model dir for saving space
        # print(f"The quantization is done. Remove {raw_model_dir}")
        # shutil.rmtree(raw_model_dir)

    print("Done.")


if __name__ == "__main__":
    main()