|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Convert MPT model checkpoint to FT format. |
|
|
|
It's a modified version of |
|
https://github.com/NVIDIA/FasterTransformer/blob/main/examples/pytorch/gpt/utils/huggingface_gpt_convert.py |
|
""" |
|
|
|
import argparse |
|
import configparser |
|
import os |
|
|
|
import transformers |
|
|
|
from llmfoundry.utils import convert_and_save_ft_weights |
|
|
|
|
|
def convert_mpt_to_ft(model_name_or_path: str, |
|
output_dir: str, |
|
infer_gpu_num: int = 1, |
|
weight_data_type: str = 'fp32', |
|
force: bool = False) -> None: |
|
"""Convert an MPT checkpoint to a FasterTransformer compatible format. |
|
|
|
Args: |
|
model_name_or_path (str): The HF hub name of the model (e.g., mosaicml/mpt-7b) or the path of a directory |
|
containing an MPT checkpoint in a local dir. |
|
output_dir (str): Path of the directory to save the checkpoint in FT format. The directory must not already exist. |
|
infer_gpu_num (int): The number of gpus you are planning to use for inference. |
|
weight_data_type (str): Data type of the weights in the input checkpoint. |
|
force (bool): force conversion even with unsupported features in FT. |
|
""" |
|
save_dir = os.path.join(output_dir, f'{infer_gpu_num}-gpu') |
|
|
|
if (os.path.exists(save_dir) == False): |
|
os.makedirs(save_dir) |
|
else: |
|
raise RuntimeError(f'Output path {save_dir} already exists!') |
|
|
|
|
|
torch_device = 'cpu' |
|
|
|
model = transformers.AutoModelForCausalLM.from_pretrained( |
|
model_name_or_path, trust_remote_code=True).to(torch_device) |
|
tokenizer = transformers.AutoTokenizer.from_pretrained( |
|
model_name_or_path, trust_remote_code=True) |
|
|
|
hf_config = vars(model.config) |
|
|
|
config = configparser.ConfigParser() |
|
config['gpt'] = {} |
|
try: |
|
config['gpt']['model_name'] = 'mpt' if hf_config[ |
|
'_name_or_path'] == '' else hf_config['_name_or_path'] |
|
config['gpt']['head_num'] = str(hf_config['n_heads']) |
|
n_embd = hf_config['d_model'] |
|
config['gpt']['size_per_head'] = str(n_embd // hf_config['n_heads']) |
|
config['gpt']['inter_size'] = str(n_embd * hf_config['expansion_ratio']) |
|
config['gpt']['max_pos_seq_len'] = str(hf_config['max_seq_len']) |
|
config['gpt']['num_layer'] = str(hf_config['n_layers']) |
|
config['gpt']['vocab_size'] = str(hf_config['vocab_size']) |
|
config['gpt']['start_id'] = str( |
|
hf_config['bos_token_id'] |
|
) if hf_config['bos_token_id'] != None else str(tokenizer.bos_token_id) |
|
config['gpt']['end_id'] = str( |
|
hf_config['eos_token_id'] |
|
) if hf_config['eos_token_id'] != None else str(tokenizer.eos_token_id) |
|
config['gpt']['weight_data_type'] = weight_data_type |
|
config['gpt']['tensor_para_size'] = str(infer_gpu_num) |
|
|
|
config['gpt']['layernorm_eps'] = str(1e-5) |
|
if hf_config['attn_config']['alibi']: |
|
config['gpt']['has_positional_encoding'] = str(False) |
|
config['gpt']['use_attention_linear_bias'] = str(True) |
|
if hf_config['attn_config']['clip_qkv'] and not force: |
|
raise RuntimeError( |
|
'clip_qkv is enabled for this MPT model. This may not work as expected in FT. Use --force to force a conversion.' |
|
) |
|
if hf_config['attn_config']['qk_ln'] and not force: |
|
raise RuntimeError( |
|
'qk_ln is enabled for this MPT model. This may not work as expected in FT. Use --force to force a conversion.' |
|
) |
|
|
|
with open(os.path.join(save_dir, 'config.ini'), 'w') as configfile: |
|
config.write(configfile) |
|
except: |
|
print(f'Failed to save the config in config.ini.') |
|
raise |
|
|
|
named_params_dict = { |
|
name: param for name, param in model.named_parameters() |
|
} |
|
convert_and_save_ft_weights(named_params=named_params_dict, |
|
config=hf_config, |
|
infer_gpu_num=infer_gpu_num, |
|
weight_data_type=weight_data_type, |
|
save_dir=save_dir) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser( |
|
formatter_class=argparse.RawTextHelpFormatter) |
|
parser.add_argument('--save_dir', |
|
'-o', |
|
type=str, |
|
help='Directory to save converted checkpoint in', |
|
required=True) |
|
parser.add_argument( |
|
'--name_or_dir', |
|
'-i', |
|
type=str, |
|
help= |
|
'HF hub Model name (e.g., mosaicml/mpt-7b) or local dir path to load checkpoint from', |
|
required=True) |
|
parser.add_argument('--infer_gpu_num', |
|
'-i_g', |
|
type=int, |
|
help='How many gpus for inference?', |
|
required=True) |
|
parser.add_argument( |
|
'--force', |
|
action='store_true', |
|
help= |
|
'Force conversion to FT even if some features may not work as expected in FT' |
|
) |
|
parser.add_argument('--weight_data_type', |
|
type=str, |
|
help='Data type of weights in the input checkpoint', |
|
default='fp32', |
|
choices=['fp32', 'fp16']) |
|
|
|
args = parser.parse_args() |
|
print('\n=============== Argument ===============') |
|
for key in vars(args): |
|
print('{}: {}'.format(key, vars(args)[key])) |
|
print('========================================') |
|
|
|
convert_mpt_to_ft(args.name_or_dir, args.save_dir, args.infer_gpu_num, |
|
args.weight_data_type, args.force) |
|
|