crystal-technologies
/

CRYSTAL-Mac

Model card Files Files and versions Community

CRYSTAL-Mac / Perceptrix /finetune /inference /convert_hf_mpt_to_ft.py

crystal-technologies

Upload 303 files

de4ade4 over 1 year ago

raw

history blame contribute delete

6.42 kB

	# Copyright 2022 MosaicML LLM Foundry authors
	# SPDX-License-Identifier: Apache-2.0

	# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Convert MPT model checkpoint to FT format.

	It's a modified version of
	https://github.com/NVIDIA/FasterTransformer/blob/main/examples/pytorch/gpt/utils/huggingface_gpt_convert.py
	"""

	import argparse
	import configparser
	import os

	import transformers

	from llmfoundry.utils import convert_and_save_ft_weights


	def convert_mpt_to_ft(model_name_or_path: str,
	output_dir: str,
	infer_gpu_num: int = 1,
	weight_data_type: str = 'fp32',
	force: bool = False) -> None:
	"""Convert an MPT checkpoint to a FasterTransformer compatible format.

	Args:
	model_name_or_path (str): The HF hub name of the model (e.g., mosaicml/mpt-7b) or the path of a directory
	containing an MPT checkpoint in a local dir.
	output_dir (str): Path of the directory to save the checkpoint in FT format. The directory must not already exist.
	infer_gpu_num (int): The number of gpus you are planning to use for inference.
	weight_data_type (str): Data type of the weights in the input checkpoint.
	force (bool): force conversion even with unsupported features in FT.
	"""
	save_dir = os.path.join(output_dir, f'{infer_gpu_num}-gpu')

	if (os.path.exists(save_dir) == False):
	os.makedirs(save_dir)
	else:
	raise RuntimeError(f'Output path {save_dir} already exists!')

	# do conversion on cpu
	torch_device = 'cpu'

	model = transformers.AutoModelForCausalLM.from_pretrained(
	model_name_or_path, trust_remote_code=True).to(torch_device)
	tokenizer = transformers.AutoTokenizer.from_pretrained(
	model_name_or_path, trust_remote_code=True)

	hf_config = vars(model.config)

	config = configparser.ConfigParser()
	config['gpt'] = {}
	try:
	config['gpt']['model_name'] = 'mpt' if hf_config[
	'_name_or_path'] == '' else hf_config['_name_or_path']
	config['gpt']['head_num'] = str(hf_config['n_heads'])
	n_embd = hf_config['d_model']
	config['gpt']['size_per_head'] = str(n_embd // hf_config['n_heads'])
	config['gpt']['inter_size'] = str(n_embd * hf_config['expansion_ratio'])
	config['gpt']['max_pos_seq_len'] = str(hf_config['max_seq_len'])
	config['gpt']['num_layer'] = str(hf_config['n_layers'])
	config['gpt']['vocab_size'] = str(hf_config['vocab_size'])
	config['gpt']['start_id'] = str(
	hf_config['bos_token_id']
	) if hf_config['bos_token_id'] != None else str(tokenizer.bos_token_id)
	config['gpt']['end_id'] = str(
	hf_config['eos_token_id']
	) if hf_config['eos_token_id'] != None else str(tokenizer.eos_token_id)
	config['gpt']['weight_data_type'] = weight_data_type
	config['gpt']['tensor_para_size'] = str(infer_gpu_num)
	# nn.LayerNorm default eps is 1e-5
	config['gpt']['layernorm_eps'] = str(1e-5)
	if hf_config['attn_config']['alibi']:
	config['gpt']['has_positional_encoding'] = str(False)
	config['gpt']['use_attention_linear_bias'] = str(True)
	if hf_config['attn_config']['clip_qkv'] and not force:
	raise RuntimeError(
	'clip_qkv is enabled for this MPT model. This may not work as expected in FT. Use --force to force a conversion.'
	)
	if hf_config['attn_config']['qk_ln'] and not force:
	raise RuntimeError(
	'qk_ln is enabled for this MPT model. This may not work as expected in FT. Use --force to force a conversion.'
	)

	with open(os.path.join(save_dir, 'config.ini'), 'w') as configfile:
	config.write(configfile)
	except:
	print(f'Failed to save the config in config.ini.')
	raise

	named_params_dict = {
	name: param for name, param in model.named_parameters()
	}
	convert_and_save_ft_weights(named_params=named_params_dict,
	config=hf_config,
	infer_gpu_num=infer_gpu_num,
	weight_data_type=weight_data_type,
	save_dir=save_dir)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument('--save_dir',
	'-o',
	type=str,
	help='Directory to save converted checkpoint in',
	required=True)
	parser.add_argument(
	'--name_or_dir',
	'-i',
	type=str,
	help=
	'HF hub Model name (e.g., mosaicml/mpt-7b) or local dir path to load checkpoint from',
	required=True)
	parser.add_argument('--infer_gpu_num',
	'-i_g',
	type=int,
	help='How many gpus for inference?',
	required=True)
	parser.add_argument(
	'--force',
	action='store_true',
	help=
	'Force conversion to FT even if some features may not work as expected in FT'
	)
	parser.add_argument('--weight_data_type',
	type=str,
	help='Data type of weights in the input checkpoint',
	default='fp32',
	choices=['fp32', 'fp16'])

	args = parser.parse_args()
	print('\n=============== Argument ===============')
	for key in vars(args):
	print('{}: {}'.format(key, vars(args)[key]))
	print('========================================')

	convert_mpt_to_ft(args.name_or_dir, args.save_dir, args.infer_gpu_num,
	args.weight_data_type, args.force)