Spaces:

msun415
/

Llamole

Build error

App Files Files Community

Llamole / src /train /tuner.py

msun415

Upload folder using huggingface_hub

13362e2 verified 5 months ago

raw

history blame contribute delete

5.17 kB

	# Copyright 2024 the LlamaFactory team and the Llamole team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	import shutil
	from typing import TYPE_CHECKING, Any, Dict, List, Optional

	import torch
	from transformers import PreTrainedModel

	from ..data import get_template_and_fix_tokenizer
	from ..extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
	from ..extras.logging import get_logger
	from ..hparams import get_infer_args, get_train_args
	from ..model import load_language_model, load_tokenizer
	from .callbacks import LogCallback
	from .mmsft import run_mmsft

	if TYPE_CHECKING:
	from transformers import TrainerCallback

	logger = get_logger(__name__)

	def run_train(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallback"] = []) -> None:
	callbacks.append(LogCallback())
	model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args)
	run_mmsft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)

	def merge_adapter(args: Optional[Dict[str, Any]] = None) -> None:
	model_args, data_args, finetuning_args, _ = get_infer_args(args)

	if model_args.export_dir is None:
	raise ValueError("Please specify `export_dir` to save model.")

	if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None:
	raise ValueError("Please merge adapters before quantizing the model.")

	tokenizer_module = load_tokenizer(model_args)
	tokenizer = tokenizer_module["tokenizer"]
	processor = tokenizer_module["processor"]
	get_template_and_fix_tokenizer(tokenizer, data_args.template)
	model = load_language_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab

	if getattr(model, "quantization_method", None) and model_args.adapter_name_or_path is not None:
	raise ValueError("Cannot merge adapters to a quantized model.")

	if not isinstance(model, PreTrainedModel):
	raise ValueError("The model is not a `PreTrainedModel`, export aborted.")

	if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model
	output_dtype = getattr(model.config, "torch_dtype", torch.float16)
	setattr(model.config, "torch_dtype", output_dtype)
	model = model.to(output_dtype)
	else:
	setattr(model.config, "torch_dtype", torch.float16)

	model.save_pretrained(
	save_directory=model_args.export_dir,
	max_shard_size="{}GB".format(model_args.export_size),
	safe_serialization=(not model_args.export_legacy_format),
	)
	if model_args.export_hub_model_id is not None:
	model.push_to_hub(
	model_args.export_hub_model_id,
	token=model_args.hf_hub_token,
	max_shard_size="{}GB".format(model_args.export_size),
	safe_serialization=(not model_args.export_legacy_format),
	)

	if finetuning_args.stage == "rm":
	if model_args.adapter_name_or_path is not None:
	vhead_path = model_args.adapter_name_or_path[-1]
	else:
	vhead_path = model_args.model_name_or_path

	if os.path.exists(os.path.join(vhead_path, V_HEAD_SAFE_WEIGHTS_NAME)):
	shutil.copy(
	os.path.join(vhead_path, V_HEAD_SAFE_WEIGHTS_NAME),
	os.path.join(model_args.export_dir, V_HEAD_SAFE_WEIGHTS_NAME),
	)
	logger.info("Copied valuehead to {}.".format(model_args.export_dir))
	elif os.path.exists(os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME)):
	shutil.copy(
	os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME),
	os.path.join(model_args.export_dir, V_HEAD_WEIGHTS_NAME),
	)
	logger.info("Copied valuehead to {}.".format(model_args.export_dir))

	try:
	tokenizer.padding_side = "left" # restore padding side
	tokenizer.init_kwargs["padding_side"] = "left"
	tokenizer.save_pretrained(model_args.export_dir)
	if model_args.export_hub_model_id is not None:
	tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token)

	if model_args.visual_inputs and processor is not None:
	getattr(processor, "image_processor").save_pretrained(model_args.export_dir)
	if model_args.export_hub_model_id is not None:
	getattr(processor, "image_processor").push_to_hub(
	model_args.export_hub_model_id, token=model_args.hf_hub_token
	)

	except Exception:
	logger.warning("Cannot save tokenizer, please copy the files manually.")