Spaces:

zhengr
/

ChatTTS2

Runtime error

App Files Files Community

ChatTTS2 / ChatTTS /model /velocity /model_loader.py

zhengr

init

c02bdcd 10 months ago

raw

history blame contribute delete

2.61 kB

	"""Utilities for selecting and loading models."""

	import contextlib

	import torch
	import torch.nn as nn

	from vllm.config import ModelConfig
	from vllm.model_executor.models import ModelRegistry
	from vllm.model_executor.weight_utils import get_quant_config, initialize_dummy_weights

	from .llama import LlamaModel


	@contextlib.contextmanager
	def _set_default_torch_dtype(dtype: torch.dtype):
	"""Sets the default torch dtype to the given dtype."""
	old_dtype = torch.get_default_dtype()
	torch.set_default_dtype(dtype)
	yield
	torch.set_default_dtype(old_dtype)


	def get_model(model_config: ModelConfig) -> nn.Module:
	# Get the (maybe quantized) linear method.
	linear_method = None
	if model_config.quantization is not None:
	quant_config = get_quant_config(
	model_config.quantization,
	model_config.model,
	model_config.hf_config,
	model_config.download_dir,
	)
	capability = torch.cuda.get_device_capability()
	capability = capability[0] * 10 + capability[1]
	if capability < quant_config.get_min_capability():
	raise ValueError(
	f"The quantization method {model_config.quantization} is not "
	"supported for the current GPU. "
	f"Minimum capability: {quant_config.get_min_capability()}. "
	f"Current capability: {capability}."
	)
	supported_dtypes = quant_config.get_supported_act_dtypes()
	if model_config.dtype not in supported_dtypes:
	raise ValueError(
	f"{model_config.dtype} is not supported for quantization "
	f"method {model_config.quantization}. Supported dtypes: "
	f"{supported_dtypes}"
	)
	linear_method = quant_config.get_linear_method()

	with _set_default_torch_dtype(model_config.dtype):
	# Create a model instance.
	# The weights will be initialized as empty tensors.
	with torch.device("cuda"):
	model = LlamaModel(model_config.hf_config, linear_method)
	if model_config.load_format == "dummy":
	# NOTE(woosuk): For accurate performance evaluation, we assign
	# random values to the weights.
	initialize_dummy_weights(model)
	else:
	# Load the weights from the cached or downloaded files.
	model.load_weights(
	model_config.model,
	model_config.download_dir,
	model_config.load_format,
	model_config.revision,
	)
	return model.eval()