Elite-text-gen-web

Running

App Files Files Community

Elite-text-gen-web / venv /lib /python3.10 /site-packages /bitsandbytes /cuda_setup /main.py

pikto

Duplicate from algovenus/text-generation-webui

82fea12 almost 2 years ago

raw

history blame contribute delete

17.9 kB

	"""
	extract factors the build is dependent on:
	[X] compute capability
	[ ] TODO: Q - What if we have multiple GPUs of different makes?
	- CUDA version
	- Software:
	- CPU-only: only CPU quantization functions (no optimizer, no matrix multipl)
	- CuBLAS-LT: full-build 8-bit optimizer
	- no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)

	evaluation:
	- if paths faulty, return meaningful error
	- else:
	- determine CUDA version
	- determine capabilities
	- based on that set the default path
	"""

	import ctypes as ct
	import os
	import errno
	import torch
	from warnings import warn
	from itertools import product

	from pathlib import Path
	from typing import Set, Union
	from .env_vars import get_potentially_lib_path_containing_env_vars

	# these are the most common libs names
	# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
	# we have libcudart.so.11.0 which causes a lot of errors before
	# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
	CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0']

	# this is a order list of backup paths to search CUDA in, if it cannot be found in the main environmental paths
	backup_paths = []
	backup_paths.append('$CONDA_PREFIX/lib/libcudart.so.11.0')

	class CUDASetup:
	_instance = None

	def __init__(self):
	raise RuntimeError("Call get_instance() instead")

	def generate_instructions(self):
	if getattr(self, 'error', False): return
	print(self.error)
	self.error = True
	if not self.cuda_available:
	self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA library was not detected or CUDA not installed.')
	self.add_log_entry('CUDA SETUP: Solution 1): Your paths are probably not up-to-date. You can update them via: sudo ldconfig.')
	self.add_log_entry('CUDA SETUP: Solution 2): If you do not have sudo rights, you can do the following:')
	self.add_log_entry('CUDA SETUP: Solution 2a): Find the cuda library via: find / -name libcuda.so 2>/dev/null')
	self.add_log_entry('CUDA SETUP: Solution 2b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_2a')
	self.add_log_entry('CUDA SETUP: Solution 2c): For a permanent solution add the export from 2b into your .bashrc file, located at ~/.bashrc')
	self.add_log_entry('CUDA SETUP: Solution 3): For a missing CUDA runtime library (libcudart.so), use `find / -name libcudart.so* and follow with step (2b)')
	return

	if self.cudart_path is None:
	self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.')
	self.add_log_entry('CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable')
	self.add_log_entry('CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null')
	self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a')
	self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc')
	self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.')
	self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh')
	self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.')
	self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local')
	return

	make_cmd = f'CUDA_VERSION={self.cuda_version_string}'
	if len(self.cuda_version_string) < 3:
	make_cmd += ' make cuda92'
	elif self.cuda_version_string == '110':
	make_cmd += ' make cuda110'
	elif self.cuda_version_string[:2] == '11' and int(self.cuda_version_string[2]) > 0:
	make_cmd += ' make cuda11x'
	elif self.cuda_version_string == '100':
	self.add_log_entry('CUDA SETUP: CUDA 10.0 not supported. Please use a different CUDA version.')
	self.add_log_entry('CUDA SETUP: Before you try again running bitsandbytes, make sure old CUDA 10.0 versions are uninstalled and removed from $LD_LIBRARY_PATH variables.')
	return


	has_cublaslt = is_cublasLt_compatible(self.cc)
	if not has_cublaslt:
	make_cmd += '_nomatmul'

	self.add_log_entry('CUDA SETUP: Something unexpected happened. Please compile from source:')
	self.add_log_entry('git clone https://github.com/TimDettmers/bitsandbytes.git')
	self.add_log_entry('cd bitsandbytes')
	self.add_log_entry(make_cmd)
	self.add_log_entry('python setup.py install')

	def initialize(self):
	if not getattr(self, 'initialized', False):
	self.has_printed = False
	self.lib = None
	self.initialized = False
	self.error = False

	def manual_override(self):
	if torch.cuda.is_available():
	if 'BNB_CUDA_VERSION' in os.environ:
	if len(os.environ['BNB_CUDA_VERSION']) > 0:
	warn((f'\n\n{"="*80}\n'
	'WARNING: Manual override via BNB_CUDA_VERSION env variable detected!\n'
	'BNB_CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
	'If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n'
	'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
	'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
	f'Loading CUDA version: BNB_CUDA_VERSION={os.environ["BNB_CUDA_VERSION"]}'
	f'\n{"="*80}\n\n'))
	self.binary_name = self.binary_name[:-6] + f'{os.environ["BNB_CUDA_VERSION"]}.so'

	def run_cuda_setup(self):
	self.initialized = True
	self.cuda_setup_log = []

	binary_name, cudart_path, cc, cuda_version_string = evaluate_cuda_setup()
	self.cudart_path = cudart_path
	self.cuda_available = torch.cuda.is_available()
	self.cc = cc
	self.cuda_version_string = cuda_version_string
	self.binary_name = binary_name
	self.manual_override()

	package_dir = Path(__file__).parent.parent
	binary_path = package_dir / self.binary_name

	try:
	if not binary_path.exists():
	self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
	legacy_binary_name = "libbitsandbytes_cpu.so"
	self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
	binary_path = package_dir / legacy_binary_name
	if not binary_path.exists() or torch.cuda.is_available():
	self.add_log_entry('')
	self.add_log_entry('='48 + 'ERROR' + '='37)
	self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
	self.add_log_entry('1. You need to manually override the PyTorch CUDA version. Please see: '
	'"https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md')
	self.add_log_entry('2. CUDA driver not installed')
	self.add_log_entry('3. CUDA not installed')
	self.add_log_entry('4. You have multiple conflicting CUDA libraries')
	self.add_log_entry('5. Required library not pre-compiled for this bitsandbytes release!')
	self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.')
	self.add_log_entry('CUDA SETUP: The CUDA version for the compile might depend on your conda install. Inspect CUDA version via `conda list \| grep cuda`.')
	self.add_log_entry('='*80)
	self.add_log_entry('')
	self.generate_instructions()
	raise Exception('CUDA SETUP: Setup Failed!')
	self.lib = ct.cdll.LoadLibrary(binary_path)
	else:
	self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path}...")
	self.lib = ct.cdll.LoadLibrary(binary_path)
	except Exception as ex:
	self.add_log_entry(str(ex))

	def add_log_entry(self, msg, is_warning=False):
	self.cuda_setup_log.append((msg, is_warning))

	def print_log_stack(self):
	for msg, is_warning in self.cuda_setup_log:
	if is_warning:
	warn(msg)
	else:
	print(msg)

	@classmethod
	def get_instance(cls):
	if cls._instance is None:
	cls._instance = cls.__new__(cls)
	cls._instance.initialize()
	return cls._instance


	def is_cublasLt_compatible(cc):
	has_cublaslt = False
	if cc is not None:
	cc_major, cc_minor = cc.split('.')
	if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5):
	CUDASetup.get_instance().add_log_entry("WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! \
	If you run into issues with 8-bit matmul, you can try 4-bit quantization: https://huggingface.co/blog/4bit-transformers-bitsandbytes", is_warning=True)
	else:
	has_cublaslt = True
	return has_cublaslt

	def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]:
	return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path}


	def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
	existent_directories: Set[Path] = set()
	for path in candidate_paths:
	try:
	if path.exists():
	existent_directories.add(path)
	except OSError as exc:
	if exc.errno != errno.ENAMETOOLONG:
	raise exc
	except PermissionError as pex:
	pass

	non_existent_directories: Set[Path] = candidate_paths - existent_directories
	if non_existent_directories:
	CUDASetup.get_instance().add_log_entry("The following directories listed in your path were found to "
	f"be non-existent: {non_existent_directories}", is_warning=False)

	return existent_directories


	def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]:
	paths = set()
	for libname in CUDA_RUNTIME_LIBS:
	for path in candidate_paths:
	if (path / libname).is_file():
	paths.add(path / libname)
	return paths


	def resolve_paths_list(paths_list_candidate: str) -> Set[Path]:
	"""
	Searches a given environmental var for the CUDA runtime library,
	i.e. `libcudart.so`.
	"""
	return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate))


	def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]:
	return get_cuda_runtime_lib_paths(
	resolve_paths_list(paths_list_candidate)
	)


	def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
	if len(results_paths) > 1:
	warning_msg = (
	f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
	"We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
	"but this might missmatch with the CUDA version that is needed for bitsandbytes."
	"To override this behavior set the BNB_CUDA_VERSION=<version string, e.g. 122> environmental variable"
	"For example, if you want to use the CUDA version 122"
	"BNB_CUDA_VERSION=122 python ..."
	"OR set the environmental variable in your .bashrc: export BNB_CUDA_VERSION=122"
	"In the case of a manual override, make sure you set the LD_LIBRARY_PATH, e.g."
	"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2")
	CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)


	def determine_cuda_runtime_lib_path() -> Union[Path, None]:
	"""
	Searches for a cuda installations, in the following order of priority:
	1. active conda env
	2. LD_LIBRARY_PATH
	3. any other env vars, while ignoring those that
	- are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`)
	- don't contain the path separator `/`

	If multiple libraries are found in part 3, we optimistically try one,
	while giving a warning message.
	"""
	candidate_env_vars = get_potentially_lib_path_containing_env_vars()

	cuda_runtime_libs = set()
	if "CONDA_PREFIX" in candidate_env_vars:
	conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib"

	conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path))
	warn_in_case_of_duplicates(conda_cuda_libs)

	if conda_cuda_libs:
	cuda_runtime_libs.update(conda_cuda_libs)

	CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["CONDA_PREFIX"]} did not contain '
	f'{CUDA_RUNTIME_LIBS} as expected! Searching further paths...', is_warning=True)

	if "LD_LIBRARY_PATH" in candidate_env_vars:
	lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"])

	if lib_ld_cuda_libs:
	cuda_runtime_libs.update(lib_ld_cuda_libs)
	warn_in_case_of_duplicates(lib_ld_cuda_libs)

	CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain '
	f'{CUDA_RUNTIME_LIBS} as expected! Searching further paths...', is_warning=True)

	remaining_candidate_env_vars = {
	env_var: value for env_var, value in candidate_env_vars.items()
	if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"}
	}

	cuda_runtime_libs = set()
	for env_var, value in remaining_candidate_env_vars.items():
	cuda_runtime_libs.update(find_cuda_lib_in(value))

	if len(cuda_runtime_libs) == 0:
	CUDASetup.get_instance().add_log_entry('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...')
	cuda_runtime_libs.update(find_cuda_lib_in('/usr/local/cuda/lib64'))

	warn_in_case_of_duplicates(cuda_runtime_libs)

	cuda_setup = CUDASetup.get_instance()
	cuda_setup.add_log_entry(f'DEBUG: Possible options found for libcudart.so: {cuda_runtime_libs}')

	return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None


	# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
	def get_cuda_version():
	major, minor = map(int, torch.version.cuda.split("."))

	if major < 11:
	CUDASetup.get_instance().add_log_entry('CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!')

	return f'{major}{minor}'

	def get_compute_capabilities():
	ccs = []
	for i in range(torch.cuda.device_count()):
	cc_major, cc_minor = torch.cuda.get_device_capability(torch.cuda.device(i))
	ccs.append(f"{cc_major}.{cc_minor}")

	return ccs


	def evaluate_cuda_setup():
	cuda_setup = CUDASetup.get_instance()
	if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
	cuda_setup.add_log_entry('')
	cuda_setup.add_log_entry('='35 + 'BUG REPORT' + '='35)
	cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
	('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
	cuda_setup.add_log_entry('='*80)
	if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None

	cudart_path = determine_cuda_runtime_lib_path()
	ccs = get_compute_capabilities()
	ccs.sort()
	cc = ccs[-1] # we take the highest capability
	cuda_version_string = get_cuda_version()

	cuda_setup.add_log_entry(f"CUDA SETUP: PyTorch settings found: CUDA_VERSION={cuda_version_string}, Highest Compute Capability: {cc}.")
	cuda_setup.add_log_entry(f"CUDA SETUP: To manually override the PyTorch CUDA version please see:"
	"https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md")


	# 7.5 is the minimum CC vor cublaslt
	has_cublaslt = is_cublasLt_compatible(cc)

	# TODO:
	# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
	# (2) Multiple CUDA versions installed

	# we use ls -l instead of nvcc to determine the cuda version
	# since most installations will have the libcudart.so installed, but not the compiler

	if has_cublaslt:
	binary_name = f"libbitsandbytes_cuda{cuda_version_string}.so"
	else:
	"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so"
	binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt.so"

	return binary_name, cudart_path, cc, cuda_version_string