File size: 4,628 Bytes
de4ade4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
"""MosaicML LLM Foundry package setup."""
import os
import re
import setuptools
from setuptools import setup
_PACKAGE_NAME = 'llm-foundry'
_PACKAGE_DIR = 'llmfoundry'
_REPO_REAL_PATH = os.path.dirname(os.path.realpath(__file__))
_PACKAGE_REAL_PATH = os.path.join(_REPO_REAL_PATH, _PACKAGE_DIR)
# Read the repo version
# We can't use `.__version__` from the library since it's not installed yet
with open(os.path.join(_PACKAGE_REAL_PATH, '__init__.py')) as f:
content = f.read()
# regex: '__version__', whitespace?, '=', whitespace, quote, version, quote
# we put parens around the version so that it becomes elem 1 of the match
expr = re.compile(r"""^__version__\W+=\W+['"]([0-9\.]*)['"]""", re.MULTILINE)
repo_version = expr.findall(content)[0]
# Use repo README for PyPi description
with open('README.md', 'r', encoding='utf-8') as fh:
long_description = fh.read()
# Hide the content between <!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_BEGIN --> and
# <!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_END --> tags in the README
while True:
start_tag = '<!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_BEGIN -->'
end_tag = '<!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_END -->'
start = long_description.find(start_tag)
end = long_description.find(end_tag)
if start == -1:
assert end == -1, 'there should be a balanced number of start and ends'
break
else:
assert end != -1, 'there should be a balanced number of start and ends'
long_description = long_description[:start] + long_description[
end + len(end_tag):]
classifiers = [
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
]
install_requires = [
'mosaicml[libcloud,wandb,mlflow,oci,gcs]>=0.16.3,<0.17',
'accelerate>=0.20', # for HF inference `device_map`
'transformers>=4.33',
'mosaicml-streaming>=0.6,<0.7',
'torch>=1.13.1',
'datasets>=2.14.5',
'fsspec==2023.6.0', # newer version results in a bug in datasets that duplicates data
'sentencepiece==0.1.97',
'einops==0.5.0',
'omegaconf>=2.2.3,<3',
'slack-sdk<4',
'mosaicml-cli>=0.3,<1',
'onnx==1.14.0',
'onnxruntime==1.15.1',
'boto3>=1.21.45,<2',
'huggingface-hub>=0.17.0,<1.0',
]
extra_deps = {}
extra_deps['dev'] = [
'pre-commit>=2.18.1,<3',
'pytest>=7.2.1,<8',
'pytest_codeblocks>=0.16.1,<0.17',
'pytest-cov>=4,<5',
'pyright==1.1.256',
'toml>=0.10.2,<0.11',
'packaging>=21,<23',
'hf_transfer==0.1.3',
]
extra_deps['tensorboard'] = [
'mosaicml[tensorboard]>=0.16.1,<0.17',
]
extra_deps['gpu'] = [
'flash-attn==1.0.9',
'mosaicml-turbo==0.0.4',
# PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
'xentropy-cuda-lib@git+https://github.com/HazyResearch/[email protected]#subdirectory=csrc/xentropy',
]
extra_deps['gpu-flash2'] = [
'flash-attn==2.3.2',
'mosaicml-turbo==0.0.4',
# PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
'xentropy-cuda-lib@git+https://github.com/HazyResearch/[email protected]#subdirectory=csrc/xentropy',
]
extra_deps['peft'] = [
'loralib==0.1.1', # lora core
'bitsandbytes==0.39.1', # 8bit
'scipy>=1.10.0,<=1.11.0', # bitsandbytes dependency; TODO: eliminate when incorporated to bitsandbytes
# TODO: pin peft when it stabilizes.
# PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
'peft==0.4.0',
]
extra_deps['openai'] = [
'openai==0.27.8',
'tiktoken==0.4.0',
]
extra_deps['all-cpu'] = set(
dep for key, deps in extra_deps.items() for dep in deps if 'gpu' not in key)
extra_deps['all'] = set(dep for key, deps in extra_deps.items() for dep in deps
if key != 'gpu-flash2')
extra_deps['all-flash2'] = set(
dep for key, deps in extra_deps.items() for dep in deps if key != 'gpu')
setup(
name=_PACKAGE_NAME,
version=repo_version,
author='MosaicML',
author_email='[email protected]',
description='LLM Foundry',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/mosaicml/llm-foundry/',
package_data={
'llmfoundry': ['py.typed'],
},
packages=setuptools.find_packages(
exclude=['.github*', 'mcli*', 'scripts*', 'tests*']),
classifiers=classifiers,
install_requires=install_requires,
extras_require=extra_deps,
python_requires='>=3.7',
)
|