Spaces:
Runtime error
Runtime error
File size: 11,549 Bytes
476ac07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
import random
import subprocess
import sys
from mmengine.logging import print_log
import xtuner
# Define valid modes
MODES = ('list-cfg', 'copy-cfg', 'log-dataset', 'check-custom-dataset',
'train', 'test', 'chat', 'convert', 'preprocess', 'mmbench',
'eval_refcoco', 'mmbench_omg_seg_llava', 'gcd_omg_seg_llava', 'refcoco_omg_seg_llava',
'region_cap_omg_seg_llava', 'region_cap_mask_omg_seg_llava', 'refcoco_omg_seg_llava_msseg')
CLI_HELP_MSG = \
f"""
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax:
xtuner MODE MODE_ARGS ARGS
Where MODE (required) is one of {MODES}
MODE_ARG (optional) is the argument for specific mode
ARGS (optional) are the arguments for specific command
Some usages for xtuner commands: (See more by using -h for specific command!)
1. List all predefined configs:
xtuner list-cfg
2. Copy a predefined config to a given path:
xtuner copy-cfg $CONFIG $SAVE_FILE
3-1. Fine-tune LLMs by a single GPU:
xtuner train $CONFIG
3-2. Fine-tune LLMs by multiple GPUs:
NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS
4-1. Convert the pth model to HuggingFace's model:
xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
4-2. Merge the HuggingFace's adapter to the pretrained base model:
xtuner convert merge $LLM $ADAPTER $SAVE_PATH
xtuner convert merge $CLIP $ADAPTER $SAVE_PATH --is-clip
4-3. Split HuggingFace's LLM to the smallest sharded one:
xtuner convert split $LLM $SAVE_PATH
5-1. Chat with LLMs with HuggingFace's model and adapter:
xtuner chat $LLM --adapter $ADAPTER --prompt-template $PROMPT_TEMPLATE --system-template $SYSTEM_TEMPLATE
5-2. Chat with VLMs with HuggingFace's model and LLaVA:
xtuner chat $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --image $IMAGE --prompt-template $PROMPT_TEMPLATE --system-template $SYSTEM_TEMPLATE
6-1. Preprocess arxiv dataset:
xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
6-2. Preprocess refcoco dataset:
xtuner preprocess refcoco --ann-path $RefCOCO_ANN_PATH --image-path $COCO_IMAGE_PATH --save-path $SAVE_PATH
7-1. Log processed dataset:
xtuner log-dataset $CONFIG
7-2. Verify the correctness of the config file for the custom dataset:
xtuner check-custom-dataset $CONFIG
8. MMBench evaluation:
xtuner mmbench $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --prompt-template $PROMPT_TEMPLATE --data-path $MMBENCH_DATA_PATH
9. Refcoco evaluation:
xtuner eval_refcoco $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --prompt-template $PROMPT_TEMPLATE --data-path $REFCOCO_DATA_PATH
10. List all dataset formats which are supported in XTuner
Run special commands:
xtuner help
xtuner version
GitHub: https://github.com/InternLM/xtuner
""" # noqa: E501
CONVERT_HELP_MSG = \
f"""
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax:
xtuner MODE MODE_ARGS ARGS
Where MODE (required) is one of {MODES}
MODE_ARG (optional) is the argument for specific mode
ARGS (optional) are the arguments for specific command
Some usages for convert: (See more by using -h for specific command!)
1. Convert the pth model to HuggingFace's model:
xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
2. Merge the HuggingFace's adapter to the pretrained LLM:
xtuner convert merge $LLM $ADAPTER $SAVE_PATH
3. Split HuggingFace's LLM to the smallest sharded one:
xtuner convert split $LLM $SAVE_PATH
GitHub: https://github.com/InternLM/xtuner
""" # noqa: E501
PREPROCESS_HELP_MSG = \
f"""
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax:
xtuner MODE MODE_ARGS ARGS
Where MODE (required) is one of {MODES}
MODE_ARG (optional) is the argument for specific mode
ARGS (optional) are the arguments for specific command
Some usages for preprocess: (See more by using -h for specific command!)
1. Preprocess arxiv dataset:
xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
2. Preprocess refcoco dataset:
xtuner preprocess refcoco --ann-path $RefCOCO_ANN_PATH --image-path $COCO_IMAGE_PATH --save-path $SAVE_PATH
GitHub: https://github.com/InternLM/xtuner
""" # noqa: E501
special = {
'help': lambda: print_log(CLI_HELP_MSG, 'current'),
'version': lambda: print_log(xtuner.__version__, 'current')
}
special = {
**special,
**{f'-{k[0]}': v
for k, v in special.items()},
**{f'--{k}': v
for k, v in special.items()}
}
def list_dataset_format():
from xtuner.tools import list_dataset_format
return list_dataset_format.__file__
def list_cfg():
from xtuner.tools import list_cfg
return list_cfg.__file__
def copy_cfg():
from xtuner.tools import copy_cfg
return copy_cfg.__file__
def log_dataset():
from xtuner.tools import log_dataset
return log_dataset.__file__
def check_custom_dataset():
from xtuner.tools import check_custom_dataset
return check_custom_dataset.__file__
def train():
from xtuner.tools import train
return train.__file__
def test():
from xtuner.tools import test
return test.__file__
def chat():
from xtuner.tools import chat
return chat.__file__
def mmbench():
from xtuner.tools import mmbench
return mmbench.__file__
def pth_to_hf():
from xtuner.tools.model_converters import pth_to_hf
return pth_to_hf.__file__
def merge():
from xtuner.tools.model_converters import merge
return merge.__file__
def split():
from xtuner.tools.model_converters import split
return split.__file__
def arxiv_preprocess():
from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess
return arxiv_preprocess.__file__
def convert_refcoco():
from xtuner.tools.data_preprocess import convert_refcoco
return convert_refcoco.__file__
def convert_help_msg():
print_log(CONVERT_HELP_MSG, 'current')
def preprocess_help_msg():
print_log(PREPROCESS_HELP_MSG, 'current')
def eval_refcoco():
from xtuner.tools import eval_refcoco
return eval_refcoco.__file__
def mmbench_omg_seg_llava():
from omg_llava.tools import mmbench_omg_seg_llava
return mmbench_omg_seg_llava.__file__
def gcd_omg_seg_llava():
from omg_llava.tools import gcd_omg_seg_llava
return gcd_omg_seg_llava.__file__
def refcoco_omg_seg_llava():
from omg_llava.tools import refcoco_omg_seg_llava
return refcoco_omg_seg_llava.__file__
def refcoco_omg_seg_llava_msseg():
from omg_llava.tools import refcoco_omg_seg_llava_msseg
return refcoco_omg_seg_llava_msseg.__file__
def region_cap_omg_seg_llava():
from omg_llava.tools import region_cap_omg_seg_llava
return region_cap_omg_seg_llava.__file__
def region_cap_mask_omg_seg_llava():
from omg_llava.tools import region_cap_mask_omg_seg_llava
return region_cap_mask_omg_seg_llava.__file__
modes = {
'list-cfg': list_cfg,
'copy-cfg': copy_cfg,
'log-dataset': log_dataset,
'check-custom-dataset': check_custom_dataset,
'train': train,
'test': test,
'chat': chat,
'mmbench': mmbench,
'convert': {
'pth_to_hf': pth_to_hf,
'merge': merge,
'split': split,
'--help': convert_help_msg,
'-h': convert_help_msg
},
'preprocess': {
'arxiv': arxiv_preprocess,
'refcoco': convert_refcoco,
'--help': preprocess_help_msg,
'-h': preprocess_help_msg
},
'eval_refcoco': eval_refcoco,
'list-dataset-format': list_dataset_format,
'mmbench_omg_seg_llava': mmbench_omg_seg_llava,
'gcd_omg_seg_llava': gcd_omg_seg_llava,
'refcoco_omg_seg_llava': refcoco_omg_seg_llava,
'refcoco_omg_seg_llava_msseg': refcoco_omg_seg_llava_msseg,
'region_cap_omg_seg_llava': region_cap_omg_seg_llava,
'region_cap_mask_omg_seg_llava': region_cap_mask_omg_seg_llava,
}
HELP_FUNCS = [preprocess_help_msg, convert_help_msg]
MAP_FILE_FUNCS = [
list_cfg, copy_cfg, log_dataset, check_custom_dataset, train, test, chat,
mmbench, pth_to_hf, merge, split, arxiv_preprocess, eval_refcoco,
convert_refcoco, list_dataset_format,
mmbench_omg_seg_llava, gcd_omg_seg_llava, refcoco_omg_seg_llava, refcoco_omg_seg_llava_msseg,
region_cap_omg_seg_llava, region_cap_mask_omg_seg_llava,
]
def cli():
args = sys.argv[1:]
if not args: # no arguments passed
print_log(CLI_HELP_MSG, 'current')
return
if args[0].lower() in special:
special[args[0].lower()]()
return
elif args[0].lower() in modes:
try:
fn_or_dict = modes[args[0].lower()]
n_arg = 0
if isinstance(fn_or_dict, dict):
n_arg += 1
fn = fn_or_dict[args[n_arg].lower()]
else:
fn = fn_or_dict
assert callable(fn)
if fn in HELP_FUNCS:
fn()
else:
slurm_launcher = False
for i in range(n_arg + 1, len(args)):
if args[i] == '--launcher':
if i + 1 < len(args) and args[i + 1] == 'slurm':
slurm_launcher = True
break
nnodes = int(os.environ.get('NNODES', 1))
nproc_per_node = int(os.environ.get('NPROC_PER_NODE', 1))
if slurm_launcher or (nnodes == 1 and nproc_per_node == 1):
subprocess.run(['python', fn()] + args[n_arg + 1:])
else:
port = os.environ.get('PORT', None)
if port is None:
port = random.randint(20000, 29999)
print_log(f'Use random port: {port}', 'current',
logging.WARNING)
torchrun_args = [
f'--nnodes={nnodes}',
f"--node_rank={os.environ.get('NODE_RANK', 0)}",
f'--nproc_per_node={nproc_per_node}',
f"--master_addr={os.environ.get('ADDR', '127.0.0.1')}",
f'--master_port={port}'
]
subprocess.run(['torchrun'] + torchrun_args + [fn()] +
args[n_arg + 1:] +
['--launcher', 'pytorch'])
except Exception as e:
print_log(f"WARNING: command error: '{e}'!", 'current',
logging.WARNING)
print_log(CLI_HELP_MSG, 'current', logging.WARNING)
return
else:
print_log('WARNING: command error!', 'current', logging.WARNING)
print_log(CLI_HELP_MSG, 'current', logging.WARNING)
return
|