Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import logging | |
import os | |
import random | |
import subprocess | |
import sys | |
from mmengine.logging import print_log | |
import xtuner | |
# Define valid modes | |
MODES = ('list-cfg', 'copy-cfg', 'log-dataset', 'check-custom-dataset', | |
'train', 'test', 'chat', 'convert', 'preprocess', 'mmbench', | |
'eval_refcoco', 'mmbench_omg_seg_llava', 'gcd_omg_seg_llava', 'refcoco_omg_seg_llava', | |
'region_cap_omg_seg_llava', 'region_cap_mask_omg_seg_llava', 'refcoco_omg_seg_llava_msseg') | |
CLI_HELP_MSG = \ | |
f""" | |
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax: | |
xtuner MODE MODE_ARGS ARGS | |
Where MODE (required) is one of {MODES} | |
MODE_ARG (optional) is the argument for specific mode | |
ARGS (optional) are the arguments for specific command | |
Some usages for xtuner commands: (See more by using -h for specific command!) | |
1. List all predefined configs: | |
xtuner list-cfg | |
2. Copy a predefined config to a given path: | |
xtuner copy-cfg $CONFIG $SAVE_FILE | |
3-1. Fine-tune LLMs by a single GPU: | |
xtuner train $CONFIG | |
3-2. Fine-tune LLMs by multiple GPUs: | |
NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS | |
4-1. Convert the pth model to HuggingFace's model: | |
xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL | |
4-2. Merge the HuggingFace's adapter to the pretrained base model: | |
xtuner convert merge $LLM $ADAPTER $SAVE_PATH | |
xtuner convert merge $CLIP $ADAPTER $SAVE_PATH --is-clip | |
4-3. Split HuggingFace's LLM to the smallest sharded one: | |
xtuner convert split $LLM $SAVE_PATH | |
5-1. Chat with LLMs with HuggingFace's model and adapter: | |
xtuner chat $LLM --adapter $ADAPTER --prompt-template $PROMPT_TEMPLATE --system-template $SYSTEM_TEMPLATE | |
5-2. Chat with VLMs with HuggingFace's model and LLaVA: | |
xtuner chat $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --image $IMAGE --prompt-template $PROMPT_TEMPLATE --system-template $SYSTEM_TEMPLATE | |
6-1. Preprocess arxiv dataset: | |
xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES | |
6-2. Preprocess refcoco dataset: | |
xtuner preprocess refcoco --ann-path $RefCOCO_ANN_PATH --image-path $COCO_IMAGE_PATH --save-path $SAVE_PATH | |
7-1. Log processed dataset: | |
xtuner log-dataset $CONFIG | |
7-2. Verify the correctness of the config file for the custom dataset: | |
xtuner check-custom-dataset $CONFIG | |
8. MMBench evaluation: | |
xtuner mmbench $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --prompt-template $PROMPT_TEMPLATE --data-path $MMBENCH_DATA_PATH | |
9. Refcoco evaluation: | |
xtuner eval_refcoco $LLM --llava $LLAVA --visual-encoder $VISUAL_ENCODER --prompt-template $PROMPT_TEMPLATE --data-path $REFCOCO_DATA_PATH | |
10. List all dataset formats which are supported in XTuner | |
Run special commands: | |
xtuner help | |
xtuner version | |
GitHub: https://github.com/InternLM/xtuner | |
""" # noqa: E501 | |
CONVERT_HELP_MSG = \ | |
f""" | |
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax: | |
xtuner MODE MODE_ARGS ARGS | |
Where MODE (required) is one of {MODES} | |
MODE_ARG (optional) is the argument for specific mode | |
ARGS (optional) are the arguments for specific command | |
Some usages for convert: (See more by using -h for specific command!) | |
1. Convert the pth model to HuggingFace's model: | |
xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL | |
2. Merge the HuggingFace's adapter to the pretrained LLM: | |
xtuner convert merge $LLM $ADAPTER $SAVE_PATH | |
3. Split HuggingFace's LLM to the smallest sharded one: | |
xtuner convert split $LLM $SAVE_PATH | |
GitHub: https://github.com/InternLM/xtuner | |
""" # noqa: E501 | |
PREPROCESS_HELP_MSG = \ | |
f""" | |
Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax: | |
xtuner MODE MODE_ARGS ARGS | |
Where MODE (required) is one of {MODES} | |
MODE_ARG (optional) is the argument for specific mode | |
ARGS (optional) are the arguments for specific command | |
Some usages for preprocess: (See more by using -h for specific command!) | |
1. Preprocess arxiv dataset: | |
xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES | |
2. Preprocess refcoco dataset: | |
xtuner preprocess refcoco --ann-path $RefCOCO_ANN_PATH --image-path $COCO_IMAGE_PATH --save-path $SAVE_PATH | |
GitHub: https://github.com/InternLM/xtuner | |
""" # noqa: E501 | |
special = { | |
'help': lambda: print_log(CLI_HELP_MSG, 'current'), | |
'version': lambda: print_log(xtuner.__version__, 'current') | |
} | |
special = { | |
**special, | |
**{f'-{k[0]}': v | |
for k, v in special.items()}, | |
**{f'--{k}': v | |
for k, v in special.items()} | |
} | |
def list_dataset_format(): | |
from xtuner.tools import list_dataset_format | |
return list_dataset_format.__file__ | |
def list_cfg(): | |
from xtuner.tools import list_cfg | |
return list_cfg.__file__ | |
def copy_cfg(): | |
from xtuner.tools import copy_cfg | |
return copy_cfg.__file__ | |
def log_dataset(): | |
from xtuner.tools import log_dataset | |
return log_dataset.__file__ | |
def check_custom_dataset(): | |
from xtuner.tools import check_custom_dataset | |
return check_custom_dataset.__file__ | |
def train(): | |
from xtuner.tools import train | |
return train.__file__ | |
def test(): | |
from xtuner.tools import test | |
return test.__file__ | |
def chat(): | |
from xtuner.tools import chat | |
return chat.__file__ | |
def mmbench(): | |
from xtuner.tools import mmbench | |
return mmbench.__file__ | |
def pth_to_hf(): | |
from xtuner.tools.model_converters import pth_to_hf | |
return pth_to_hf.__file__ | |
def merge(): | |
from xtuner.tools.model_converters import merge | |
return merge.__file__ | |
def split(): | |
from xtuner.tools.model_converters import split | |
return split.__file__ | |
def arxiv_preprocess(): | |
from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess | |
return arxiv_preprocess.__file__ | |
def convert_refcoco(): | |
from xtuner.tools.data_preprocess import convert_refcoco | |
return convert_refcoco.__file__ | |
def convert_help_msg(): | |
print_log(CONVERT_HELP_MSG, 'current') | |
def preprocess_help_msg(): | |
print_log(PREPROCESS_HELP_MSG, 'current') | |
def eval_refcoco(): | |
from xtuner.tools import eval_refcoco | |
return eval_refcoco.__file__ | |
def mmbench_omg_seg_llava(): | |
from omg_llava.tools import mmbench_omg_seg_llava | |
return mmbench_omg_seg_llava.__file__ | |
def gcd_omg_seg_llava(): | |
from omg_llava.tools import gcd_omg_seg_llava | |
return gcd_omg_seg_llava.__file__ | |
def refcoco_omg_seg_llava(): | |
from omg_llava.tools import refcoco_omg_seg_llava | |
return refcoco_omg_seg_llava.__file__ | |
def refcoco_omg_seg_llava_msseg(): | |
from omg_llava.tools import refcoco_omg_seg_llava_msseg | |
return refcoco_omg_seg_llava_msseg.__file__ | |
def region_cap_omg_seg_llava(): | |
from omg_llava.tools import region_cap_omg_seg_llava | |
return region_cap_omg_seg_llava.__file__ | |
def region_cap_mask_omg_seg_llava(): | |
from omg_llava.tools import region_cap_mask_omg_seg_llava | |
return region_cap_mask_omg_seg_llava.__file__ | |
modes = { | |
'list-cfg': list_cfg, | |
'copy-cfg': copy_cfg, | |
'log-dataset': log_dataset, | |
'check-custom-dataset': check_custom_dataset, | |
'train': train, | |
'test': test, | |
'chat': chat, | |
'mmbench': mmbench, | |
'convert': { | |
'pth_to_hf': pth_to_hf, | |
'merge': merge, | |
'split': split, | |
'--help': convert_help_msg, | |
'-h': convert_help_msg | |
}, | |
'preprocess': { | |
'arxiv': arxiv_preprocess, | |
'refcoco': convert_refcoco, | |
'--help': preprocess_help_msg, | |
'-h': preprocess_help_msg | |
}, | |
'eval_refcoco': eval_refcoco, | |
'list-dataset-format': list_dataset_format, | |
'mmbench_omg_seg_llava': mmbench_omg_seg_llava, | |
'gcd_omg_seg_llava': gcd_omg_seg_llava, | |
'refcoco_omg_seg_llava': refcoco_omg_seg_llava, | |
'refcoco_omg_seg_llava_msseg': refcoco_omg_seg_llava_msseg, | |
'region_cap_omg_seg_llava': region_cap_omg_seg_llava, | |
'region_cap_mask_omg_seg_llava': region_cap_mask_omg_seg_llava, | |
} | |
HELP_FUNCS = [preprocess_help_msg, convert_help_msg] | |
MAP_FILE_FUNCS = [ | |
list_cfg, copy_cfg, log_dataset, check_custom_dataset, train, test, chat, | |
mmbench, pth_to_hf, merge, split, arxiv_preprocess, eval_refcoco, | |
convert_refcoco, list_dataset_format, | |
mmbench_omg_seg_llava, gcd_omg_seg_llava, refcoco_omg_seg_llava, refcoco_omg_seg_llava_msseg, | |
region_cap_omg_seg_llava, region_cap_mask_omg_seg_llava, | |
] | |
def cli(): | |
args = sys.argv[1:] | |
if not args: # no arguments passed | |
print_log(CLI_HELP_MSG, 'current') | |
return | |
if args[0].lower() in special: | |
special[args[0].lower()]() | |
return | |
elif args[0].lower() in modes: | |
try: | |
fn_or_dict = modes[args[0].lower()] | |
n_arg = 0 | |
if isinstance(fn_or_dict, dict): | |
n_arg += 1 | |
fn = fn_or_dict[args[n_arg].lower()] | |
else: | |
fn = fn_or_dict | |
assert callable(fn) | |
if fn in HELP_FUNCS: | |
fn() | |
else: | |
slurm_launcher = False | |
for i in range(n_arg + 1, len(args)): | |
if args[i] == '--launcher': | |
if i + 1 < len(args) and args[i + 1] == 'slurm': | |
slurm_launcher = True | |
break | |
nnodes = int(os.environ.get('NNODES', 1)) | |
nproc_per_node = int(os.environ.get('NPROC_PER_NODE', 1)) | |
if slurm_launcher or (nnodes == 1 and nproc_per_node == 1): | |
subprocess.run(['python', fn()] + args[n_arg + 1:]) | |
else: | |
port = os.environ.get('PORT', None) | |
if port is None: | |
port = random.randint(20000, 29999) | |
print_log(f'Use random port: {port}', 'current', | |
logging.WARNING) | |
torchrun_args = [ | |
f'--nnodes={nnodes}', | |
f"--node_rank={os.environ.get('NODE_RANK', 0)}", | |
f'--nproc_per_node={nproc_per_node}', | |
f"--master_addr={os.environ.get('ADDR', '127.0.0.1')}", | |
f'--master_port={port}' | |
] | |
subprocess.run(['torchrun'] + torchrun_args + [fn()] + | |
args[n_arg + 1:] + | |
['--launcher', 'pytorch']) | |
except Exception as e: | |
print_log(f"WARNING: command error: '{e}'!", 'current', | |
logging.WARNING) | |
print_log(CLI_HELP_MSG, 'current', logging.WARNING) | |
return | |
else: | |
print_log('WARNING: command error!', 'current', logging.WARNING) | |
print_log(CLI_HELP_MSG, 'current', logging.WARNING) | |
return | |