File size: 1,156 Bytes
e34aada |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# import utils.commons.single_thread_env # NOQA
import os
import sys
sys.path.append(os.path.abspath("./"))
from utils.commons.hparams import hparams, set_hparams
import importlib
def run_task():
assert hparams['task_cls'] != ''
pkg = ".".join(hparams["task_cls"].split(".")[:-1])
cls_name = hparams["task_cls"].split(".")[-1]
task_cls = getattr(importlib.import_module(pkg), cls_name)
task_cls.start()
def clear_gpus():
devices = os.environ.get('CUDA_VISIBLE_DEVICES', '').split(",")
for d in devices:
os.system(f'pkill -f "voidgpu{d}"')
if __name__ == '__main__':
if os.environ.get('CUDA_VISIBLE_DEVICES', '') == '':
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
try:
set_hparams()
run_task()
except KeyboardInterrupt:
if hparams['init_method'] == 'file':
# on exit, remove the shared file in nfs for DDP
exp_name = hparams['exp_name']
shared_file_name = f'/mnt/bn/sa-ag-data/yezhenhui/nfs/pytorch_ddp_sharedfile/{exp_name}'
if os.path.exists(shared_file_name):
os.system(f"rm -r {shared_file_name}")
|