wavlm-large / s3prl_s3prl_main /s3prl /run_pretrain.py

Upload 1162 files

0b32ad6 verified 9 months ago

5.68 kB

	# -- coding: utf-8 -- #
	"""*********************************************************************************************"""
	# FileName [ run_pretrain.py ]
	# Synopsis [ scripts for running the pre-training of upstream models ]
	# Author [ Andy T. Liu (Andi611) ]
	# Copyright [ Copyleft(c), Speech Lab, NTU, Taiwan ]
	"""*********************************************************************************************"""


	###############
	# IMPORTATION #
	###############
	import os
	import re
	import yaml
	import glob
	import random
	import argparse
	import importlib
	from shutil import copyfile
	from argparse import Namespace
	#-------------#
	import torch
	import numpy as np
	#-------------#
	from pretrain.runner import Runner
	from utility.helper import override


	######################
	# PRETRAIN ARGUMENTS #
	######################
	def get_pretrain_args():
	parser = argparse.ArgumentParser()

	# use a ckpt as the experiment initialization
	# if set, all the following args and config will be overwrited by the ckpt, except args.mode
	parser.add_argument('-e', '--past_exp', metavar='{CKPT_PATH,CKPT_DIR}', help='Resume training from a checkpoint')
	parser.add_argument('-o', '--override', help='Used to override args and config, this is at the highest priority')

	# configuration for the experiment, including runner and downstream
	parser.add_argument('-c', '--config', metavar='CONFIG_PATH', help='The yaml file for configuring the whole experiment, except the upstream model')

	# upstream settings
	parser.add_argument('-u', '--upstream', choices=os.listdir('pretrain/'))
	parser.add_argument('-g', '--upstream_config', metavar='CONFIG_PATH', help='The yaml file for configuring the upstream model')

	# experiment directory, choose one to specify
	# expname uses the default root directory: result/pretrain
	parser.add_argument('-n', '--expname', help='Save experiment at expdir/expname')
	parser.add_argument('-p', '--expdir', help='Save experiment at expdir')
	parser.add_argument('-a', '--auto_resume', action='store_true', help='Auto-resume if the expdir contains checkpoints')

	# options
	parser.add_argument('--seed', default=1337, type=int)
	parser.add_argument('--device', default='cuda', help='model.to(device)')
	parser.add_argument('--multi_gpu', action='store_true', help='Enables multi-GPU training')

	args = parser.parse_args()

	if args.expdir is None:
	args.expdir = f'result/pretrain/{args.expname}'

	if args.auto_resume:
	if os.path.isdir(args.expdir):
	ckpt_pths = glob.glob(f'{args.expdir}/states-*.ckpt')
	if len(ckpt_pths) > 0:
	args.past_exp = args.expdir

	if args.past_exp:
	# determine checkpoint path
	if os.path.isdir(args.past_exp):
	ckpt_pths = glob.glob(f'{args.past_exp}/states-*.ckpt')
	assert len(ckpt_pths) > 0
	ckpt_pths = sorted(ckpt_pths, key=lambda pth: int(pth.split('-')[-1].split('.')[0]))
	ckpt_pth = ckpt_pths[-1]
	else:
	ckpt_pth = args.past_exp

	print(f'[Runner] - Resume from {ckpt_pth}')

	# load checkpoint
	ckpt = torch.load(ckpt_pth, map_location='cpu')

	def update_args(old, new):
	old_dict = vars(old)
	new_dict = vars(new)
	old_dict.update(new_dict)
	return Namespace(**old_dict)

	# overwrite args and config
	args = update_args(args, ckpt['Args'])
	os.makedirs(args.expdir, exist_ok=True)
	args.init_ckpt = ckpt_pth
	config = ckpt['Config']

	else:
	print('[Runner] - Start a new experiment')
	args.init_ckpt = None

	assert args.expname is not None
	if args.expdir is None:
	args.expdir = f'result/pretrain/{args.expname}'
	os.makedirs(args.expdir, exist_ok=True)

	upstream_dirs = [u for u in os.listdir('pretrain/') if re.search(f'^{u}_\|^{u}$', args.upstream)]
	assert len(upstream_dirs) == 1

	if args.config is None:
	args.config = f'pretrain/{upstream_dirs[0]}/config_runner.yaml'
	with open(args.config, 'r') as file:
	config = yaml.load(file, Loader=yaml.FullLoader)
	if os.path.isfile(args.config):
	copyfile(args.config, f'{args.expdir}/config_runner.yaml')
	else:
	raise FileNotFoundError('Wrong file path for runner config.')

	if args.upstream_config is None:
	default_upstream_config = f'pretrain/{upstream_dirs[0]}/config_model.yaml'
	assert os.path.isfile(default_upstream_config)
	args.upstream_config = default_upstream_config
	if os.path.isfile(args.upstream_config):
	copyfile(args.upstream_config, f'{args.expdir}/config_model.yaml')
	else:
	raise FileNotFoundError('Wrong file path for model config.')

	if args.override is not None and args.override.lower() != "none":
	override(args.override, args, config)
	os.makedirs(args.expdir, exist_ok=True)

	return args, config


	########
	# MAIN #
	########
	def main():
	# get config and arguments
	args, config = get_pretrain_args()

	# Fix seed and make backends deterministic
	random.seed(args.seed)
	np.random.seed(args.seed)
	torch.manual_seed(args.seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed_all(args.seed)
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

	runner = Runner(args, config)
	eval('runner.train')()
	runner.logger.close()


	if __name__ == '__main__':
	main()