Spaces:

waysolong
/

text_to_speech

Sleeping

text_to_speech / mtts /models /vocoder /VocGAN /config /default.yaml

wuxulong19950206

First model version

14d1720 over 1 year ago

1.73 kB

	data: # root path of train/validation data (either relative/absoulte path is ok)
	train: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\wavs'
	validation: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\valid'
	mel_path: 'H:\Deepsync\backup\fastspeech\data\mels'
	eval_path: ''
	---
	train:
	rep_discriminator: 1
	discriminator_train_start_steps: 100000
	num_workers: 8
	batch_size: 16
	optimizer: 'adam'
	adam:
	lr: 0.0001
	beta1: 0.5
	beta2: 0.9
	---
	audio:
	n_mel_channels: 80
	segment_length: 16000
	pad_short: 2000
	filter_length: 1024
	hop_length: 256 # WARNING: this can't be changed.
	win_length: 1024
	sampling_rate: 22050
	mel_fmin: 0.0
	mel_fmax: 8000.0
	---
	model:
	feat_match: 10.0
	lambda_adv: 1
	use_subband_stft_loss: False
	feat_loss: False
	out_channels: 1
	generator_ratio: [4, 4, 2, 2, 2, 2] # for 256 hop size and 22050 sample rate
	mult: 256
	n_residual_layers: 4
	num_D : 3
	ndf : 16
	n_layers: 3
	downsampling_factor: 4
	disc_out: 512

	stft_loss_params:
	fft_sizes: [1024, 2048, 512] # List of FFT size for STFT-based loss.
	hop_sizes: [120, 240, 50] # List of hop size for STFT-based loss
	win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
	window: "hann_window" # Window function for STFT-based loss
	subband_stft_loss_params:
	fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
	hop_sizes: [30, 60, 10] # List of hop size for STFT-based loss
	win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
	window: "hann_window" # Window function for STFT-based loss
	---
	log:
	summary_interval: 1
	validation_interval: 5
	save_interval: 20
	chkpt_dir: 'chkpt'
	log_dir: 'logs'