data: # root path of train/validation data (either relative/absoulte path is ok) train: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\wavs' validation: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\valid' mel_path: 'H:\Deepsync\backup\fastspeech\data\mels' eval_path: '' --- train: rep_discriminator: 1 discriminator_train_start_steps: 100000 num_workers: 8 batch_size: 16 optimizer: 'adam' adam: lr: 0.0001 beta1: 0.5 beta2: 0.9 --- audio: n_mel_channels: 80 segment_length: 16000 pad_short: 2000 filter_length: 1024 hop_length: 256 # WARNING: this can't be changed. win_length: 1024 sampling_rate: 22050 mel_fmin: 0.0 mel_fmax: 8000.0 --- model: feat_match: 10.0 lambda_adv: 1 use_subband_stft_loss: False feat_loss: False out_channels: 1 generator_ratio: [4, 4, 2, 2, 2, 2] # for 256 hop size and 22050 sample rate mult: 256 n_residual_layers: 4 num_D : 3 ndf : 16 n_layers: 3 downsampling_factor: 4 disc_out: 512 stft_loss_params: fft_sizes: [1024, 2048, 512] # List of FFT size for STFT-based loss. hop_sizes: [120, 240, 50] # List of hop size for STFT-based loss win_lengths: [600, 1200, 240] # List of window length for STFT-based loss. window: "hann_window" # Window function for STFT-based loss subband_stft_loss_params: fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss. hop_sizes: [30, 60, 10] # List of hop size for STFT-based loss win_lengths: [150, 300, 60] # List of window length for STFT-based loss. window: "hann_window" # Window function for STFT-based loss --- log: summary_interval: 1 validation_interval: 5 save_interval: 20 chkpt_dir: 'chkpt' log_dir: 'logs'