{ 'train': { 'log_interval': 100, 'eval_interval': 200, 'seed': 1234, 'epochs': 15, 'learning_rate': 0.0001, 'betas': [ 0.8, 0.99 ], 'eps': 1e-09, 'batch_size': 8, 'fp16_run': False, 'lr_decay': 0.999875, 'segment_size': 20480, 'init_lr_ratio': 1, 'warmup_epochs': 0, 'c_mel': 45, 'c_kl': 1.0 }, 'data': { 'max_wav_value': 32768.0, 'sampling_rate': 32000, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'n_mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'add_blank': True, 'n_speakers': 300, 'cleaned_text': True }, 'model': { 'inter_channels': 192, 'hidden_channels': 192, 'filter_channels': 768, 'n_heads': 2, 'n_layers': 6, 'kernel_size': 3, 'p_dropout': 0.1, 'resblock': '1', 'resblock_kernel_sizes': [ 3, 7, 11 ], 'resblock_dilation_sizes': [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], 'upsample_rates': [ 10, 8, 2, 2, 2 ], 'upsample_initial_channel': 512, 'upsample_kernel_sizes': [ 16, 16, 8, 2, 2 ], 'n_layers_q': 3, 'use_spectral_norm': False, 'gin_channels': 512, 'semantic_frame_rate': '25hz', 'freeze_quantizer': True }, 's2_ckpt_dir': 'logs/s2/big2k1ft_ko_5commit2', 'content_module': 'cnhubert' }