Spaces:
Running
Running
File size: 1,002 Bytes
4f6613a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
_target_: fish_speech.models.vqgan.modules.firefly.FireflyArchitecture
spec_transform:
_target_: fish_speech.utils.spectrogram.LogMelSpectrogram
sample_rate: 44100
n_mels: 160
n_fft: 2048
hop_length: 512
win_length: 2048
backbone:
_target_: fish_speech.models.vqgan.modules.firefly.ConvNeXtEncoder
input_channels: 160
depths: [3, 3, 9, 3]
dims: [128, 256, 384, 512]
drop_path_rate: 0.2
kernel_size: 7
head:
_target_: fish_speech.models.vqgan.modules.firefly.HiFiGANGenerator
hop_length: 512
upsample_rates: [8, 8, 2, 2, 2] # aka. strides
upsample_kernel_sizes: [16, 16, 4, 4, 4]
resblock_kernel_sizes: [3, 7, 11]
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
num_mels: 512
upsample_initial_channel: 512
pre_conv_kernel_size: 13
post_conv_kernel_size: 13
quantizer:
_target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
input_dim: 512
n_groups: 8
n_codebooks: 1
levels: [8, 5, 5, 5]
downsample_factor: [2, 2]
|