Spaces:
Sleeping
Sleeping
from dataclasses import dataclass | |
# TODO: DEPRECATED! | |
class PostNetConfig: | |
p_dropout: float | |
postnet_embedding_dim: int | |
postnet_kernel_size: int | |
postnet_n_convolutions: int | |
postnet_expetimental = PostNetConfig( | |
p_dropout=0.1, | |
postnet_embedding_dim=512, | |
postnet_kernel_size=5, | |
postnet_n_convolutions=3, | |
) | |
# TODO: DEPRECATED! | |
class DiffusionConfig: | |
# model parameters | |
model: str | |
n_mel_channels: int | |
multi_speaker: bool | |
# denoiser parameters | |
residual_channels: int | |
residual_layers: int | |
denoiser_dropout: float | |
noise_schedule_naive: str | |
timesteps: int | |
shallow_timesteps: int | |
min_beta: float | |
max_beta: float | |
s: float | |
pe_scale: int | |
keep_bins: int | |
# trainsformer params | |
encoder_hidden: int | |
decoder_hidden: int | |
speaker_embed_dim: int | |
# loss params | |
noise_loss: str | |
diff_en = DiffusionConfig( | |
# model parameters | |
model="shallow", | |
n_mel_channels=100, | |
multi_speaker=True, | |
# denoiser parameters | |
# residual_channels=256, | |
# residual_channels=384, | |
residual_channels=100, | |
residual_layers=20, | |
denoiser_dropout=0.2, | |
noise_schedule_naive="vpsde", | |
timesteps=10, | |
shallow_timesteps=1, | |
min_beta=0.1, | |
max_beta=40, | |
s=0.008, | |
keep_bins=80, | |
pe_scale=1000, | |
# trainsformer params | |
# encoder_hidden=100, | |
encoder_hidden=512, | |
decoder_hidden=512, | |
# Speaker_emb + lang_emb | |
speaker_embed_dim=1025, | |
# loss params | |
noise_loss="l1", | |
) | |
diff_multi = DiffusionConfig( | |
# model parameters | |
model="shallow", | |
n_mel_channels=100, | |
multi_speaker=True, | |
# denoiser parameters | |
# residual_channels=256, | |
residual_channels=100, | |
residual_layers=20, | |
denoiser_dropout=0.2, | |
noise_schedule_naive="vpsde", | |
timesteps=10, | |
shallow_timesteps=1, | |
min_beta=0.1, | |
max_beta=40, | |
s=0.008, | |
pe_scale=1000, | |
keep_bins=80, | |
# trainsformer params | |
encoder_hidden=512, | |
decoder_hidden=512, | |
# Speaker_emb + lang_emb | |
speaker_embed_dim=1280, | |
# loss params | |
noise_loss="l1", | |
) | |