Spaces:
Running
Running
import torch | |
from torch import nn | |
from torch.nn import Module | |
from models.tts.delightful_tts.constants import LEAKY_RELU_SLOPE | |
from models.tts.delightful_tts.conv_blocks import ConvTransposed | |
class VariancePredictor(Module): | |
r"""Duration and Pitch predictor neural network module in PyTorch. | |
It consists of multiple layers, including `ConvTransposed` layers (custom convolution transpose layers from | |
the `model.conv_blocks` module), LeakyReLU activation functions, Layer Normalization and Dropout layers. | |
Constructor for `VariancePredictor` class. | |
Args: | |
channels_in (int): Number of input channels. | |
channels (int): Number of output channels for ConvTransposed layers and input channels for linear layer. | |
channels_out (int): Number of output channels for linear layer. | |
kernel_size (int): Size of the kernel for ConvTransposed layers. | |
p_dropout (float): Probability of dropout. | |
Returns: | |
torch.Tensor: Output tensor. | |
""" | |
def __init__( | |
self, | |
channels_in: int, | |
channels: int, | |
channels_out: int, | |
kernel_size: int, | |
p_dropout: float, | |
leaky_relu_slope: float = LEAKY_RELU_SLOPE, | |
): | |
super().__init__() | |
self.layers = nn.ModuleList( | |
[ | |
# Convolution transpose layer followed by LeakyReLU, LayerNorm and Dropout | |
ConvTransposed( | |
channels_in, | |
channels, | |
kernel_size=kernel_size, | |
padding=(kernel_size - 1) // 2, | |
), | |
nn.LeakyReLU(leaky_relu_slope), | |
nn.LayerNorm( | |
channels, | |
), | |
nn.Dropout(p_dropout), | |
# Another "block" of ConvTransposed, LeakyReLU, LayerNorm, and Dropout | |
ConvTransposed( | |
channels, | |
channels, | |
kernel_size=kernel_size, | |
padding=(kernel_size - 1) // 2, | |
), | |
nn.LeakyReLU(leaky_relu_slope), | |
nn.LayerNorm( | |
channels, | |
), | |
nn.Dropout(p_dropout), | |
], | |
) | |
# Output linear layer | |
self.linear_layer = nn.Linear( | |
channels, | |
channels_out, | |
) | |
def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: | |
r"""Forward pass for `VariancePredictor`. | |
Args: | |
x (torch.Tensor): Input tensor. | |
mask (torch.Tensor): Mask tensor, has the same size as x. | |
Returns: | |
torch.Tensor: Output tensor. | |
""" | |
# Sequentially pass the input through all defined layers | |
# (ConvTransposed -> LeakyReLU -> LayerNorm -> Dropout -> ConvTransposed -> LeakyReLU -> LayerNorm -> Dropout) | |
for layer in self.layers: | |
x = layer(x) | |
x = self.linear_layer(x) | |
x = x.squeeze(-1) | |
return x.masked_fill(mask, 0.0) | |