martin
initial
67c46fd
raw
history blame
4.3 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2019 Shigeki Karita
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
"""Repeat the same layer definition."""
from typing import Dict, List, Optional
from funasr_detach.models.transformer.layer_norm import LayerNorm
import torch
class MultiSequential(torch.nn.Sequential):
"""Multi-input multi-output torch.nn.Sequential."""
def __init__(self, *args, layer_drop_rate=0.0):
"""Initialize MultiSequential with layer_drop.
Args:
layer_drop_rate (float): Probability of dropping out each fn (layer).
"""
super(MultiSequential, self).__init__(*args)
self.layer_drop_rate = layer_drop_rate
def forward(self, *args):
"""Repeat."""
_probs = torch.empty(len(self)).uniform_()
for idx, m in enumerate(self):
if not self.training or (_probs[idx] >= self.layer_drop_rate):
args = m(*args)
return args
def repeat(N, fn, layer_drop_rate=0.0):
"""Repeat module N times.
Args:
N (int): Number of repeat time.
fn (Callable): Function to generate module.
layer_drop_rate (float): Probability of dropping out each fn (layer).
Returns:
MultiSequential: Repeated model instance.
"""
return MultiSequential(*[fn(n) for n in range(N)], layer_drop_rate=layer_drop_rate)
class MultiBlocks(torch.nn.Module):
"""MultiBlocks definition.
Args:
block_list: Individual blocks of the encoder architecture.
output_size: Architecture output size.
norm_class: Normalization module class.
norm_args: Normalization module arguments.
"""
def __init__(
self,
block_list: List[torch.nn.Module],
output_size: int,
norm_class: torch.nn.Module = LayerNorm,
) -> None:
"""Construct a MultiBlocks object."""
super().__init__()
self.blocks = torch.nn.ModuleList(block_list)
self.norm_blocks = norm_class(output_size)
self.num_blocks = len(block_list)
def reset_streaming_cache(self, left_context: int, device: torch.device) -> None:
"""Initialize/Reset encoder streaming cache.
Args:
left_context: Number of left frames during chunk-by-chunk inference.
device: Device to use for cache tensor.
"""
for idx in range(self.num_blocks):
self.blocks[idx].reset_streaming_cache(left_context, device)
def forward(
self,
x: torch.Tensor,
pos_enc: torch.Tensor,
mask: torch.Tensor,
chunk_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""Forward each block of the encoder architecture.
Args:
x: MultiBlocks input sequences. (B, T, D_block_1)
pos_enc: Positional embedding sequences.
mask: Source mask. (B, T)
chunk_mask: Chunk mask. (T_2, T_2)
Returns:
x: Output sequences. (B, T, D_block_N)
"""
for block_index, block in enumerate(self.blocks):
x, mask, pos_enc = block(x, pos_enc, mask, chunk_mask=chunk_mask)
x = self.norm_blocks(x)
return x
def chunk_forward(
self,
x: torch.Tensor,
pos_enc: torch.Tensor,
mask: torch.Tensor,
chunk_size: int = 0,
left_context: int = 0,
right_context: int = 0,
) -> torch.Tensor:
"""Forward each block of the encoder architecture.
Args:
x: MultiBlocks input sequences. (B, T, D_block_1)
pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_att)
mask: Source mask. (B, T_2)
left_context: Number of frames in left context.
right_context: Number of frames in right context.
Returns:
x: MultiBlocks output sequences. (B, T, D_block_N)
"""
for block_idx, block in enumerate(self.blocks):
x, pos_enc = block.chunk_forward(
x,
pos_enc,
mask,
chunk_size=chunk_size,
left_context=left_context,
right_context=right_context,
)
x = self.norm_blocks(x)
return x