|
from typing import Tuple |
|
import torch |
|
from torch import nn |
|
|
|
|
|
class Wav2Vec2Processor(nn.Module): |
|
def __init__(self): |
|
""" |
|
Convert tuple of waveforms whose length is different to a batch. |
|
|
|
Args: |
|
waveforms (Tuple[torch.Tensor]): The waveforms. Shape: (batch_size, wave_length). |
|
|
|
Returns: |
|
waveforms (torch.Tensor): The batched waveforms. Shape: (batch_size, max_wave_length). |
|
wave_lengths (torch.Tensor): The wave length of each waveform. Shape: (batch_size,). |
|
""" |
|
super().__init__() |
|
|
|
def forward(self, waveforms: Tuple[torch.Tensor, ...]): |
|
device = waveforms[0].device |
|
wave_lengths = torch.tensor( |
|
tuple(waveform.size(0) for waveform in waveforms), device=device |
|
) |
|
|
|
max_length = wave_lengths.max().item() |
|
|
|
padded = [] |
|
|
|
for waveform in waveforms: |
|
padded.append( |
|
nn.functional.pad( |
|
waveform, |
|
(0, max_length - waveform.size(0)), |
|
mode="constant", |
|
value=0.0, |
|
) |
|
) |
|
|
|
batched_waveforms = torch.stack(padded, dim=0) |
|
|
|
return batched_waveforms, wave_lengths |
|
|