Spaces:
Runtime error
Runtime error
File size: 1,940 Bytes
c914273 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import torch
from torch.utils.data import Dataset
import numpy as np
import torchaudio as ta
from .preprocess import AudioPipeline
class SongDataset(Dataset):
def __init__(self,
audio_paths: list[str],
dance_labels: list[np.ndarray],
audio_duration=30, # seconds
audio_window_duration=6, # seconds
):
assert audio_duration % audio_window_duration == 0, "Audio window should divide duration evenly."
self.audio_paths = audio_paths
self.dance_labels = dance_labels
audio_info = ta.info(audio_paths[0])
self.sample_rate = audio_info.sample_rate
self.audio_window_duration = int(audio_window_duration)
self.audio_duration = int(audio_duration)
self.audio_pipeline = AudioPipeline(input_freq=self.sample_rate)
def __len__(self):
return len(self.audio_paths) * self.audio_duration // self.audio_window_duration
def __getitem__(self, idx) -> tuple[torch.Tensor, torch.Tensor]:
waveform = self._waveform_from_index(idx)
spectrogram = self.audio_pipeline(waveform)
dance_labels = self._label_from_index(idx)
return spectrogram, dance_labels
def _waveform_from_index(self, idx:int) -> torch.Tensor:
audio_file_idx = idx * self.audio_window_duration // self.audio_duration
frame_offset = idx % self.audio_duration // self.audio_window_duration
num_frames = self.sample_rate * self.audio_window_duration
waveform, sample_rate = ta.load(self.audio_paths[audio_file_idx], frame_offset=frame_offset, num_frames=num_frames)
assert sample_rate == self.sample_rate, f"Expected sample rate of {self.sample_rate}. Found {sample_rate}"
return waveform
def _label_from_index(self, idx:int) -> torch.Tensor:
label_idx = idx * self.audio_window_duration // self.audio_duration
return torch.from_numpy(self.dance_labels[label_idx])
|