Spaces:
Building
Building
File size: 2,091 Bytes
f6b56a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from gyraudio.audio_separation.data.dataset import AudioDataset
from typing import Tuple
import logging
from torch import Tensor
import torch
import torchaudio
class RemixedAudioDataset(AudioDataset):
def generate_snr_list(self):
self.snr_list = None
def load_data(self):
self.folder_list = sorted(list(self.data_path.iterdir()))
self.file_list = [
[
folder/"voice.wav",
folder/"noise.wav"
] for folder in self.folder_list
]
self.sampling_rate = None
self.min_snr, self.max_snr = -4, 4
self.generate_snr_list()
if self.debug:
print("Not filtered", len(self.file_list), self.snr_filter)
print(self.snr_list)
def get_idx_noise(self, idx):
raise NotImplementedError("get_idx_noise method must be implemented")
def get_snr(self, idx):
raise NotImplementedError("get_snr method must be implemented")
def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, Tensor]:
signal_path = self.file_list[idx][0]
idx_noise = self.get_idx_noise(idx)
noise_path = self.file_list[idx_noise][1]
assert signal_path.exists()
assert noise_path.exists()
clean_audio_signal, sampling_rate = torchaudio.load(str(signal_path))
noise_audio_signal, sampling_rate = torchaudio.load(str(noise_path))
snr = self.get_snr(idx)
alpha = 10 ** (-snr / 20) * torch.norm(clean_audio_signal) / torch.norm(noise_audio_signal)
mixed_audio_signal = clean_audio_signal + alpha*noise_audio_signal
self.sampling_rate = sampling_rate
mixed_audio_signal, clean_audio_signal, noise_audio_signal = self.augment_data(
mixed_audio_signal, clean_audio_signal, noise_audio_signal)
if self.debug:
logging.debug(f"{mixed_audio_signal.shape}")
logging.debug(f"{clean_audio_signal.shape}")
logging.debug(f"{noise_audio_signal.shape}")
return mixed_audio_signal, clean_audio_signal, noise_audio_signal
|