|
|
|
|
|
|
|
|
|
|
|
|
|
import julius
|
|
import torch
|
|
import pytest
|
|
|
|
from audiocraft.data.audio_utils import (
|
|
_clip_wav,
|
|
convert_audio_channels,
|
|
convert_audio,
|
|
f32_pcm,
|
|
i16_pcm,
|
|
normalize_audio
|
|
)
|
|
from ..common_utils import get_batch_white_noise
|
|
|
|
|
|
class TestConvertAudioChannels:
|
|
|
|
def test_convert_audio_channels_downmix(self):
|
|
b, c, t = 2, 3, 100
|
|
audio = get_batch_white_noise(b, c, t)
|
|
mixed = convert_audio_channels(audio, channels=2)
|
|
assert list(mixed.shape) == [b, 2, t]
|
|
|
|
def test_convert_audio_channels_nochange(self):
|
|
b, c, t = 2, 3, 100
|
|
audio = get_batch_white_noise(b, c, t)
|
|
mixed = convert_audio_channels(audio, channels=c)
|
|
assert list(mixed.shape) == list(audio.shape)
|
|
|
|
def test_convert_audio_channels_upmix(self):
|
|
b, c, t = 2, 1, 100
|
|
audio = get_batch_white_noise(b, c, t)
|
|
mixed = convert_audio_channels(audio, channels=3)
|
|
assert list(mixed.shape) == [b, 3, t]
|
|
|
|
def test_convert_audio_channels_upmix_error(self):
|
|
b, c, t = 2, 2, 100
|
|
audio = get_batch_white_noise(b, c, t)
|
|
with pytest.raises(ValueError):
|
|
convert_audio_channels(audio, channels=3)
|
|
|
|
|
|
class TestConvertAudio:
|
|
|
|
def test_convert_audio_channels_downmix(self):
|
|
b, c, dur = 2, 3, 4.
|
|
sr = 128
|
|
audio = get_batch_white_noise(b, c, int(sr * dur))
|
|
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=2)
|
|
assert list(out.shape) == [audio.shape[0], 2, audio.shape[-1]]
|
|
|
|
def test_convert_audio_channels_upmix(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 128
|
|
audio = get_batch_white_noise(b, c, int(sr * dur))
|
|
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=3)
|
|
assert list(out.shape) == [audio.shape[0], 3, audio.shape[-1]]
|
|
|
|
def test_convert_audio_upsample(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 2
|
|
new_sr = 3
|
|
audio = get_batch_white_noise(b, c, int(sr * dur))
|
|
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
|
|
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
|
|
assert torch.allclose(out, out_j)
|
|
|
|
def test_convert_audio_resample(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
new_sr = 2
|
|
audio = get_batch_white_noise(b, c, int(sr * dur))
|
|
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
|
|
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
|
|
assert torch.allclose(out, out_j)
|
|
|
|
def test_convert_pcm(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
i16_audio = torch.randint(-2**15, 2**15, (b, c, int(sr * dur)), dtype=torch.int16)
|
|
f32_audio = f32_pcm(i16_audio)
|
|
another_i16_audio = i16_pcm(f32_audio)
|
|
assert torch.allclose(i16_audio, another_i16_audio)
|
|
|
|
|
|
class TestNormalizeAudio:
|
|
|
|
def test_clip_wav(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
|
|
_clip_wav(audio)
|
|
assert audio.abs().max() <= 1
|
|
|
|
def test_normalize_audio_clip(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
|
|
norm_audio = normalize_audio(audio, strategy='clip')
|
|
assert norm_audio.abs().max() <= 1
|
|
|
|
def test_normalize_audio_rms(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
|
|
norm_audio = normalize_audio(audio, strategy='rms')
|
|
assert norm_audio.abs().max() <= 1
|
|
|
|
def test_normalize_audio_peak(self):
|
|
b, c, dur = 2, 1, 4.
|
|
sr = 3
|
|
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
|
|
norm_audio = normalize_audio(audio, strategy='peak')
|
|
assert norm_audio.abs().max() <= 1
|
|
|