Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
import julius | |
import torch | |
import pytest | |
from audiocraft.data.audio_utils import ( | |
_clip_wav, | |
convert_audio_channels, | |
convert_audio, | |
f32_pcm, | |
i16_pcm, | |
normalize_audio | |
) | |
from ..common_utils import get_batch_white_noise | |
class TestConvertAudioChannels: | |
def test_convert_audio_channels_downmix(self): | |
b, c, t = 2, 3, 100 | |
audio = get_batch_white_noise(b, c, t) | |
mixed = convert_audio_channels(audio, channels=2) | |
assert list(mixed.shape) == [b, 2, t] | |
def test_convert_audio_channels_nochange(self): | |
b, c, t = 2, 3, 100 | |
audio = get_batch_white_noise(b, c, t) | |
mixed = convert_audio_channels(audio, channels=c) | |
assert list(mixed.shape) == list(audio.shape) | |
def test_convert_audio_channels_upmix(self): | |
b, c, t = 2, 1, 100 | |
audio = get_batch_white_noise(b, c, t) | |
mixed = convert_audio_channels(audio, channels=3) | |
assert list(mixed.shape) == [b, 3, t] | |
def test_convert_audio_channels_upmix_error(self): | |
b, c, t = 2, 2, 100 | |
audio = get_batch_white_noise(b, c, t) | |
with pytest.raises(ValueError): | |
convert_audio_channels(audio, channels=3) | |
class TestConvertAudio: | |
def test_convert_audio_channels_downmix(self): | |
b, c, dur = 2, 3, 4. | |
sr = 128 | |
audio = get_batch_white_noise(b, c, int(sr * dur)) | |
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=2) | |
assert list(out.shape) == [audio.shape[0], 2, audio.shape[-1]] | |
def test_convert_audio_channels_upmix(self): | |
b, c, dur = 2, 1, 4. | |
sr = 128 | |
audio = get_batch_white_noise(b, c, int(sr * dur)) | |
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=3) | |
assert list(out.shape) == [audio.shape[0], 3, audio.shape[-1]] | |
def test_convert_audio_upsample(self): | |
b, c, dur = 2, 1, 4. | |
sr = 2 | |
new_sr = 3 | |
audio = get_batch_white_noise(b, c, int(sr * dur)) | |
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c) | |
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr) | |
assert torch.allclose(out, out_j) | |
def test_convert_audio_resample(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
new_sr = 2 | |
audio = get_batch_white_noise(b, c, int(sr * dur)) | |
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c) | |
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr) | |
assert torch.allclose(out, out_j) | |
def test_convert_pcm(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
i16_audio = torch.randint(-2**15, 2**15, (b, c, int(sr * dur)), dtype=torch.int16) | |
f32_audio = f32_pcm(i16_audio) | |
another_i16_audio = i16_pcm(f32_audio) | |
assert torch.allclose(i16_audio, another_i16_audio) | |
class TestNormalizeAudio: | |
def test_clip_wav(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur)) | |
_clip_wav(audio) | |
assert audio.abs().max() <= 1 | |
def test_normalize_audio_clip(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur)) | |
norm_audio = normalize_audio(audio, strategy='clip') | |
assert norm_audio.abs().max() <= 1 | |
def test_normalize_audio_rms(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur)) | |
norm_audio = normalize_audio(audio, strategy='rms') | |
assert norm_audio.abs().max() <= 1 | |
def test_normalize_audio_peak(self): | |
b, c, dur = 2, 1, 4. | |
sr = 3 | |
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur)) | |
norm_audio = normalize_audio(audio, strategy='peak') | |
assert norm_audio.abs().max() <= 1 | |