Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,078 Bytes
9d0d223 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import julius
import torch
import pytest
from audiocraft.data.audio_utils import (
_clip_wav,
convert_audio_channels,
convert_audio,
f32_pcm,
i16_pcm,
normalize_audio
)
from ..common_utils import get_batch_white_noise
class TestConvertAudioChannels:
def test_convert_audio_channels_downmix(self):
b, c, t = 2, 3, 100
audio = get_batch_white_noise(b, c, t)
mixed = convert_audio_channels(audio, channels=2)
assert list(mixed.shape) == [b, 2, t]
def test_convert_audio_channels_nochange(self):
b, c, t = 2, 3, 100
audio = get_batch_white_noise(b, c, t)
mixed = convert_audio_channels(audio, channels=c)
assert list(mixed.shape) == list(audio.shape)
def test_convert_audio_channels_upmix(self):
b, c, t = 2, 1, 100
audio = get_batch_white_noise(b, c, t)
mixed = convert_audio_channels(audio, channels=3)
assert list(mixed.shape) == [b, 3, t]
def test_convert_audio_channels_upmix_error(self):
b, c, t = 2, 2, 100
audio = get_batch_white_noise(b, c, t)
with pytest.raises(ValueError):
convert_audio_channels(audio, channels=3)
class TestConvertAudio:
def test_convert_audio_channels_downmix(self):
b, c, dur = 2, 3, 4.
sr = 128
audio = get_batch_white_noise(b, c, int(sr * dur))
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=2)
assert list(out.shape) == [audio.shape[0], 2, audio.shape[-1]]
def test_convert_audio_channels_upmix(self):
b, c, dur = 2, 1, 4.
sr = 128
audio = get_batch_white_noise(b, c, int(sr * dur))
out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=3)
assert list(out.shape) == [audio.shape[0], 3, audio.shape[-1]]
def test_convert_audio_upsample(self):
b, c, dur = 2, 1, 4.
sr = 2
new_sr = 3
audio = get_batch_white_noise(b, c, int(sr * dur))
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
assert torch.allclose(out, out_j)
def test_convert_audio_resample(self):
b, c, dur = 2, 1, 4.
sr = 3
new_sr = 2
audio = get_batch_white_noise(b, c, int(sr * dur))
out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
assert torch.allclose(out, out_j)
def test_convert_pcm(self):
b, c, dur = 2, 1, 4.
sr = 3
i16_audio = torch.randint(-2**15, 2**15, (b, c, int(sr * dur)), dtype=torch.int16)
f32_audio = f32_pcm(i16_audio)
another_i16_audio = i16_pcm(f32_audio)
assert torch.allclose(i16_audio, another_i16_audio)
class TestNormalizeAudio:
def test_clip_wav(self):
b, c, dur = 2, 1, 4.
sr = 3
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
_clip_wav(audio)
assert audio.abs().max() <= 1
def test_normalize_audio_clip(self):
b, c, dur = 2, 1, 4.
sr = 3
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
norm_audio = normalize_audio(audio, strategy='clip')
assert norm_audio.abs().max() <= 1
def test_normalize_audio_rms(self):
b, c, dur = 2, 1, 4.
sr = 3
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
norm_audio = normalize_audio(audio, strategy='rms')
assert norm_audio.abs().max() <= 1
def test_normalize_audio_peak(self):
b, c, dur = 2, 1, 4.
sr = 3
audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
norm_audio = normalize_audio(audio, strategy='peak')
assert norm_audio.abs().max() <= 1
|