|
import pytest |
|
from dotenv import dotenv_values |
|
|
|
from s3prl.dataio.corpus.librilight import LibriLight |
|
from s3prl.dataio.corpus.librispeech import LibriSpeech |
|
|
|
libri_stats = { |
|
"train-clean-100": 28539, |
|
"train-clean-360": 104014, |
|
"train-other-500": 148688, |
|
"dev-clean": 2703, |
|
"dev-other": 2864, |
|
"test-clean": 2620, |
|
"test-other": 2939, |
|
} |
|
|
|
|
|
@pytest.mark.corpus |
|
def test_librispeech_dataset(): |
|
config = dotenv_values() |
|
dataset_root = config["LibriSpeech"] |
|
dataset = LibriSpeech( |
|
dataset_root, |
|
train_split=[ |
|
"train-clean-100", |
|
"train-clean-360", |
|
], |
|
valid_split=["dev-clean", "dev-other"], |
|
test_split=["test-clean", "test-other"], |
|
) |
|
data = dataset.all_data |
|
assert len(data) == 292367 - libri_stats["train-other-500"] |
|
|
|
|
|
@pytest.mark.corpus |
|
def test_librilight(): |
|
config = dotenv_values() |
|
train_corpus = LibriLight(config["LibriLight"]) |
|
eval_corpus = LibriSpeech(config["LibriSpeech"], 4, []) |
|
|
|
train_data = train_corpus.all_data |
|
_, valid_data, test_data = eval_corpus.data_split |
|
|
|
assert len(train_data) == 48 |
|
|