lmzjms's picture
Upload 1162 files
0b32ad6 verified
import math
import tempfile
from pathlib import Path
import pandas as pd
import pytest
from s3prl.problem import (
SuperbASR,
SuperbASV,
SuperbER,
SuperbIC,
SuperbKS,
SuperbPR,
SuperbSD,
SuperbSF,
SuperbSID,
)
from s3prl.util.pseudo_data import pseudo_audio
@pytest.mark.parametrize("vocab_type", ["subword", "character"])
def test_superb_asr(vocab_type):
if vocab_type == "subword":
vocab_args = {"vocab_size": 18}
else:
vocab_args = {}
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestASR(SuperbASR):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
all_wav_paths = wav_paths
all_text = [
"hello how are you today",
"fine",
"oh",
"I think is good",
"maybe okay",
]
ids = list(range(len(all_wav_paths)))
df = pd.DataFrame(
data={
"id": ids,
"wav_path": all_wav_paths,
"transcription": all_text,
}
)
train_path = Path(target_dir) / "train.csv"
valid_path = Path(target_dir) / "valid.csv"
test_path = Path(target_dir) / "test.csv"
df.iloc[:3].to_csv(train_path, index=False)
df.iloc[3:4].to_csv(valid_path, index=False)
df.iloc[4:].to_csv(test_path, index=False)
return train_path, valid_path, [test_path]
problem = TestASR()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_tokenizer"] = {
"vocab_type": vocab_type,
"vocab_args": vocab_args,
}
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_er():
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestER(SuperbER):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
ids = [Path(path).stem for path in wav_paths]
labels = ["a", "b", "a", "c", "d"]
start_secs = [0.0, 0.1, 0.2, None, 0.0]
end_secs = [5.2, 1.0, 0.3, None, 4.9]
df = pd.DataFrame(
data={
"id": ids,
"wav_path": wav_paths,
"label": labels,
"start_sec": start_secs,
"end_sec": end_secs,
}
)
train_csv = target_dir / "train.csv"
valid_csv = target_dir / "valid.csv"
test_csv = target_dir / "test.csv"
df.to_csv(train_csv)
df.to_csv(valid_csv)
df.to_csv(test_csv)
return train_csv, valid_csv, [test_csv]
problem = TestER()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_ks():
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestKS(SuperbKS):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
ids = [Path(path).stem for path in wav_paths]
labels = ["a", "b", "a", "c", "d"]
start_secs = [0.0, 0.1, 0.2, None, 0.0]
end_secs = [5.2, 1.0, 0.3, None, 4.9]
df = pd.DataFrame(
data={
"id": ids,
"wav_path": wav_paths,
"label": labels,
"start_sec": start_secs,
"end_sec": end_secs,
}
)
train_csv = target_dir / "train.csv"
valid_csv = target_dir / "valid.csv"
test_csv = target_dir / "test.csv"
df.to_csv(train_csv)
df.to_csv(valid_csv)
df.to_csv(test_csv)
return train_csv, valid_csv, [test_csv]
problem = TestKS()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_pr():
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestPR(SuperbPR):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
from s3prl.dataio.encoder.g2p import G2P
all_wav_paths = wav_paths
all_text = [
"hello how are you today",
"fine",
"oh",
"I think is good",
"maybe okay",
]
g2p = G2P()
all_text = [g2p.encode(text.strip()) for text in all_text]
ids = list(range(len(all_wav_paths)))
df = pd.DataFrame(
data={
"id": ids,
"wav_path": all_wav_paths,
"transcription": all_text,
}
)
train_path = Path(target_dir) / "train.csv"
valid_path = Path(target_dir) / "valid.csv"
test_path = Path(target_dir) / "test.csv"
df.iloc[:3].to_csv(train_path, index=False)
df.iloc[3:4].to_csv(valid_path, index=False)
df.iloc[4:].to_csv(test_path, index=False)
return train_path, valid_path, [test_path]
problem = TestPR()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_ic():
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestIC(SuperbIC):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
ids = [Path(path).stem for path in wav_paths]
labels1 = ["a", "b", "a", "c", "d"]
labels2 = ["1", "2", "3", "4", "5"]
df = pd.DataFrame(
data={
"id": ids,
"wav_path": wav_paths,
"labels": [
f"{label1} ; {label2}"
for label1, label2 in zip(labels1, labels2)
],
}
)
train_csv = target_dir / "train.csv"
valid_csv = target_dir / "valid.csv"
test_csv = target_dir / "test.csv"
df.to_csv(train_csv)
df.to_csv(valid_csv)
df.to_csv(test_csv)
return train_csv, valid_csv, [test_csv]
problem = TestIC()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_sid():
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestSID(SuperbSID):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
ids = [Path(path).stem for path in wav_paths]
label = ["a", "b", "a", "c", "d"]
start_secs = [0.0, 0.1, 0.2, None, 0.0]
end_secs = [5.2, 1.0, 0.3, None, 4.9]
df = pd.DataFrame(
data={
"id": ids,
"wav_path": wav_paths,
"label": label,
"start_sec": start_secs,
"end_sec": end_secs,
}
)
train_csv = target_dir / "train.csv"
valid_csv = target_dir / "valid.csv"
test_csv = target_dir / "test.csv"
df.to_csv(train_csv)
df.to_csv(valid_csv)
df.to_csv(test_csv)
return train_csv, valid_csv, [test_csv]
problem = TestSID()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_sd():
with tempfile.TemporaryDirectory() as tempdir:
secs = [10, 2, 1, 8, 5]
with pseudo_audio(secs) as (wav_paths, num_samples):
class TestSD(SuperbSD):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only=False,
):
record_id = [Path(path).stem for path in wav_paths]
durations = secs
speaker = ["a", "b", "a", "a", "b"]
utt_id = record_id
start_secs = [0.0, 0.1, 0.2, 0.3, 0.0]
end_secs = [5.2, 1.0, 0.3, 5.4, 4.9]
df = pd.DataFrame(
data={
"record_id": record_id,
"wav_path": wav_paths,
"duration": durations,
"utt_id": utt_id,
"speaker": speaker,
"start_sec": start_secs,
"end_sec": end_secs,
}
)
train_csv = Path(target_dir) / "train.csv"
valid_csv = Path(target_dir) / "valid.csv"
test_csv = Path(target_dir) / "test.csv"
df.to_csv(train_csv)
df.to_csv(valid_csv)
df.to_csv(test_csv)
return train_csv, valid_csv, [test_csv]
problem = TestSD()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
def test_superb_asv():
with tempfile.TemporaryDirectory() as tempdir:
secs = [10, 2, 1, 8, 5]
with pseudo_audio(secs) as (wav_paths, num_samples):
class TestASV(SuperbASV):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
train_csv = Path(target_dir) / "train.csv"
test_csv = Path(target_dir) / "test.csv"
ids = [Path(path).stem for path in wav_paths]
spk = ["a", "b", "c", "a", "b"]
train_df = pd.DataFrame(
data={
"id": ids,
"wav_path": wav_paths,
"spk": spk,
}
)
train_df.to_csv(train_csv)
id1 = [ids[0], ids[1], ids[2]]
id2 = [ids[1], ids[1], ids[2]]
wav_path1 = [wav_paths[0], wav_paths[1], wav_paths[2]]
wav_path2 = [wav_paths[1], wav_paths[1], wav_paths[2]]
labels = [0, 1, 1]
test_df = pd.DataFrame(
data={
"id1": id1,
"id2": id2,
"wav_path1": wav_path1,
"wav_path2": wav_path2,
"label": labels,
}
)
test_df.to_csv(test_csv)
return train_csv, [test_csv]
problem = TestASV()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = math.inf
config["train"]["save_step"] = 1
config["build_upstream"]["name"] = "fbank"
problem.run(**config)
@pytest.mark.parametrize("vocab_type", ["subword", "character"])
def test_superb_sf(vocab_type):
if vocab_type == "subword":
vocab_args = {"vocab_size": 22}
else:
vocab_args = {}
with tempfile.TemporaryDirectory() as tempdir:
with pseudo_audio([10, 2, 1, 8, 5]) as (wav_paths, num_samples):
class TestSF(SuperbSF):
def default_config(self) -> dict:
config = super().default_config()
config["prepare_data"] = {}
return config
def prepare_data(
self,
prepare_data: dict,
target_dir: str,
cache_dir: str,
get_path_only: bool = False,
):
all_wav_paths = wav_paths
all_text_with_iob = [
("hello how are you today", "O O O O timeRange"),
("fine thank you", "condition O O"),
("oh nice", "O condition"),
("I think is good", "O O O genre"),
("maybe okay", "O genre"),
]
text, iob = zip(*all_text_with_iob)
ids = list(range(len(all_wav_paths)))
df = pd.DataFrame(
data={
"id": ids,
"wav_path": all_wav_paths,
"transcription": text,
"iob": iob,
}
)
train_path = Path(target_dir) / "train.csv"
valid_path = Path(target_dir) / "valid.csv"
test_path = Path(target_dir) / "test.csv"
df.iloc[:3].to_csv(train_path, index=False)
df.iloc[3:4].to_csv(valid_path, index=False)
df.iloc[4:].to_csv(test_path, index=False)
return train_path, valid_path, [test_path]
problem = TestSF()
config = problem.default_config()
config["target_dir"] = tempdir
config["device"] = "cpu"
config["train"]["total_steps"] = 4
config["train"]["log_step"] = 1
config["train"]["eval_step"] = 2
config["train"]["save_step"] = 2
config["eval_batch"] = 2
config["build_tokenizer"] = {
"vocab_type": vocab_type,
"vocab_args": vocab_args,
}
config["build_upstream"]["name"] = "fbank"
problem.run(**config)