|
import logging |
|
from pathlib import Path |
|
from typing import Dict |
|
from typing import List |
|
from typing import Union |
|
|
|
from typeguard import check_argument_types |
|
|
|
|
|
def read_2column_text(path: Union[Path, str]) -> Dict[str, str]: |
|
"""Read a text file having 2 column as dict object. |
|
|
|
Examples: |
|
wav.scp: |
|
key1 /some/path/a.wav |
|
key2 /some/path/b.wav |
|
|
|
>>> read_2column_text('wav.scp') |
|
{'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'} |
|
|
|
""" |
|
assert check_argument_types() |
|
|
|
data = {} |
|
with Path(path).open("r", encoding="utf-8") as f: |
|
for linenum, line in enumerate(f, 1): |
|
sps = line.rstrip().split(maxsplit=1) |
|
if len(sps) == 1: |
|
k, v = sps[0], "" |
|
else: |
|
k, v = sps |
|
if k in data: |
|
raise RuntimeError(f"{k} is duplicated ({path}:{linenum})") |
|
data[k] = v |
|
return data |
|
|
|
|
|
def load_num_sequence_text( |
|
path: Union[Path, str], loader_type: str = "csv_int" |
|
) -> Dict[str, List[Union[float, int]]]: |
|
"""Read a text file indicating sequences of number |
|
|
|
Examples: |
|
key1 1 2 3 |
|
key2 34 5 6 |
|
|
|
>>> d = load_num_sequence_text('text') |
|
>>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3])) |
|
""" |
|
assert check_argument_types() |
|
if loader_type == "text_int": |
|
delimiter = " " |
|
dtype = int |
|
elif loader_type == "text_float": |
|
delimiter = " " |
|
dtype = float |
|
elif loader_type == "csv_int": |
|
delimiter = "," |
|
dtype = int |
|
elif loader_type == "csv_float": |
|
delimiter = "," |
|
dtype = float |
|
else: |
|
raise ValueError(f"Not supported loader_type={loader_type}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
d = read_2column_text(path) |
|
|
|
|
|
retval = {} |
|
for k, v in d.items(): |
|
try: |
|
retval[k] = [dtype(i) for i in v.split(delimiter)] |
|
except TypeError: |
|
logging.error(f'Error happened with path="{path}", id="{k}", value="{v}"') |
|
raise |
|
return retval |
|
|