|
from pathlib import Path |
|
from typing import Union |
|
import warnings |
|
|
|
from typeguard import check_argument_types |
|
from typeguard import check_return_type |
|
|
|
|
|
class DatadirWriter: |
|
"""Writer class to create kaldi like data directory. |
|
|
|
Examples: |
|
>>> with DatadirWriter("output") as writer: |
|
... # output/sub.txt is created here |
|
... subwriter = writer["sub.txt"] |
|
... # Write "uttidA some/where/a.wav" |
|
... subwriter["uttidA"] = "some/where/a.wav" |
|
... subwriter["uttidB"] = "some/where/b.wav" |
|
|
|
""" |
|
|
|
def __init__(self, p: Union[Path, str]): |
|
assert check_argument_types() |
|
self.path = Path(p) |
|
self.chilidren = {} |
|
self.fd = None |
|
self.has_children = False |
|
self.keys = set() |
|
|
|
def __enter__(self): |
|
return self |
|
|
|
def __getitem__(self, key: str) -> "DatadirWriter": |
|
assert check_argument_types() |
|
if self.fd is not None: |
|
raise RuntimeError("This writer points out a file") |
|
|
|
if key not in self.chilidren: |
|
w = DatadirWriter((self.path / key)) |
|
self.chilidren[key] = w |
|
self.has_children = True |
|
|
|
retval = self.chilidren[key] |
|
assert check_return_type(retval) |
|
return retval |
|
|
|
def __setitem__(self, key: str, value: str): |
|
assert check_argument_types() |
|
if self.has_children: |
|
raise RuntimeError("This writer points out a directory") |
|
if key in self.keys: |
|
warnings.warn(f"Duplicated: {key}") |
|
|
|
if self.fd is None: |
|
self.path.parent.mkdir(parents=True, exist_ok=True) |
|
self.fd = self.path.open("w", encoding="utf-8") |
|
|
|
self.keys.add(key) |
|
self.fd.write(f"{key} {value}\n") |
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb): |
|
self.close() |
|
|
|
def close(self): |
|
if self.has_children: |
|
prev_child = None |
|
for child in self.chilidren.values(): |
|
child.close() |
|
if prev_child is not None and prev_child.keys != child.keys: |
|
warnings.warn( |
|
f"Ids are mismatching between " |
|
f"{prev_child.path} and {child.path}" |
|
) |
|
prev_child = child |
|
|
|
elif self.fd is not None: |
|
self.fd.close() |
|
|