from pathlib import Path from typing import Union import warnings from typeguard import check_argument_types from typeguard import check_return_type class DatadirWriter: """Writer class to create kaldi like data directory. Examples: >>> with DatadirWriter("output") as writer: ... # output/sub.txt is created here ... subwriter = writer["sub.txt"] ... # Write "uttidA some/where/a.wav" ... subwriter["uttidA"] = "some/where/a.wav" ... subwriter["uttidB"] = "some/where/b.wav" """ def __init__(self, p: Union[Path, str]): assert check_argument_types() self.path = Path(p) self.chilidren = {} self.fd = None self.has_children = False self.keys = set() def __enter__(self): return self def __getitem__(self, key: str) -> "DatadirWriter": assert check_argument_types() if self.fd is not None: raise RuntimeError("This writer points out a file") if key not in self.chilidren: w = DatadirWriter((self.path / key)) self.chilidren[key] = w self.has_children = True retval = self.chilidren[key] assert check_return_type(retval) return retval def __setitem__(self, key: str, value: str): assert check_argument_types() if self.has_children: raise RuntimeError("This writer points out a directory") if key in self.keys: warnings.warn(f"Duplicated: {key}") if self.fd is None: self.path.parent.mkdir(parents=True, exist_ok=True) self.fd = self.path.open("w", encoding="utf-8") self.keys.add(key) self.fd.write(f"{key} {value}\n") def __exit__(self, exc_type, exc_val, exc_tb): self.close() def close(self): if self.has_children: prev_child = None for child in self.chilidren.values(): child.close() if prev_child is not None and prev_child.keys != child.keys: warnings.warn( f"Ids are mismatching between " f"{prev_child.path} and {child.path}" ) prev_child = child elif self.fd is not None: self.fd.close()