|
from pathlib import Path |
|
import soundfile as sf |
|
import xml.etree.ElementTree as ET |
|
|
|
split = "train" |
|
|
|
|
|
|
|
archive_path = Path("data/train") |
|
|
|
wav_dir = archive_path / "wav" |
|
segments_file = archive_path / "xml" / "utf8" |
|
|
|
output_wav_dir = archive_path / "dataset" / split /"wav" |
|
output_txt_dir = archive_path / "dataset" / split /"txt" |
|
|
|
|
|
output_wav_dir.mkdir(parents=True, exist_ok=True) |
|
output_txt_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
for s_file in segments_file.glob("*.xml"): |
|
tree = ET.parse(str(s_file)) |
|
root = tree.getroot() |
|
head = root[0] |
|
segments = root[1][0] |
|
|
|
|
|
for child in head: |
|
if child.tag == "recording": |
|
print(child.attrib) |
|
file_name = child.attrib.get("filename") |
|
|
|
|
|
|
|
for segment in segments: |
|
start_time = int(float(segment.attrib.get("starttime")) *16_000) |
|
end_time = int(float(segment.attrib.get("endtime")) * 16_000) |
|
|
|
text = " ".join([x.text for x in segment]) |
|
|
|
|
|
|
|
|
|
wav_path = wav_dir / f"{file_name}.wav" |
|
sound, _ = sf.read(wav_path, start=start_time, stop=end_time) |
|
sf.write(output_wav_dir / f"{file_name}_seg{start_time}_{end_time}.wav", sound, 16_000) |
|
open(output_txt_dir / f"{file_name}_seg{start_time}_{end_time}.txt", "w").write(text) |