Spaces:
Running
Running
File size: 3,334 Bytes
aaef8e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import json
from pathlib import Path
from zipfile import ZipFile
from typing import List, Dict, Any
from tempfile import TemporaryDirectory
def validate_zip(submission_track: str, submission_zip: str):
"""
Validates the submission format and contents
Args:
submission_track: the track of the submission
submission_zip: path to the submission zip file
Raises:
ValueError: if the submission zip is invalid
"""
with TemporaryDirectory() as temp_dir:
with ZipFile(submission_zip, 'r') as submission_zip_file:
submission_zip_file.extractall(temp_dir)
submission_dir = Path(temp_dir)
if submission_track in ['NOTSOFAR-SC', 'NOTSOFAR-MC']:
validate_notsofar_submission(submission_dir=submission_dir)
elif submission_track in ['DASR-Constrained-LM', 'DASR-Unconstrained-LM']:
validate_dasr_submission(submission_dir=submission_dir)
else:
raise ValueError(f'Invalid submission track: {submission_track}')
def validate_notsofar_submission(submission_dir: Path):
"""
Validates NOTSOFAR submission format and contents
Args:
submission_dir: path to the submission directory
Raises:
ValueError: if the submission zip is invalid
"""
submission_file_names = ['tc_orc_wer_hyp.json', 'tcp_wer_hyp.json']
fields = ['session_id', 'words', 'speaker', 'start_time', 'end_time']
for file_name in submission_file_names:
file_path = submission_dir / file_name
if not file_path.exists():
raise ValueError(f'Missing {file_name}')
with open(file_path, 'r') as json_file:
json_data: List[Dict[str, Any]] = json.load(json_file)
if not isinstance(json_data, list):
raise ValueError(f'Invalid `{file_name}` format, expecting a list of entries')
for data in json_data:
if not all(field in data for field in fields):
raise ValueError(f'Invalid `{file_name}` format, fields: {fields} are required in each entry')
def validate_dasr_submission(submission_dir: Path):
"""
Validates DASR submission format and contents
Args:
submission_dir: path to the submission directory
Raises:
ValueError: if the submission zip is invalid
"""
submission_file_names = ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json']
fields = ['session_id', 'words', 'speaker', 'start_time', 'end_time']
if not (submission_dir / 'dev').exists():
raise ValueError('Missing dev directory, expecting a directory named `dev` with the submission files in it.')
for file_name in submission_file_names:
file_path = submission_dir / 'dev' / file_name
if not file_path.exists():
raise ValueError(f'Missing {file_name}')
with open(file_path, 'r') as json_file:
json_data: List[Dict[str, Any]] = json.load(json_file)
if not isinstance(json_data, list):
raise ValueError(f'Invalid `{file_name}` format, expecting a list of entries')
for data in json_data:
if not all(field in data for field in fields):
raise ValueError(f'Invalid `{file_name}` format, fields: {fields} are required in each entry')
|