Spaces:
Runtime error
Runtime error
import os | |
import json | |
from typing import Dict, List | |
def load_eurorad_dataset( | |
dataset_path: str, | |
section: str = "any", | |
as_dict: bool = False, | |
filter_by_caption: List[str] = [ | |
"xray", | |
"x-ray", | |
"x ray", | |
"ray", | |
"xr", | |
"radiograph", | |
"radiogram", | |
"plain film", | |
], | |
) -> List[Dict] | Dict[str, Dict]: | |
""" | |
Load a dataset from a JSON file. | |
Args: | |
dataset_path (str): Path to the JSON dataset file. | |
section (str, optional): Section of the dataset to load. Defaults to "any". | |
as_dict (bool, optional): Whether to return data as dict. Defaults to False. | |
filter_by_caption (List[str], optional): List of strings to filter cases by caption content. Defaults to []. | |
Returns: | |
List[Dict] | Dict[str, Dict]: The loaded dataset as a list of dictionaries or dict if as_dict=True. | |
Raises: | |
FileNotFoundError: If dataset_path does not exist | |
json.JSONDecodeError: If file is not valid JSON | |
""" | |
with open(dataset_path, "r", encoding="utf-8") as file: | |
data = json.load(file) | |
if filter_by_caption: | |
filtered_data = {} | |
for case_id, case in data.items(): | |
if any( | |
any(x in subfig["caption"].lower() for x in filter_by_caption) | |
for figure in case["figures"] | |
for subfig in figure["subfigures"] | |
) or any(x in case["image_finding"].lower() for x in filter_by_caption): | |
filtered_data[case_id] = case | |
data = filtered_data | |
if section != "any": | |
section = section.strip().lower() | |
if not as_dict: | |
data = [ | |
item for item in data.values() if item.get("section", "").strip().lower() == section | |
] | |
else: | |
data = { | |
k: v for k, v in data.items() if v.get("section", "").strip().lower() == section | |
} | |
elif not as_dict: | |
data = list(data.values()) | |
return data | |
def save_dataset(dataset: Dict | List[Dict], dataset_path: str): | |
""" | |
Save a dataset to a JSON file. | |
Args: | |
dataset (Dict | List[Dict]): The dataset to save as a dictionary or list of dictionaries. | |
dataset_path (str): Path where the JSON dataset file will be saved. | |
""" | |
with open(dataset_path, "w", encoding="utf-8") as file: | |
json.dump(dataset, file) | |