|
from collections import defaultdict |
|
from csv import DictReader, reader as TupleReader |
|
from pathlib import Path |
|
from typing import Dict, List, Any |
|
import warnings |
|
|
|
from taming.data.annotated_objects_dataset import AnnotatedObjectsDataset |
|
from taming.data.helper_types import Annotation, Category |
|
from tqdm import tqdm |
|
|
|
OPEN_IMAGES_STRUCTURE = { |
|
'train': { |
|
'top_level': '', |
|
'class_descriptions': 'class-descriptions-boxable.csv', |
|
'annotations': 'oidv6-train-annotations-bbox.csv', |
|
'file_list': 'train-images-boxable.csv', |
|
'files': 'train' |
|
}, |
|
'validation': { |
|
'top_level': '', |
|
'class_descriptions': 'class-descriptions-boxable.csv', |
|
'annotations': 'validation-annotations-bbox.csv', |
|
'file_list': 'validation-images.csv', |
|
'files': 'validation' |
|
}, |
|
'test': { |
|
'top_level': '', |
|
'class_descriptions': 'class-descriptions-boxable.csv', |
|
'annotations': 'test-annotations-bbox.csv', |
|
'file_list': 'test-images.csv', |
|
'files': 'test' |
|
} |
|
} |
|
|
|
|
|
def load_annotations(descriptor_path: Path, min_object_area: float, category_mapping: Dict[str, str], |
|
category_no_for_id: Dict[str, int]) -> Dict[str, List[Annotation]]: |
|
annotations: Dict[str, List[Annotation]] = defaultdict(list) |
|
with open(descriptor_path) as file: |
|
reader = DictReader(file) |
|
for i, row in tqdm(enumerate(reader), total=14620000, desc='Loading OpenImages annotations'): |
|
width = float(row['XMax']) - float(row['XMin']) |
|
height = float(row['YMax']) - float(row['YMin']) |
|
area = width * height |
|
category_id = row['LabelName'] |
|
if category_id in category_mapping: |
|
category_id = category_mapping[category_id] |
|
if area >= min_object_area and category_id in category_no_for_id: |
|
annotations[row['ImageID']].append( |
|
Annotation( |
|
id=i, |
|
image_id=row['ImageID'], |
|
source=row['Source'], |
|
category_id=category_id, |
|
category_no=category_no_for_id[category_id], |
|
confidence=float(row['Confidence']), |
|
bbox=(float(row['XMin']), float(row['YMin']), width, height), |
|
area=area, |
|
is_occluded=bool(int(row['IsOccluded'])), |
|
is_truncated=bool(int(row['IsTruncated'])), |
|
is_group_of=bool(int(row['IsGroupOf'])), |
|
is_depiction=bool(int(row['IsDepiction'])), |
|
is_inside=bool(int(row['IsInside'])) |
|
) |
|
) |
|
if 'train' in str(descriptor_path) and i < 14000000: |
|
warnings.warn(f'Running with subset of Open Images. Train dataset has length [{len(annotations)}].') |
|
return dict(annotations) |
|
|
|
|
|
def load_image_ids(csv_path: Path) -> List[str]: |
|
with open(csv_path) as file: |
|
reader = DictReader(file) |
|
return [row['image_name'] for row in reader] |
|
|
|
|
|
def load_categories(csv_path: Path) -> Dict[str, Category]: |
|
with open(csv_path) as file: |
|
reader = TupleReader(file) |
|
return {row[0]: Category(id=row[0], name=row[1], super_category=None) for row in reader} |
|
|
|
|
|
class AnnotatedObjectsOpenImages(AnnotatedObjectsDataset): |
|
def __init__(self, use_additional_parameters: bool, **kwargs): |
|
""" |
|
@param data_path: is the path to the following folder structure: |
|
open_images/ |
|
β oidv6-train-annotations-bbox.csv |
|
βββ class-descriptions-boxable.csv |
|
βββ oidv6-train-annotations-bbox.csv |
|
βββ test |
|
β βββ 000026e7ee790996.jpg |
|
β βββ 000062a39995e348.jpg |
|
β βββ ... |
|
βββ test-annotations-bbox.csv |
|
βββ test-images.csv |
|
βββ train |
|
β βββ 000002b66c9c498e.jpg |
|
β βββ 000002b97e5471a0.jpg |
|
β βββ ... |
|
βββ train-images-boxable.csv |
|
βββ validation |
|
β βββ 0001eeaf4aed83f9.jpg |
|
β βββ 0004886b7d043cfd.jpg |
|
β βββ ... |
|
βββ validation-annotations-bbox.csv |
|
βββ validation-images.csv |
|
@param: split: one of 'train', 'validation' or 'test' |
|
@param: desired image size (returns square images) |
|
""" |
|
|
|
super().__init__(**kwargs) |
|
self.use_additional_parameters = use_additional_parameters |
|
|
|
self.categories = load_categories(self.paths['class_descriptions']) |
|
self.filter_categories() |
|
self.setup_category_id_and_number() |
|
|
|
self.image_descriptions = {} |
|
annotations = load_annotations(self.paths['annotations'], self.min_object_area, self.category_mapping, |
|
self.category_number) |
|
self.annotations = self.filter_object_number(annotations, self.min_object_area, self.min_objects_per_image, |
|
self.max_objects_per_image) |
|
self.image_ids = list(self.annotations.keys()) |
|
self.clean_up_annotations_and_image_descriptions() |
|
|
|
def get_path_structure(self) -> Dict[str, str]: |
|
if self.split not in OPEN_IMAGES_STRUCTURE: |
|
raise ValueError(f'Split [{self.split} does not exist for Open Images data.]') |
|
return OPEN_IMAGES_STRUCTURE[self.split] |
|
|
|
def get_image_path(self, image_id: str) -> Path: |
|
return self.paths['files'].joinpath(f'{image_id:0>16}.jpg') |
|
|
|
def get_image_description(self, image_id: str) -> Dict[str, Any]: |
|
image_path = self.get_image_path(image_id) |
|
return {'file_path': str(image_path), 'file_name': image_path.name} |
|
|