|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Implements nocaps val/test set in TFDS structure. |
|
|
|
It's small data, so simple to run locally. First, copy the data to local disk: |
|
|
|
mkdir -p /tmp/data/nocaps_data |
|
cd /tmp/data/nocaps_data |
|
wget https://s3.amazonaws.com/open-images-dataset/tar/test.tar.gz |
|
wget https://s3.amazonaws.com/open-images-dataset/tar/validation.tar.gz |
|
curl -O https://nocaps.s3.amazonaws.com/nocaps_val_4500_captions.json |
|
curl -O https://s3.amazonaws.com/nocaps/nocaps_test_image_info.json |
|
|
|
mkdir -p /tmp/data/nocaps_data/Images |
|
tar -xf validation.tar.gz -C Images |
|
rm validation.tar.gz |
|
tar -xf test.tar.gz -C Images |
|
rm test.tar.gz |
|
|
|
Then, run conversion locally (make sure to install tensorflow-datasets for the `tfds` util): |
|
|
|
cd big_vision/datasets |
|
env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=nocaps |
|
|
|
Example to load: |
|
|
|
import tensorflow_datasets as tfds |
|
dataset = tfds.load('nocaps', split='val', data_dir='/tmp/tfds') |
|
""" |
|
import collections |
|
import json |
|
import os |
|
|
|
from absl import logging |
|
import numpy as np |
|
import tensorflow as tf |
|
import tensorflow_datasets as tfds |
|
|
|
|
|
_DESCRIPTION = """Nocaps dataset.""" |
|
|
|
_CITATION = ( |
|
'@inproceedings{agrawal2019nocaps,' |
|
'title={nocaps: novel object captioning at scale},' |
|
'author={Agrawal, Harsh and Desai, Karan and Wang, Yufei and Chen, Xinlei' |
|
'and Jain, Rishabh and Johnson, Mark and Batra, Dhruv and Parikh, Devi' |
|
'and Lee, Stefan and Anderson, Peter},' |
|
'booktitle={ICCV},' |
|
'pages={8948--8957},' |
|
'year={2019}}') |
|
|
|
|
|
_FILEPATH = '/tmp/data/nocaps_data/Images/' |
|
_VAL_FILES = '/tmp/data/nocaps_data/nocaps_val_4500_captions.json' |
|
_TEST_FILES = '/tmp/data/nocaps_data/nocaps_test_image_info.json' |
|
|
|
|
|
class NoCaps(tfds.core.GeneratorBasedBuilder): |
|
"""DatasetBuilder for nocaps dataset.""" |
|
|
|
VERSION = tfds.core.Version('1.0.0') |
|
RELEASE_NOTES = { |
|
'1.0.0': 'Initial release.', |
|
} |
|
|
|
def _info(self) -> tfds.core.DatasetInfo: |
|
"""Returns the dataset metadata. |
|
|
|
(tfds.core.DatasetInfo object) |
|
These are the features of your dataset like images, labels, etc. |
|
""" |
|
|
|
return tfds.core.DatasetInfo( |
|
builder=self, |
|
description=_DESCRIPTION, |
|
features=tfds.features.FeaturesDict({ |
|
'image/id': tf.int64, |
|
'image_filepath': tfds.features.Text(), |
|
'url': tfds.features.Text(), |
|
'image': tfds.features.Image(encoding_format='jpeg'), |
|
'texts': tfds.features.Sequence(tfds.features.Text()), |
|
}), |
|
|
|
|
|
|
|
supervised_keys=None, |
|
homepage='https://nocaps.org/', |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager: tfds.download.DownloadManager): |
|
"""Returns SplitGenerators.""" |
|
def group_by_id(data, image_dir): |
|
id2caps = collections.defaultdict(list) |
|
for ex in data.get('annotations', []): |
|
id2caps[ex['image_id']].append(ex['caption']) |
|
|
|
id_to_example = {} |
|
for ex in data['images']: |
|
id_to_example[ex['id']] = { |
|
'image/id': ex['id'], |
|
'image_filepath': os.path.join( |
|
_FILEPATH, image_dir, ex['file_name']), |
|
'url': ex['coco_url'], |
|
'image': os.path.join(_FILEPATH, image_dir, ex['file_name']), |
|
'texts': id2caps[ex['id']] if ex['id'] in id2caps else ['N/A'], |
|
} |
|
return id_to_example |
|
|
|
|
|
with open(_VAL_FILES) as f: |
|
val_data = group_by_id(json.load(f), 'validation') |
|
with open(_TEST_FILES) as f: |
|
test_data = group_by_id(json.load(f), 'test') |
|
return { |
|
'val': self._generate_examples(val_data), |
|
'test': self._generate_examples(test_data), |
|
} |
|
|
|
def _generate_examples(self, data): |
|
"""Generate a tf.Example object. |
|
|
|
This contains the image, objects, attributes, regions and relationships. |
|
|
|
Args: |
|
data: a dictionary with the image/id. |
|
|
|
Yields: |
|
(key, example) tuples from dataset. The example has format specified in |
|
the above DatasetInfo. |
|
""" |
|
for k, v in data.items(): |
|
try: |
|
|
|
|
|
unused_image = tf.io.read_file(v['image_filepath']) |
|
unused_image = np.array(tf.image.decode_jpeg(unused_image)) |
|
except tf.errors.InvalidArgumentError: |
|
|
|
logging.error('Unable to decode: curl -O %s', v['url']) |
|
continue |
|
except tf.errors.NotFoundError: |
|
|
|
logging.error('File not found: curl -O %s', v['url']) |
|
continue |
|
|
|
yield k, v |
|
|