File size: 5,640 Bytes
74e8f2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Copyright 2024 Big Vision Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint: disable=line-too-long
r"""Implements nocaps val/test set in TFDS structure.

It's small data, so simple to run locally. First, copy the data to local disk:

  mkdir -p /tmp/data/nocaps_data
  cd /tmp/data/nocaps_data
  wget https://s3.amazonaws.com/open-images-dataset/tar/test.tar.gz
  wget https://s3.amazonaws.com/open-images-dataset/tar/validation.tar.gz
  curl -O https://nocaps.s3.amazonaws.com/nocaps_val_4500_captions.json
  curl -O https://s3.amazonaws.com/nocaps/nocaps_test_image_info.json

  mkdir -p /tmp/data/nocaps_data/Images
  tar -xf validation.tar.gz -C Images
  rm validation.tar.gz
  tar -xf test.tar.gz -C Images
  rm test.tar.gz

Then, run conversion locally (make sure to install tensorflow-datasets for the `tfds` util):

  cd big_vision/datasets
  env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=nocaps

Example to load:

  import tensorflow_datasets as tfds
  dataset = tfds.load('nocaps', split='val', data_dir='/tmp/tfds')
"""
import collections
import json
import os

from absl import logging
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds


_DESCRIPTION = """Nocaps dataset."""

_CITATION = (
    '@inproceedings{agrawal2019nocaps,'
    'title={nocaps: novel object captioning at scale},'
    'author={Agrawal, Harsh and Desai, Karan and Wang, Yufei and Chen, Xinlei'
    'and Jain, Rishabh and Johnson, Mark and Batra, Dhruv and Parikh, Devi'
    'and Lee, Stefan and Anderson, Peter},'
    'booktitle={ICCV},'
    'pages={8948--8957},'
    'year={2019}}')

# When running locally (recommended), copy files as above an use these:
_FILEPATH = '/tmp/data/nocaps_data/Images/'
_VAL_FILES = '/tmp/data/nocaps_data/nocaps_val_4500_captions.json'
_TEST_FILES = '/tmp/data/nocaps_data/nocaps_test_image_info.json'


class NoCaps(tfds.core.GeneratorBasedBuilder):
  """DatasetBuilder for nocaps dataset."""

  VERSION = tfds.core.Version('1.0.0')
  RELEASE_NOTES = {
      '1.0.0': 'Initial release.',
  }

  def _info(self) -> tfds.core.DatasetInfo:
    """Returns the dataset metadata.

    (tfds.core.DatasetInfo object)
      These are the features of your dataset like images, labels, etc.
    """

    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            'image/id': tf.int64,
            'image_filepath': tfds.features.Text(),
            'url': tfds.features.Text(),
            'image': tfds.features.Image(encoding_format='jpeg'),
            'texts': tfds.features.Sequence(tfds.features.Text()),
        }),
        # If there's a common (input, target) tuple from the
        # features, specify them here. They'll be used if
        # `as_supervised=True` in `builder.as_dataset`.
        supervised_keys=None,  # Set to `None` to disable
        homepage='https://nocaps.org/',
        citation=_CITATION,
    )

  def _split_generators(self, dl_manager: tfds.download.DownloadManager):
    """Returns SplitGenerators."""
    def group_by_id(data, image_dir):
      id2caps = collections.defaultdict(list)
      for ex in data.get('annotations', []):
        id2caps[ex['image_id']].append(ex['caption'])

      id_to_example = {}
      for ex in data['images']:
        id_to_example[ex['id']] = {
            'image/id': ex['id'],
            'image_filepath': os.path.join(
                _FILEPATH, image_dir, ex['file_name']),
            'url': ex['coco_url'],
            'image': os.path.join(_FILEPATH, image_dir, ex['file_name']),
            'texts': id2caps[ex['id']] if ex['id'] in id2caps else ['N/A'],
        }
      return id_to_example

    # Returns the Dict[split names, Iterator[Key, Example]]
    with open(_VAL_FILES) as f:
      val_data = group_by_id(json.load(f), 'validation')
    with open(_TEST_FILES) as f:
      test_data = group_by_id(json.load(f), 'test')
    return {
        'val': self._generate_examples(val_data),
        'test': self._generate_examples(test_data),
    }

  def _generate_examples(self, data):
    """Generate a tf.Example object.

    This contains the image, objects, attributes, regions and relationships.

    Args:
      data: a dictionary with the image/id.

    Yields:
      (key, example) tuples from dataset. The example has format specified in
        the above DatasetInfo.
    """
    for k, v in data.items():
      try:
        # Jpeg decode test to check early errors. The decoded images are not
        # used, instead we rely on the default tfds.features.Image function.
        unused_image = tf.io.read_file(v['image_filepath'])
        unused_image = np.array(tf.image.decode_jpeg(unused_image))
      except tf.errors.InvalidArgumentError:
        # Unable to read image, skip this image and output download link.
        logging.error('Unable to decode: curl -O %s', v['url'])
        continue
      except tf.errors.NotFoundError:
        # Unable to read image, skip this image and output download link.
        logging.error('File not found: curl -O %s', v['url'])
        continue

      yield k, v