|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Generates GQA in a TFDS-ready structure, using Beam. |
|
|
|
Instructions below are to generate the dataset with a *local* Beam pipeline. |
|
It's advisable to run the Beam job on Google Cloud Dataflow, see |
|
https://www.tensorflow.org/datasets/beam_datasets. |
|
for more details, which would significantly speed up generation. This would |
|
involve uploading the locally downloaded data to a GCS bucket, and then |
|
adding in the Beam pipeline options and your GCP/GCS bucket details |
|
to the `tfds build` command below (as detailed in the link). |
|
|
|
First, copy the data to local disk: |
|
|
|
mkdir -p /tmp/data/gqa |
|
wget -O /tmp/data/gqa/question1.2.zip https://downloads.cs.stanford.edu/nlp/data/gqa/questions1.2.zip?download=true |
|
unzip /tmp/data/gqa/question1.2.zip |
|
mv /tmp/data/gqa/question1.2/* /tmp/data/gqa/ |
|
wget -O /tmp/data/gqa/images.zip https://downloads.cs.stanford.edu/nlp/data/gqa/images.zip?download=true |
|
unzip /tmp/data/gqa/images.zip |
|
|
|
Then, run conversion (make sure to install tensorflow-datasets for the `tfds` util): |
|
|
|
cd big_vision/datasets |
|
env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=gqa |
|
|
|
Example to load: |
|
|
|
import tensorflow_datasets as tfds |
|
dataset = tfds.load('gqa', split='testdev_balanced', data_dir='/tmp/tfds') |
|
|
|
Some statistics: |
|
train_all: 14305356 examples |
|
train_balanced: 943000 examples |
|
val_all: 2011853 examples |
|
val_balanced: 132062 examples |
|
testdev_all: 172174 examples |
|
testdev_balanced: 12578 examples |
|
""" |
|
import glob |
|
import json |
|
import os |
|
|
|
import numpy as np |
|
import tensorflow_datasets as tfds |
|
|
|
|
|
_DESCRIPTION = """GQA: Visual Reasoning in the Real World.""" |
|
|
|
|
|
_CITATION = """ |
|
@article{DBLP:journals/corr/abs-2306-14610, |
|
author = {Drew Hudson and |
|
Christopher Manning}, |
|
title = {GQA: A New Dataset for Real-World Visual Reasoning and Compositional Question Answering}, |
|
journal = {CVPR}, |
|
volume = {abs/1902.09506}, |
|
year = {2019}, |
|
url = {https://doi.org/10.48550/arXiv.1902.09506}, |
|
doi = {10.48550/arXiv.1902.09506}, |
|
eprinttype = {arXiv}, |
|
eprint = {1902.09506}, |
|
timestamp = {Tue, 25 Jun 2019 00:00:00 +0100}, |
|
biburl = {https://dblp.org/rec/journals/corr/abs-1902-09506}, |
|
bibsource = {dblp computer science bibliography, https://dblp.org} |
|
} |
|
""" |
|
|
|
|
|
|
|
_DATA_PATH = '/tmp/data/gqa/' |
|
|
|
|
|
class GQA(tfds.core.GeneratorBasedBuilder): |
|
"""DatasetBuilder for GQA dataset.""" |
|
|
|
VERSION = tfds.core.Version('1.0.0') |
|
RELEASE_NOTES = {'1.0.0': 'First release.'} |
|
|
|
def _info(self): |
|
"""Returns the metadata.""" |
|
|
|
return tfds.core.DatasetInfo( |
|
builder=self, |
|
description=_DESCRIPTION, |
|
features=tfds.features.FeaturesDict({ |
|
'example_id': tfds.features.Scalar(np.int64), |
|
'image/id': tfds.features.Text(), |
|
'image': tfds.features.Image(encoding_format='jpeg'), |
|
'question': tfds.features.Text(), |
|
'answer': tfds.features.Text(), |
|
'full_answer': tfds.features.Text(), |
|
'is_balanced': tfds.features.Scalar(np.bool_), |
|
}), |
|
homepage='https://cs.stanford.edu/people/dorarad/gqa/', |
|
citation=_CITATION, |
|
) |
|
|
|
def _split_generators(self, dl_manager: tfds.download.DownloadManager): |
|
"""Returns SplitGenerators.""" |
|
splits = [ |
|
|
|
'train_all', |
|
'train_balanced', |
|
'testdev_all', |
|
'testdev_balanced', |
|
'val_all', |
|
'val_balanced', |
|
'challenge_all', |
|
'challenge_balanced', |
|
] |
|
return {split: self._generate_examples(split) for split in splits} |
|
|
|
def _generate_examples(self, split: str): |
|
"""Yields (key, example) tuples from dataset.""" |
|
if split == 'train_all': |
|
train_json_dir = os.path.join(_DATA_PATH, 'train_all_questions', '*.json') |
|
json_files = glob.glob(train_json_dir) |
|
else: |
|
json_files = [os.path.join(_DATA_PATH, f'{split}_questions.json')] |
|
|
|
def _prepare_data(json_path): |
|
with open(os.path.join(json_path)) as f: |
|
annotations = json.load(f) |
|
return [(k, v) for k, v in annotations.items()] |
|
|
|
def _process_example(entry): |
|
question_id, question_data = entry |
|
image_id = question_data['imageId'] |
|
image_path = os.path.join(_DATA_PATH, 'images', f'{image_id}.jpg') |
|
answer = question_data['answer'] if 'answer' in question_data else '' |
|
if 'fullAnswer' in question_data: |
|
full_answer = question_data['fullAnswer'] |
|
else: |
|
full_answer = '' |
|
|
|
example = { |
|
'example_id': question_id, |
|
'image/id': image_id, |
|
'image': image_path, |
|
'question': question_data['question'], |
|
'answer': answer, |
|
'full_answer': full_answer, |
|
'is_balanced': question_data['isBalanced'], |
|
} |
|
return question_id, example |
|
|
|
beam = tfds.core.lazy_imports.apache_beam |
|
return ( |
|
beam.Create(json_files) |
|
| beam.FlatMap(_prepare_data) |
|
| beam.Reshuffle() |
|
| beam.Map(_process_example) |
|
) |
|
|