pranavSIT
/

PaliOpenVocabSegmentation

Model card Files Files and versions Community

PaliOpenVocabSegmentation / big_vision /datasets /gqa /gqa.py

pranavSIT

added pali inference

74e8f2f 12 months ago

raw

history blame contribute delete

5.7 kB

	# Copyright 2024 Big Vision Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# pylint: disable=line-too-long
	r"""Generates GQA in a TFDS-ready structure, using Beam.

	Instructions below are to generate the dataset with a local Beam pipeline.
	It's advisable to run the Beam job on Google Cloud Dataflow, see
	https://www.tensorflow.org/datasets/beam_datasets.
	for more details, which would significantly speed up generation. This would
	involve uploading the locally downloaded data to a GCS bucket, and then
	adding in the Beam pipeline options and your GCP/GCS bucket details
	to the `tfds build` command below (as detailed in the link).

	First, copy the data to local disk:

	mkdir -p /tmp/data/gqa
	wget -O /tmp/data/gqa/question1.2.zip https://downloads.cs.stanford.edu/nlp/data/gqa/questions1.2.zip?download=true
	unzip /tmp/data/gqa/question1.2.zip
	mv /tmp/data/gqa/question1.2/* /tmp/data/gqa/
	wget -O /tmp/data/gqa/images.zip https://downloads.cs.stanford.edu/nlp/data/gqa/images.zip?download=true
	unzip /tmp/data/gqa/images.zip

	Then, run conversion (make sure to install tensorflow-datasets for the `tfds` util):

	cd big_vision/datasets
	env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=gqa

	Example to load:

	import tensorflow_datasets as tfds
	dataset = tfds.load('gqa', split='testdev_balanced', data_dir='/tmp/tfds')

	Some statistics:
	train_all: 14305356 examples
	train_balanced: 943000 examples
	val_all: 2011853 examples
	val_balanced: 132062 examples
	testdev_all: 172174 examples
	testdev_balanced: 12578 examples
	"""
	import glob
	import json
	import os

	import numpy as np
	import tensorflow_datasets as tfds


	_DESCRIPTION = """GQA: Visual Reasoning in the Real World."""

	# pylint: disable=line-too-long
	_CITATION = """
	@article{DBLP:journals/corr/abs-2306-14610,
	author = {Drew Hudson and
	Christopher Manning},
	title = {GQA: A New Dataset for Real-World Visual Reasoning and Compositional Question Answering},
	journal = {CVPR},
	volume = {abs/1902.09506},
	year = {2019},
	url = {https://doi.org/10.48550/arXiv.1902.09506},
	doi = {10.48550/arXiv.1902.09506},
	eprinttype = {arXiv},
	eprint = {1902.09506},
	timestamp = {Tue, 25 Jun 2019 00:00:00 +0100},
	biburl = {https://dblp.org/rec/journals/corr/abs-1902-09506},
	bibsource = {dblp computer science bibliography, https://dblp.org}
	}
	"""
	# pylint: enable=line-too-long


	_DATA_PATH = '/tmp/data/gqa/'


	class GQA(tfds.core.GeneratorBasedBuilder):
	"""DatasetBuilder for GQA dataset."""

	VERSION = tfds.core.Version('1.0.0')
	RELEASE_NOTES = {'1.0.0': 'First release.'}

	def _info(self):
	"""Returns the metadata."""

	return tfds.core.DatasetInfo(
	builder=self,
	description=_DESCRIPTION,
	features=tfds.features.FeaturesDict({
	'example_id': tfds.features.Scalar(np.int64),
	'image/id': tfds.features.Text(),
	'image': tfds.features.Image(encoding_format='jpeg'),
	'question': tfds.features.Text(),
	'answer': tfds.features.Text(),
	'full_answer': tfds.features.Text(),
	'is_balanced': tfds.features.Scalar(np.bool_),
	}),
	homepage='https://cs.stanford.edu/people/dorarad/gqa/',
	citation=_CITATION,
	)

	def _split_generators(self, dl_manager: tfds.download.DownloadManager):
	"""Returns SplitGenerators."""
	splits = [
	# 'debug',
	'train_all',
	'train_balanced',
	'testdev_all',
	'testdev_balanced',
	'val_all',
	'val_balanced',
	'challenge_all',
	'challenge_balanced',
	]
	return {split: self._generate_examples(split) for split in splits}

	def _generate_examples(self, split: str):
	"""Yields (key, example) tuples from dataset."""
	if split == 'train_all':
	train_json_dir = os.path.join(_DATA_PATH, 'train_all_questions', '*.json')
	json_files = glob.glob(train_json_dir)
	else:
	json_files = [os.path.join(_DATA_PATH, f'{split}_questions.json')]

	def _prepare_data(json_path):
	with open(os.path.join(json_path)) as f:
	annotations = json.load(f)
	return [(k, v) for k, v in annotations.items()]

	def _process_example(entry):
	question_id, question_data = entry
	image_id = question_data['imageId']
	image_path = os.path.join(_DATA_PATH, 'images', f'{image_id}.jpg')
	answer = question_data['answer'] if 'answer' in question_data else ''
	if 'fullAnswer' in question_data:
	full_answer = question_data['fullAnswer']
	else:
	full_answer = ''

	example = {
	'example_id': question_id,
	'image/id': image_id,
	'image': image_path,
	'question': question_data['question'],
	'answer': answer,
	'full_answer': full_answer,
	'is_balanced': question_data['isBalanced'],
	}
	return question_id, example

	beam = tfds.core.lazy_imports.apache_beam
	return (
	beam.Create(json_files)
	\| beam.FlatMap(_prepare_data)
	\| beam.Reshuffle()
	\| beam.Map(_process_example)
	)