pranavSIT
/

PaliOpenVocabSegmentation

Model card Files Files and versions Community

PaliOpenVocabSegmentation / big_vision /datasets /pope /pope.py

pranavSIT

added pali inference

74e8f2f 11 months ago

raw

history blame contribute delete

5.9 kB

	# Copyright 2024 Big Vision Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# pylint: disable=line-too-long
	r"""Implements POPE test-set in TFDS structure.

	It's small data, so simple to run locally. First, copy the data to local disk:
	First download json files from https://github.com/AoiDragon/POPE; then download
	MSCOCO (val 2014) images from https://cocodataset.org/#download

	mkdir -p /tmp/data/pope/
	mkdir -p /tmp/data/pope/pope/
	mkdir -p /tmp/data/pope/images/
	git clone https://github.com/AoiDragon/POPE.git
	cp POPE/output/coco/* /tmp/data/pope/pope/
	wget http://images.cocodataset.org/zips/val2014.zip
	unzip val2014.zip
	cp -r val2014/ /tmp/data/pope/images/

	Then, run conversion locally (make sure to install tensorflow-datasets for the `tfds` util):

	cd big_vision/datasets
	env TFDS_DATA_DIR=/tmp/tfds tfds build --datasets=pope

	Example to load:

	import tensorflow_datasets as tfds
	dataset_random = tfds.load('pope/pope_random', split='test', data_dir='/tmp/tfds')
	dataset_popular = tfds.load('pope/pope_popular', split='test', data_dir='/tmp/tfds')
	dataset_adversarial = tfds.load('pope/pope_adversarial', split='test', data_dir='/tmp/tfds')

	"""
	import json
	import os

	import numpy as np
	import tensorflow_datasets as tfds


	_DESCRIPTION = """POPE dataset."""

	# pylint: disable=line-too-long
	_CITATION = """
	@inproceedings{li-etal-2023-evaluating,
	title = "Evaluating Object Hallucination in Large Vision-Language Models",
	author = "Li, Yifan and
	Du, Yifan and
	Zhou, Kun and
	Wang, Jinpeng and
	Zhao, Xin and
	Wen, Ji-Rong",
	editor = "Bouamor, Houda and
	Pino, Juan and
	Bali, Kalika",
	booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
	month = dec,
	year = "2023",
	address = "Singapore",
	publisher = "Association for Computational Linguistics",
	url = "https://aclanthology.org/2023.emnlp-main.20",
	doi = "10.18653/v1/2023.emnlp-main.20",
	pages = "292--305",
	abstract = "Inspired by the superior language abilities of large language models (LLM), large vision-language models (LVLM) have been recently proposed by integrating powerful LLMs for improving the performance on complex multimodal tasks. Despite the promising progress on LVLMs, we find that they suffer from object hallucinations, i.e., they tend to generate objects inconsistent with the target images in the descriptions. To investigate it, this work presents the first systematic study on object hallucination of LVLMs. We conduct the evaluation experiments on several representative LVLMs, and show that they mostly suffer from severe object hallucination issues. We further discuss that the visual instructions may influence the hallucination, and find that: objects that frequently appear in the visual instructions or co-occur with the image objects are obviously prone to be hallucinated by LVLMs. Besides, we further design a polling-based query method called POPE for better evaluation of object hallucination. Experiment results show that our POPE can evaluate object hallucination in a more stable and flexible way.",
	}
	"""
	# pylint: enable=line-too-long

	# When running locally (recommended), copy files as above and use these:
	_POPE_PATH = '/tmp/data/pope/'


	class POPEConfig(tfds.core.BuilderConfig):
	"""Configuration to build the dataset."""

	pass


	class POPE(tfds.core.GeneratorBasedBuilder):
	"""DatasetBuilder for POPE dataset."""

	VERSION = tfds.core.Version('1.0.0')
	RELEASE_NOTES = {'1.0.0': 'First release.'}
	BUILDER_CONFIGS = [
	POPEConfig(name='pope_random', description='Random set'),
	POPEConfig(name='pope_popular', description='Popular set'),
	POPEConfig(name='pope_adversarial', description='Adversarial set'),
	]

	def _info(self):
	"""Returns the metadata."""
	return tfds.core.DatasetInfo(
	builder=self,
	description=_DESCRIPTION,
	features=tfds.features.FeaturesDict({
	'question_id': tfds.features.Scalar(np.int32),
	'image/filename': tfds.features.Text(),
	'image': tfds.features.Image(encoding_format='png'),
	'question': tfds.features.Text(),
	'answer': tfds.features.Text(),
	'thing': tfds.features.Text(),
	}),
	supervised_keys=None,
	homepage='https://github.com/AoiDragon/POPE',
	citation=_CITATION,
	)

	def _split_generators(self, dl_manager: tfds.download.DownloadManager):
	"""Returns SplitGenerators."""
	return {'test': self._generate_examples('test', self.builder_config.name)}

	def _generate_examples(self, split: str, source: str):
	"""Yields (key, example) tuples from test set."""
	annot_fname = os.path.join(
	_POPE_PATH, f'pope/coco_{source}.json'
	)

	with open(annot_fname, 'r') as f:
	data = [json.loads(line) for line in f]

	for idx, v in enumerate(data):
	question = v['text']
	thing = (
	question.replace('Is there an ', '')
	.replace('Is there a ', '')
	.replace(' in the image?', '')
	)
	yield idx, {
	'question_id': idx,
	'image/filename': v['image'],
	'image': os.path.join(_POPE_PATH, 'images/val2014/', v['image']),
	'question': question,
	'answer': v['label'],
	'thing': thing,
	}