Spaces:

BK-Lee
/

Meteor

Running on Zero

File size: 17,169 Bytes
import os
import json
import math
import glob
from config import *
from PIL import Image
import pandas as pd
import pyarrow.parquet as pq 
import torch.nn.functional as F
from eval.utils import *
from torch.utils.data import Dataset
from torchvision.transforms.functional import pil_to_tensor

class CreateEvalDataset(Dataset):
    def __init__(self):
        super(CreateEvalDataset, self).__init__()

        """
        Eval Datasets

        - VQAv2
        - GQA
        - SQA-IMG
        - VizWiz
        - TextVQA
        - POPE
        - MME
        - MMBench
        - MMBench-CN
        - QBench
        - MM-Vet
        - MMMU
        - MathVista
        - AI2D
        - HallusionBench
        - ChartQA
        - SEED
        - LLaVA Wild
        - BLINK
        - MathVerse

        """

        # dataset root path
        self.dataset_root_path = DATASET_ROOT

        # load test data
        pre_vqav2 = json.load(open(os.path.join(DATASET_ROOT, VQAV2)))
        pre_gqa = json.load(open(os.path.join(DATASET_ROOT, GQA)))
        pre_sqa = json.load(open(os.path.join(DATASET_ROOT, SQA)))
        pre_sqa_split = json.load(open(os.path.join(DATASET_ROOT, SQA_SPLIT)))
        pre_vizwiz = json.load(open(os.path.join(DATASET_ROOT, VIZWIZ)))
        pre_textvqa = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA)))
        pre_textvqa_annotations = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA_ANNOTATIONS)))
        pre_pope_popular = pd.read_json(os.path.join(DATASET_ROOT, POPE_POPULAR), lines=True)
        pre_pope_adversarial= pd.read_json(os.path.join(DATASET_ROOT, POPE_ADVERSARIAL), lines=True)
        pre_pope_random = pd.read_json(os.path.join(DATASET_ROOT, POPE_RANDOM), lines=True)
        pre_mme = json.load(open(os.path.join(DATASET_ROOT, MME)))
        pre_mmbench = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH))
        pre_mmbench_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_DEV))
        pre_mmbench_cn = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN))
        pre_mmbench_cn_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN_DEV))
        pre_qbench = json.load(open(os.path.join(DATASET_ROOT, QBENCH)))
        pre_qbench_cn = json.load(open(os.path.join(DATASET_ROOT, QBENCH_CN)))
        pre_mmvet = json.load(open(os.path.join(DATASET_ROOT, MMVET)))
        mmmu_files = glob.glob(os.path.join(DATASET_ROOT, MMMU))
        pre_mmmu = [pq.read_pandas(os.path.join(DATASET_ROOT, mf)).to_pandas() for mf in mmmu_files]
        pre_mathvista1 = pq.read_pandas(os.path.join(DATASET_ROOT, MATHVISTA)).to_pandas()
        pre_ai2d = json.load(open(os.path.join(DATASET_ROOT, AI2D)))
        pre_hallusionbench = json.load(open(os.path.join(DATASET_ROOT, HALLUSIONBENCH)))
        pre_chartqa = json.load(open(os.path.join(DATASET_ROOT, CHARTQA)))
        pre_seed = json.load(open(os.path.join(DATASET_ROOT, SEED)))
        pre_llava = pd.read_json(os.path.join(DATASET_ROOT, LLAVA), lines=True)
        # pre_blink = json.load(open(os.path.join(DATASET_ROOT, BLINK)))
        pre_mathverse = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE)))
        pre_mathverse_text_only = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE_TEXT_ONLY)))
        pre_mmstar = pq.read_pandas(os.path.join(DATASET_ROOT, MMSTAR)).to_pandas()
        
        # data filtering
        vqav2 = self.vqav2_filtering(pre_vqav2)
        gqa = self.gqa_filtering(pre_gqa)
        sqa = self.sqa_filtering(pre_sqa, pre_sqa_split)
        vizwiz = self.vizwiz_filtering(pre_vizwiz)
        textvqa = self.textvqa_filtering(pre_textvqa, pre_textvqa_annotations)
        pope = self.pope_filtering([pre_pope_popular, pre_pope_adversarial, pre_pope_random])
        mme = self.mme_filtering(pre_mme)
        mmbench = self.mmbench_filtering(pre_mmbench)
        mmbench_dev = self.mmbench_filtering(pre_mmbench_dev)
        mmbench_cn = self.mmbench_filtering(pre_mmbench_cn)
        mmbench_cn_dev = self.mmbench_filtering(pre_mmbench_cn_dev)
        qbench = self.qbench_filtering(pre_qbench)
        qbench_cn = self.qbench_filtering(pre_qbench_cn)
        mmvet = self.mmvet_filtering(pre_mmvet)
        mmmu = self.mmmu_filtering(pre_mmmu)
        mathvista = self.mathvista_filtering(pre_mathvista1)
        ai2d = self.ai2d_filtering(pre_ai2d)
        hallusionbench = self.hallusionbench_filtering(pre_hallusionbench)
        chartqa = self.chartqa_filtering(pre_chartqa)
        seed = self.seed_filtering(pre_seed)
        llava = self.llava_filtering(pre_llava)
        # blink = self.blink_filtering(pre_blink)
        mathverse = self.mathverse_filtering(pre_mathverse, pre_mathverse_text_only)
        mmstar = self.mmstar_filtering(pre_mmstar)
        
        # merging
        self.data = {
            'vqav2': vqav2,
            'gqa': gqa,
            'sqa':sqa,
            'vizwiz': vizwiz,
            'textvqa': textvqa,
            'pope': pope,
            'mme': mme,
            'mmbench': mmbench,
            'mmbench_dev': mmbench_dev,
            'mmbench_cn': mmbench_cn,
            'mmbench_cn_dev': mmbench_cn_dev,
            'qbench': qbench,
            'mm-vet': mmvet,
            'mmmu': mmmu,
            'mathvista': mathvista,
            'ai2d': ai2d,
            'hallusionbench': hallusionbench,
            'chartqa': chartqa,
            'seed': seed,
            'llava': llava,
            # 'blink': blink,
            'mathverse' : mathverse,
            'mmstar' : mmstar
        }        


    def vqav2_filtering(self, pre_data):
        data = []
        for x in pre_data['questions']:
            data.append({'image': f"VQAv2/test2015/COCO_test2015_{x['image_id']:012d}.jpg",
                        'question': x['question'],
                        'id': x['question_id']})
        return data
    
    def gqa_filtering(self, pre_data):
        data = []
        for qid, x in pre_data.items():
            data.append({'image': f"gqa/images/{x['imageId']}.jpg",
                        'question': x['question'],
                        'id': qid})
        return data
    
    def sqa_filtering(self, pre_data, pre_sqa_split):
        data = []
        questions = {idx: pre_data[idx] for idx in pre_sqa_split['test']}
        for qid, x in questions.items():
            if x['image'] is not None:
                choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['choices']))
                question = '\n'.join([x['hint'], x['question'], choices])
                data.append({'image': f"ScienceQA/images/test/{qid}/image.png",
                            'question': question,
                            'id': qid,
                            'candidates': x['choices'],
                            'gt': x['answer']})
        return data
    
    def vizwiz_filtering(self, pre_data):
        data = []
        for qid, x in enumerate(pre_data):
            data.append({'image': f"VizWiz/test/{x['image']}",
                        'question': x['question'],
                        'id': qid})
        return data
    
    def textvqa_filtering(self, pre_data, annotations):
        data = []
        for x, answer in zip(pre_data, annotations['data']):
            data.append({'image': f"TextVQA/train_images/{x['image']}",
                        'question': x['text'],
                        'id': x['question_id'],
                        'gt': answer['answers']})
        return data
    
    def pope_filtering(self, pre_data):
        data = []
        categories = ['adversarial', 'popular', 'random']
        for category, split in zip(categories, pre_data):
            for _, x in split.iterrows():
                data.append({'image': f"coco2014/val2014/{x['image']}",
                            'question': x['text'],
                            'id': x['question_id'], 
                            'category': category})
        return data
    
    def mme_filtering(self, pre_data):
        data = []
        for x in pre_data:
            data.append({'image': f"MME_Benchmark_release_version/{x['image']}",
                        'question': x['text'],
                        'id': x['question_id'],
                        'category': x['category']})
        return data
    
    def mmbench_filtering(self, pre_data):
        data = []
        for _, x in pre_data.iterrows():
            options = ['A', 'B', 'C', 'D']
            choice_list = [choice for choice in options if not self.is_none(x[choice])]
            choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
            question = '\n'.join([x['question'], choices])

            if not self.is_none(x['hint']):
                question = '\n'.join([x['hint'], question])
        
            data.append({'image': x['image'],
                        'question': question,
                        'id': x['index'],
                        'answer': x['answer'] if 'answer' in x else None})
        return data
    
    def qbench_filtering(self, pre_data):
        data = []
        for qid, x in enumerate(pre_data):
            choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['candidates']))
            question = '\n'.join([x['question'], choices])
            data.append({'image': f"LLVisionQA-QBench/images/{x['img_path']}",
                        'question': question,
                        'id': qid,
                        'candidates': x['candidates'],
                        'gt': x['correct_ans']})
        return data
    
    def mmvet_filtering(self, pre_data):
        data = []
        for qid, x in pre_data.items():
            data.append({'image': f"mm-vet/images/{x['imagename']}",
                        'question': x['question'],
                        'id': qid,
                        'gt': x['answer'],
                        'capability': x['capability']})
        return data
    
    def mmmu_filtering(self, pre_data):
        data = []
        for split in pre_data:
            for _, x in split.iterrows():
                index2ans, all_choices = self.get_multi_choice_info(eval(x['options']))
                choices = ' '.join([f"{k}. {v}" for k,v in index2ans.items()])
                question = '\n'.join([x['question'], choices])
                num_images = count_unique_image_tokens(question)
                data.append({'images': [x[f"image_{i+1}"]['bytes'] for i in range(num_images)],
                            'question': replace_image_tokens(question),
                            'id': x['id'],
                            'question_type': x['question_type'],
                            'gt': x['answer'],
                            'index2ans': index2ans,
                            'all_choices': all_choices})
        return data
    
    def mathvista_filtering(self, pre_data):
        data = []
        for _, x in pre_data.iterrows():
            skills = x['metadata']['skills'].tolist()
            x['metadata']['skills'] = skills
            choices = x['choices'].tolist() if x['choices'] is not None else None
            data.append({'image': f"MathVista/{x['image']}",
                        'question': x['query'],
                        'question_type': x['question_type'],
                        'answer': x['answer'],
                        'answer_type': x['answer_type'],
                        'choices': choices,
                        'metadata': x['metadata'],
                        'precision': x['precision'],
                        'id': x['pid']})
        return data
    
    def ai2d_filtering(self, pre_data):
        data = []
        for x in pre_data:
            choices = ' '.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x["metadata"]["answerTexts"]))
            question = '\n'.join([x['question'], choices])
            image = f"ai2d/abc_images/{x['imageName']}" if x['metadata']['abcLabel'] else f"ai2d/images/{x['imageName']}"
            data.append({'image': image,
                         'question': question,
                        'id': x['metadata']['questionId'],
                        'gt': x['metadata']['correctAnswer']})
        return data
    
    def hallusionbench_filtering(self, pre_data):
        data = []
        for qid, x in enumerate(pre_data):
            if x['filename'] is None:
                img_path = ""
                question = x['question']
            else:
                img_path = f"HallusionBench/hallusion_bench/{x['filename'][2:]}".format()
                question =  "<image>" + x['question']
            data.append({'image': img_path,
                        'question': question,
                        'id': qid,
                        'gt': x['gt_answer']})
        return data

    def chartqa_filtering(self, pre_data):
        data = []
        for qid, x in enumerate(pre_data):
            data.append({'image': f"chartqa/test/png/{x['imgname']}",
                        'question': x['query'],
                        'id': x['imgname'],
                        'gt': x['label']})
        return data
    
    def seed_filtering(self, pre_data):
        data = []
        for x in pre_data['questions']:
            if x['data_type'] != 'image':
                continue
            choice_list = [key for key in x.keys() if 'choice' in key]
            choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
            question = '\n'.join([x['question'], choices])
            data.append({'image': f"SEED-Bench/SEED-Bench-image/{x['data_id']}",
                        'question': question,
                        'id': x['question_id'],
                        'question_type': x['question_type_id'],
                        'gt': x['answer']})
        return data
    
    def llava_filtering(self, pre_data):
        data = []
        for _, x in pre_data.iterrows():
            data.append({'image': f"llava-bench-in-the-wild/images/{x['image']}",
                        'question': x['text'],
                        'id': x['question_id'],
                        "category": x['category']})
        return data
    
    def blink_filtering(self, pre_data):
        data = []
        # TODO
        return data
    
    def mathverse_filtering(self, pre_data, pre_data_text_only):
        data = []
        for x in pre_data:
            data.append({'image': f"MathVerse/images/{x['image']}",
                        'question': "<image>" + x['query_wo'],
                        # 'question': "<image>" + x['query_cot'],
                        'id': x['sample_index'],
                        'problem_index': x['problem_index'],
                        'problem_version': x['problem_version'],
                        'gt' : x['answer'],
                        'question_type': x['question_type'],
                        'metadata' : x['metadata'],
                        'query_cot' : x['query_cot'],
                        'origin_question': x['question']
                        })
        offset = len(pre_data)
        for x in pre_data_text_only:
            data.append({'image': "",
                        'question': x['query_wo'],
                        # 'question': x['query_cot'],
                        'id': str(int(x['sample_index']) + offset),
                        'problem_index': x['problem_index'],
                        'problem_version': x['problem_version'],
                        'gt' : x['answer'],
                        'question_type': x['question_type'],
                        'metadata' : x['metadata'],
                        'query_cot' : x['query_cot'],
                        'origin_question': x['question']
                        })
            
        return data
    
    def is_none(self, value):
        return type(value) is float and math.isnan(value)

    def get_options(self, row, options):
        parsed_options = []
        for option in options:
            option_value = row[option]
            if self.is_none(option_value):
                break
            parsed_options.append(option_value)
        return parsed_options

    def __len__(self):
        return len(self.data)
    
    def get_multi_choice_info(self, options):
        """
        Given the list of options for multiple choice question
        Return the index2ans and all_choices
        """
        
        start_chr = 'A'
        all_choices = []
        index2ans = {}
        for i, option in enumerate(options):
            index2ans[chr(ord(start_chr) + i)] = option
            all_choices.append(chr(ord(start_chr) + i))

        return index2ans, all_choices
    
    def mmstar_filtering(self, pre_data):
        data = []
        for _, x in pre_data.iterrows():
            data.append({'id' : x['index'],
                        'question': x['question'],
                        'answer': x['answer'],
                        'category': x['category'],
                        'l2_category': x['l2_category'],
                        # 'bench': x['bench'],
                        'image': x['image']})
        return data