|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
from copy import deepcopy |
|
from collections import defaultdict |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
|
|
class MPLUG(torch.nn.Module): |
|
def __init__(self, ckpt='damo/mplug_visual-question-answering_coco_large_en', device='gpu'): |
|
super().__init__() |
|
from modelscope.pipelines import pipeline |
|
from modelscope.utils.constant import Tasks |
|
self.pipeline_vqa = pipeline(Tasks.visual_question_answering, model=ckpt, device=device) |
|
|
|
def vqa(self, image, question): |
|
input_vqa = {'image': image, 'question': question} |
|
result = self.pipeline_vqa(input_vqa) |
|
return result['text'] |
|
|
|
|
|
class DPGScore: |
|
def __init__(self, device): |
|
self.device = device |
|
ckpt = os.getenv('DPG_VQA_MODEL_PATH', "xingjianleng/mplug_visual-question-answering_coco_large_en") |
|
self.vqa_model = MPLUG(ckpt, device=self.device) |
|
|
|
|
|
def __call__(self, image, q_dict): |
|
VQA = self.vqa_model |
|
qid2tuple, qid2dependency, qid2question = q_dict['qid2tuple'], q_dict['qid2dependency'], q_dict['qid2question'] |
|
qid2answer = {} |
|
qid2scores = {} |
|
|
|
for id, question in qid2question.items(): |
|
id = str(id) |
|
answer = VQA.vqa(image, question) |
|
qid2answer[id] = answer |
|
qid2scores[id] = float(answer == 'yes') |
|
|
|
average_score_without_dep = sum(qid2scores.values()) / len(qid2scores) |
|
|
|
qid2validity = {} |
|
qid2scores_after_filtering = deepcopy(qid2scores) |
|
|
|
for id, parent_ids in qid2dependency.items(): |
|
id = str(id) |
|
any_parent_answered_no = False |
|
for parent_id in parent_ids: |
|
parent_id = str(parent_id) |
|
if int(parent_id) == 0: |
|
continue |
|
if parent_id in qid2scores: |
|
if qid2scores[parent_id] == 0: |
|
any_parent_answered_no = True |
|
break |
|
if any_parent_answered_no: |
|
qid2scores_after_filtering[id] = 0.0 |
|
qid2validity[id] = False |
|
else: |
|
qid2validity[id] = True |
|
|
|
average_score_with_dep = sum(qid2scores_after_filtering.values()) / len(qid2scores) |
|
return { |
|
'qid2tuple': qid2tuple, |
|
'qid2dependency': qid2dependency, |
|
'qid2question': qid2question, |
|
'qid2answer': qid2answer, |
|
'qid2scores': qid2scores, |
|
'qid2validity': qid2validity, |
|
'average_score_with_dependency': average_score_with_dep * 100., |
|
'average_score_without_dependency': average_score_without_dep * 100. |
|
} |
|
|
|
|
|
def prepare_dpg_data(csv_path): |
|
previous_id = '' |
|
current_id = '' |
|
question_dict = dict() |
|
category_count = defaultdict(int) |
|
data = pd.read_csv(csv_path) |
|
for i, line in data.iterrows(): |
|
if i == 0: |
|
continue |
|
|
|
current_id = line.item_id |
|
qid = str(line.proposition_id) |
|
dependency_list_str = line.dependency.split(',') |
|
dependency_list_int = [] |
|
for d in dependency_list_str: |
|
d_int = str(d.strip()) |
|
dependency_list_int.append(d_int) |
|
|
|
if current_id == previous_id: |
|
question_dict[current_id]['qid2tuple'][qid] = line.tuple |
|
question_dict[current_id]['qid2dependency'][qid] = dependency_list_int |
|
question_dict[current_id]['qid2question'][qid] = line.question_natural_language |
|
else: |
|
question_dict[current_id] = dict( |
|
qid2tuple={qid: line.tuple}, |
|
qid2dependency={qid: dependency_list_int}, |
|
qid2question={qid: line.question_natural_language}) |
|
|
|
category = line.question_natural_language.split('(')[0].strip() |
|
category_count[category] += 1 |
|
|
|
previous_id = current_id |
|
return question_dict |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
import os |
|
import time |
|
import shutil |
|
import argparse |
|
from PIL import Image |
|
from tqdm import tqdm |
|
from src.train.data.data_utils import split_grid, json_load, json_dump |
|
from src.train.train_utils import get_train_config, get_rank_and_worldsize |
|
from src.train.data.validation import * |
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--image_dir", type=str, default="") |
|
args = parser.parse_args() |
|
return args |
|
|
|
args = parse_args() |
|
|
|
local_rank, global_rank, world_size = get_rank_and_worldsize() |
|
print(f"local_rank={local_rank}, global_rank={global_rank}, world_size={world_size}") |
|
is_local_main_process = local_rank == 0 |
|
is_main_process = global_rank == 0 |
|
|
|
images = sorted(glob(f"{args.image_dir}/*.png")) |
|
|
|
if world_size > 1: |
|
num_per_rank = round(len(images) / world_size) |
|
images = images[global_rank*num_per_rank:(global_rank+1)*num_per_rank] |
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(local_rank % 8) |
|
print(f"[rank {global_rank}/{world_size}] has {len(images)} prompts to process, using device {torch.cuda.current_device()}") |
|
|
|
run_name = time.strftime("%Y%m%d-%H") |
|
temp_dir = os.path.join(args.image_dir, f"eval_temp_{run_name}") |
|
|
|
if global_rank == 0: |
|
if os.path.exists(temp_dir): |
|
shutil.rmtree(temp_dir) |
|
os.makedirs(temp_dir) |
|
|
|
dpg_score_model = DPGScore("cuda") |
|
q_dicts = prepare_dpg_data(f"eval/dpg/dpg_bench.csv") |
|
|
|
rank_json = {} |
|
with torch.no_grad(): |
|
for image_path in tqdm(images): |
|
prompt_name = os.path.splitext(os.path.basename(image_path))[0] |
|
q_dict = q_dicts[prompt_name] |
|
images = split_grid(Image.open(image_path)) |
|
rank_json[prompt_name] = [] |
|
for i, img in enumerate(images): |
|
rank_json[prompt_name].append({}) |
|
result = dpg_score_model(img, q_dict) |
|
for q_id, question in result["qid2question"].items(): |
|
answer = result["qid2answer"][q_id] |
|
rank_json[prompt_name][i][question] = answer |
|
rank_json[prompt_name][i]['average_score_with_dependency'] = result['average_score_with_dependency'] |
|
rank_json[prompt_name][i]['average_score_without_dependency'] = result['average_score_without_dependency'] |
|
|
|
rank_save_path = os.path.join(temp_dir, f"scores_{global_rank}.json") |
|
json_dump(rank_json, rank_save_path, "utf-8") |
|
|
|
if global_rank == 0: |
|
while len(glob(os.path.join(temp_dir, f"scores_*.json"))) < world_size: |
|
time.sleep(5) |
|
time.sleep(5) |
|
merged_json = {} |
|
prompt_scores = {} |
|
scores = [] |
|
for rank_path in glob(os.path.join(temp_dir, f"scores_*.json")): |
|
rank_json = json_load(rank_path, "utf-8") |
|
merged_json.update(rank_json) |
|
for prompt_name in rank_json: |
|
score_list = [x['average_score_with_dependency'] for x in rank_json[prompt_name]] |
|
prompt_scores[prompt_name] = np.mean(score_list) |
|
scores += score_list |
|
|
|
json_dump(merged_json, os.path.join(args.image_dir, f"dpg_scores_{run_name}.json"), "utf-8") |
|
dpg_score = np.mean(scores) |
|
lines_to_write = [ |
|
f"DPG Score: {dpg_score:.2f}\n" |
|
] |
|
print(lines_to_write[0]) |
|
for prompt_name, score in prompt_scores.items(): |
|
lines_to_write.append(f"{prompt_name}: {score:.2f}\n") |
|
|
|
with open(os.path.join(args.image_dir, f"dpg_scores_{run_name}.txt"), "w") as f: |
|
f.writelines(lines_to_write) |
|
|
|
shutil.rmtree(temp_dir) |