Spaces:
Runtime error
Runtime error
File size: 5,644 Bytes
0b7b08a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import torch
from tqdm import tqdm
from PIL import Image
from io import BytesIO
import base64
import numpy as np
import time
import json
import os
import cv2
from coco_metric import compute_cider
import random
import pickle
def evaluate_reg(
model,
tokenizer,
image_processor,
vis_embed_size=None,
rank=0,
world_size=1,
id=0,
):
lang_encoder_name = model.lang_encoder.__class__.__name__.lower()
dataset_name = "refcocog"
pkl_file = "/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal2/open_flamingo/eval/task/others/refcocog_reg_val_data.pkl"
try:
media_token_id = tokenizer("<|#image#|>", add_special_tokens=False)["input_ids"][-1]
endofmedia_token_id = tokenizer("<|#endofimage#|>", add_special_tokens=False)["input_ids"][-1]
pad_token_id = tokenizer(tokenizer.pad_token, add_special_tokens=False)["input_ids"][-1]
bos_token_id = tokenizer(tokenizer.bos_token, add_special_tokens=False)["input_ids"][-1]
except:
pass
model.eval().cuda()
if world_size > 1:
torch.distributed.barrier()
this_tot = 0
predictions = []
D = pickle.load(open(pkl_file, "rb"))
lines = []
data = D["data"]
uniq_id_to_text = D["uniq_id_to_text"]
uniq_id_to_image = D["uniq_id_to_image"]
uniq_id_to_image_id = D["uniq_id_to_image_id"]
for image_id in data:
for region in data[image_id]:
uniq_id = data[image_id][region][0]
lines.append([uniq_id, uniq_id_to_image_id[uniq_id], [uniq_id_to_text[r] for r in data[image_id][region]], region, uniq_id_to_image[uniq_id]])
print("total data:", len(lines))
# lines = lines[:20]
pbar = tqdm(lines, disable=(rank != 0))
for ii, line in enumerate(pbar):
if ii % world_size != rank:
continue
uniq_id, image_id, text, region_coord, image = line
gt_box = np.array(region_coord)
width = image.width
height = image.height
image = image.resize((224, 224))
gt_box = gt_box / np.array([width, height, width, height]) * 224
batch_images = image_processor(image).unsqueeze(0).unsqueeze(1).unsqueeze(0)
prompt = [f"{tokenizer.bos_token}<|#image#|>{tokenizer.pad_token*vis_embed_size}<|#endofimage#|><|#object#|><|#previsual#|><|#prebox#|><|#object#|>"]
encodings = tokenizer(
prompt,
padding="longest",
truncation=True,
return_tensors="pt",
max_length=2000,
)
input_ids = encodings["input_ids"]
attention_mask = encodings["attention_mask"]
image_start_index_list = ((input_ids == media_token_id).nonzero(as_tuple=True)[-1] + 1).tolist()
image_start_index_list = [[x] for x in image_start_index_list]
image_nums = [1] * len(input_ids)
batch_images = batch_images.cuda()
input_ids = input_ids.cuda()
attention_mask = attention_mask.cuda()
added_bbox_list = [(torch.tensor(gt_box).cuda() / 224).clamp(0, 0.99).unsqueeze(0)]
with torch.inference_mode() and torch.cuda.amp.autocast(dtype=torch.float16):
outputs = model.generate(
batch_images,
input_ids,
attention_mask=attention_mask,
max_new_tokens=25,
min_length=5,
num_beams=8,
length_penalty=0,
image_start_index_list=image_start_index_list,
image_nums=image_nums,
added_bbox_list=added_bbox_list,
)
outputs = outputs[:, len(input_ids[0]) :]
new_prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip().lower()
this_tot += 1
if rank == 0 and this_tot % 10 == 0:
for i in range(1):
tqdm.write(f"answer: {text}\nmodel output: {new_prediction}")
predictions.append(
{"image_id": image_id, "caption": new_prediction}
)
results_path = f"reg_{lang_encoder_name}_{rank}_{id}.json"
json.dump(predictions, open(results_path, "w"))
print("save to", results_path)
del predictions
time.sleep(5)
if world_size > 1:
torch.distributed.barrier()
if rank == 0:
print(f"evaluate on rank {rank}. world size is {world_size}")
predictions = []
for rank_i in range(world_size):
part_results_path = f"reg_{lang_encoder_name}_{rank_i}_{id}.json"
print("load", part_results_path)
part_data = json.load(open(part_results_path))
predictions.extend(part_data)
os.remove(part_results_path)
print("num:", len(predictions))
results_path = f"reg_{lang_encoder_name}_{id}_result.json"
json.dump(predictions, open(results_path, "w"), indent=2)
metrics = compute_cider(
result_path=results_path,
annotations_path="/gpfs/u/home/LMCG/LMCGljnn/scratch/code/multimodal2/open_flamingo/eval/task/others/refcocog_reg_val_label.json",
)
os.makedirs("eval_results", exist_ok=True)
cider = metrics["CIDEr"]
print("cider", cider)
with open(os.path.join("eval_results", f"reg_{model.expr_name}_{model.step_num}_{int(time.time())}_{cider}"), "w") as f:
f.write(json.dumps(predictions, indent=2))
# delete the temporary file
os.remove(results_path)
return cider
if __name__ == "__main__":
anno = json.load(open("/gpfs/u/home/LMCG/LMCGljnn/scratch/.cache/lavis/coco_gt/coco_karpathy_test_gt.json"))
import pdb; pdb.set_trace()
print(anno.keys())
|