Spaces:
Running
Running
import os | |
import io | |
import pandas as pd | |
import numpy as np | |
import string | |
from uuid import uuid4 | |
import os.path as osp | |
import base64 | |
from PIL import Image | |
import sys | |
Image.MAX_IMAGE_PIXELS = 1e9 | |
def rescale_img(img, tgt=None): | |
assert isinstance(tgt, tuple) and -1 in tgt | |
w, h = img.size | |
if tgt[0] != -1: | |
new_w, new_h = tgt[0], int(tgt[0] / w * h) | |
elif tgt[1] != -1: | |
new_w, new_h = int(tgt[1] / h * w), tgt[1] | |
img = img.resize((new_w, new_h)) | |
return img | |
def concat_images_vlmeval(images, target_size=-1, mode='h', return_image=False): | |
from .file import md5 | |
ims = [Image.open(im) for im in images] | |
if target_size != -1: | |
ims = [ | |
rescale_img(im, (-1, target_size) if mode == 'h' else (target_size, -1)) | |
for im in ims | |
] | |
ws, hs = [x.width for x in ims], [x.height for x in ims] | |
if mode == 'h': | |
new_w, new_h = sum(ws), max(hs) | |
dst = Image.new('RGB', (new_w, new_h)) | |
for i, im in enumerate(ims): | |
dst.paste(im, (sum(ws[:i]), 0)) | |
elif mode == 'v': | |
new_w, new_h = max(ws), sum(hs) | |
dst = Image.new('RGB', (new_w, new_h)) | |
for i, im in enumerate(ims): | |
dst.paste(im, (sum(ws[:i], 0))) | |
if return_image: | |
return dst | |
else: | |
_str = '\n'.join(images) | |
str_md5 = md5(_str) | |
tgt = osp.join('/tmp', str_md5 + '.jpg') | |
dst.save(tgt) | |
return tgt | |
def mmqa_display(question, target_size=512): | |
question = {k.lower(): v for k, v in question.items()} | |
keys = list(question.keys()) | |
keys = [k for k in keys if k not in ['index', 'image']] | |
images = question['image'] | |
if isinstance(images, str): | |
images = [images] | |
idx = question.pop('index', 'XXX') | |
print(f'INDEX: {idx}') | |
for im in images: | |
image = decode_base64_to_image(im, target_size=target_size) | |
display(image) # noqa: F821 | |
for k in keys: | |
try: | |
if not pd.isna(question[k]): | |
print(f'{k.upper()}. {question[k]}') | |
except ValueError: | |
if False in pd.isna(question[k]): | |
print(f'{k.upper()}. {question[k]}') | |
def encode_image_to_base64(img, target_size=-1): | |
# if target_size == -1, will not do resizing | |
# else, will set the max_size ot (target_size, target_size) | |
if img.mode in ('RGBA', 'P'): | |
img = img.convert('RGB') | |
if target_size > 0: | |
img.thumbnail((target_size, target_size)) | |
img_buffer = io.BytesIO() | |
img.save(img_buffer, format='JPEG') | |
image_data = img_buffer.getvalue() | |
ret = base64.b64encode(image_data).decode('utf-8') | |
return ret | |
def encode_image_file_to_base64(image_path, target_size=-1): | |
image = Image.open(image_path) | |
return encode_image_to_base64(image, target_size=target_size) | |
def decode_base64_to_image(base64_string, target_size=-1): | |
image_data = base64.b64decode(base64_string) | |
image = Image.open(io.BytesIO(image_data)) | |
if image.mode in ('RGBA', 'P'): | |
image = image.convert('RGB') | |
if target_size > 0: | |
image.thumbnail((target_size, target_size)) | |
return image | |
def decode_base64_to_image_file(base64_string, image_path, target_size=-1): | |
image = decode_base64_to_image(base64_string, target_size=target_size) | |
image.save(image_path) | |
def build_option_str(option_dict): | |
s = 'There are several options: \n' | |
for c, content in option_dict.items(): | |
if not pd.isna(content): | |
s += f'{c}. {content}\n' | |
return s | |
def isimg(s): | |
return osp.exists(s) or s.startswith('http') | |
def read_ok(img_path): | |
if not osp.exists(img_path): | |
return False | |
try: | |
im = Image.open(img_path) | |
assert im.size[0] > 0 and im.size[1] > 0 | |
return True | |
except: | |
return False | |
def gpt_key_set(): | |
openai_key = os.environ.get('OPENAI_API_KEY', None) | |
return isinstance(openai_key, str) and openai_key.startswith('sk-') | |
def apiok(wrapper): | |
s = wrapper.generate('Hello!') | |
return wrapper.fail_msg not in s | |
def circular_pred(df, extract_func=None): | |
if extract_func is None: | |
extract_func = lambda x: x # noqa: E731 | |
df = df.sort_values('index') | |
from vlmeval.utils import can_infer_option | |
shift = int(1e6) | |
choices = [extract_func(x) for x in df['prediction']] | |
pred_map = {i: c for i, c in zip(df['index'], choices)} | |
flag_map = {i: True for i in pred_map if i < 1e6} | |
valid_map = {i: True for i in pred_map if i < 1e6} | |
for i in df['index']: | |
if i >= shift and pred_map[i] and pred_map[i - shift]: | |
if pred_map[i] not in list( | |
string.ascii_uppercase | |
) or pred_map[ # noqa: W504 | |
i - shift | |
] not in list( | |
string.ascii_uppercase | |
): | |
valid_map[i % shift] = False | |
continue | |
if (ord(pred_map[i]) - ord(pred_map[i - shift])) % 4 == 1: | |
continue | |
else: | |
flag_map[i % shift] = False | |
flag_map = {k: v for k, v in flag_map.items() if valid_map[k]} | |
flags = list(flag_map.values()) | |
return np.mean(flags) | |