LinB203 commited on
Commit
0c8d55e
·
1 Parent(s): e24ee61
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +777 -0
  2. univa/__init__.py +0 -0
  3. univa/dataset/__init__.py +8 -0
  4. univa/dataset/data_collator.py +156 -0
  5. univa/dataset/llava_dataset.py +312 -0
  6. univa/dataset/qwen2vl_dataset.py +658 -0
  7. univa/eval/__init__.py +0 -0
  8. univa/eval/configuration_eval.py +55 -0
  9. univa/eval/dpgbench/README.md +65 -0
  10. univa/eval/dpgbench/__init__.py +0 -0
  11. univa/eval/dpgbench/dpgbench.yaml +18 -0
  12. univa/eval/dpgbench/eval_prompts/dpgbench.csv +0 -0
  13. univa/eval/dpgbench/eval_prompts/dpgbench_prompts.json +0 -0
  14. univa/eval/dpgbench/requirements.txt +32 -0
  15. univa/eval/dpgbench/step1_gen_samples.py +248 -0
  16. univa/eval/dpgbench/step2_compute_dpg_bench.py +269 -0
  17. univa/eval/gedit/README.md +71 -0
  18. univa/eval/gedit/__init__.py +0 -0
  19. univa/eval/gedit/gedit.yaml +20 -0
  20. univa/eval/gedit/gedit_edit.json +0 -0
  21. univa/eval/gedit/secret_t2.env +0 -0
  22. univa/eval/gedit/step0_prepare_gedit.py +85 -0
  23. univa/eval/gedit/step1_gen_samples.py +260 -0
  24. univa/eval/gedit/step2_gedit_bench.py +178 -0
  25. univa/eval/gedit/step3_calculate_statistics.py +153 -0
  26. univa/eval/gedit/viescore/__init__.py +115 -0
  27. univa/eval/gedit/viescore/mllm_tools/__init__.py +0 -0
  28. univa/eval/gedit/viescore/mllm_tools/gemini.py +147 -0
  29. univa/eval/gedit/viescore/mllm_tools/idefics2_eval.py +43 -0
  30. univa/eval/gedit/viescore/mllm_tools/mantis_idefics2_eval.py +43 -0
  31. univa/eval/gedit/viescore/mllm_tools/minicpmv_eval.py +42 -0
  32. univa/eval/gedit/viescore/mllm_tools/openai.py +184 -0
  33. univa/eval/gedit/viescore/mllm_tools/qwen25vl_eval.py +121 -0
  34. univa/eval/gedit/viescore/mllm_tools/utils.py +65 -0
  35. univa/eval/gedit/viescore/parse_prompt.py +20 -0
  36. univa/eval/gedit/viescore/utils.py +362 -0
  37. univa/eval/gedit/viescore/vie_prompts.py +406 -0
  38. univa/eval/genai/README.md +47 -0
  39. univa/eval/genai/__init__.py +0 -0
  40. univa/eval/genai/eval_prompts/genai1600/genai_image.json +0 -0
  41. univa/eval/genai/eval_prompts/genai1600/genai_skills.json +4872 -0
  42. univa/eval/genai/eval_prompts/genai527/genai_image.json +0 -0
  43. univa/eval/genai/eval_prompts/genai527/genai_skills.json +1482 -0
  44. univa/eval/genai/genai1600.yaml +18 -0
  45. univa/eval/genai/genai527.yaml +18 -0
  46. univa/eval/genai/step1_gen_samples.py +269 -0
  47. univa/eval/genai/step2_run_model.py +113 -0
  48. univa/eval/genai/t2v_metrics/__init__.py +13 -0
  49. univa/eval/genai/t2v_metrics/clipscore.py +21 -0
  50. univa/eval/genai/t2v_metrics/constants.py +8 -0
app.py ADDED
@@ -0,0 +1,777 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ sys.path.append("..")
4
+ from transformers import AutoProcessor, SiglipImageProcessor, SiglipVisionModel, T5EncoderModel, BitsAndBytesConfig
5
+ from univa.models.qwen2p5vl.modeling_univa_qwen2p5vl import UnivaQwen2p5VLForConditionalGeneration
6
+ from univa.utils.flux_pipeline import FluxPipeline
7
+ from univa.utils.get_ocr import get_ocr_result
8
+ from univa.utils.denoiser_prompt_embedding_flux import encode_prompt
9
+ from qwen_vl_utils import process_vision_info
10
+ from univa.utils.anyres_util import dynamic_resize, concat_images_adaptive
11
+ import torch
12
+ from torch import nn
13
+ import os
14
+ import uuid
15
+ import base64
16
+ from typing import Dict
17
+ from PIL import Image, ImageDraw, ImageFont
18
+ import spaces
19
+ import argparse
20
+ import gc
21
+
22
+ def parse_args():
23
+ parser = argparse.ArgumentParser(description="Model and component paths")
24
+
25
+ parser.add_argument("--model_path", type=str, default="LanguageBind/UniWorld-V1", help="UniWorld-V1模型路径")
26
+ parser.add_argument("--flux_path", type=str, default="black-forest-labs/FLUX.1-dev", help="FLUX.1-dev模型路径")
27
+ parser.add_argument("--siglip_path", type=str, default="google/siglip2-so400m-patch16-512", help="siglip2模型路径")
28
+ parser.add_argument("--server_name", type=str, default="127.0.0.1", help="IP地址")
29
+ parser.add_argument("--server_port", type=int, default=6812, help="端口号")
30
+ parser.add_argument("--share", action="store_true", help="是否公开分享")
31
+ parser.add_argument("--nf4", action="store_true", help="是否NF4量化")
32
+ parser.add_argument("--zh", action="store_true", help="是否使用中文")
33
+ parser.add_argument("--offload", action="store_true", help="是否开启顺序卸载")
34
+
35
+ return parser.parse_args()
36
+
37
+
38
+ def add_plain_text_watermark(
39
+ img: Image.Image,
40
+ text: str,
41
+ margin: int = 50,
42
+ font_size: int = 30,
43
+ ):
44
+ if img.mode != "RGB":
45
+ img = img.convert("RGB")
46
+
47
+ draw = ImageDraw.Draw(img)
48
+ font = ImageFont.truetype("DejaVuSans.ttf", font_size)
49
+ bbox = draw.textbbox((0, 0), text)
50
+ text_width = bbox[2] - bbox[0]
51
+ text_height = bbox[3] - bbox[1]
52
+
53
+ x = img.width - text_width - int(3.3 * margin)
54
+ y = img.height - text_height - margin
55
+
56
+ draw.text((x, y), text, font=font, fill=(255, 255, 255))
57
+ return img
58
+
59
+
60
+ css = """
61
+ .table-wrap table tr td:nth-child(3) > div {
62
+ max-height: 150px; /* 最多 100px 高度,按需修改 */
63
+ overflow-y: auto; /* 超出部分显示竖向滚动条 */
64
+ white-space: pre-wrap; /* 自动换行 */
65
+ word-break: break-all; /* 长单词内部分行 */
66
+ }
67
+ .table-wrap table tr td:nth-child(2) > div {
68
+ max-width: 150px;
69
+ white-space: pre-wrap;
70
+ word-break: break-all;
71
+ overflow-x: auto;
72
+ }
73
+ .table-wrap table tr th:nth-child(2) {
74
+ max-width: 150px;
75
+ white-space: normal;
76
+ word-break: keep-all;
77
+ overflow-x: auto;
78
+ }
79
+ .table-wrap table tr td:nth-last-child(-n+8) > div {
80
+ max-width: 130px;
81
+ white-space: pre-wrap;
82
+ word-break: break-all;
83
+ overflow-x: auto;
84
+ }
85
+ .table-wrap table tr th:nth-last-child(-n+8) {
86
+ max-width: 130px;
87
+ white-space: normal;
88
+ word-break: keep-all;
89
+ overflow-x: auto;
90
+ }
91
+ """
92
+
93
+
94
+ def img2b64(image_path):
95
+ with open(image_path, "rb") as f:
96
+ b64 = base64.b64encode(f.read()).decode()
97
+ data_uri = f"data:image/jpeg;base64,{b64}"
98
+ return data_uri
99
+
100
+ @spaces.GPU
101
+ def initialize_models(args):
102
+ os.makedirs("tmp", exist_ok=True)
103
+ # Paths
104
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
+
106
+ quantization_config = BitsAndBytesConfig(
107
+ load_in_4bit=True,
108
+ bnb_4bit_compute_dtype=torch.bfloat16,
109
+ bnb_4bit_quant_type="nf4",
110
+ )
111
+
112
+ # Load main model and task head
113
+ model = UnivaQwen2p5VLForConditionalGeneration.from_pretrained(
114
+ args.model_path,
115
+ torch_dtype=torch.bfloat16,
116
+ attn_implementation="flash_attention_2",
117
+ quantization_config=quantization_config if args.nf4 else None,
118
+ ).to(device)
119
+ task_head = nn.Sequential(
120
+ nn.Linear(3584, 10240),
121
+ nn.SiLU(),
122
+ nn.Dropout(0.3),
123
+ nn.Linear(10240, 2)
124
+ ).to(device)
125
+ task_head.load_state_dict(torch.load(os.path.join(args.model_path, 'task_head_final.pt')))
126
+ task_head.eval()
127
+
128
+ processor = AutoProcessor.from_pretrained(
129
+ args.model_path,
130
+ min_pixels=448*448,
131
+ max_pixels=448*448,
132
+ )
133
+ if args.nf4:
134
+ text_encoder_2 = T5EncoderModel.from_pretrained(
135
+ args.flux_path,
136
+ subfolder="text_encoder_2",
137
+ quantization_config=quantization_config,
138
+ torch_dtype=torch.bfloat16,
139
+ )
140
+ pipe = FluxPipeline.from_pretrained(
141
+ args.flux_path,
142
+ transformer=model.denoise_tower.denoiser,
143
+ text_encoder_2=text_encoder_2,
144
+ torch_dtype=torch.bfloat16,
145
+ ).to(device)
146
+ else:
147
+ pipe = FluxPipeline.from_pretrained(
148
+ args.flux_path,
149
+ transformer=model.denoise_tower.denoiser,
150
+ torch_dtype=torch.bfloat16,
151
+ ).to(device)
152
+ if args.offload:
153
+ pipe.enable_model_cpu_offload()
154
+ pipe.enable_vae_slicing()
155
+ tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
156
+ text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
157
+
158
+ # Optional SigLIP
159
+ siglip_processor, siglip_model = None, None
160
+ siglip_processor = SiglipImageProcessor.from_pretrained(args.siglip_path)
161
+ siglip_model = SiglipVisionModel.from_pretrained(
162
+ args.siglip_path,
163
+ torch_dtype=torch.bfloat16,
164
+ ).to(device)
165
+
166
+ return {
167
+ 'model': model,
168
+ 'task_head': task_head,
169
+ 'processor': processor,
170
+ 'pipe': pipe,
171
+ 'tokenizers': tokenizers,
172
+ 'text_encoders': text_encoders,
173
+ 'siglip_processor': siglip_processor,
174
+ 'siglip_model': siglip_model,
175
+ 'device': device,
176
+ }
177
+
178
+
179
+ args = parse_args()
180
+ state = initialize_models(args)
181
+
182
+ @spaces.GPU
183
+ def process_large_image(raw_img):
184
+ if raw_img is None:
185
+ return raw_img
186
+ img = Image.open(raw_img).convert("RGB")
187
+
188
+ max_side = max(img.width, img.height)
189
+ if max_side > 1024:
190
+ scale = 1024 / max_side
191
+ new_w = int(img.width * scale)
192
+ new_h = int(img.height * scale)
193
+ print(f'resize img {img.size} to {(new_w, new_h)}')
194
+ img = img.resize((new_w, new_h), resample=Image.LANCZOS)
195
+ save_path = f"tmp/{uuid.uuid4().hex}.png"
196
+ img.save(save_path)
197
+ return save_path
198
+ else:
199
+ return raw_img
200
+
201
+ @spaces.GPU
202
+ def chat_step(image1, image2, text, height, width, steps, guidance,
203
+ ocr_enhancer, joint_with_t5, enhance_generation, enhance_understanding,
204
+ seed, num_imgs, history_state, progress=gr.Progress()):
205
+
206
+ try:
207
+ convo = history_state['conversation']
208
+ image_paths = history_state['history_image_paths']
209
+ cur_ocr_i = history_state['cur_ocr_i']
210
+ cur_genimg_i = history_state['cur_genimg_i']
211
+
212
+ # image1 = process_large_image(image1)
213
+ # image2 = process_large_image(image2)
214
+ # Build content
215
+ content = []
216
+ if text:
217
+ ocr_text = ''
218
+ if ocr_enhancer and content:
219
+ ocr_texts = []
220
+ for img in (image1, image2):
221
+ if img:
222
+ ocr_texts.append(get_ocr_result(img, cur_ocr_i))
223
+ cur_ocr_i += 1
224
+ ocr_text = '\n'.join(ocr_texts)
225
+ content.append({'type':'text','text': text + ocr_text})
226
+ for img in (image1, image2):
227
+ if img:
228
+ content.append({'type':'image','image':img,'min_pixels':448*448,'max_pixels':448*448})
229
+ image_paths.append(img)
230
+
231
+ convo.append({'role':'user','content':content})
232
+
233
+ # Prepare inputs
234
+ chat_text = state['processor'].apply_chat_template(convo,
235
+ tokenize=False, add_generation_prompt=True)
236
+ chat_text = '<|im_end|>\n'.join(chat_text.split('<|im_end|>\n')[1:])
237
+ image_inputs, video_inputs = process_vision_info(convo)
238
+ inputs = state['processor'](
239
+ text=[chat_text], images=image_inputs, videos=video_inputs,
240
+ padding=True, return_tensors='pt'
241
+ ).to(state['device'])
242
+
243
+ # Model forward & task head
244
+ with torch.no_grad():
245
+ outputs = state['model'](**inputs, return_dict=True, output_hidden_states=True)
246
+ hidden = outputs.hidden_states[-1]
247
+ mask = inputs.input_ids == 77091
248
+ vecs = hidden[mask][-1:]
249
+ task_res = state['task_head'](vecs.float())[0]
250
+ print(task_res)
251
+ # Branch decision
252
+ if enhance_generation:
253
+ do_image = True
254
+ elif enhance_understanding:
255
+ do_image = False
256
+ else:
257
+ do_image = (task_res[0] < task_res[1])
258
+
259
+ seed = int(seed)
260
+ if seed == -1:
261
+ seed = torch.Generator(device="cpu").seed()
262
+ torch.manual_seed(seed)
263
+ # Generate
264
+ if do_image:
265
+ # image generation pipeline
266
+ siglip_hs = None
267
+ if state['siglip_processor'] and image_paths:
268
+ vals = [state['siglip_processor'].preprocess(
269
+ images=Image.open(p).convert('RGB'), do_resize=True,
270
+ return_tensors='pt', do_convert_rgb=True
271
+ ).pixel_values.to(state['device'])
272
+ for p in image_paths]
273
+ siglip_hs = state['siglip_model'](torch.concat(vals)).last_hidden_state
274
+
275
+ with torch.no_grad():
276
+ lvlm = state['model'](
277
+ inputs.input_ids, pixel_values=getattr(inputs,'pixel_values',None),
278
+ attention_mask=inputs.attention_mask,
279
+ image_grid_thw=getattr(inputs,'image_grid_thw',None),
280
+ siglip_hidden_states=siglip_hs,
281
+ output_type='denoise_embeds'
282
+ )
283
+ prm_embeds, pooled = encode_prompt(
284
+ state['text_encoders'], state['tokenizers'],
285
+ text if joint_with_t5 else '', 256, state['device'], 1
286
+ )
287
+ emb = torch.concat([lvlm, prm_embeds], dim=1) if joint_with_t5 else lvlm
288
+
289
+
290
+ def diffusion_to_gradio_callback(_pipeline, step_idx: int, timestep: int, tensor_dict: Dict):
291
+ # 1)更新 Gradio 进度条
292
+ frac = (step_idx + 1) / float(steps)
293
+ progress(frac)
294
+
295
+ return tensor_dict
296
+
297
+ with torch.no_grad():
298
+ img = state['pipe'](
299
+ prompt_embeds=emb, pooled_prompt_embeds=pooled,
300
+ height=height, width=width,
301
+ num_inference_steps=steps,
302
+ guidance_scale=guidance,
303
+ generator=torch.Generator(device='cuda').manual_seed(seed),
304
+ num_images_per_prompt=num_imgs,
305
+ callback_on_step_end=diffusion_to_gradio_callback,
306
+ # callback_on_step_end_tensor_inputs=["latents", "prompt_embeds"],
307
+ ).images
308
+ # img = [add_plain_text_watermark(im, 'Open-Sora Plan 2.0 Generated') for im in img]
309
+ img = concat_images_adaptive(img)
310
+ save_path = f"tmp/{uuid.uuid4().hex}.png"
311
+ img.save(save_path)
312
+ convo.append({'role':'assistant','content':[{'type':'image','image':save_path}]})
313
+ cur_genimg_i += 1
314
+ progress(1.0)
315
+ bot_msg = (None, save_path)
316
+ else:
317
+ # text generation
318
+ gen_ids = state['model'].generate(**inputs, max_new_tokens=128)
319
+ out = state['processor'].batch_decode(
320
+ [g[len(inputs.input_ids[0]):] for g in gen_ids], skip_special_tokens=True
321
+ )[0]
322
+ convo.append({'role':'assistant','content':[{'type':'text','text':out}]})
323
+ bot_msg = (None, out)
324
+
325
+
326
+ chat_pairs = []
327
+ # print(convo)
328
+ # print()
329
+ # print()
330
+ for msg in convo:
331
+ # print(msg)
332
+ if msg['role']=='user':
333
+ parts = []
334
+ for c in msg['content']:
335
+ if c['type']=='text': parts.append(c['text'])
336
+ if c['type']=='image': parts.append(f"![user image]({img2b64(c['image'])})")
337
+ chat_pairs.append(("\n".join(parts), None))
338
+ else:
339
+ parts = []
340
+ for c in msg['content']:
341
+ if c['type']=='text': parts.append(c['text'])
342
+ if c['type']=='image': parts.append(f"![assistant image]({img2b64(c['image'])})")
343
+ if msg['content'][-1]['type']=='text':
344
+ chat_pairs[-1] = (chat_pairs[-1][0], parts[-1])
345
+ else:
346
+ chat_pairs[-1] = (chat_pairs[-1][0], parts[-1])
347
+ # print()
348
+ # print(chat_pairs)
349
+
350
+ # Update state
351
+ history_state.update({
352
+ 'conversation': convo,
353
+ 'history_image_paths': image_paths,
354
+ 'cur_ocr_i': cur_ocr_i,
355
+ 'cur_genimg_i': cur_genimg_i
356
+ })
357
+ return chat_pairs, history_state, seed
358
+ except Exception as e:
359
+ # 捕捉所有异常,返回错误提示,建议用户清理历史后重试
360
+ error_msg = f"发生错误:{e}. 请点击 \"Clear History\" 清理对话历史后再试一次。"
361
+ chat_pairs = [(None, error_msg)]
362
+ # 不修改 history_state,让用户自行清理
363
+ return chat_pairs, history_state, seed
364
+
365
+ def copy_seed_for_user(real_seed):
366
+ # 这个函数会把隐藏的 seed_holder 值,传给真正要显示的 seed Textbox
367
+ return real_seed
368
+
369
+ def clear_inputs():
370
+ # img1 和 img2 用 None 来清空;text_in 用空字符串清空;seed 同理清空
371
+ return None, None, "", ""
372
+ @spaces.GPU
373
+ def clear_history():
374
+ gc.collect()
375
+ if torch.cuda.is_available():
376
+ torch.cuda.empty_cache()
377
+ torch.cuda.ipc_collect()
378
+ # 默认 prompt 和 seed
379
+ default_prompt = "Translate this photo into a Studio Ghibli-style illustration, holding true to the original composition and movement."
380
+ default_seed = "-1"
381
+
382
+ # 1. chatbot 要用 gr.update(value=[]) 清空
383
+ # 2. state 直接给回初始 dict
384
+ # 3. prompt 和 seed 同样用 gr.update()
385
+ return (
386
+ gr.update(value=[]), # 清空聊天框
387
+ {'conversation':[], # 重置 state
388
+ 'history_image_paths':[],
389
+ 'cur_ocr_i':0,
390
+ 'cur_genimg_i':0},
391
+ gr.update(value=None), # 重置 image1
392
+ gr.update(value=None), # 重置 image2
393
+ gr.update(value=default_prompt), # 重置 prompt 文本框
394
+ gr.update(value=default_seed), # 重置 seed 文本框
395
+ )
396
+
397
+
398
+ if __name__ == '__main__':
399
+ # Gradio UI
400
+ with gr.Blocks(
401
+ theme=gr.themes.Soft(),
402
+ css=css
403
+ ) as demo:
404
+
405
+ gr.Markdown(
406
+ """
407
+ <div style="text-align:center;">
408
+
409
+ # 🎉 UniWorld-V1 Chat Interface 🎉
410
+
411
+ ### Unlock Cutting‑Edge Visual Perception, Feature Extraction, Editing, Synthesis, and Understanding
412
+
413
+ **Usage Guide:**
414
+ - It is recommended to perform inference on four images concurrently to offer varied selections.
415
+ - Uploaded images are automatically resized; manually specifying resolutions that differ substantially from the original is not advised.
416
+ </div>
417
+ """,
418
+ elem_classes="header-text",
419
+ )
420
+ with gr.Row():
421
+ with gr.Column():
422
+ chatbot = gr.Chatbot(
423
+ max_height=100000, min_height=700,
424
+ height=None,
425
+ resizable=True,
426
+ show_copy_button=True
427
+ )
428
+ text_in = gr.Textbox(label="Instruction", value="Translate this photo into a Studio Ghibli-style illustration, holding true to the original composition and movement.")
429
+ with gr.Column():
430
+ with gr.Row():
431
+ img1 = gr.Image(type='filepath', label="Image 1", height=256, width=256)
432
+ img2 = gr.Image(type='filepath', label="Image 2 (Optional reference)", height=256, width=256, visible=True)
433
+ seed = gr.Textbox(label="Seed (-1 for random)", value="-1")
434
+ seed_holder = gr.Textbox(visible=False)
435
+ with gr.Row():
436
+ num_imgs = gr.Slider(1, 4, 4, step=1, label="Num Images")
437
+ with gr.Row():
438
+ height = gr.Slider(256, 2048, 1024, step=64, label="Height")
439
+ width = gr.Slider(256, 2048, 1024, step=64, label="Width")
440
+ with gr.Row():
441
+ steps = gr.Slider(8, 50, 30, step=1, label="Inference steps")
442
+ guidance = gr.Slider(1.0, 10.0, 4.0, step=0.1, label="Guidance scale")
443
+ with gr.Accordion("Advanced Options", open=True, visible=True):
444
+ with gr.Row():
445
+ enhance_gen_box = gr.Checkbox(value=False, label="Enhance Generation")
446
+ enhance_und_box = gr.Checkbox(value=False, label="Enhance Understanding")
447
+ with gr.Row():
448
+ ocr_box = gr.Checkbox(value=False, label="Enhance Text Rendering")
449
+ t5_box = gr.Checkbox(value=True, label="Enhance Current Turn")
450
+ with gr.Row():
451
+ submit = gr.Button("Send", variant="primary")
452
+ clear = gr.Button("Clear History", variant="primary")
453
+ with gr.Row():
454
+ with gr.Column(1, min_width=0):
455
+ gr.Markdown(
456
+ """
457
+ **🖼️ Visual Perception & Feature Extraction**
458
+ - Canny Edge Detection
459
+ - Mini-Line Segment Detection
460
+ - Normal Map Generation
461
+ - Sketch Generation
462
+ - Holistically-Nested Edge Detection
463
+ - Depth Estimation
464
+ - Human Pose Estimation
465
+ - Object Detection (Boxes)
466
+ - Semantic Segmentation (Masks)
467
+ """
468
+ )
469
+ with gr.Column(1, min_width=0):
470
+ gr.Markdown(
471
+ """
472
+ **✂️ Image Editing & Manipulation**
473
+ - Add Elements
474
+ - Adjust Attributes
475
+ - Change Background
476
+ - Remove Objects
477
+ - Replace Regions
478
+ - Perform Actions
479
+ - Restyle
480
+ - Compose Scenes
481
+ """
482
+ )
483
+ with gr.Column(1, min_width=0):
484
+ gr.Markdown(
485
+ """
486
+ **🔄 Cross-Modal Synthesis & Transformation**
487
+ - Text→Image Synthesis
488
+ - Image‑to‑Image Translation
489
+ - Multi‑Image Combination
490
+ - Extract IP Features
491
+ - IP Feature Composition
492
+ """
493
+ )
494
+ with gr.Column(1, min_width=0):
495
+ gr.Markdown(
496
+ """
497
+ **🤖 Visual & Textual QA**
498
+ - Image‑Text QA
499
+ - Text‑Text QA
500
+ """
501
+ )
502
+ anchor_pixels = 1024*1024
503
+ # Dynamic resize callback
504
+ def update_size(i1, i2):
505
+ shapes = []
506
+ for p in (i1, i2):
507
+ if p:
508
+ im = Image.open(p)
509
+ w, h = im.size
510
+ shapes.append((w, h))
511
+ if not shapes:
512
+ return gr.update(), gr.update()
513
+ if len(shapes) == 1:
514
+ w, h = shapes[0]
515
+ else:
516
+ w = sum(s[0] for s in shapes) / len(shapes)
517
+ h = sum(s[1] for s in shapes) / len(shapes)
518
+ new_h, new_w = dynamic_resize(int(h), int(w), 'any_11ratio', anchor_pixels=anchor_pixels)
519
+ return gr.update(value=new_h), gr.update(value=new_w)
520
+ img1.change(fn=update_size, inputs=[img1, img2], outputs=[height, width])
521
+ img2.change(fn=update_size, inputs=[img1, img2], outputs=[height, width])
522
+
523
+ # Mutual exclusivity
524
+ enhance_gen_box.change(
525
+ lambda g: gr.update(value=False) if g else gr.update(),
526
+ inputs=[enhance_gen_box], outputs=[enhance_und_box]
527
+ )
528
+ enhance_und_box.change(
529
+ lambda u: gr.update(value=False) if u else gr.update(),
530
+ inputs=[enhance_und_box], outputs=[enhance_gen_box]
531
+ )
532
+ state_ = gr.State({'conversation':[], 'history_image_paths':[], 'cur_ocr_i':0, 'cur_genimg_i':0})
533
+
534
+ progress_bar = gr.Progress()
535
+ gr.on(
536
+ triggers=[submit.click, text_in.submit],
537
+ fn=chat_step,
538
+ inputs=[img1, img2, text_in, height, width, steps, guidance,
539
+ ocr_box, t5_box, enhance_gen_box, enhance_und_box, seed, num_imgs, state_,
540
+ ],
541
+ outputs=[chatbot, state_, seed_holder],
542
+ scroll_to_output=True
543
+ ).then(
544
+ fn=copy_seed_for_user,
545
+ inputs=[seed_holder], # 输入是隐藏的 seed_holder
546
+ outputs=[seed] # 输出到真正要显示的 seed Textbox
547
+ )
548
+
549
+ clear.click(
550
+ fn=clear_history,
551
+ inputs=[],
552
+ outputs=[chatbot, state_, img1, img2, text_in, seed]
553
+ )
554
+
555
+ # ========== 添加 Validation Examples ==========
556
+ example_height, example_width = 1024, 1024
557
+ gr.Examples(
558
+ examples_per_page=100,
559
+ examples=[
560
+ # text-to-image
561
+ [None, None,
562
+ "Generate an adorable golden retriever puppy playing in a sunny park, "
563
+ "with fluffy fur, big round eyes, and a happy expression. "
564
+ "The background should have green grass, some flowers, and a blue sky with white clouds.",
565
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
566
+
567
+
568
+ # NIKE color swap
569
+ ["assets/nike_src.jpg", None,
570
+ "Switch the product's color from black, black to white, white, making sure the transition is crisp and clear.",
571
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
572
+
573
+ # style transfer (Ghibli)
574
+ ["assets/gradio/origin.png", None,
575
+ "Translate this photo into a Studio Ghibli-style illustration, holding true to the original composition and movement.",
576
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
577
+
578
+ ["assets/gradio/origin.png", None,
579
+ "Remove the bicycle located in the lower center region of the image.",
580
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
581
+
582
+ # blur
583
+ ["assets/gradio/blur.jpg", None,
584
+ "Remove blur, make it clear.",
585
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
586
+
587
+ #
588
+ ["assets/gradio/00004614_tgt.jpg", None,
589
+ "Add the ingrid fair isle cashmere turtleneck sweater to the person.",
590
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
591
+ #
592
+ ["assets/gradio/00006581_tgt.jpg", None,
593
+ "Place the belvoir broderie anglaise linen tank on the person in a way that complements their appearance and style.",
594
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
595
+ #
596
+ ["assets/gradio/00008153_tgt.jpg", None,
597
+ "Integrate may cashmere tank on body.",
598
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
599
+ #
600
+ ["assets/gradio/00002315_src.jpg", None,
601
+ "Strip away all context and distractions, leaving the pointelle-trimmed cashmere t-shirt floating on a neutral background.",
602
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
603
+ #
604
+ ["assets/gradio/00002985_src.jpg", None,
605
+ "Generate an image containing only the henry shearling jacket, free from any other visual elements.",
606
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
607
+
608
+ ["assets/gradio/origin.png", None,
609
+ "Add a cat in the center of image.",
610
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
611
+
612
+ # image+image-to-image (compose)
613
+ ["assets/00182555_target.jpg",
614
+ "assets/00182555_InstantStyle_ref_1.jpg",
615
+ "Adapt Image1's content to fit the aesthetic of Image2.",
616
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
617
+
618
+ # replace object
619
+ ["assets/replace_src.png", None,
620
+ "replace motorcycle located in the lower center region of the image with a black bicycle",
621
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
622
+
623
+ # segmentation
624
+ ["assets/seg_src.jpg", None,
625
+ "Segment the giraffe from the background.\n",
626
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
627
+
628
+ # detection
629
+ ["assets/det_src.jpg", None,
630
+ "Please depict the vase accurately",
631
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
632
+
633
+ # image-to-canny
634
+ ["assets/canny_image.jpg", None,
635
+ "Generate a Canny edge map for this image.",
636
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
637
+
638
+ # image-to-mlsd
639
+ ["assets/mlsd_image.jpg", None,
640
+ "Render an MLSD detection overlay for this input image.",
641
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
642
+
643
+ # image-to-normal
644
+ ["assets/normal_image.jpg", None,
645
+ "Convert the input texture into a tangent-space normal map.",
646
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
647
+
648
+ # image-to-sketch
649
+ ["assets/sketch_image.jpg", None,
650
+ "Transform this image into a hand-drawn charcoal sketch.",
651
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
652
+
653
+ # image-to-hed
654
+ ["assets/hed_image.jpg", None,
655
+ "Produce a holistically-nested boundary probability map of this image.",
656
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
657
+
658
+ # image-to-depth
659
+ ["assets/depth_image.jpg", None,
660
+ "Estimate depth with a focus on background structure.",
661
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
662
+
663
+ # image-to-image (reconstruction)
664
+ ["assets/rec.jpg", None,
665
+ "Simply reconstruct the original image with no enhancements.",
666
+ example_height, example_width, 30, 4.0, False, False, False, False, "-1", 4],
667
+
668
+ ],
669
+ inputs=[img1, img2, text_in, height, width, steps, guidance,
670
+ ocr_box, t5_box, enhance_gen_box, enhance_und_box, seed, num_imgs],
671
+ )
672
+ # ==============================================
673
+
674
+ UI_TRANSLATIONS = {
675
+ "🎉 UniWorld-V1 Chat Interface 🎉":"🎉 UniWorld-V1 聊天界面 🎉",
676
+ "Unlock Cutting‑Edge Visual Perception, Feature Extraction, Editing, Synthesis, and Understanding":
677
+ '解锁尖端视觉感知,特征提取,编辑,合成和理解',
678
+ "Usage Guide:":"使用指南:",
679
+ "It is recommended to perform inference on four images concurrently to offer varied selections.":"建议同时进行四张图像的推理,以提供多选。",
680
+ "Uploaded images are automatically resized; manually specifying resolutions that differ substantially from the original is not advised.":"已上传的图像将自动调整大小,但手动指定与原始图像差异太大的分辨率并不建议。",
681
+ "🖼️ Visual Perception & Feature Extraction":"🖼️ 视觉感知与特征提取",
682
+ "Canny Edge Detection":"Canny边缘检测 ",
683
+ "Mini-Line Segment Detection":"微型行段检测",
684
+ "Normal Map Generation":"生成法线图",
685
+ "Sketch Generation":"手绘生成",
686
+ "Holistically-Nested Edge Detection":"整体嵌套边缘检测",
687
+ "Depth Estimation":"深度估计",
688
+ "Human Pose Estimation":"人体姿势估计",
689
+ "Object Detection (Boxes)":"对象检测(框)",
690
+ "Semantic Segmentation (Masks)":"语义分割(蒙版)",
691
+ "✂️ Image Editing & Manipulation":"✂️ 图像编辑与操作",
692
+ "Add Elements":"添加元素",
693
+ "Adjust Attributes":"调整属性",
694
+ "Change Background":"更改背景",
695
+ "Remove Objects":"删除对象",
696
+ "Replace Regions":"替换区域",
697
+ "Perform Actions":"执行操作",
698
+ "Restyle":"重绘风格",
699
+ "Compose Scenes":"组合场景",
700
+ "🔄 Cross-Modal Synthesis & Transformation":"🔄 跨模态综合与转换",
701
+ "Text→Image Synthesis":"文本→图像综合",
702
+ "Image‑to‑Image Translation":"图像-图像转换",
703
+ "Multi‑Image Combination":"多图像组合",
704
+ "Extract IP Features":"提取IP特征",
705
+ "IP Feature Composition":"IP特征组合",
706
+ "🤖 Visual & Textual QA":"🤖 视觉和文字质量检查",
707
+ "Image‑Text QA":"图像-文本质量检查",
708
+ "Text‑Text QA":"文本-文本质量检查",
709
+ "Image 1":"图像 1",
710
+ "Image 2 (Optional reference)":"图像 2 (可选参考)",
711
+ "Instruction":"指令",
712
+ "Seed (-1 for random)":"种子 (-1为随机)",
713
+ "Num Images":"图像数量",
714
+ "Height":"高度",
715
+ "Width":"宽度",
716
+ "Inference steps":"推理步数",
717
+ "Guidance scale":"引导缩放",
718
+ "Advanced Options":"高级选项",
719
+ "Enhance Generation":"增强生成",
720
+ "Enhance Understanding":"增强理解",
721
+ "Enhance Text Rendering":"增强文本渲染",
722
+ "Enhance Current Turn":"增强当前轮次",
723
+ "Send":"发送",
724
+ "Clear History":"清除历史记录",
725
+ }
726
+
727
+
728
+ def apply_localization(block):
729
+ def process_component(component):
730
+ if not component:
731
+ return
732
+
733
+ for attr in ['label', 'info', 'placeholder']:
734
+ if hasattr(component, attr):
735
+ text = getattr(component, attr)
736
+ if text in UI_TRANSLATIONS:
737
+ setattr(component, attr, UI_TRANSLATIONS[text])
738
+
739
+ if hasattr(component, 'value'):
740
+ value = component.value
741
+ if isinstance(value, str) and value in UI_TRANSLATIONS:
742
+ component.value = UI_TRANSLATIONS[value]
743
+
744
+ if isinstance(component, gr.Markdown):
745
+ for en, zh in UI_TRANSLATIONS.items():
746
+ component.value = component.value.replace(en, zh)
747
+
748
+ if hasattr(component, 'children'):
749
+ for child in component.children:
750
+ process_component(child)
751
+
752
+ process_component(block)
753
+ return block
754
+
755
+
756
+ if __name__ == "__main__":
757
+ if args.zh:
758
+ demo = apply_localization(demo)
759
+ demo.title = "UniWorld-V1"
760
+ demo.launch(
761
+ allowed_paths=["/"],
762
+ server_name=args.server_name,
763
+ server_port=args.server_port,
764
+ share=args.share,
765
+ inbrowser=True,
766
+ )
767
+
768
+
769
+ '''
770
+ MODEL_PATH="/mnt/data/lb/Remake/FlowWorld/checkpoints/flux_qwen2p5vl_7b_vlm_mlp_siglip_stage2_ts_1024_bs42x8x1_fa_any_11ratio_ema999_ocr_adamw_t5_0p4_lr1e-5_mask_refstyle_extract_resume_run3/checkpoint-12000/model_ema"
771
+ FLUX_PATH="/mnt/data/checkpoints/black-forest-labs/FLUX.1-dev"
772
+ SIGLIP_PATH="/mnt/data/checkpoints/google/siglip2-so400m-patch16-512"
773
+ CUDA_VISIBLE_DEVICES=2 python app.py \
774
+ --model_path ${MODEL_PATH} \
775
+ --flux_path ${FLUX_PATH} \
776
+ --siglip_path ${SIGLIP_PATH}
777
+ '''
univa/__init__.py ADDED
File without changes
univa/dataset/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .llava_dataset import LlavaDataset
2
+ from .qwen2vl_dataset import Qwen2VLDataset
3
+
4
+ DATASET_TYPE = {
5
+ 'llava': LlavaDataset,
6
+ 'qwen2vl': Qwen2VLDataset,
7
+ 'qwen2p5vl': Qwen2VLDataset,
8
+ }
univa/dataset/data_collator.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ from transformers import PreTrainedTokenizer
3
+ import torch
4
+ import torch.nn.functional as F
5
+
6
+ def pad_list_of_tensors(tensor_list, padding_value=0):
7
+ # tensor_list: list of tensors, each of shape (b, c, h, w)
8
+
9
+ # if all empty list, which means all data are t2i within this batch
10
+ if all(not isinstance(tensor, torch.Tensor) for tensor in tensor_list):
11
+ return []
12
+ else:
13
+ for tmp_tensor in tensor_list:
14
+ if isinstance(tmp_tensor, torch.Tensor):
15
+ # find a tensor
16
+ break
17
+ # this line pad zero_tensor when batch mixed between t2i and others.
18
+ # t2i can be considered a uncondition (no-reference image) editing
19
+ tensor_list = [
20
+ torch.zeros_like(tmp_tensor) if isinstance(tensor, list) else tensor for tensor in tensor_list
21
+ ]
22
+ assert all(tensor.shape[1] == tensor_list[0].shape[1] for tensor in tensor_list)
23
+ # 找到最大的 b, h, w
24
+ max_b = max(tensor.shape[0] for tensor in tensor_list)
25
+ max_c = tensor_list[0].shape[1] # 假设c都是一样的
26
+ max_h = max(tensor.shape[2] for tensor in tensor_list)
27
+ max_w = max(tensor.shape[3] for tensor in tensor_list)
28
+
29
+ padded_tensors = []
30
+ for tensor in tensor_list:
31
+ b, c, h, w = tensor.shape
32
+ pad_b = max_b - b
33
+ pad_h = max_h - h
34
+ pad_w = max_w - w
35
+
36
+ # 先 pad h, w (最后两维)
37
+ tensor = F.pad(tensor, (0, pad_w, 0, pad_h), value=padding_value)
38
+ # 再 pad b 维(最前面),要扩成 (max_b, c, h, w)
39
+ if pad_b > 0:
40
+ padding_shape = (pad_b, c, max_h, max_w)
41
+ pad_tensor = torch.full(padding_shape, fill_value=padding_value, dtype=tensor.dtype, device=tensor.device)
42
+ tensor = torch.cat([tensor, pad_tensor], dim=0)
43
+
44
+ padded_tensors.append(tensor)
45
+
46
+ # 最后 stack 成 (B, b_max, c, h_max, w_max)
47
+ return torch.stack(padded_tensors)
48
+
49
+ def resize_list_of_tensors(weights):
50
+ # suppose weights is your list of [1, H, W] tensors
51
+ # 1) find the max height and width
52
+ heights = [w.shape[-2] for w in weights]
53
+ widths = [w.shape[-1] for w in weights]
54
+ max_h, max_w = max(heights), max(widths)
55
+
56
+ # 2) interpolate each mask to (max_h, max_w)
57
+ resized = []
58
+ for w in weights:
59
+ # F.interpolate expects a 4D tensor: (N, C, H, W)
60
+ w_4d = w.unsqueeze(0) # -> [1, 1, H, W]
61
+ w_4d = w_4d.unsqueeze(0) if w_4d.ndim == 3 else w_4d
62
+ # but since w is already [1,H,W], unsqueeze once is enough:
63
+ # w_4d = w.unsqueeze(0) # [1, 1, H, W]
64
+
65
+ w_resized = F.interpolate(
66
+ w_4d, size=(max_h, max_w), mode='nearest'
67
+ )
68
+ # back to [1, H', W']
69
+ w_resized = w_resized.squeeze(0)
70
+ resized.append(w_resized)
71
+
72
+ # 3) stack into a single tensor [N, 1, max_h, max_w]
73
+ weights = torch.stack(resized) # -> [N, 1, max_h, max_w]
74
+ return weights
75
+
76
+ class DataCollator:
77
+ def __init__(self, tokenizer: PreTrainedTokenizer, padding_side='right'):
78
+ self.tokenizer = tokenizer
79
+ self.padding_side = padding_side
80
+
81
+ def __call__(self, instances: List[Dict]) -> Dict:
82
+ input_ids = [instance["input_ids"][0] for instance in instances]
83
+ labels = [instance["labels"][0] for instance in instances]
84
+ image_position = [instance["image_position"] for instance in instances]
85
+
86
+ pixel_values = [
87
+ instance["pixel_values"] for instance in instances if len(instance["pixel_values"]) > 0
88
+ ]
89
+ pixel_values = torch.cat(pixel_values) if len(pixel_values) > 0 else None
90
+
91
+ image_grid_thw = [
92
+ instance["image_grid_thw"] for instance in instances if len(instance["image_grid_thw"]) > 0
93
+ ]
94
+ image_grid_thw = torch.cat(image_grid_thw) if len(image_grid_thw) > 0 else None
95
+
96
+ pil_pixel_values = [
97
+ instance["pil_pixel_values"] for instance in instances
98
+ ]
99
+
100
+ prompts = [instance["prompt"] for instance in instances]
101
+
102
+ ref_pixel_values = [
103
+ instance["ref_pixel_values"] for instance in instances
104
+ ]
105
+ ref_pixel_values = pad_list_of_tensors(ref_pixel_values, padding_value=0)
106
+
107
+ siglip_pixel_values = [
108
+ instance["siglip_pixel_values"] for instance in instances if len(instance["siglip_pixel_values"]) > 0
109
+ ]
110
+ siglip_pixel_values = torch.cat(siglip_pixel_values, dim=0) if len(siglip_pixel_values) > 0 else []
111
+
112
+ input_ids = torch.nn.utils.rnn.pad_sequence(
113
+ input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id,
114
+ padding_side=self.padding_side,
115
+ )
116
+ labels = torch.nn.utils.rnn.pad_sequence(
117
+ labels, batch_first=True, padding_value=-100,
118
+ padding_side=self.padding_side,
119
+ )
120
+ attention_mask = input_ids.ne(self.tokenizer.pad_token_id)
121
+
122
+ weights = [
123
+ instance["weights"] for instance in instances if len(instance["weights"]) > 0
124
+ ]
125
+ if len(weights) > 0:
126
+ if all([i.shape == weights[0].shape for i in weights]):
127
+ weights = torch.stack(weights)
128
+ else:
129
+ weights = [i.unsqueeze(0) for i in weights]
130
+ else:
131
+ weights = None
132
+
133
+ generated_image = [
134
+ instance["generated_image"] for instance in instances if len(instance["generated_image"]) > 0
135
+ ]
136
+ if len(generated_image) > 0:
137
+ if all([i.shape == generated_image[0].shape for i in generated_image]):
138
+ generated_image = torch.stack(generated_image)
139
+ else:
140
+ generated_image = [i.unsqueeze(0) for i in generated_image]
141
+ else:
142
+ generated_image = []
143
+ return {
144
+ "input_ids": input_ids,
145
+ "pixel_values": pixel_values,
146
+ "labels": labels,
147
+ "attention_mask": attention_mask,
148
+ "image_position": image_position,
149
+ "image_grid_thw": image_grid_thw,
150
+ "prompts": prompts,
151
+ "ref_pixel_values": ref_pixel_values,
152
+ "pil_pixel_values": pil_pixel_values,
153
+ "siglip_pixel_values": siglip_pixel_values,
154
+ "weights": weights,
155
+ "generated_image": generated_image,
156
+ }
univa/dataset/llava_dataset.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, Optional, List
2
+
3
+ import torch
4
+ from transformers import PreTrainedTokenizer
5
+ from torch.utils.data import Dataset
6
+ from tqdm import tqdm
7
+ import json
8
+ import os
9
+ from PIL import Image
10
+ from univa.utils.prompter import Prompter
11
+ import numpy as np
12
+ from einops import rearrange
13
+ import random
14
+ from univa.utils.constant import SPACIAL_TOKEN, GENERATE_TOKEN
15
+
16
+ class LlavaDataset(Dataset):
17
+ def __init__(
18
+ self,
19
+ dataset_type: str,
20
+ data_txt: str,
21
+ tokenizer: PreTrainedTokenizer,
22
+ prompter: Prompter,
23
+ image_processor: Callable,
24
+ processor: Callable = None,
25
+ min_pixels: int = 384*384,
26
+ max_pixels: int = 384*384,
27
+ image_token_length: int = 729,
28
+ only_generated_task: bool = False,
29
+ drop_prompt_rate: float = 0.2,
30
+ ):
31
+ assert dataset_type == 'llava'
32
+ with open(data_txt, "r") as f:
33
+ self.datasets = [line.strip() for line in f.readlines()]
34
+
35
+ self.data = []
36
+ self._load_data()
37
+ self.tokenizer = tokenizer
38
+ self.prompter = prompter
39
+ self.image_token_length = image_token_length
40
+ self.image_token = SPACIAL_TOKEN[dataset_type]['image_token']
41
+ self.image_begin_token = SPACIAL_TOKEN[dataset_type]['image_begin_token']
42
+ self.image_end_token = SPACIAL_TOKEN[dataset_type]['image_end_token']
43
+ self.generated_image_token = GENERATE_TOKEN
44
+ self.image_processor = image_processor
45
+
46
+ self.only_generated_task = only_generated_task # For denoiser training
47
+ self.drop_prompt_rate = drop_prompt_rate
48
+ if self.drop_prompt_rate > 0:
49
+ assert self.only_generated_task, (
50
+ "Only generated task is supported when drop prompt rate is greater than 0"
51
+ )
52
+
53
+ # Add image token if not exists.
54
+ if self.image_token not in self.tokenizer.get_vocab():
55
+ self.tokenizer.add_special_tokens(
56
+ {"additional_special_tokens": [self.image_token]}
57
+ )
58
+ self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token)
59
+
60
+ self.image_begin_token_id = self.tokenizer.convert_tokens_to_ids(
61
+ self.image_begin_token
62
+ )
63
+ assert isinstance(self.image_begin_token_id, int), (
64
+ f"tokenizer miss image begin token `{self.image_begin_token}`"
65
+ )
66
+ self.image_end_token_id = self.tokenizer.convert_tokens_to_ids(
67
+ self.image_end_token
68
+ )
69
+ assert isinstance(self.image_end_token_id, int), (
70
+ f"tokenizer miss image end token `{self.image_end_token}`"
71
+ )
72
+
73
+ def _load_data(self):
74
+ for dataset in self.datasets:
75
+ image_root, json_file = dataset.split(",")
76
+
77
+ # Load json file
78
+ with open(json_file, "r") as f:
79
+ data = json.load(f)
80
+
81
+ dataset_data = []
82
+ for line in tqdm(data):
83
+ # Ensure `image` is a list
84
+ if isinstance(line["image"], str):
85
+ line["image"] = [line["image"]]
86
+ assert isinstance(line["image"], list), (
87
+ "`image` must be a str or a list."
88
+ )
89
+
90
+ # Convert image path to absolute path
91
+ line["image"] = [
92
+ os.path.join(image_root, image_path) for image_path in line["image"]
93
+ ]
94
+
95
+ dataset_data.append(line)
96
+
97
+ print(f"Load {len(dataset_data)} data from {json_file}.")
98
+ self.data.extend(dataset_data)
99
+
100
+ def __len__(self):
101
+ return len(self.data)
102
+
103
+ def __getitem__(self, idx):
104
+ try:
105
+ data: Any = self.data[idx]
106
+
107
+ # Reformat the conversation to the format of prompter
108
+ conversations = []
109
+ prompt = ""
110
+ for item in data["conversations"]:
111
+ if item["from"] == "human":
112
+ role = self.prompter.user_role
113
+ elif item["from"] == "gpt":
114
+ role = self.prompter.assistant_role
115
+ else:
116
+ raise ValueError(f"Unknown role: {item['from']}")
117
+ conversations.append({"from": role, "value": item["value"]})
118
+ assert prompt != ""
119
+
120
+ # Make prompt
121
+ drop_condition = False
122
+ if self.only_generated_task:
123
+ if self.drop_prompt_rate < random.random(): # Randomly drop the prompt
124
+ prompt_list = self.prompter.get_train_prompt(conversations)
125
+ else:
126
+ drop_condition = True
127
+ # Drop the prompt
128
+ prompt_list = [
129
+ {
130
+ "from": self.prompter.system_role,
131
+ "value": "You are a helpful assistant.",
132
+ },
133
+ {
134
+ "from": self.prompter.user_role,
135
+ "value": "Generate an image.",
136
+ },
137
+ {
138
+ "from": self.prompter.assistant_role,
139
+ "value": self.generated_image_token,
140
+ },
141
+ ]
142
+ prompt_list = self.prompter.get_train_prompt(prompt_list)
143
+ else:
144
+ prompt_list = self.prompter.get_train_prompt(conversations)
145
+
146
+ input_ids = []
147
+ labels = []
148
+ has_generated_image = False
149
+ for item in prompt_list:
150
+ item["prompt"] = item["prompt"].replace('<image>', self.image_token)
151
+ if self.generated_image_token in item["prompt"]: # Check if self.generated_image_token in prompt
152
+ assert item["from"] == self.prompter.assistant_role, (
153
+ "Generated image token must be in assistant role"
154
+ )
155
+ assert (
156
+ f"{self.generated_image_token}{self.prompter.eos_token}"
157
+ in item["prompt"]
158
+ ), "Generated image token must in end of prompt"
159
+
160
+ # Replace the generated image token with image begin token and without eos token
161
+ item["prompt"] = item["prompt"].replace(
162
+ f"{self.generated_image_token}{self.prompter.eos_token}",
163
+ self.image_begin_token,
164
+ )
165
+ has_generated_image = True
166
+
167
+ tokenized_item = self.tokenizer(
168
+ item["prompt"],
169
+ return_tensors="pt",
170
+ truncation=False,
171
+ )
172
+ if item["is_labels"]: # If this prompt is labels
173
+ labels.append(tokenized_item.input_ids)
174
+ else:
175
+ labels.append(torch.full_like(tokenized_item.input_ids, -100))
176
+ input_ids.append(tokenized_item.input_ids)
177
+
178
+ if (
179
+ self.only_generated_task and not has_generated_image
180
+ ): # For denoiser training
181
+ raise ValueError(
182
+ f"Only generated task is not supported. But this prompt not contains generated image token: {prompt_list[0]['prompt']}"
183
+ )
184
+
185
+ input_ids = torch.cat(input_ids, dim=1)
186
+ labels = torch.cat(labels, dim=1)
187
+
188
+ # Load images
189
+ if has_generated_image:
190
+ if not drop_condition:
191
+ image_slice = data["image"][:-1]
192
+ else:
193
+ image_slice = []
194
+ else:
195
+ image_slice = data["image"]
196
+ image_dict = self._load_image(image_slice, image_processor=self.image_processor, image_token_lengths=self.image_token_length)
197
+ image_token_lengths = image_dict['image_token_lengths']
198
+ pixel_values = image_dict['pixel_values']
199
+ image_grid_thw = image_dict['image_grid_thw']
200
+
201
+
202
+ # Repeat the image token to the length of image_token_length
203
+ # and record the position of image tokens.
204
+ input_ids, labels, image_position = self._process_image_token(
205
+ input_ids,
206
+ labels=labels,
207
+ image_token_id=self.image_token_id,
208
+ image_begin_token_id=self.image_begin_token_id,
209
+ image_end_token_id=self.image_end_token_id,
210
+ image_token_lengths=image_token_lengths,
211
+ )
212
+
213
+ return_data = {
214
+ "input_ids": input_ids,
215
+ "labels": labels,
216
+ "pixel_values": pixel_values,
217
+ "image_position": image_position,
218
+ "image_grid_thw": image_grid_thw,
219
+ "prompt": [prompt],
220
+ }
221
+
222
+ if has_generated_image: # If this item is a generation task
223
+ image = Image.open(data["image"][-1]).convert("RGB")
224
+ image_tensor = torch.tensor(np.array(image)) / 255.0 # scale to 0-1
225
+ image_tensor = rearrange(image_tensor, "h w c -> c h w")
226
+ return_data["generated_image"] = image_tensor
227
+
228
+ return return_data
229
+ except Exception as e:
230
+ print(f'Error with {e}')
231
+ return self.__getitem__(random.randint(0, self.__len__()-1))
232
+
233
+ @staticmethod
234
+ def _load_image(
235
+ image_slice: List[str],
236
+ max_pixels: int = 448*448,
237
+ min_pixels: int = 448*448,
238
+ processor: Callable = None,
239
+ image_processor: Callable = None,
240
+ image_token_lengths: int = 729,
241
+ image_token: str = '<image>',
242
+ ):
243
+ # images tensor shape is (b, c, h, w)
244
+ images = []
245
+ # Ignore the last image (generated image)
246
+ for image_path in image_slice: # Ignore the last image (generated image)
247
+ image = Image.open(image_path).convert("RGB")
248
+ image = image_processor(
249
+ image, return_tensors="pt"
250
+ ).pixel_values
251
+ images.append(image)
252
+ if len(images) > 0:
253
+ images = torch.cat(images)
254
+ image_token_lengths = len(images) * [image_token_lengths]
255
+ return {'pixel_values': images, 'image_grid_thw': [], 'image_token_lengths': image_token_lengths}
256
+
257
+ @staticmethod
258
+ def _process_image_token(
259
+ input_ids: torch.Tensor,
260
+ image_token_id: int,
261
+ image_begin_token_id: int,
262
+ image_end_token_id: int,
263
+ image_token_lengths: List[int],
264
+ labels: Optional[torch.Tensor] = None,
265
+ ):
266
+ # Find the indices of the image token
267
+ image_token_indices = (input_ids == image_token_id).nonzero(as_tuple=True)
268
+ image_position = []
269
+ offset = 0
270
+ cur_i = 0
271
+ if isinstance(image_token_lengths, int):
272
+ image_token_lengths = [image_token_lengths] * len(image_token_indices[1])
273
+ for idx in image_token_indices[1]:
274
+ image_token_length = image_token_lengths[cur_i]
275
+ adjusted_idx = idx + offset
276
+ assert input_ids[0, adjusted_idx] == image_token_id
277
+
278
+ # Add image begin and end token
279
+ input_ids = torch.cat(
280
+ [
281
+ input_ids[:, :adjusted_idx],
282
+ input_ids.new_full(
283
+ (1, 1), image_begin_token_id
284
+ ), # image begin token
285
+ input_ids.new_full(
286
+ (1, image_token_length), image_token_id
287
+ ), # Repeat the image token to the length of image_token_length
288
+ input_ids.new_full((1, 1), image_end_token_id), # image end token
289
+ input_ids[:, adjusted_idx + 1 :],
290
+ ],
291
+ dim=1,
292
+ )
293
+ if labels is not None:
294
+ labels = torch.cat(
295
+ [
296
+ labels[:, :adjusted_idx],
297
+ labels.new_full(
298
+ (1, 1), image_begin_token_id
299
+ ), # Make begin token as label
300
+ labels.new_full((1, image_token_length), -100),
301
+ labels.new_full((1, 1), -100),
302
+ labels[:, adjusted_idx + 1 :],
303
+ ],
304
+ dim=1,
305
+ )
306
+
307
+ adjusted_idx += 1 # skip the image begin token
308
+ image_position.append(adjusted_idx.item())
309
+ offset += image_token_length - 1
310
+ offset += 2 # begin and end token
311
+
312
+ return input_ids, labels, image_position
univa/dataset/qwen2vl_dataset.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, Optional, List
2
+
3
+ import torch
4
+ from transformers import PreTrainedTokenizer
5
+ from torch.utils.data import Dataset
6
+ from tqdm import tqdm
7
+ import json
8
+ import os
9
+ from PIL import Image
10
+ from univa.utils.prompter import Prompter
11
+ import numpy as np
12
+ from einops import rearrange
13
+ import random
14
+ # from qwen_vl_utils.vision_process import fetch_image, fetch_video
15
+ from qwen_vl_utils.vision_process import to_rgb, smart_resize, fetch_video
16
+ from univa.utils.constant import SPACIAL_TOKEN, GENERATE_TOKEN
17
+ from univa.utils.get_mask import get_weight_mask
18
+ from univa.utils.get_ocr import get_ocr_result
19
+ from fractions import Fraction
20
+ from torchvision.transforms import functional
21
+ from torchvision import transforms
22
+ from io import BytesIO
23
+ import base64
24
+ import requests
25
+ import torch
26
+ from PIL import Image
27
+ from torchvision import io, transforms
28
+ from typing import Optional
29
+
30
+
31
+ def get_aspect_ratio(img):
32
+ width, height = img.size
33
+ return Fraction(width, height).limit_denominator()
34
+
35
+ def has_same_aspect_ratio(img1, img2):
36
+ if not isinstance(img1, Image.Image):
37
+ img1 = Image.open(img1).convert('RGB')
38
+ if not isinstance(img2, Image.Image):
39
+ img2 = Image.open(img2).convert('RGB')
40
+ ratio1 = get_aspect_ratio(img1)
41
+ ratio2 = get_aspect_ratio(img2)
42
+ return ratio1 == ratio2
43
+
44
+ def has_same_resolution(img1, img2):
45
+ if not isinstance(img1, Image.Image):
46
+ img1 = Image.open(img1).convert('RGB')
47
+ if not isinstance(img2, Image.Image):
48
+ img2 = Image.open(img2).convert('RGB')
49
+ return img1.size == img2.size
50
+
51
+ class Qwen2VLDataset(Dataset):
52
+ def __init__(
53
+ self,
54
+ dataset_type: str,
55
+ data_txt: str,
56
+ transform: Callable,
57
+ tokenizer: PreTrainedTokenizer,
58
+ prompter: Prompter,
59
+ image_processor: Callable,
60
+ processor: Callable = None,
61
+ min_pixels: int = 384*384,
62
+ max_pixels: int = 384*384,
63
+ image_token_length: int = 729,
64
+ only_generated_task: bool = False,
65
+ drop_prompt_rate: float = 0.0,
66
+ joint_ref_feature: bool = False,
67
+ anyres: bool = False,
68
+ mask_weight_type: str = 'log',
69
+ siglip_processor: Callable = None,
70
+ ocr_enhancer: bool = False,
71
+ random_data: bool = False,
72
+ maxnum_per_data: int = -1,
73
+ notry: bool = False,
74
+ ):
75
+ assert dataset_type == 'qwen2vl' or dataset_type == 'qwen2p5vl', "dataset_type == 'qwen2vl' or dataset_type == 'qwen2p5vl'"
76
+ with open(data_txt, "r") as f:
77
+ self.datasets = [line.strip() for line in f.readlines()]
78
+
79
+ self.data = []
80
+ self._load_data(maxnum_per_data)
81
+
82
+ self.transform = transform
83
+ self.processor = processor
84
+ self.tokenizer = processor.tokenizer
85
+ self.prompter = prompter
86
+ self.min_pixels = min_pixels
87
+ self.max_pixels = max_pixels
88
+ self.image_token = SPACIAL_TOKEN[dataset_type]['image_token']
89
+ self.image_begin_token = SPACIAL_TOKEN[dataset_type]['image_begin_token']
90
+ self.image_end_token = SPACIAL_TOKEN[dataset_type]['image_end_token']
91
+ self.generated_image_token = GENERATE_TOKEN
92
+ self.image_processor = processor.image_processor
93
+ # self.factor = 4 if joint_ref_feature else 1
94
+ self.factor = 2
95
+
96
+ self.only_generated_task = only_generated_task # For denoiser training
97
+ self.drop_prompt_rate = drop_prompt_rate
98
+ if self.drop_prompt_rate > 0:
99
+ assert self.only_generated_task, (
100
+ "Only generated task is supported when drop_prompt_rate > 0"
101
+ )
102
+ self.mask_weight_type = mask_weight_type
103
+ self.siglip_processor = siglip_processor
104
+ self.ocr_enhancer = ocr_enhancer
105
+ self.random_data = random_data
106
+ self.notry = notry
107
+
108
+ # Add image token if not exists.
109
+ assert self.image_token in self.tokenizer.get_vocab()
110
+ self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token)
111
+
112
+ self.image_begin_token_id = self.tokenizer.convert_tokens_to_ids(
113
+ self.image_begin_token
114
+ )
115
+ assert isinstance(self.image_begin_token_id, int), (
116
+ f"tokenizer miss image begin token `{self.image_begin_token}`"
117
+ )
118
+ self.image_end_token_id = self.tokenizer.convert_tokens_to_ids(
119
+ self.image_end_token
120
+ )
121
+ assert isinstance(self.image_end_token_id, int), (
122
+ f"tokenizer miss image end token `{self.image_end_token}`"
123
+ )
124
+
125
+ def _load_data(self, maxnum_per_data=-1):
126
+ for dataset in self.datasets:
127
+ image_root, json_file, need_weight = dataset.split(",")
128
+
129
+ # Load json file
130
+ with open(json_file, "r") as f:
131
+ data = json.load(f)
132
+ if maxnum_per_data > 0 and maxnum_per_data < len(data):
133
+ print(f'original data: {len(data)}, sample: {maxnum_per_data}')
134
+ data = random.sample(data, maxnum_per_data)
135
+ dataset_data = []
136
+ for line in tqdm(data):
137
+ if "image" not in line:
138
+ line["image"] = []
139
+ # Ensure `image` is a list
140
+ if isinstance(line["image"], str):
141
+ line["image"] = [line["image"]]
142
+ assert isinstance(line["image"], list), (
143
+ "`image` must be a str or a list."
144
+ )
145
+
146
+ # Convert image path to absolute path
147
+ line["need_weight"] = need_weight
148
+ line["image"] = [
149
+ os.path.join(image_root, image_path) for image_path in line["image"]
150
+ ]
151
+ dataset_data.append(line)
152
+
153
+ print(f"Load {len(dataset_data)} data from {json_file}.")
154
+ self.data.extend(dataset_data)
155
+
156
+ def __len__(self):
157
+ return len(self.data)
158
+
159
+ def _get_random_data(self, ):
160
+
161
+ prompt = self.prompter(
162
+ [
163
+ {"from": "system", "value": "You are a helpful assistant."},
164
+ {
165
+ "from": "user",
166
+ "value": f"test an image {self.image_token}",
167
+ },
168
+ ]
169
+ )
170
+ input_ids = self.tokenizer.batch_encode_plus(
171
+ [prompt], return_tensors="pt", truncation=False,
172
+ ).input_ids
173
+ labels = input_ids
174
+
175
+ width, height = 448, 448
176
+ random_data = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
177
+ image = Image.fromarray(random_data, 'RGB')
178
+
179
+ image_slice = [image]
180
+ image_dict = self._load_image(
181
+ image_slice, self.max_pixels, self.min_pixels,
182
+ processor=self.processor, image_token=self.image_token,
183
+ factor=self.factor,
184
+ last_image=image,
185
+ vae_image_transform=self.transform,
186
+ drop_prompt=False,
187
+ prompt=prompt,
188
+ mask_weight_type=self.mask_weight_type,
189
+ siglip_processor=self.siglip_processor,
190
+ )
191
+
192
+ image_token_lengths = image_dict['image_token_lengths']
193
+ pixel_values = image_dict['pixel_values']
194
+ image_grid_thw = image_dict['image_grid_thw']
195
+ ref_pixel_values = image_dict['ref_pixel_values']
196
+ pil_pixel_values = image_dict['pil_pixel_values']
197
+ siglip_pixel_values = image_dict['siglip_pixel_values']
198
+ weights = image_dict['weights']
199
+
200
+ input_ids, labels, image_position = self._process_image_token(
201
+ input_ids,
202
+ labels=labels,
203
+ image_token_id=self.image_token_id,
204
+ image_begin_token_id=self.image_begin_token_id,
205
+ image_end_token_id=self.image_end_token_id,
206
+ image_token_lengths=image_token_lengths,
207
+ )
208
+
209
+ generated_image = torch.randn(3, 512, 512)
210
+
211
+ return_data = {
212
+ "input_ids": input_ids,
213
+ "labels": labels,
214
+ "pixel_values": pixel_values,
215
+ "image_position": image_position,
216
+ "image_grid_thw": image_grid_thw,
217
+ "prompt": prompt,
218
+ "ref_pixel_values": ref_pixel_values,
219
+ "pil_pixel_values": pil_pixel_values,
220
+ "siglip_pixel_values": siglip_pixel_values,
221
+ "weights": weights,
222
+ "generated_image": generated_image,
223
+ }
224
+ return return_data
225
+
226
+ def getitem(self, data):
227
+ # Reformat the conversation to the format of prompter
228
+ conversations = []
229
+ prompt = ""
230
+ for item in data["conversations"]:
231
+ if item["from"] == "human":
232
+ role = self.prompter.user_role
233
+ prompt = item["value"]
234
+ elif item["from"] == "gpt":
235
+ role = self.prompter.assistant_role
236
+ else:
237
+ raise ValueError(f"Unknown role: {item['from']}")
238
+ conversations.append({"from": role, "value": item["value"]})
239
+ assert prompt != "", "prompt != ''"
240
+ # The last turn instruction will be used for t5_embed
241
+ prompt = prompt.replace('<image>', '').replace('\n', '')
242
+
243
+ # Make prompt
244
+ drop_prompt = False
245
+ if self.only_generated_task:
246
+ if self.drop_prompt_rate < random.random(): # Randomly drop the prompt
247
+ prompt_list = self.prompter.get_train_prompt(conversations)
248
+ else:
249
+ drop_prompt = True
250
+ num_images = (''.join([i['value'] for i in conversations])).count('<image>')
251
+ # Drop the prompt
252
+ prompt_list = [
253
+ {
254
+ "from": self.prompter.system_role,
255
+ "value": "You are a helpful assistant.",
256
+ },
257
+ {
258
+ "from": self.prompter.user_role,
259
+ # "value": f"{num_images * '<image>'} Generate an image.",
260
+ "value": "Generate an image.",
261
+ },
262
+ {
263
+ "from": self.prompter.assistant_role,
264
+ "value": self.generated_image_token,
265
+ },
266
+ ]
267
+ prompt_list = self.prompter.get_train_prompt(prompt_list)
268
+ else:
269
+ prompt_list = self.prompter.get_train_prompt(conversations)
270
+
271
+ input_ids = []
272
+ labels = []
273
+ has_generated_image = False
274
+ cur_i = 0
275
+ for item in prompt_list:
276
+ item["prompt"] = item["prompt"].replace('<image>', self.image_token)
277
+
278
+ if self.generated_image_token in item["prompt"]: # Check if self.generated_image_token in prompt
279
+ assert item["from"] == self.prompter.assistant_role, (
280
+ "Generated image token must be in assistant role"
281
+ )
282
+ assert (
283
+ f"{self.generated_image_token}{self.prompter.eos_token}"
284
+ in item["prompt"]
285
+ ), "Generated image token must in end of prompt"
286
+
287
+ # Replace the generated image token with image begin token and without eos token
288
+ item["prompt"] = item["prompt"].replace(
289
+ f"{self.generated_image_token}{self.prompter.eos_token}",
290
+ self.image_begin_token,
291
+ )
292
+ has_generated_image = True
293
+
294
+ if self.ocr_enhancer and (self.image_token in item["prompt"]):
295
+ # print('item["prompt"]', item["prompt"])
296
+ if not has_generated_image:
297
+ num_img = item["prompt"].count(self.image_token)
298
+ ocr_sentences = []
299
+ for i in range(num_img):
300
+ ocr_sentences.append(get_ocr_result(data["image"][cur_i], cur_i))
301
+ cur_i += 1
302
+ ocr_sentences = '\n'.join(ocr_sentences)
303
+ if len(ocr_sentences.split()) > 256:
304
+ print(f'ocr_sentences too long, total len {len(ocr_sentences.split())} trunk first 256')
305
+ ocr_sentences = ' '.join(ocr_sentences.split()[:256])
306
+ # ocr_sentences = ''
307
+ assert item['prompt'][-len(self.prompter.eos_token):] == self.prompter.eos_token, \
308
+ "item['prompt'][-len(self.prompter.eos_token):] == self.prompter.eos_token"
309
+ assert item['prompt'].count(self.prompter.eos_token) == 1, \
310
+ "item['prompt'].count(self.prompter.eos_token) == 1"
311
+ item["prompt"] = item["prompt"].replace(self.prompter.eos_token, f'{ocr_sentences} {self.prompter.eos_token}')
312
+
313
+ tokenized_item = self.tokenizer(
314
+ item["prompt"],
315
+ return_tensors="pt",
316
+ truncation=True,
317
+ max_length=1024,
318
+ )
319
+ if item["is_labels"]: # If this prompt is labels
320
+ labels.append(tokenized_item.input_ids)
321
+ else:
322
+ labels.append(torch.full_like(tokenized_item.input_ids, -100))
323
+ input_ids.append(tokenized_item.input_ids)
324
+
325
+ if (
326
+ self.only_generated_task and not has_generated_image
327
+ ): # For denoiser training
328
+ raise ValueError(
329
+ f"Only generated task is not supported. But this prompt not contains generated image token: {prompt_list[0]['prompt']}"
330
+ )
331
+
332
+ input_ids = torch.cat(input_ids, dim=1)
333
+ labels = torch.cat(labels, dim=1)
334
+
335
+ # Load images
336
+ if has_generated_image:
337
+ # generate task
338
+ # process images but exclude the last image, which need to generate
339
+ image_slice = data["image"][:-1]
340
+ else:
341
+ # understanding task
342
+ image_slice = data["image"]
343
+
344
+
345
+ image_dict = self._load_image(
346
+ image_slice, self.max_pixels, self.min_pixels,
347
+ processor=self.processor, image_token=self.image_token,
348
+ factor=self.factor,
349
+ last_image=data["image"][-1] if has_generated_image else None,
350
+ vae_image_transform=self.transform,
351
+ drop_prompt=drop_prompt,
352
+ prompt=prompt,
353
+ mask_weight_type=self.mask_weight_type,
354
+ siglip_processor=self.siglip_processor,
355
+ need_weight=data['need_weight'],
356
+ )
357
+
358
+ image_token_lengths = image_dict['image_token_lengths']
359
+ pixel_values = image_dict['pixel_values']
360
+ image_grid_thw = image_dict['image_grid_thw']
361
+ ref_pixel_values = image_dict['ref_pixel_values']
362
+ pil_pixel_values = image_dict['pil_pixel_values']
363
+ siglip_pixel_values = image_dict['siglip_pixel_values']
364
+ weights = image_dict['weights']
365
+
366
+ input_ids, labels, image_position = self._process_image_token(
367
+ input_ids,
368
+ labels=labels,
369
+ image_token_id=self.image_token_id,
370
+ image_begin_token_id=self.image_begin_token_id,
371
+ image_end_token_id=self.image_end_token_id,
372
+ image_token_lengths=image_token_lengths,
373
+ )
374
+
375
+
376
+ return_data = {
377
+ "input_ids": input_ids,
378
+ "labels": labels,
379
+ "pixel_values": pixel_values,
380
+ "image_position": image_position,
381
+ "image_grid_thw": image_grid_thw,
382
+ "prompt": prompt,
383
+ "ref_pixel_values": ref_pixel_values,
384
+ "pil_pixel_values": pil_pixel_values,
385
+ "siglip_pixel_values": siglip_pixel_values,
386
+ "weights": weights,
387
+ }
388
+
389
+ if has_generated_image: # If this item is a generation task
390
+ image = Image.open(data["image"][-1]).convert("RGB")
391
+ # if self.anyres:
392
+ # image = image.resize(pil_pixel_values[-1].size)
393
+ image_tensor = torch.tensor(np.array(image)) / 255.0 # scale to 0-1
394
+ image_tensor = rearrange(image_tensor, "h w c -> c h w")
395
+ return_data["generated_image"] = self.transform(image_tensor)
396
+ else:
397
+ return_data["generated_image"] = []
398
+ return return_data
399
+
400
+ def __getitem__(self, idx):
401
+ if self.random_data:
402
+ return self._get_random_data()
403
+
404
+ data: Any = self.data[idx]
405
+ if self.notry:
406
+ return self.getitem(data)
407
+ try:
408
+ return self.getitem(data)
409
+ except Exception as e:
410
+ print(f'Error with {e}')
411
+ return self.__getitem__(random.randint(0, self.__len__()-1))
412
+
413
+ @staticmethod
414
+ def _load_image(
415
+ image_slice: List[str],
416
+ max_pixels: int = 448*448,
417
+ min_pixels: int = 448*448,
418
+ processor: Callable = None,
419
+ image_processor: Callable = None,
420
+ image_token_lengths: int = 729,
421
+ image_token: str = '<|image_pad|>',
422
+ factor: int = 1,
423
+ last_image: Optional[str] = None,
424
+ vae_image_transform: Callable = None,
425
+ drop_prompt: bool = False,
426
+ prompt: str = '',
427
+ mask_weight_type: str = None,
428
+ siglip_processor: Callable = None,
429
+ need_weight: str = 'true',
430
+ ):
431
+ resize_ref_image = False
432
+ pil_pixel_values_last = []
433
+ if last_image is not None:
434
+ last_vision_infos = dict(
435
+ image=last_image, min_pixels=min_pixels, max_pixels=max_pixels
436
+ )
437
+ # last_image will be resize by qwenvl-processor automatically
438
+ # generated variable resolution
439
+ last_image_inputs, last_video_inputs = process_vision_info([last_vision_infos], factor=factor)
440
+
441
+ # logging what size will be process when use qwenvl-processor
442
+ pil_pixel_values_last.append(last_image_inputs[0])
443
+
444
+ # not all reference images are same resolution
445
+ # if multiple reference images and they have different resolution, resize it depend on last_image (generated_image)
446
+ if not all([has_same_resolution(image_path, last_image) for image_path in image_slice]):
447
+ resize_ref_image = True
448
+ resize_w, resize_h = last_image_inputs[0].size
449
+
450
+ image_token_lengths = []
451
+ pixel_values = []
452
+ image_grid_thw = []
453
+ ref_pixel_values = []
454
+ pil_pixel_values = []
455
+ siglip_pixel_values = []
456
+ # Ignore the last image (generated image)
457
+ for image_path in image_slice:
458
+ vision_infos = dict(image=image_path, min_pixels=min_pixels, max_pixels=max_pixels)
459
+
460
+ # if multiple reference images and they have different aspect ratio, resize it depend on generated_image (last_image)
461
+ if resize_ref_image:
462
+ vision_infos.update(
463
+ dict(resized_height=resize_h, resized_width=resize_w)
464
+ )
465
+ image_inputs, video_inputs = process_vision_info([vision_infos], factor=factor)
466
+ inputs = processor(text=[f'dummy {image_token}'], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
467
+
468
+ if not drop_prompt:
469
+ pixel_values.append(inputs.pixel_values) # inputs.pixel_values shape is (token, dim)
470
+ image_grid_thw.append(inputs.image_grid_thw) # image_grid_thw List[int, int, int]
471
+ image_token_length = (inputs.input_ids[0] == processor.tokenizer.convert_tokens_to_ids(image_token)).sum()
472
+ image_token_lengths.append(image_token_length)
473
+
474
+ image_tensor = torch.tensor(np.array(image_inputs[0])) / 255.0 # scale to 0-1
475
+ image_tensor = rearrange(image_tensor, "h w c -> 1 c h w")
476
+ if vae_image_transform is not None:
477
+ # image_tensor has been resized by qwenvl-processor
478
+ image_tensor = (image_tensor - 0.5) / 0.5 # shift [0, 1] to [-1, 1]
479
+ pil_pixel_values.append(image_inputs[0])
480
+
481
+ if siglip_processor is not None:
482
+ siglip_pixel_value = siglip_processor.preprocess(
483
+ images=Image.open(image_path).convert('RGB') if isinstance(image_path, str) else image_path,
484
+ do_resize=True, return_tensors="pt", do_convert_rgb=True
485
+ ).pixel_values # 1 c h w
486
+ if drop_prompt:
487
+ siglip_pixel_values.append(torch.zeros_like(siglip_pixel_value))
488
+ else:
489
+ siglip_pixel_values.append(siglip_pixel_value)
490
+ # use zero_image as uncondition reference image
491
+ if drop_prompt:
492
+ ref_pixel_values.append(torch.zeros_like(image_tensor))
493
+ else:
494
+ ref_pixel_values.append(image_tensor)
495
+
496
+
497
+
498
+ # if multi-image in a sample, concat them
499
+ # assume pixel_values[0] (n1, 1176), pixel_values[1] (n2, 1176), pixel_values will be (n1+n2, 1176)
500
+ if len(pixel_values) > 0:
501
+ pixel_values = torch.concat(pixel_values)
502
+ image_grid_thw = torch.concat(image_grid_thw) # (b, 3), 3 mean the grid of t, h, w
503
+ # if len(ref_pixel_values) > 0:
504
+ # ref_pixel_values = torch.concat(ref_pixel_values) # b c h w
505
+ ref_pixel_values = []
506
+ if len(siglip_pixel_values) > 0:
507
+ siglip_pixel_values = torch.concat(siglip_pixel_values) # b c h w
508
+
509
+ pil_pixel_values = pil_pixel_values + pil_pixel_values_last
510
+
511
+ if mask_weight_type is not None:
512
+ _, weights = get_weight_mask(pil_pixel_values, prompt, mask_weight_type, need_weight)
513
+ if need_weight.lower() == 'false':
514
+ assert torch.all(weights == 1)
515
+ else:
516
+ weights = []
517
+ return {
518
+ 'pixel_values': pixel_values,
519
+ 'image_grid_thw': image_grid_thw,
520
+ 'image_token_lengths': image_token_lengths,
521
+ 'ref_pixel_values': ref_pixel_values,
522
+ 'pil_pixel_values': pil_pixel_values,
523
+ 'siglip_pixel_values': siglip_pixel_values,
524
+ 'weights': weights,
525
+ }
526
+
527
+ @staticmethod
528
+ def _process_image_token(
529
+ input_ids: torch.Tensor,
530
+ image_token_id: int,
531
+ image_begin_token_id: int,
532
+ image_end_token_id: int,
533
+ image_token_lengths: List[int],
534
+ labels: Optional[torch.Tensor] = None,
535
+ ):
536
+ # Find the indices of the image token
537
+ image_token_indices = (input_ids == image_token_id).nonzero(as_tuple=True)
538
+ # assert len(image_token_lengths) == image_token_indices[1].numel()
539
+ image_position = []
540
+ offset = 0
541
+ cur_i = 0
542
+ if isinstance(image_token_lengths, int):
543
+ image_token_lengths = [image_token_lengths] * len(image_token_indices[1])
544
+ for idx in image_token_indices[1]:
545
+ image_token_length = image_token_lengths[cur_i]
546
+ adjusted_idx = idx + offset
547
+ assert input_ids[0, adjusted_idx] == image_token_id, "assert input_ids[0, adjusted_idx] == image_token_id"
548
+
549
+ # Add image begin and end token
550
+ input_ids = torch.cat(
551
+ [
552
+ input_ids[:, :adjusted_idx],
553
+ input_ids.new_full(
554
+ (1, 1), image_begin_token_id
555
+ ), # image begin token
556
+ input_ids.new_full(
557
+ (1, image_token_length), image_token_id
558
+ ), # Repeat the image token to the length of image_token_length
559
+ input_ids.new_full((1, 1), image_end_token_id), # image end token
560
+ input_ids[:, adjusted_idx + 1 :],
561
+ ],
562
+ dim=1,
563
+ )
564
+ if labels is not None:
565
+ labels = torch.cat(
566
+ [
567
+ labels[:, :adjusted_idx],
568
+ labels.new_full(
569
+ (1, 1), image_begin_token_id
570
+ ), # Make begin token as label
571
+ labels.new_full((1, image_token_length), -100),
572
+ labels.new_full((1, 1), -100),
573
+ labels[:, adjusted_idx + 1 :],
574
+ ],
575
+ dim=1,
576
+ )
577
+
578
+ adjusted_idx += 1 # skip the image begin token
579
+ image_position.append(adjusted_idx.item())
580
+ offset += image_token_length - 1
581
+ offset += 2 # begin and end token
582
+
583
+ cur_i += 1
584
+
585
+ return input_ids, labels, image_position
586
+
587
+
588
+ def fetch_image(ele: dict[str, str | Image.Image], size_factor: int = 28) -> Image.Image:
589
+ if "image" in ele:
590
+ image = ele["image"]
591
+ else:
592
+ image = ele["image_url"]
593
+ image_obj = None
594
+ if isinstance(image, Image.Image):
595
+ image_obj = image
596
+ elif image.startswith("http://") or image.startswith("https://"):
597
+ response = requests.get(image, stream=True)
598
+ image_obj = Image.open(BytesIO(response.content))
599
+ elif image.startswith("file://"):
600
+ image_obj = Image.open(image[7:])
601
+ elif image.startswith("data:image"):
602
+ if "base64," in image:
603
+ _, base64_data = image.split("base64,", 1)
604
+ data = base64.b64decode(base64_data)
605
+ image_obj = Image.open(BytesIO(data))
606
+ else:
607
+ image_obj = Image.open(image)
608
+ if image_obj is None:
609
+ raise ValueError(f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}")
610
+ image = to_rgb(image_obj)
611
+ ## resize
612
+ if "resized_height" in ele and "resized_width" in ele:
613
+ resized_height, resized_width = smart_resize(
614
+ ele["resized_height"],
615
+ ele["resized_width"],
616
+ factor=size_factor,
617
+ )
618
+ else:
619
+ width, height = image.size
620
+ min_pixels = ele.get("min_pixels")
621
+ max_pixels = ele.get("max_pixels")
622
+ resized_height, resized_width = smart_resize(
623
+ height,
624
+ width,
625
+ factor=size_factor,
626
+ min_pixels=min_pixels,
627
+ max_pixels=max_pixels,
628
+ )
629
+ image = image.resize((resized_width, resized_height), resample=Image.Resampling.BICUBIC)
630
+
631
+ return image
632
+
633
+ def process_vision_info(
634
+ vision_infos: list,
635
+ return_video_kwargs: bool = False,
636
+ factor: int = 1,
637
+ ) -> tuple[list[Image.Image] | None, list[torch.Tensor | list[Image.Image]] | None, Optional[dict]]:
638
+
639
+ ## Read images or videos
640
+ image_inputs = []
641
+ video_inputs = []
642
+ video_sample_fps_list = []
643
+ for vision_info in vision_infos:
644
+ if "image" in vision_info or "image_url" in vision_info:
645
+ image_inputs.append(fetch_image(vision_info, size_factor=28*factor))
646
+ elif "video" in vision_info:
647
+ video_input, video_sample_fps = fetch_video(vision_info, return_video_sample_fps=True)
648
+ video_sample_fps_list.append(video_sample_fps)
649
+ video_inputs.append(video_input)
650
+ else:
651
+ raise ValueError("image, image_url or video should in content.")
652
+ if len(image_inputs) == 0:
653
+ image_inputs = None
654
+ if len(video_inputs) == 0:
655
+ video_inputs = None
656
+ if return_video_kwargs:
657
+ return image_inputs, video_inputs, {'fps': video_sample_fps_list}
658
+ return image_inputs, video_inputs
univa/eval/__init__.py ADDED
File without changes
univa/eval/configuration_eval.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Optional, List
3
+
4
+ @dataclass
5
+ class EvalConfig:
6
+ pretrained_lvlm_name_or_path: str
7
+ pretrained_denoiser_name_or_path: str
8
+ pretrained_siglip_name_or_path: str
9
+
10
+ ocr_enhancer: bool = False
11
+ joint_with_t5: bool = False
12
+ only_use_t5: bool = False
13
+
14
+ seed: int = 42
15
+ allow_tf32: bool = False
16
+
17
+ output_dir: str = "./output"
18
+
19
+ num_images_per_prompt: int = 1
20
+ num_inference_steps: int = 32
21
+ guidance_scale: float = 3.5 # Used in Flux
22
+ num_samples_per_prompt: int = 1
23
+ height: int = 1024
24
+ width: int = 1024
25
+ min_pixels: int = 448*448
26
+ max_pixels: int = 448*448
27
+ anyres: str = 'any_11ratio'
28
+ padding_side: str = 'right'
29
+
30
+
31
+ local_rank: int = 0
32
+ world_size: int = 1
33
+
34
+ # genai
35
+ genai_prompt_path: str = "univa/eval/genai/eval_prompts/genai527/genai_image.json"
36
+
37
+ # geneval
38
+ n_samples: int = 4
39
+ geneval_prompt_path: str = "univa/eval/geneval/evaluation_metadata.jsonl"
40
+ resized_height: int = 1024
41
+ resized_width: int = 1024
42
+
43
+ # dpgbench
44
+ dpgbench_prompt_path: str = "univa/eval/dpgbench/dpgbench_prompts.json"
45
+
46
+ # wise
47
+ wise_prompt_path: str = "univa/eval/wise/data"
48
+
49
+ # imgedit
50
+ imgedit_prompt_path: str = "univa/eval/imgedit/basic_edit.json"
51
+ imgedit_image_dir: str = "/mnt/data/lb/Remake/imgedit_bench_eval_images"
52
+
53
+ # gedit
54
+ gedit_prompt_path: str = "univa/eval/gedit/basic_edit.json"
55
+ gedit_image_dir: str = "/mnt/data/lb/Remake/gedit_bench_eval_images"
univa/eval/dpgbench/README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The original code is from [DPG-Bench](https://github.com/TencentQQGYLab/ELLA).
3
+
4
+
5
+ ## Requirements and Installation
6
+
7
+ > Official environment is **NOT** recommended.
8
+
9
+ Prepare conda environment:
10
+
11
+ ```bash
12
+ conda create -n dpgbench_eval python=3.10 -y
13
+ conda activate geneval_eval
14
+ ```
15
+
16
+ Install package:
17
+
18
+ ```bash
19
+ pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124
20
+ pip install "pip<24.1"
21
+ pip install -r requirements.txt
22
+ ```
23
+
24
+ ## Eval
25
+
26
+ ### Generate samples
27
+
28
+ ```bash
29
+ # switch to univa env
30
+ MODEL_PATH='path/to/model'
31
+ OUTPUT_DIR='path/to/eval_output/dpgbench'
32
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun \
33
+ --nproc_per_node 8 \
34
+ -m step1_gen_samples \
35
+ dpgbench.yaml \
36
+ --pretrained_lvlm_name_or_path ${MODEL_PATH} \
37
+ --output_dir ${OUTPUT_DIR}
38
+ ```
39
+
40
+ ### Evaluation & Summary
41
+
42
+
43
+ Download mplug model to `$MPLUG_LOCAL_PATH`:
44
+
45
+ ```bash
46
+ conda activate dpgbench_eval
47
+ modelscope download --model 'iic/mplug_visual-question-answering_coco_large_en' --local_dir ${MPLUG_LOCAL_PATH}
48
+ ```
49
+
50
+ ```bash
51
+ conda activate dpgbench_eval
52
+ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
53
+ IMAGE_DIR=${OUTPUT_DIR}
54
+ accelerate launch --num_machines 1 --num_processes 8 \
55
+ --multi_gpu --mixed_precision "fp16" \
56
+ step2_compute_dpg_bench.py \
57
+ --image_root_path ${IMAGE_DIR} \
58
+ --resolution 1024 \
59
+ --pic_num 4 \
60
+ --res_path ${IMAGE_DIR}.txt \
61
+ --vqa_model mplug \
62
+ --mplug_local_path ${MPLUG_LOCAL_PATH} \
63
+ --csv eval_prompts/dpgbench.csv
64
+ cat ${IMAGE_DIR}.txt
65
+ ```
univa/eval/dpgbench/__init__.py ADDED
File without changes
univa/eval/dpgbench/dpgbench.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ pretrained_lvlm_name_or_path: /mnt/data/lb/Remake/UniWorld//checkpoints/flux_qwen2p5vl_7b_vlm_mlp_siglip_stage2_ts_1024_bs42x8x1_fa_any_11ratio_ema999_ocr_adamw_t5_1p0_lr5e-6_mask_refstyle_extract/checkpoint-20000/model_ema
3
+ pretrained_denoiser_name_or_path: /mnt/data/checkpoints/black-forest-labs/FLUX.1-dev/
4
+ pretrained_siglip_name_or_path: /mnt/data/checkpoints/google/siglip2-so400m-patch16-512
5
+ joint_with_t5: true
6
+
7
+ seed: 42
8
+ allow_tf32: false
9
+
10
+ output_dir: /mnt/data/lb/Remake/UniWorld//eval_output/dpgbench
11
+
12
+ num_images_per_prompt: 4
13
+ num_inference_steps: 28
14
+ guidance_scale: 2.5
15
+ height: 1024
16
+ width: 1024
17
+
18
+ dpgbench_prompt_path: /mnt/data/lb/Remake/UniWorld//univa/eval/dpgbench/eval_prompts/dpgbench_prompts.json
univa/eval/dpgbench/eval_prompts/dpgbench.csv ADDED
The diff for this file is too large to render. See raw diff
 
univa/eval/dpgbench/eval_prompts/dpgbench_prompts.json ADDED
The diff for this file is too large to render. See raw diff
 
univa/eval/dpgbench/requirements.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ numpy
3
+ pandas
4
+ pillow
5
+ tqdm
6
+
7
+ # for modelscope
8
+ cloudpickle
9
+ decord>=0.6.0
10
+ diffusers
11
+ fairseq
12
+ ftfy>=6.0.3
13
+ librosa==0.10.1
14
+ modelscope
15
+ opencv-python
16
+ # compatible with taming-transformers-rom1504
17
+ rapidfuzz
18
+ # rough-score was just recently updated from 0.0.4 to 0.0.7
19
+ # which introduced compatability issues that are being investigated
20
+ rouge_score<=0.0.4
21
+ safetensors
22
+ # scikit-video
23
+ soundfile
24
+ taming-transformers-rom1504
25
+ tiktoken
26
+ timm
27
+ tokenizers
28
+ torchvision
29
+ transformers
30
+ transformers_stream_generator
31
+ unicodedata2
32
+ zhconv
univa/eval/dpgbench/step1_gen_samples.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
5
+ sys.path.append(root)
6
+ import json
7
+ import torch
8
+ import random
9
+ import subprocess
10
+ import numpy as np
11
+ import torch.distributed as dist
12
+ import pandas as pd
13
+ import argparse
14
+ import torch
15
+ import os
16
+ from PIL import Image
17
+ from tqdm import tqdm
18
+ import torch.distributed as dist
19
+ from qwen_vl_utils import process_vision_info
20
+ from torchvision import transforms
21
+ from transformers import AutoProcessor
22
+ from transformers import SiglipImageProcessor, SiglipVisionModel
23
+ from univa.utils.flux_pipeline import FluxPipeline
24
+ from univa.eval.configuration_eval import EvalConfig
25
+ from univa.utils.get_ocr import get_ocr_result
26
+ from univa.utils.denoiser_prompt_embedding_flux import encode_prompt
27
+ from univa.models.qwen2p5vl.modeling_univa_qwen2p5vl import UnivaQwen2p5VLForConditionalGeneration
28
+
29
+ # adapted from https://github.com/huggingface/accelerate/blob/main/src/accelerate/utils/random.py#L31
30
+ def set_seed(seed, rank, device_specific=True):
31
+ if device_specific:
32
+ seed += rank
33
+ random.seed(seed)
34
+ np.random.seed(seed)
35
+ torch.manual_seed(seed)
36
+ torch.cuda.manual_seed(seed)
37
+ torch.cuda.manual_seed_all(seed)
38
+ torch.backends.cudnn.deterministic = True
39
+ torch.backends.cudnn.benchmark = False
40
+
41
+ def initialize_models(args, device):
42
+
43
+ # Load main model and task head
44
+ model = UnivaQwen2p5VLForConditionalGeneration.from_pretrained(
45
+ args.pretrained_lvlm_name_or_path,
46
+ torch_dtype=torch.bfloat16,
47
+ attn_implementation="flash_attention_2",
48
+ ).to(device)
49
+
50
+ processor = AutoProcessor.from_pretrained(
51
+ args.pretrained_lvlm_name_or_path,
52
+ min_pixels=args.min_pixels,
53
+ max_pixels=args.max_pixels,
54
+ )
55
+
56
+ # Load FLUX pipeline
57
+ pipe = FluxPipeline.from_pretrained(
58
+ args.pretrained_denoiser_name_or_path,
59
+ transformer=model.denoise_tower.denoiser,
60
+ torch_dtype=torch.bfloat16,
61
+ ).to(device)
62
+ tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
63
+ text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
64
+
65
+ siglip_processor = SiglipImageProcessor.from_pretrained(args.pretrained_siglip_name_or_path)
66
+ siglip_model = SiglipVisionModel.from_pretrained(
67
+ args.pretrained_siglip_name_or_path,
68
+ torch_dtype=torch.bfloat16,
69
+ ).to(device)
70
+
71
+ return {
72
+ 'model': model,
73
+ 'processor': processor,
74
+ 'pipe': pipe,
75
+ 'tokenizers': tokenizers,
76
+ 'text_encoders': text_encoders,
77
+ 'device': device,
78
+ 'siglip_model': siglip_model,
79
+ 'siglip_processor': siglip_processor,
80
+ }
81
+
82
+
83
+ def init_gpu_env(args):
84
+ local_rank = int(os.getenv('RANK', 0))
85
+ world_size = int(os.getenv('WORLD_SIZE', 1))
86
+ args.local_rank = local_rank
87
+ args.world_size = world_size
88
+ torch.cuda.set_device(local_rank)
89
+ dist.init_process_group(
90
+ backend='nccl', init_method='env://',
91
+ world_size=world_size, rank=local_rank
92
+ )
93
+ return args
94
+
95
+
96
+ def run_model_and_return_samples(args, state, text, image1=None, image2=None):
97
+
98
+ # Build content
99
+ convo = []
100
+ image_paths = []
101
+ content = []
102
+ for img in (image1, image2):
103
+ if img:
104
+ content.append({'type':'image','image':img,'min_pixels':args.min_pixels,'max_pixels':args.max_pixels})
105
+ image_paths.append(img)
106
+ if text:
107
+ ocr_text = ''
108
+ if args.ocr_enhancer and content:
109
+ ocr_texts = []
110
+ for img in (image1, image2):
111
+ if img:
112
+ ocr_texts.append(get_ocr_result(img, cur_ocr_i))
113
+ cur_ocr_i += 1
114
+ ocr_text = '\n'.join(ocr_texts)
115
+ content.append({'type':'text','text': text + ocr_text})
116
+
117
+ if not args.only_use_t5:
118
+ convo.append({'role':'user','content':content})
119
+
120
+ # Prepare inputs
121
+ chat_text = state['processor'].apply_chat_template(
122
+ convo,
123
+ tokenize=False,
124
+ add_generation_prompt=True
125
+ )
126
+ chat_text = '<|im_end|>\n'.join(chat_text.split('<|im_end|>\n')[1:])
127
+ image_inputs, video_inputs = process_vision_info(convo)
128
+ inputs = state['processor'](
129
+ text=[chat_text], images=image_inputs, videos=video_inputs,
130
+ padding=True, return_tensors='pt'
131
+ ).to(state['device'])
132
+
133
+ # Generate
134
+ # image generation pipeline
135
+ siglip_hs = None
136
+ if state['siglip_processor'] and image_paths:
137
+ vals = [state['siglip_processor'].preprocess(
138
+ images=Image.open(p).convert('RGB'), do_resize=True,
139
+ return_tensors='pt', do_convert_rgb=True
140
+ ).pixel_values.to(state['device'])
141
+ for p in image_paths]
142
+ siglip_hs = state['siglip_model'](torch.concat(vals)).last_hidden_state
143
+
144
+ with torch.no_grad():
145
+ lvlm = state['model'](
146
+ inputs.input_ids, pixel_values=getattr(inputs,'pixel_values',None),
147
+ attention_mask=inputs.attention_mask,
148
+ image_grid_thw=getattr(inputs,'image_grid_thw',None),
149
+ siglip_hidden_states=siglip_hs,
150
+ output_type='denoise_embeds'
151
+ )
152
+ prm_embeds, pooled = encode_prompt(
153
+ state['text_encoders'], state['tokenizers'],
154
+ text if args.joint_with_t5 else '', 256, state['device'], 1
155
+ )
156
+ emb = torch.concat([lvlm, prm_embeds], dim=1) if args.joint_with_t5 else lvlm
157
+ else:
158
+ prm_embeds, pooled = encode_prompt(
159
+ state['text_encoders'], state['tokenizers'],
160
+ text, 256, state['device'], 1
161
+ )
162
+ emb = prm_embeds
163
+
164
+
165
+ with torch.no_grad():
166
+ img = state['pipe'](
167
+ prompt_embeds=emb,
168
+ pooled_prompt_embeds=pooled,
169
+ height=args.height,
170
+ width=args.width,
171
+ num_inference_steps=args.num_inference_steps,
172
+ guidance_scale=args.guidance_scale,
173
+ num_images_per_prompt=args.num_images_per_prompt,
174
+ ).images
175
+ return img
176
+
177
+
178
+ def concat_image(images, save_path, args):
179
+ height = args.height
180
+ width = args.width
181
+
182
+ # 创建一个新的空白图像,宽度和高度是单张图像的两倍
183
+ new_image = Image.new('RGB', (width * 2, height * 2))
184
+
185
+ # 将四张图像粘贴到新图像的相应位置
186
+ for index in range(4):
187
+ row = index // 2
188
+ col = index % 2
189
+ img = images[index]
190
+ new_image.paste(img, (col * width, row * height))
191
+
192
+ # 保存拼接后的图像
193
+ new_image.save(save_path)
194
+
195
+
196
+ def main(args):
197
+
198
+ args = init_gpu_env(args)
199
+
200
+ torch.backends.cuda.matmul.allow_tf32 = False
201
+ torch.backends.cudnn.allow_tf32 = False
202
+ if args.allow_tf32:
203
+ torch.backends.cuda.matmul.allow_tf32 = True
204
+ torch.backends.cudnn.allow_tf32 = True
205
+
206
+ set_seed(args.seed, rank=args.local_rank, device_specific=True)
207
+ device = torch.cuda.current_device()
208
+ state = initialize_models(args, device)
209
+
210
+ if not os.path.exists(args.output_dir):
211
+ os.makedirs(args.output_dir)
212
+
213
+ with open(args.dpgbench_prompt_path, 'r') as f:
214
+ data = list(json.load(f).items())
215
+ data = data[args.local_rank::args.world_size]
216
+
217
+ for filename, text_prompt in tqdm(data):
218
+
219
+ img_name = filename.replace('.txt', '.png')
220
+
221
+ save_path = os.path.join(args.output_dir, img_name)
222
+ if os.path.exists(save_path):
223
+ continue
224
+
225
+ image = run_model_and_return_samples(args, state, text_prompt, image1=None, image2=None)
226
+
227
+ concat_image(image, save_path, args)
228
+
229
+
230
+
231
+ if __name__ == "__main__":
232
+ import argparse
233
+ from omegaconf import OmegaConf
234
+
235
+ parser = argparse.ArgumentParser()
236
+ parser.add_argument("config", type=str)
237
+ parser.add_argument("--pretrained_lvlm_name_or_path", type=str, default=None, required=False)
238
+ parser.add_argument("--output_dir", type=str, default=None, required=False)
239
+ args = parser.parse_args()
240
+
241
+ config = OmegaConf.load(args.config)
242
+ schema = OmegaConf.structured(EvalConfig)
243
+ conf = OmegaConf.merge(schema, config)
244
+ if args.pretrained_lvlm_name_or_path is not None:
245
+ assert args.output_dir is not None
246
+ conf.pretrained_lvlm_name_or_path = args.pretrained_lvlm_name_or_path
247
+ conf.output_dir = args.output_dir
248
+ main(conf)
univa/eval/dpgbench/step2_compute_dpg_bench.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import os.path as osp
4
+ import time
5
+ from collections import defaultdict
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import torch
10
+ from accelerate import Accelerator
11
+ from accelerate.utils import gather_object
12
+ from PIL import Image
13
+ from tqdm import tqdm
14
+
15
+
16
+ def parse_args():
17
+ parser = argparse.ArgumentParser(description="DPG-Bench evaluation.")
18
+ parser.add_argument(
19
+ "--image_root_path",
20
+ type=str,
21
+ default=None,
22
+ )
23
+ parser.add_argument(
24
+ "--resolution",
25
+ type=int,
26
+ default=None,
27
+ )
28
+ parser.add_argument(
29
+ "--csv",
30
+ type=str,
31
+ default='eval/eval_prompts/DPGbench/dpg_bench.csv',
32
+ )
33
+ parser.add_argument(
34
+ "--res_path",
35
+ type=str,
36
+ default='eval/dpgbench_test/score_result/result.txt',
37
+ )
38
+ parser.add_argument(
39
+ "--pic_num",
40
+ type=int,
41
+ default=1,
42
+ )
43
+ parser.add_argument(
44
+ "--vqa_model",
45
+ type=str,
46
+ default='mplug',
47
+ )
48
+
49
+ parser.add_argument(
50
+ "--vqa_model_ckpt",
51
+ type=str,
52
+ default='/storage/hxy/t2i/opensora/Open-Sora-Plan/opensora/eval/dpgbench_test/mplug',
53
+ )
54
+
55
+ parser.add_argument(
56
+ "--mplug_local_path",
57
+ type=str,
58
+ default='/storage/hxy/t2i/opensora/Open-Sora-Plan/opensora/eval/dpgbench_test/mplug',
59
+ )
60
+
61
+
62
+ args = parser.parse_args()
63
+ return args
64
+
65
+
66
+ class MPLUG(torch.nn.Module):
67
+ def __init__(self, ckpt='weight/dpgbench', device='gpu'):
68
+ super().__init__()
69
+ from modelscope.pipelines import pipeline
70
+ from modelscope.utils.constant import Tasks
71
+ self.pipeline_vqa = pipeline(Tasks.visual_question_answering, model=ckpt, device=device)
72
+
73
+ def vqa(self, image, question):
74
+ input_vqa = {'image': image, 'question': question}
75
+ result = self.pipeline_vqa(input_vqa)
76
+ return result['text']
77
+
78
+ def prepare_dpg_data(args):
79
+ previous_id = ''
80
+ current_id = ''
81
+ question_dict = dict()
82
+ category_count = defaultdict(int)
83
+ # 'item_id', 'text', 'keywords', 'proposition_id', 'dependency', 'category_broad', 'category_detailed', 'tuple', 'question_natural_language'
84
+ data = pd.read_csv(args.csv)
85
+ for i, line in data.iterrows():
86
+ if i == 0:
87
+ continue
88
+
89
+ current_id = line.item_id
90
+ qid = int(line.proposition_id)
91
+ dependency_list_str = line.dependency.split(',')
92
+ dependency_list_int = []
93
+ for d in dependency_list_str:
94
+ d_int = int(d.strip())
95
+ dependency_list_int.append(d_int)
96
+
97
+ if current_id == previous_id:
98
+ question_dict[current_id]['qid2tuple'][qid] = line.tuple
99
+ question_dict[current_id]['qid2dependency'][qid] = dependency_list_int
100
+ question_dict[current_id]['qid2question'][qid] = line.question_natural_language
101
+ else:
102
+ question_dict[current_id] = dict(
103
+ qid2tuple={qid: line.tuple},
104
+ qid2dependency={qid: dependency_list_int},
105
+ qid2question={qid: line.question_natural_language})
106
+
107
+ category = line.question_natural_language.split('(')[0].strip()
108
+ category_count[category] += 1
109
+
110
+ previous_id = current_id
111
+
112
+ return question_dict
113
+
114
+ def crop_image(input_image, crop_tuple=None):
115
+ if crop_tuple is None:
116
+ return input_image
117
+
118
+ cropped_image = input_image.crop((crop_tuple[0], crop_tuple[1], crop_tuple[2], crop_tuple[3]))
119
+
120
+ return cropped_image
121
+
122
+ def compute_dpg_one_sample(args, question_dict, image_path, vqa_model, resolution):
123
+ generated_image = Image.open(image_path)
124
+ crop_tuples_list = [
125
+ (0,0,resolution,resolution),
126
+ (resolution, 0, resolution*2, resolution),
127
+ (0, resolution, resolution, resolution*2),
128
+ (resolution, resolution, resolution*2, resolution*2),
129
+ ]
130
+
131
+ crop_tuples = crop_tuples_list[:args.pic_num]
132
+ key = osp.basename(image_path).split('.')[0]
133
+ value = question_dict.get(key, None)
134
+ qid2tuple = value['qid2tuple']
135
+ qid2question = value['qid2question']
136
+ qid2dependency = value['qid2dependency']
137
+
138
+ qid2answer = dict()
139
+ qid2scores = dict()
140
+ qid2validity = dict()
141
+
142
+ scores = []
143
+ for crop_tuple in crop_tuples:
144
+ cropped_image = crop_image(generated_image, crop_tuple)
145
+ for id, question in qid2question.items():
146
+ answer = vqa_model.vqa(cropped_image, question)
147
+ qid2answer[id] = answer
148
+ qid2scores[id] = float(answer == 'yes')
149
+ with open(args.res_path.replace('.txt', '_detail.txt'), 'a') as f:
150
+ f.write(image_path + ', ' + str(crop_tuple) + ', ' + question + ', ' + answer + '\n')
151
+ qid2scores_orig = qid2scores.copy()
152
+
153
+ for id, parent_ids in qid2dependency.items():
154
+ # zero-out scores if parent questions are answered 'no'
155
+ any_parent_answered_no = False
156
+ for parent_id in parent_ids:
157
+ if parent_id == 0:
158
+ continue
159
+ if qid2scores[parent_id] == 0:
160
+ any_parent_answered_no = True
161
+ break
162
+ if any_parent_answered_no:
163
+ qid2scores[id] = 0
164
+ qid2validity[id] = False
165
+ else:
166
+ qid2validity[id] = True
167
+
168
+ score = sum(qid2scores.values()) / len(qid2scores)
169
+ scores.append(score)
170
+ average_score = sum(scores) / len(scores)
171
+ with open(args.res_path, 'a') as f:
172
+ f.write(image_path + ', ' + ', '.join(str(i) for i in scores) + ', ' + str(average_score) + '\n')
173
+
174
+ return average_score, qid2tuple, qid2scores_orig
175
+
176
+
177
+ def main():
178
+ args = parse_args()
179
+
180
+ accelerator = Accelerator()
181
+
182
+ question_dict = prepare_dpg_data(args)
183
+
184
+ timestamp = time.time()
185
+ time_array = time.localtime(timestamp)
186
+ time_style = time.strftime("%Y%m%d-%H%M%S", time_array)
187
+ if args.res_path is None:
188
+ args.res_path = osp.join(args.image_root_path, f'dpg-bench_{time_style}_results.txt')
189
+ if accelerator.is_main_process:
190
+ with open(args.res_path, 'w') as f:
191
+ pass
192
+ with open(args.res_path.replace('.txt', '_detail.txt'), 'w') as f:
193
+ pass
194
+
195
+ device = str(accelerator.device)
196
+ if args.vqa_model == 'mplug':
197
+ vqa_model = MPLUG(args.mplug_local_path, device=device)
198
+ else:
199
+ raise NotImplementedError
200
+ vqa_model = accelerator.prepare(vqa_model)
201
+ vqa_model = getattr(vqa_model, 'module', vqa_model)
202
+
203
+ filename_list = os.listdir(args.image_root_path)
204
+ num_each_rank = len(filename_list) / accelerator.num_processes
205
+ local_rank = accelerator.process_index
206
+ local_filename_list = filename_list[round(local_rank * num_each_rank) : round((local_rank + 1) * num_each_rank)]
207
+
208
+ local_scores = []
209
+ local_category2scores = defaultdict(list)
210
+ model_id = osp.basename(args.image_root_path)
211
+ print(f'Start to conduct evaluation of {model_id}')
212
+ for fn in tqdm(local_filename_list):
213
+ image_path = osp.join(args.image_root_path, fn)
214
+ try:
215
+ # compute score of one sample
216
+ score, qid2tuple, qid2scores = compute_dpg_one_sample(
217
+ args=args, question_dict=question_dict, image_path=image_path, vqa_model=vqa_model, resolution=args.resolution)
218
+ local_scores.append(score)
219
+
220
+ # summarize scores by categoris
221
+ for qid in qid2tuple.keys():
222
+ category = qid2tuple[qid].split('(')[0].strip()
223
+ qid_score = qid2scores[qid]
224
+ local_category2scores[category].append(qid_score)
225
+
226
+ except Exception as e:
227
+ print('Failed filename:', fn, e)
228
+ continue
229
+
230
+ accelerator.wait_for_everyone()
231
+ global_dpg_scores = gather_object(local_scores)
232
+ mean_dpg_score = np.mean(global_dpg_scores)
233
+
234
+ global_categories = gather_object(list(local_category2scores.keys()))
235
+ global_categories = set(global_categories)
236
+ global_category2scores = dict()
237
+ global_average_scores = []
238
+ for category in global_categories:
239
+ local_category_scores = local_category2scores.get(category, [])
240
+ global_category2scores[category] = gather_object(local_category_scores)
241
+ global_average_scores.extend(gather_object(local_category_scores))
242
+
243
+ global_category2scores_l1 = defaultdict(list)
244
+ for category in global_categories:
245
+ l1_category = category.split('-')[0].strip()
246
+ global_category2scores_l1[l1_category].extend(global_category2scores[category])
247
+
248
+ time.sleep(3)
249
+ if accelerator.is_main_process:
250
+ output = f'Model: {model_id}\n'
251
+
252
+ output += 'L1 category scores:\n'
253
+ for l1_category in global_category2scores_l1.keys():
254
+ output += f'\t{l1_category}: {np.mean(global_category2scores_l1[l1_category]) * 100}\n'
255
+
256
+ output += 'L2 category scores:\n'
257
+ for category in sorted(global_categories):
258
+ output += f'\t{category}: {np.mean(global_category2scores[category]) * 100}\n'
259
+
260
+ output += f'Image path: {args.image_root_path}\n'
261
+ output += f'Save results to: {args.res_path}\n'
262
+ output += f'DPG-Bench score: {mean_dpg_score * 100}'
263
+ with open(args.res_path, 'a') as f:
264
+ f.write(output + '\n')
265
+ print(output)
266
+
267
+
268
+ if __name__ == "__main__":
269
+ main()
univa/eval/gedit/README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The original code is from [GEdit-Bench](https://github.com/stepfun-ai/Step1X-Edit/blob/main/GEdit-Bench/EVAL.md).
3
+
4
+ ## Requirements and Installation
5
+
6
+ ```
7
+ pip install megfile openai
8
+ ```
9
+
10
+ ## Prepare Source Images
11
+ Prepare the original image and metadata json following the example code in `step0_generate_image_example.py`
12
+
13
+ ```bash
14
+ GEDIT_ASSET="/path/to/gedit_asset"
15
+ python step0_prepare_gedit.py --save_path ${GEDIT_ASSET} --json_file_path gedit_edit.json
16
+ ```
17
+
18
+ The file directory structure of the original image:
19
+ ```folder
20
+ ${GEDIT_ASSET}/
21
+ │ └── fullset/
22
+ │ └── edit_task/
23
+ │ ├── cn/ # Chinese instructions
24
+ │ │ ├── key1.png
25
+ │ │ ├── key2.png
26
+ │ │ └── ...
27
+ │ └── en/ # English instructions
28
+ │ ├── key1.png
29
+ │ ├── key2.png
30
+ │ └── ...
31
+ ```
32
+
33
+ ## Eval
34
+
35
+
36
+ ### Generate samples
37
+
38
+ ```bash
39
+ # switch to univa env
40
+ MODEL_PATH='path/to/model'
41
+ OUTPUT_DIR='path/to/eval_output/gedit'
42
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun \
43
+ --nproc_per_node 8 \
44
+ -m step1_gen_samples \
45
+ gedit.yaml \
46
+ --pretrained_lvlm_name_or_path ${MODEL_PATH} \
47
+ --output_dir ${OUTPUT_DIR}
48
+ ```
49
+
50
+ ### Evaluation
51
+
52
+ Write your gpt-api-key to `secret_t2.env`.
53
+
54
+ ```bash
55
+ IMAGE_DIR=${OUTPUT_DIR}
56
+ python step2_gedit_bench.py \
57
+ --model_name UniWorld \
58
+ --save_path ${IMAGE_DIR} \
59
+ --backbone gpt4o \
60
+ --source_path ${GEDIT_ASSET}
61
+ ```
62
+
63
+ ### Summary
64
+ ```bash
65
+ python step3_calculate_statistics.py \
66
+ --model_name UniWorld \
67
+ --save_path ${IMAGE_DIR} \
68
+ --backbone gpt4o \
69
+ --language en > ${IMAGE_DIR}.txt
70
+ cat ${IMAGE_DIR}.txt
71
+ ```
univa/eval/gedit/__init__.py ADDED
File without changes
univa/eval/gedit/gedit.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_lvlm_name_or_path: /mnt/data/lb/Remake/UniWorld//checkpoints/flux_qwen2p5vl_7b_vlm_mlp_siglip_stage2_ts_1024_bs42x8x1_fa_any_11ratio_ema999_ocr_adamw_t5_1p0_lr5e-6_mask_refstyle_extract/checkpoint-20000/model_ema
2
+ pretrained_denoiser_name_or_path: /mnt/data/checkpoints/black-forest-labs/FLUX.1-dev/
3
+ pretrained_siglip_name_or_path: /mnt/data/checkpoints/google/siglip2-so400m-patch16-512
4
+ joint_with_t5: false
5
+
6
+ seed: 42
7
+ allow_tf32: false
8
+
9
+ output_dir: /mnt/data/lb/Remake/UniWorld//eval_output/gedit
10
+
11
+ num_images_per_prompt: 1
12
+ num_inference_steps: 28
13
+ guidance_scale: 3.5
14
+ height: 1024
15
+ width: 1024
16
+
17
+ gedit_prompt_path: gedit_edit.json
18
+ gedit_image_dir: /mnt/data/lb/Remake/gedit_bench_eval_images
19
+ resized_height: 1024
20
+ resized_width: 1024
univa/eval/gedit/gedit_edit.json ADDED
The diff for this file is too large to render. See raw diff
 
univa/eval/gedit/secret_t2.env ADDED
File without changes
univa/eval/gedit/step0_prepare_gedit.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import math
4
+ import argparse
5
+ from datasets import Dataset, load_dataset
6
+
7
+ # Dataset info structure:
8
+ # - task_type: string - Type of the task
9
+ # - key: string - Unique identifier for the sample
10
+ # - instruction: string - Task instruction/prompt
11
+ # - instruction_language: string - Language of the instruction
12
+ # - input_image: Image - Original input image
13
+ # - input_image_raw: Image - Raw/unprocessed input image
14
+ # - Intersection_exist: bool - Whether intersection exists
15
+
16
+ def calculate_dimensions(target_area, ratio):
17
+ width = math.sqrt(target_area * ratio)
18
+ height = width / ratio
19
+
20
+ width = round(width / 32) * 32
21
+ height = round(height / 32) * 32
22
+
23
+ new_area = width * height
24
+ if new_area < target_area:
25
+ width += 32
26
+ new_area = width * height
27
+ elif new_area > target_area:
28
+ width -= 32
29
+ new_area = width * height
30
+
31
+ return width, height, new_area
32
+
33
+ def main(args):
34
+ # Load dataset
35
+ dataset = load_dataset("stepfun-ai/GEdit-Bench")
36
+
37
+ # Dictionary to store instruction and image paths
38
+ instruction_image_paths = {}
39
+
40
+ for item in dataset['train']:
41
+ task_type = item['task_type']
42
+ key = item['key']
43
+ instruction = item['instruction']
44
+ instruction_language = item['instruction_language']
45
+ input_image = item['input_image']
46
+ input_image_raw = item['input_image_raw']
47
+ intersection_exist = item['Intersection_exist']
48
+
49
+ target_width, target_height, new_area = calculate_dimensions(512 * 512, input_image_raw.width / input_image_raw.height)
50
+ resize_input_image = input_image_raw.resize((target_width, target_height))
51
+
52
+ save_path_fullset_source_image = os.path.join(args.save_path, f"fullset/{task_type}/{instruction_language}/{key}_SRCIMG.png")
53
+ save_path_fullset = os.path.join(args.save_path, f"fullset/{task_type}/{instruction_language}/{key}.png")
54
+
55
+ relative_path = f"fullset/{task_type}/{instruction_language}/{key}.png"
56
+
57
+ # Create directories if they don't exist
58
+ os.makedirs(os.path.dirname(save_path_fullset_source_image), exist_ok=True)
59
+ os.makedirs(os.path.dirname(save_path_fullset), exist_ok=True)
60
+
61
+ # Save the images
62
+ input_image.save(save_path_fullset_source_image)
63
+ resize_input_image.save(save_path_fullset)
64
+
65
+ # Store instruction and corresponding image path in the dictionary
66
+ instruction_image_paths[key] = {
67
+ 'prompt': instruction,
68
+ 'id': relative_path,
69
+ 'edit_type': task_type,
70
+ }
71
+
72
+ # Save the dictionary to a JSON file
73
+ with open(args.json_file_path, 'w') as json_file:
74
+ json.dump(instruction_image_paths, json_file, indent=4)
75
+
76
+ print(f"Instruction and image paths saved to {args.json_file_path}")
77
+
78
+ if __name__ == "__main__":
79
+ parser = argparse.ArgumentParser(description="Process and save dataset images and instructions.")
80
+ parser.add_argument("--save_path", type=str, required=True, help="Directory to save processed images.")
81
+ parser.add_argument("--json_file_path", type=str, required=True, help="Path to save the JSON file with instruction-image mappings.")
82
+
83
+ args = parser.parse_args()
84
+
85
+ main(args)
univa/eval/gedit/step1_gen_samples.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
5
+ sys.path.append(root)
6
+ import json
7
+ import torch
8
+ import random
9
+ import subprocess
10
+ import numpy as np
11
+ import torch.distributed as dist
12
+ import pandas as pd
13
+ import argparse
14
+ import torch
15
+ import os
16
+ from PIL import Image
17
+ from tqdm import tqdm
18
+ import torch.distributed as dist
19
+ from qwen_vl_utils import process_vision_info
20
+ from torchvision import transforms
21
+ from transformers import AutoProcessor
22
+ from transformers import SiglipImageProcessor, SiglipVisionModel
23
+ from univa.utils.flux_pipeline import FluxPipeline
24
+ from univa.eval.configuration_eval import EvalConfig
25
+ from univa.utils.get_ocr import get_ocr_result
26
+ from univa.utils.denoiser_prompt_embedding_flux import encode_prompt
27
+ from univa.models.qwen2p5vl.modeling_univa_qwen2p5vl import UnivaQwen2p5VLForConditionalGeneration
28
+ from univa.utils.anyres_util import dynamic_resize
29
+
30
+ # adapted from https://github.com/huggingface/accelerate/blob/main/src/accelerate/utils/random.py#L31
31
+ def set_seed(seed, rank, device_specific=True):
32
+ if device_specific:
33
+ seed += rank
34
+ random.seed(seed)
35
+ np.random.seed(seed)
36
+ torch.manual_seed(seed)
37
+ torch.cuda.manual_seed(seed)
38
+ torch.cuda.manual_seed_all(seed)
39
+ torch.backends.cudnn.deterministic = True
40
+ torch.backends.cudnn.benchmark = False
41
+
42
+ def initialize_models(args, device):
43
+
44
+ # Load main model and task head
45
+ model = UnivaQwen2p5VLForConditionalGeneration.from_pretrained(
46
+ args.pretrained_lvlm_name_or_path,
47
+ torch_dtype=torch.bfloat16
48
+ ).to(device)
49
+
50
+ processor = AutoProcessor.from_pretrained(
51
+ args.pretrained_lvlm_name_or_path,
52
+ min_pixels=args.min_pixels,
53
+ max_pixels=args.max_pixels,
54
+ )
55
+
56
+ # Load FLUX pipeline
57
+ pipe = FluxPipeline.from_pretrained(
58
+ args.pretrained_denoiser_name_or_path,
59
+ transformer=model.denoise_tower.denoiser,
60
+ torch_dtype=torch.bfloat16,
61
+ ).to(device)
62
+ tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
63
+ text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
64
+
65
+ siglip_processor = SiglipImageProcessor.from_pretrained(args.pretrained_siglip_name_or_path)
66
+ siglip_model = SiglipVisionModel.from_pretrained(
67
+ args.pretrained_siglip_name_or_path,
68
+ torch_dtype=torch.bfloat16,
69
+ ).to(device)
70
+
71
+ return {
72
+ 'model': model,
73
+ 'processor': processor,
74
+ 'pipe': pipe,
75
+ 'tokenizers': tokenizers,
76
+ 'text_encoders': text_encoders,
77
+ 'device': device,
78
+ 'siglip_model': siglip_model,
79
+ 'siglip_processor': siglip_processor,
80
+ }
81
+
82
+
83
+ def init_gpu_env(args):
84
+ local_rank = int(os.getenv('RANK', 0))
85
+ world_size = int(os.getenv('WORLD_SIZE', 1))
86
+ args.local_rank = local_rank
87
+ args.world_size = world_size
88
+ torch.cuda.set_device(local_rank)
89
+ dist.init_process_group(
90
+ backend='nccl', init_method='env://',
91
+ world_size=world_size, rank=local_rank
92
+ )
93
+ return args
94
+
95
+
96
+ def update_size(i1, i2, anyres='any_11ratio', anchor_pixels=1024*1024):
97
+ shapes = []
98
+ for p in (i1, i2):
99
+ if p:
100
+ im = Image.open(p)
101
+ w, h = im.size
102
+ shapes.append((w, h))
103
+ if not shapes:
104
+ return int(anchor_pixels**0.5), int(anchor_pixels**0.5)
105
+ if len(shapes) == 1:
106
+ w, h = shapes[0]
107
+ else:
108
+ w = sum(s[0] for s in shapes) / len(shapes)
109
+ h = sum(s[1] for s in shapes) / len(shapes)
110
+ new_h, new_w = dynamic_resize(int(h), int(w), anyres, anchor_pixels=anchor_pixels)
111
+ return new_h, new_w
112
+
113
+ def run_model_and_return_samples(args, state, text, image1=None, image2=None):
114
+
115
+ # Build content
116
+ convo = []
117
+ image_paths = []
118
+ content = []
119
+ if text:
120
+ ocr_text = ''
121
+ if args.ocr_enhancer and content:
122
+ ocr_texts = []
123
+ for img in (image1, image2):
124
+ if img:
125
+ ocr_texts.append(get_ocr_result(img, cur_ocr_i))
126
+ cur_ocr_i += 1
127
+ ocr_text = '\n'.join(ocr_texts)
128
+ content.append({'type':'text','text': text + ocr_text})
129
+ for img in (image1, image2):
130
+ if img:
131
+ content.append({'type':'image','image':img,'min_pixels':args.min_pixels,'max_pixels':args.max_pixels})
132
+ image_paths.append(img)
133
+
134
+ convo.append({'role':'user','content':content})
135
+
136
+ new_h, new_w = update_size(image1, image2, 'any_11ratio', anchor_pixels=args.height * args.width)
137
+
138
+ # Prepare inputs
139
+ chat_text = state['processor'].apply_chat_template(
140
+ convo,
141
+ tokenize=False,
142
+ add_generation_prompt=True
143
+ )
144
+ chat_text = '<|im_end|>\n'.join(chat_text.split('<|im_end|>\n')[1:])
145
+ image_inputs, video_inputs = process_vision_info(convo)
146
+ inputs = state['processor'](
147
+ text=[chat_text], images=image_inputs, videos=video_inputs,
148
+ padding=True, return_tensors='pt'
149
+ ).to(state['device'])
150
+
151
+ # Generate
152
+ # image generation pipeline
153
+ siglip_hs = None
154
+ if state['siglip_processor'] and image_paths:
155
+ vals = [state['siglip_processor'].preprocess(
156
+ images=Image.open(p).convert('RGB'), do_resize=True,
157
+ return_tensors='pt', do_convert_rgb=True
158
+ ).pixel_values.to(state['device'])
159
+ for p in image_paths]
160
+ siglip_hs = state['siglip_model'](torch.concat(vals)).last_hidden_state
161
+
162
+ with torch.no_grad():
163
+ lvlm = state['model'](
164
+ inputs.input_ids, pixel_values=getattr(inputs,'pixel_values',None),
165
+ attention_mask=inputs.attention_mask,
166
+ image_grid_thw=getattr(inputs,'image_grid_thw',None),
167
+ siglip_hidden_states=siglip_hs,
168
+ output_type='denoise_embeds'
169
+ )
170
+ prm_embeds, pooled = encode_prompt(
171
+ state['text_encoders'], state['tokenizers'],
172
+ text if args.joint_with_t5 else '', 256, state['device'], 1
173
+ )
174
+ if args.only_use_t5:
175
+ emb = prm_embeds
176
+ else:
177
+ emb = torch.concat([lvlm, prm_embeds], dim=1) if args.joint_with_t5 else lvlm
178
+
179
+ with torch.no_grad():
180
+ img = state['pipe'](
181
+ prompt_embeds=emb,
182
+ pooled_prompt_embeds=pooled,
183
+ # height=args.height,
184
+ # width=args.width,
185
+ height=new_h,
186
+ width=new_w,
187
+ num_inference_steps=args.num_inference_steps,
188
+ guidance_scale=args.guidance_scale,
189
+ num_images_per_prompt=args.num_images_per_prompt,
190
+ ).images
191
+ return img
192
+
193
+
194
+ def main(args):
195
+
196
+ args = init_gpu_env(args)
197
+
198
+ torch.backends.cuda.matmul.allow_tf32 = False
199
+ torch.backends.cudnn.allow_tf32 = False
200
+ if args.allow_tf32:
201
+ torch.backends.cuda.matmul.allow_tf32 = True
202
+ torch.backends.cudnn.allow_tf32 = True
203
+
204
+ set_seed(args.seed, rank=args.local_rank, device_specific=True)
205
+ device = torch.cuda.current_device()
206
+ state = initialize_models(args, device)
207
+
208
+ # Create the output directory if it doesn't exist
209
+ os.makedirs(args.output_dir, exist_ok=True)
210
+
211
+ # Load the evaluation prompts
212
+ with open(args.gedit_prompt_path, "r") as f:
213
+ data = json.load(f)
214
+
215
+ inference_list = []
216
+
217
+ for key, value in tqdm(data.items()):
218
+ outpath = args.output_dir
219
+ os.makedirs(outpath, exist_ok=True)
220
+
221
+ prompt = value["prompt"]
222
+ image_path = value['id']
223
+ inference_list.append([prompt, outpath, key, image_path])
224
+
225
+ inference_list = inference_list[args.local_rank::args.world_size]
226
+
227
+ for prompt, output_path, key, image_path in tqdm(inference_list):
228
+
229
+ output_path = os.path.join(output_path, image_path)
230
+ real_image_path = os.path.join(args.imgedit_image_dir, image_path)
231
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
232
+
233
+ if os.path.exists(output_path):
234
+ continue
235
+ image = run_model_and_return_samples(args, state, prompt, image1=real_image_path, image2=None)
236
+ image = image[0]
237
+ image = image.resize((args.resized_width, args.resized_height))
238
+ image.save(
239
+ output_path
240
+ )
241
+
242
+
243
+ if __name__ == "__main__":
244
+ import argparse
245
+ from omegaconf import OmegaConf
246
+
247
+ parser = argparse.ArgumentParser()
248
+ parser.add_argument("config", type=str)
249
+ parser.add_argument("--pretrained_lvlm_name_or_path", type=str, default=None, required=False)
250
+ parser.add_argument("--output_dir", type=str, default=None, required=False)
251
+ args = parser.parse_args()
252
+
253
+ config = OmegaConf.load(args.config)
254
+ schema = OmegaConf.structured(EvalConfig)
255
+ conf = OmegaConf.merge(schema, config)
256
+ if args.pretrained_lvlm_name_or_path is not None:
257
+ assert args.output_dir is not None
258
+ conf.pretrained_lvlm_name_or_path = args.pretrained_lvlm_name_or_path
259
+ conf.output_dir = args.output_dir
260
+ main(conf)
univa/eval/gedit/step2_gedit_bench.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from viescore import VIEScore
2
+ import PIL
3
+ import os
4
+ import megfile
5
+ from PIL import Image
6
+ from tqdm import tqdm
7
+ from datasets import load_dataset, load_from_disk
8
+ import sys
9
+ import csv
10
+ import threading
11
+ import time
12
+ import argparse
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ GROUPS = [
15
+ "background_change", "color_alter", "material_alter", "motion_change", "ps_human", "style_change", "subject-add", "subject-remove", "subject-replace", "text_change", "tone_transfer"
16
+ ]
17
+
18
+ def process_single_item(item, vie_score, max_retries=10000):
19
+
20
+ instruction = item['instruction']
21
+ key = item['key']
22
+ instruction_language = item['instruction_language']
23
+ intersection_exist = item['Intersection_exist']
24
+ sample_prefix = key
25
+ save_path_fullset_source_image = f"{source_path}/fullset/{group_name}/{instruction_language}/{key}_SRCIMG.png"
26
+ save_path_fullset_result_image = f"{save_path}/fullset/{group_name}/{instruction_language}/{key}.png"
27
+
28
+ src_image_path = save_path_fullset_source_image
29
+ save_path_item = save_path_fullset_result_image
30
+
31
+ for retry in range(max_retries):
32
+ try:
33
+ pil_image_raw =Image.open(megfile.smart_open(src_image_path, 'rb'))
34
+ pil_image_edited = Image.open(megfile.smart_open(save_path_item, 'rb')).convert("RGB").resize((pil_image_raw.size[0], pil_image_raw.size[1]))
35
+
36
+ text_prompt = instruction
37
+ score_list = vie_score.evaluate([pil_image_raw, pil_image_edited], text_prompt)
38
+ sementics_score, quality_score, overall_score = score_list
39
+
40
+ print(f"sementics_score: {sementics_score}, quality_score: {quality_score}, overall_score: {overall_score}, instruction_language: {instruction_language}, instruction: {instruction}")
41
+
42
+ return {
43
+ "source_image": src_image_path,
44
+ "edited_image": save_path_item,
45
+ "instruction": instruction,
46
+ "sementics_score": sementics_score,
47
+ "quality_score": quality_score,
48
+ "intersection_exist" : item['Intersection_exist'],
49
+ "instruction_language" : item['instruction_language']
50
+ }
51
+ except Exception as e:
52
+ if retry < max_retries - 1:
53
+ wait_time = (retry + 1) * 2 # 指数退避:2秒, 4秒, 6秒...
54
+ print(f"Error processing {save_path_item} (attempt {retry + 1}/{max_retries}): {e}")
55
+ print(f"Waiting {wait_time} seconds before retry...")
56
+ time.sleep(wait_time)
57
+ else:
58
+ print(f"Failed to process {save_path_item} after {max_retries} attempts: {e}")
59
+ return
60
+
61
+ if __name__ == "__main__":
62
+ parser = argparse.ArgumentParser()
63
+ parser.add_argument("--model_name", type=str, default="UniWorld")
64
+ parser.add_argument("--save_path", type=str, default="/mnt/data/lb/Remake/UniWorld//eval_output/stage3_ema/Gedit")
65
+ parser.add_argument("--backbone", type=str, default="gpt4o", choices=["gpt4o", "qwen25vl"])
66
+ parser.add_argument("--source_path", type=str, default="/mnt/workspace/lb/Remake/gedit_bench_eval_images")
67
+ args = parser.parse_args()
68
+ model_name = args.model_name
69
+ save_path_dir = args.save_path
70
+ source_path = args.source_path
71
+ evaluate_group = [args.model_name]
72
+ backbone = args.backbone
73
+
74
+
75
+ vie_score = VIEScore(backbone=backbone, task="tie", key_path='secret_t2.env')
76
+ max_workers = 5
77
+ dataset = load_dataset("stepfun-ai/GEdit-Bench")
78
+
79
+ for model_name in evaluate_group:
80
+ save_path = save_path_dir
81
+
82
+ save_path_new = os.path.join(save_path_dir, backbone, "eval_results_new")
83
+ all_csv_list = [] # Store all results for final combined CSV
84
+
85
+ # Load existing processed samples from final CSV if it exists
86
+ processed_samples = set()
87
+ final_csv_path = os.path.join(save_path_new, f"{model_name}_combined_gpt_score.csv")
88
+ if megfile.smart_exists(final_csv_path):
89
+ with megfile.smart_open(final_csv_path, 'r', newline='') as f:
90
+ reader = csv.DictReader(f)
91
+ for row in reader:
92
+ # Create a unique identifier for each sample
93
+ sample_key = (row['source_image'], row['edited_image'])
94
+ processed_samples.add(sample_key)
95
+ print(f"Loaded {len(processed_samples)} processed samples from existing CSV")
96
+
97
+ for group_name in GROUPS:
98
+ group_csv_list = []
99
+ group_dataset_list = []
100
+ for item in tqdm(dataset['train'], desc=f"Processing {model_name} - {group_name}"):
101
+ if item['instruction_language'] == 'cn':
102
+ continue
103
+ # import pdb;pdb.set_trace()
104
+ if item['task_type'] == group_name:
105
+ group_dataset_list.append(item)
106
+ # Load existing group CSV if it exists
107
+ group_csv_path = os.path.join(save_path_new, f"{model_name}_{group_name}_gpt_score.csv")
108
+ if megfile.smart_exists(group_csv_path):
109
+ with megfile.smart_open(group_csv_path, 'r', newline='') as f:
110
+ reader = csv.DictReader(f)
111
+ group_results = list(reader)
112
+ group_csv_list.extend(group_results)
113
+
114
+ print(f"Loaded existing results for {model_name} - {group_name}")
115
+
116
+ print(f"Processing group: {group_name}")
117
+ print(f"Processing model: {model_name}")
118
+
119
+
120
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
121
+ futures = []
122
+ for item in group_dataset_list:
123
+ instruction = item['instruction']
124
+ key = item['key']
125
+ instruction_language = item['instruction_language']
126
+ intersection_exist = item['Intersection_exist']
127
+ sample_prefix = key
128
+ save_path_fullset_source_image = f"{source_path}/fullset/{group_name}/{instruction_language}/{key}_SRCIMG.png"
129
+ save_path_fullset_result_image = f"{save_path}/fullset/{group_name}/{instruction_language}/{key}.png"
130
+
131
+ if not megfile.smart_exists(save_path_fullset_result_image) or not megfile.smart_exists(save_path_fullset_source_image):
132
+ print(f"Skipping {sample_prefix}: Source or edited image does not exist")
133
+ continue
134
+
135
+ # Check if this sample has already been processed
136
+ sample_key = (save_path_fullset_source_image, save_path_fullset_result_image)
137
+ exists = sample_key in processed_samples
138
+ if exists:
139
+ print(f"Skipping already processed sample: {sample_prefix}")
140
+ continue
141
+
142
+ future = executor.submit(process_single_item, item, vie_score)
143
+ futures.append(future)
144
+
145
+ for future in tqdm(as_completed(futures), total=len(futures), desc=f"Processing {model_name} - {group_name}"):
146
+ result = future.result()
147
+ if result:
148
+ group_csv_list.append(result)
149
+
150
+ # Save group-specific CSV
151
+ group_csv_path = os.path.join(save_path_new, f"{model_name}_{group_name}_gpt_score.csv")
152
+ with megfile.smart_open(group_csv_path, 'w', newline='') as f:
153
+ fieldnames = ["source_image", "edited_image", "instruction", "sementics_score", "quality_score", "intersection_exist", "instruction_language"]
154
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
155
+ writer.writeheader()
156
+ for row in group_csv_list:
157
+ writer.writerow(row)
158
+ all_csv_list.extend(group_csv_list)
159
+
160
+ print(f"Saved group CSV for {group_name}, length: {len(group_csv_list)}")
161
+
162
+ # After processing all groups, calculate and save combined results
163
+ if not all_csv_list:
164
+ print(f"Warning: No results for model {model_name}, skipping combined CSV generation")
165
+ continue
166
+
167
+ # Save combined CSV
168
+ combined_csv_path = os.path.join(save_path_new, f"{model_name}_combined_gpt_score.csv")
169
+ with megfile.smart_open(combined_csv_path, 'w', newline='') as f:
170
+ fieldnames = ["source_image", "edited_image", "instruction", "sementics_score", "quality_score", "intersection_exist", "instruction_language"]
171
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
172
+ writer.writeheader()
173
+ for row in all_csv_list:
174
+ writer.writerow(row)
175
+
176
+
177
+
178
+
univa/eval/gedit/step3_calculate_statistics.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import megfile
2
+ import os
3
+ import pandas as pd
4
+ from collections import defaultdict
5
+ import sys
6
+ import numpy as np
7
+ import math
8
+
9
+ GROUPS = [
10
+ "background_change", "color_alter", "material_alter", "motion_change", "ps_human", "style_change", "subject-add", "subject-remove", "subject-replace", "text_change", "tone_transfer"
11
+ ]
12
+
13
+ def analyze_scores(save_path_dir, evaluate_group, language):
14
+ results = defaultdict(dict)
15
+ save_path_new = save_path_dir
16
+ model_total_score = defaultdict(dict)
17
+
18
+ group_dict_sub = {}
19
+ group_scores_semantics = defaultdict(lambda: defaultdict(list))
20
+ group_scores_quality = defaultdict(lambda: defaultdict(list))
21
+ group_scores_overall = defaultdict(lambda: defaultdict(list))
22
+
23
+ group_scores_semantics_intersection = defaultdict(lambda: defaultdict(list))
24
+ group_scores_quality_intersection = defaultdict(lambda: defaultdict(list))
25
+ group_scores_overall_intersection = defaultdict(lambda: defaultdict(list))
26
+ length_total = 0
27
+ save_path_dir_raw = save_path_dir
28
+
29
+ for group_name in GROUPS:
30
+
31
+ csv_path = os.path.join(save_path_new, f"{evaluate_group[0]}_{group_name}_gpt_score.csv")
32
+ csv_file = megfile.smart_open(csv_path)
33
+ df = pd.read_csv(csv_file)
34
+
35
+ filtered_semantics_scores = []
36
+ filtered_quality_scores = []
37
+ filtered_overall_scores = []
38
+ filtered_semantics_scores_intersection = []
39
+ filtered_quality_scores_intersection = []
40
+ filtered_overall_scores_intersection = []
41
+
42
+ for _, row in df.iterrows():
43
+ source_image = row['source_image']
44
+ edited_image = row['edited_image']
45
+ instruction = row['instruction']
46
+ semantics_score = row['sementics_score']
47
+ quality_score = row['quality_score']
48
+ intersection_exist = row['intersection_exist']
49
+ instruction_language = row['instruction_language']
50
+
51
+ if instruction_language == language:
52
+ pass
53
+ else:
54
+ continue
55
+
56
+ overall_score = math.sqrt(semantics_score * quality_score)
57
+
58
+ filtered_semantics_scores.append(semantics_score)
59
+ filtered_quality_scores.append(quality_score)
60
+ filtered_overall_scores.append(overall_score)
61
+ if intersection_exist:
62
+ filtered_semantics_scores_intersection.append(semantics_score)
63
+ filtered_quality_scores_intersection.append(quality_score)
64
+ filtered_overall_scores_intersection.append(overall_score)
65
+
66
+ avg_semantics_score = np.mean(filtered_semantics_scores)
67
+ avg_quality_score = np.mean(filtered_quality_scores)
68
+ avg_overall_score = np.mean(filtered_overall_scores)
69
+ group_scores_semantics[evaluate_group[0]][group_name] = avg_semantics_score
70
+ group_scores_quality[evaluate_group[0]][group_name] = avg_quality_score
71
+ group_scores_overall[evaluate_group[0]][group_name] = avg_overall_score
72
+
73
+ avg_semantics_score_intersection = np.mean(filtered_semantics_scores_intersection)
74
+ avg_quality_score_intersection = np.mean(filtered_quality_scores_intersection)
75
+ avg_overall_score_intersection = np.mean(filtered_overall_scores_intersection)
76
+ group_scores_semantics_intersection[evaluate_group[0]][group_name] = avg_semantics_score_intersection
77
+ group_scores_quality_intersection[evaluate_group[0]][group_name] = avg_quality_score_intersection
78
+ group_scores_overall_intersection[evaluate_group[0]][group_name] = avg_overall_score_intersection
79
+
80
+
81
+ print("\n--- Overall Model Averages ---")
82
+
83
+ print("\nSemantics:")
84
+ for model_name in evaluate_group:
85
+ model_scores = [group_scores_semantics[model_name][group] for group in GROUPS]
86
+ model_avg = np.mean(model_scores)
87
+ group_scores_semantics[model_name]["avg_semantics"] = model_avg
88
+
89
+ print("\nSemantics Intersection:")
90
+ for model_name in evaluate_group:
91
+ model_scores = [group_scores_semantics_intersection[model_name][group] for group in GROUPS]
92
+ model_avg = np.mean(model_scores)
93
+ group_scores_semantics_intersection[model_name]["avg_semantics"] = model_avg
94
+
95
+ print("\nQuality:")
96
+ for model_name in evaluate_group:
97
+ model_scores = [group_scores_quality[model_name][group] for group in GROUPS]
98
+ model_avg = np.mean(model_scores)
99
+ group_scores_quality[model_name]["avg_quality"] = model_avg
100
+
101
+ print("\nQuality Intersection:")
102
+ for model_name in evaluate_group:
103
+ model_scores = [group_scores_quality_intersection[model_name][group] for group in GROUPS]
104
+ model_avg = np.mean(model_scores)
105
+ group_scores_quality_intersection[model_name]["avg_quality"] = model_avg
106
+
107
+ print("\nOverall:")
108
+ for model_name in evaluate_group:
109
+ model_scores = [group_scores_overall[model_name][group] for group in GROUPS]
110
+ model_avg = np.mean(model_scores)
111
+ group_scores_overall[model_name]["avg_overall"] = model_avg
112
+
113
+ print("\nOverall Intersection:")
114
+ for model_name in evaluate_group:
115
+ model_scores = [group_scores_overall_intersection[model_name][group] for group in GROUPS]
116
+ model_avg = np.mean(model_scores)
117
+ group_scores_overall_intersection[model_name]["avg_overall"] = model_avg
118
+
119
+
120
+
121
+
122
+ return group_scores_semantics, group_scores_quality, group_scores_overall, group_scores_semantics_intersection, group_scores_quality_intersection, group_scores_overall_intersection
123
+
124
+ if __name__ == "__main__":
125
+ import argparse
126
+ parser = argparse.ArgumentParser()
127
+ parser.add_argument("--model_name", type=str, default="UniWorld")
128
+ parser.add_argument("--save_path", type=str, default="/mnt/data/lb/Remake/UniWorld//eval_output/stage3_ema/Gedit")
129
+ parser.add_argument("--backbone", type=str, default="gpt4o", choices=["gpt4o", "qwen25vl"])
130
+ parser.add_argument("--language", type=str, default="en", choices=["en", "zh"])
131
+ args = parser.parse_args()
132
+ model_name = args.model_name
133
+ save_path_dir = args.save_path
134
+ evaluate_group = [args.model_name]
135
+ backbone = args.backbone
136
+
137
+ save_path_new = os.path.join(save_path_dir, backbone, "eval_results_new")
138
+
139
+ print("\nOverall:")
140
+
141
+ for model_name in evaluate_group:
142
+ group_scores_semantics, group_scores_quality, group_scores_overall, group_scores_semantics_intersection, group_scores_quality_intersection, group_scores_overall_intersection = analyze_scores(save_path_new, [model_name], language=args.language)
143
+ for group_name in GROUPS:
144
+ print(f"{group_name}: {group_scores_semantics[model_name][group_name]:.3f}, {group_scores_quality[model_name][group_name]:.3f}, {group_scores_overall[model_name][group_name]:.3f}")
145
+
146
+ print(f"Average: {group_scores_semantics[model_name]['avg_semantics']:.3f}, {group_scores_quality[model_name]['avg_quality']:.3f}, {group_scores_overall[model_name]['avg_overall']:.3f}")
147
+
148
+ print("\nIntersection:")
149
+
150
+ for group_name in GROUPS:
151
+ print(f"{group_name}: {group_scores_semantics_intersection[model_name][group_name]:.3f}, {group_scores_quality_intersection[model_name][group_name]:.3f}, {group_scores_overall_intersection[model_name][group_name]:.3f}")
152
+
153
+ print(f"Average Intersection: {group_scores_semantics_intersection[model_name]['avg_semantics']:.3f}, {group_scores_quality_intersection[model_name]['avg_quality']:.3f}, {group_scores_overall_intersection[model_name]['avg_overall']:.3f}")
univa/eval/gedit/viescore/__init__.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.insert(0, 'viescore')
3
+
4
+ from utils import (
5
+ mllm_output_to_dict
6
+ )
7
+ import math
8
+ import vie_prompts
9
+
10
+ class VIEScore:
11
+ def __init__(self, backbone="gpt4o", task="t2i", key_path=None) -> None:
12
+ self.task = task
13
+ self.backbone_name = backbone
14
+
15
+ if self.task not in ["t2i", "tie", "t2v"]:
16
+ raise ValueError("task must be either 't2i' or 'tie'")
17
+
18
+ if self.backbone_name == "gpt4o":
19
+ from mllm_tools.openai import GPT4o
20
+ self.model = GPT4o(key_path, model_name="gpt-4.1")
21
+ elif self.backbone_name == "gpt4v":
22
+ from mllm_tools.openai import GPT4v
23
+ self.model = GPT4v(key_path)
24
+ elif self.backbone_name == "gemini":
25
+ from mllm_tools.gemini import Gemini
26
+ self.model = Gemini()
27
+ elif self.backbone_name == "idefics2":
28
+ from mllm_tools.idefics2_eval import Idefics2
29
+ self.model = Idefics2()
30
+ elif self.backbone_name == "mantis":
31
+ from mllm_tools.mantis_idefics2_eval import Mantis
32
+ self.model = Mantis()
33
+ elif self.backbone_name == "minicpmv":
34
+ from mllm_tools.minicpmv_eval import MiniCPMV
35
+ self.model = MiniCPMV()
36
+ elif self.backbone_name == "qwen25vl":
37
+ from mllm_tools.qwen25vl_eval import Qwen25VL
38
+ self.model = Qwen25VL()
39
+ else:
40
+ raise NotImplementedError("backbone not supported")
41
+ self.context = vie_prompts._context_no_delimit
42
+ if self.task == "t2i":
43
+ self.SC_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_one_image_gen_rule, vie_prompts._prompts_0shot_t2i_rule_SC])
44
+ self.PQ_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_rule_PQ])
45
+ elif self.task == "tie":
46
+ self.SC_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_two_image_edit_rule, vie_prompts._prompts_0shot_tie_rule_SC])
47
+ self.PQ_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_rule_PQ])
48
+ elif self.task == "t2v":
49
+ self.SC_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_one_video_gen_rule, vie_prompts._prompts_0shot_t2v_rule_SC])
50
+ self.PQ_prompt = "\n".join([self.context, vie_prompts._prompts_0shot_t2v_rule_PQ])
51
+
52
+ def evaluate(self, image_prompts, text_prompt, extract_overall_score_only=False, extract_all_score=True, echo_output=False):
53
+ if not isinstance(image_prompts, list):
54
+ image_prompts = [image_prompts]
55
+ if self.backbone_name in ['gpt4o', 'gpt4v']:
56
+ self.model.use_encode = False if isinstance(image_prompts[0], str) else True
57
+ #print("Using encode:", self.model.use_encode)
58
+ if self.task == "t2i":
59
+ _SC_prompt = self.SC_prompt.replace("<prompt>", text_prompt)
60
+ elif self.task == "tie":
61
+ _SC_prompt = self.SC_prompt.replace("<instruction>", text_prompt)
62
+ elif self.task == "t2v":
63
+ _SC_prompt = self.SC_prompt.replace("<prompt>", text_prompt)
64
+ SC_prompt_final = self.model.prepare_prompt(image_prompts, _SC_prompt)
65
+ if self.task == "tie":
66
+ PQ_prompt_final = self.model.prepare_prompt(image_prompts[-1], self.PQ_prompt)
67
+ else:
68
+ PQ_prompt_final = self.model.prepare_prompt(image_prompts, self.PQ_prompt)
69
+
70
+ results_dict = {}
71
+
72
+ SC_dict = False
73
+ PQ_dict = False
74
+ tries = 0
75
+ max_tries = 1
76
+ while SC_dict is False or PQ_dict is False:
77
+ tries += 1
78
+ guess_if_cannot_parse = True if tries > max_tries else False
79
+ result_SC = self.model.get_parsed_output(SC_prompt_final)
80
+ result_PQ = self.model.get_parsed_output(PQ_prompt_final)
81
+ SC_dict = mllm_output_to_dict(result_SC, give_up_parsing=guess_if_cannot_parse)
82
+ PQ_dict = mllm_output_to_dict(result_PQ, give_up_parsing=guess_if_cannot_parse)
83
+
84
+ if SC_dict == "rate_limit_exceeded" or PQ_dict == "rate_limit_exceeded":
85
+ print("rate_limit_exceeded")
86
+ raise ValueError("rate_limit_exceeded")
87
+ results_dict['SC'] = SC_dict
88
+ results_dict['PQ'] = PQ_dict
89
+ if echo_output:
90
+ print("results_dict", results_dict)
91
+ if extract_all_score:
92
+ SC_score = min(results_dict['SC']['score'])
93
+ PQ_score = min(results_dict['PQ']['score'])
94
+ O_score = math.sqrt(SC_score * PQ_score)
95
+ return [SC_score, PQ_score, O_score]
96
+ if extract_overall_score_only:
97
+ SC_scores = results_dict['SC']['score']
98
+ PQ_scores = results_dict['PQ']['score']
99
+ O_score = math.sqrt(min(SC_scores) * min(PQ_scores))
100
+ return O_score
101
+ return results_dict
102
+
103
+ if __name__ == "__main__":
104
+ model = VIEScore(backbone="gemini", task="t2i")
105
+ from datasets import load_dataset
106
+ dataset = load_dataset("TIGER-Lab/GenAI-Arena-Bench", "image_generation")
107
+ dataset = dataset["test"]
108
+ print("Now running the VIEScore model")
109
+ for idx in range(5):
110
+ left_image = dataset['left_image'][idx]
111
+ right_image = dataset['right_image'][idx]
112
+ prompt = dataset['prompt'][idx]
113
+ print(model.evaluate(left_image, prompt, extract_all_score=True))
114
+ print(model.evaluate(right_image, prompt, extract_all_score=True))
115
+
univa/eval/gedit/viescore/mllm_tools/__init__.py ADDED
File without changes
univa/eval/gedit/viescore/mllm_tools/gemini.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Install the Google AI Python SDK
3
+
4
+ $ pip install google-generativeai
5
+
6
+ See the getting started guide for more information:
7
+ https://ai.google.dev/gemini-api/docs/get-started/python
8
+ """
9
+
10
+ import requests
11
+ from PIL import Image
12
+ from io import BytesIO
13
+ import os
14
+ from typing import List
15
+ from urllib.parse import urlparse
16
+ import google.generativeai as genai
17
+ import tempfile
18
+
19
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
20
+
21
+ def upload_to_gemini(input, mime_type=None):
22
+ """Uploads the given file or PIL image to Gemini.
23
+
24
+ See https://ai.google.dev/gemini-api/docs/prompting_with_media
25
+ """
26
+ if isinstance(input, str):
27
+ # Input is a file path
28
+ file = genai.upload_file(input, mime_type=mime_type)
29
+ elif isinstance(input, Image.Image):
30
+ # Input is a PIL image
31
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
32
+ input.save(tmp_file, format="JPEG")
33
+ tmp_file_path = tmp_file.name
34
+ file = genai.upload_file(tmp_file_path, mime_type=mime_type or "image/jpeg")
35
+ os.remove(tmp_file_path)
36
+ else:
37
+ raise ValueError("Unsupported input type. Must be a file path or PIL Image.")
38
+
39
+ #print(f"Uploaded file '{file.display_name}' as: {file.uri}")
40
+ return file
41
+
42
+ def save_image_from_url(url, base_save_directory='tmp', file_name=None):
43
+ # Parse the URL to create a directory path
44
+ parsed_url = urlparse(url)
45
+ url_path = os.path.join(parsed_url.netloc, parsed_url.path.lstrip('/'))
46
+ save_directory = os.path.join(base_save_directory, os.path.dirname(url_path))
47
+
48
+ # Create the directory if it doesn't exist
49
+ if not os.path.exists(save_directory):
50
+ os.makedirs(save_directory)
51
+
52
+ # Get the image from the URL
53
+ response = requests.get(url)
54
+ if response.status_code == 200:
55
+ # Open the image
56
+ image = Image.open(BytesIO(response.content))
57
+
58
+ # Set the file name if not provided
59
+ if not file_name:
60
+ file_name = os.path.basename(parsed_url.path)
61
+
62
+ # Save the image locally
63
+ file_path = os.path.join(save_directory, file_name)
64
+ image.save(file_path)
65
+
66
+ return file_path
67
+ else:
68
+ raise Exception(f"Failed to retrieve image from URL. Status code: {response.status_code}")
69
+
70
+ class Gemini():
71
+ def __init__(self, model_name="gemini-1.5-pro-latest"):
72
+ # Create the model
73
+ # See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
74
+ generation_config = {
75
+ "temperature": 1,
76
+ "top_p": 0.95,
77
+ "top_k": 64,
78
+ "max_output_tokens": 8192,
79
+ "response_mime_type": "text/plain",
80
+ }
81
+ safety_settings = [
82
+ {
83
+ "category": "HARM_CATEGORY_HARASSMENT",
84
+ "threshold": "BLOCK_NONE",
85
+ },
86
+ {
87
+ "category": "HARM_CATEGORY_HATE_SPEECH",
88
+ "threshold": "BLOCK_NONE",
89
+ },
90
+ {
91
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
92
+ "threshold": "BLOCK_NONE",
93
+ },
94
+ {
95
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
96
+ "threshold": "BLOCK_NONE",
97
+ },
98
+ ]
99
+ self.model = genai.GenerativeModel(
100
+ model_name=model_name,
101
+ safety_settings=safety_settings,
102
+ generation_config=generation_config,
103
+ )
104
+
105
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
106
+ if not isinstance(image_links, list):
107
+ image_links = [image_links]
108
+
109
+ images_prompt = []
110
+ for image_link in image_links:
111
+ if isinstance(image_link, str):
112
+ image = save_image_from_url(image_link)
113
+ else:
114
+ image = image_link
115
+ image = upload_to_gemini(image, mime_type="image/jpeg")
116
+ images_prompt.append(image)
117
+
118
+ prompt_content = [images_prompt, text_prompt]
119
+ return prompt_content
120
+
121
+ def get_parsed_output(self, prompt):
122
+ images_prompt = prompt[0]
123
+ text_prompt = prompt[1]
124
+ chat_session = self.model.start_chat(
125
+ history=[
126
+ {
127
+ "role": "user",
128
+ "parts": images_prompt,
129
+ },
130
+ ]
131
+ )
132
+ try:
133
+ response = chat_session.send_message(text_prompt)
134
+ except:
135
+ return "Error in sending message to chat session."
136
+ return self.extract_response(response)
137
+
138
+ def extract_response(self, response):
139
+ response = response.text
140
+ return response
141
+
142
+ if __name__ == "__main__":
143
+ model = Gemini()
144
+ prompt = model.prepare_prompt(['https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/DiffEdit/sample_34_1.jpg', 'https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/input/sample_34_1.jpg'], 'What is difference between two images?')
145
+ print("prompt : \n", prompt)
146
+ res = model.get_parsed_output(prompt)
147
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/idefics2_eval.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import time
4
+ from typing import List
5
+ from transformers import AutoProcessor, AutoModelForVision2Seq
6
+ from transformers.image_utils import load_image
7
+ from transformers.utils import is_flash_attn_2_available
8
+
9
+
10
+ class Idefics2():
11
+ def __init__(self, model_path:str="HuggingFaceM4/idefics2-8b") -> None:
12
+ attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else None
13
+ print(f"Using {attn_implementation} for attention implementation")
14
+ self.model = AutoModelForVision2Seq.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16, _attn_implementation=attn_implementation).eval()
15
+ self.processor = AutoProcessor.from_pretrained(model_path)
16
+
17
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
18
+ if not isinstance(image_links, list):
19
+ image_links = [image_links]
20
+ messages = [
21
+ {
22
+ "role": "user",
23
+ "content": [ {"type": "image"}] * len(image_links) + [{"type": "text", "text": text_prompt}]
24
+ }
25
+ ]
26
+ prompt = self.processor.apply_chat_template(messages, add_generation_prompt=True)
27
+ images = [load_image(image_link) for image_link in image_links] #Support PIL images as well
28
+ inputs = self.processor(text=prompt, images=images, return_tensors="pt")
29
+ inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
30
+ return inputs
31
+
32
+ def get_parsed_output(self, inputs):
33
+ generate_ids = self.model.generate(**inputs, max_new_tokens=512, num_beams=1)
34
+ generated_text = self.processor.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
35
+ return generated_text
36
+
37
+
38
+ if __name__ == "__main__":
39
+ model = Idefics2()
40
+ prompt = model.prepare_prompt(['https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/DiffEdit/sample_34_1.jpg', 'https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/input/sample_34_1.jpg'], 'What is difference between two images?')
41
+ #print("prompt : \n", prompt)
42
+ res = model.get_parsed_output(prompt)
43
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/mantis_idefics2_eval.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import time
4
+ from typing import List
5
+ from transformers import AutoProcessor, AutoModelForVision2Seq
6
+ from transformers.image_utils import load_image
7
+ from transformers.utils import is_flash_attn_2_available
8
+
9
+
10
+ class Mantis():
11
+ def __init__(self, model_path:str="TIGER-Lab/Mantis-8B-Idefics2") -> None:
12
+ attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else None
13
+ print(f"Using {attn_implementation} for attention implementation")
14
+ self.model = AutoModelForVision2Seq.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16, _attn_implementation=attn_implementation).eval()
15
+ self.processor = AutoProcessor.from_pretrained(model_path)
16
+
17
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
18
+ if not isinstance(image_links, list):
19
+ image_links = [image_links]
20
+ messages = [
21
+ {
22
+ "role": "user",
23
+ "content": [ {"type": "image"}] * len(image_links) + [{"type": "text", "text": text_prompt}]
24
+ }
25
+ ]
26
+ prompt = self.processor.apply_chat_template(messages, add_generation_prompt=True)
27
+ images = [load_image(image_link) for image_link in image_links] #Support PIL images as well
28
+ inputs = self.processor(text=prompt, images=images, return_tensors="pt")
29
+ inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
30
+ return inputs
31
+
32
+ def get_parsed_output(self, inputs):
33
+ generate_ids = self.model.generate(**inputs, max_new_tokens=512, num_beams=1)
34
+ generated_text = self.processor.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
35
+ return generated_text
36
+
37
+
38
+ if __name__ == "__main__":
39
+ model = Mantis()
40
+ prompt = model.prepare_prompt(['https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/DiffEdit/sample_34_1.jpg', 'https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/input/sample_34_1.jpg'], 'What is difference between two images?')
41
+ #print("prompt : \n", prompt)
42
+ res = model.get_parsed_output(prompt)
43
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/minicpmv_eval.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import time
4
+ from PIL import Image
5
+ from typing import List
6
+ from transformers import AutoModel, AutoTokenizer
7
+ from transformers.utils import is_flash_attn_2_available
8
+
9
+ class MiniCPMV():
10
+ def __init__(self) -> None:
11
+ attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else None
12
+ self.model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16, device_map='auto', _attn_implementation=attn_implementation).eval()
13
+ self.tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
14
+
15
+ print(f"Using {attn_implementation} for attention implementation")
16
+
17
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
18
+ if not isinstance(image_links, list):
19
+ image_links = [image_links]
20
+ messages = [
21
+ {
22
+ "role": "user",
23
+ "content": [ {"type": "image"}] * len(image_links) + [{"type": "text", "text": text_prompt}]
24
+ }
25
+ ]
26
+ return messages
27
+
28
+ def get_parsed_output(self, inputs):
29
+ res = self.model.chat(
30
+ image=None,
31
+ msgs=inputs,
32
+ tokenizer=self.tokenizer,
33
+ sampling=False, # if sampling=False, beam_search will be used by default
34
+ )
35
+ return res
36
+
37
+ if __name__ == "__main__":
38
+ model = MiniCPMV()
39
+ prompt = model.prepare_prompt(['https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/DiffEdit/sample_34_1.jpg', 'https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/input/sample_34_1.jpg'], 'What is difference between two images?')
40
+ #print("prompt : \n", prompt)
41
+ res = model.get_parsed_output(prompt)
42
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/openai.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import requests
3
+ from io import BytesIO, StringIO
4
+ from typing import Union, Optional, Tuple, List
5
+ from PIL import Image, ImageOps
6
+ import os
7
+
8
+ def get_api_key(file_path):
9
+ # Read the API key from the first line of the file
10
+ with open(file_path, 'r') as file:
11
+ return file.readline().strip()
12
+
13
+ # Function to encode the image
14
+ def encode_image(image_path):
15
+ with open(image_path, "rb") as image_file:
16
+ return base64.b64encode(image_file.read()).decode('utf-8')
17
+
18
+ def pick_next_item(current_item, item_list):
19
+ if current_item not in item_list:
20
+ raise ValueError("Current item is not in the list")
21
+ current_index = item_list.index(current_item)
22
+ next_index = (current_index + 1) % len(item_list)
23
+
24
+ return item_list[next_index]
25
+
26
+ # Function to encode a PIL image
27
+ def encode_pil_image(pil_image):
28
+ # Create an in-memory binary stream
29
+ image_stream = BytesIO()
30
+
31
+ # Save the PIL image to the binary stream in JPEG format (you can change the format if needed)
32
+ pil_image.save(image_stream, format='JPEG')
33
+
34
+ # Get the binary data from the stream and encode it as base64
35
+ image_data = image_stream.getvalue()
36
+ base64_image = base64.b64encode(image_data).decode('utf-8')
37
+
38
+ return base64_image
39
+
40
+
41
+ def load_image(image: Union[str, Image.Image], format: str = "RGB", size: Optional[Tuple] = None) -> Image.Image:
42
+ """
43
+ Load an image from a given path or URL and convert it to a PIL Image.
44
+
45
+ Args:
46
+ image (Union[str, Image.Image]): The image path, URL, or a PIL Image object to be loaded.
47
+ format (str, optional): Desired color format of the resulting image. Defaults to "RGB".
48
+ size (Optional[Tuple], optional): Desired size for resizing the image. Defaults to None.
49
+
50
+ Returns:
51
+ Image.Image: A PIL Image in the specified format and size.
52
+
53
+ Raises:
54
+ ValueError: If the provided image format is not recognized.
55
+ """
56
+ if isinstance(image, str):
57
+ if image.startswith("http://") or image.startswith("https://"):
58
+ image = Image.open(requests.get(image, stream=True).raw)
59
+ elif os.path.isfile(image):
60
+ image = Image.open(image)
61
+ else:
62
+ raise ValueError(
63
+ f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path"
64
+ )
65
+ elif isinstance(image, Image.Image):
66
+ image = image
67
+ else:
68
+ raise ValueError(
69
+ "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image."
70
+ )
71
+ image = ImageOps.exif_transpose(image)
72
+ image = image.convert(format)
73
+ if (size != None):
74
+ image = image.resize(size, Image.LANCZOS)
75
+ return image
76
+
77
+ class GPT4v():
78
+ def __init__(self, api_key_path='keys/secret.env', are_images_encoded=False, model_name="gpt-4-vision-preview"):
79
+ """OpenAI GPT-4-vision model wrapper
80
+ Args:
81
+ api_key_path (str): Path to the API key file. Defaults to 'keys/secret.env'.
82
+ are_images_encoded (bool): Whether the images are encoded in base64. Defaults to False.
83
+ """
84
+ self.multiple_api_keys = False
85
+ self.current_key_file = None
86
+ self.key_lists = None
87
+ if isinstance(api_key_path, list):
88
+ self.key_lists = api_key_path
89
+ self.current_key_file = api_key_path[0]
90
+ self.api_key = get_api_key(self.current_key_file)
91
+ self.multiple_api_keys = True
92
+ else:
93
+ self.api_key = get_api_key(api_key_path)
94
+
95
+ if not self.api_key:
96
+ print("API key not found.")
97
+ exit(1)
98
+
99
+ self.url = "https://api.openai.com/v1/chat/completions"
100
+ self.model_name = model_name
101
+ self.use_encode = are_images_encoded
102
+
103
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
104
+ prompt_content = []
105
+ text_dict = {
106
+ "type": "text",
107
+ "text": text_prompt
108
+ }
109
+ prompt_content.append(text_dict)
110
+
111
+ if not isinstance(image_links, list):
112
+ image_links = [image_links]
113
+ for image_link in image_links:
114
+ image = load_image(image_link)
115
+ if self.use_encode == True:
116
+ visual_dict = {
117
+ "type": "image_url",
118
+ "image_url": {"url": f"data:image/jpeg;base64,{encode_pil_image(image)}"}
119
+ }
120
+ else:
121
+ visual_dict = {
122
+ "type": "image_url",
123
+ "image_url": {"url": image_link}
124
+ }
125
+ prompt_content.append(visual_dict)
126
+ return prompt_content
127
+
128
+ def get_parsed_output(self, prompt):
129
+ payload = {
130
+ "model": self.model_name,
131
+ "messages": [
132
+ {
133
+ "role": "user",
134
+ "content": prompt
135
+ }
136
+ ],
137
+ "max_tokens": 1400
138
+ }
139
+ headers = {
140
+ "Content-Type": "application/json",
141
+ "Authorization": f"Bearer {self.api_key}"
142
+ }
143
+ response = requests.post(self.url, json=payload, headers=headers)
144
+ #return response.text
145
+ return self.extract_response(response)
146
+
147
+ def extract_response(self, response):
148
+ response = response.json()
149
+
150
+ try:
151
+ out = response['choices'][0]['message']['content']
152
+ return out
153
+ except:
154
+ if response['error']['code'] == 'content_policy_violation':
155
+ print("Code is content_policy_violation")
156
+ elif response['error']['code'] == 'rate_limit_exceeded' or response['error']['code'] == 'insufficient_quota':
157
+ print(f"Code is {response['error']['code']}")
158
+ print(response['error']['message'])
159
+ if self.multiple_api_keys == True:
160
+ new_key = pick_next_item(self.current_key_file, self.key_lists)
161
+ self.update_key(new_key)
162
+ self.current_key_file = new_key #override key
163
+ print("New key is from the file: ", new_key)
164
+ else:
165
+ print("Code is different")
166
+ print(response)
167
+ return ""
168
+
169
+ def update_key(self, key, load_from_file=True):
170
+ if load_from_file:
171
+ self.api_key = get_api_key(key)
172
+ else:
173
+ self.api_key = key
174
+
175
+ class GPT4o(GPT4v):
176
+ def __init__(self, api_key_path='keys/secret.env', are_images_encoded=False, model_name="gpt-4o-2024-05-13"):
177
+ super().__init__(api_key_path, are_images_encoded, model_name)
178
+
179
+ if __name__ == "__main__":
180
+ model = GPT4o('secret_t2.env', model_name="gpt-4.1")
181
+ prompt = model.prepare_prompt(['https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/DiffEdit/sample_34_1.jpg', 'https://chromaica.github.io/Museum/ImagenHub_Text-Guided_IE/input/sample_34_1.jpg'], 'What is difference between two images?')
182
+ print("prompt : \n", prompt)
183
+ res = model.get_parsed_output(prompt)
184
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/qwen25vl_eval.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import time
4
+ from PIL import Image
5
+ from typing import List
6
+ from transformers import AutoModel, AutoTokenizer
7
+ from transformers.utils import is_flash_attn_2_available
8
+ from transformers import Qwen2_5_VLForConditionalGeneration
9
+ from qwen_vl_utils import process_vision_info
10
+ from transformers import AutoProcessor
11
+ import requests
12
+ from io import BytesIO
13
+ import random
14
+ import numpy as np
15
+ import base64
16
+ import magic
17
+ import megfile
18
+
19
+ def process_image(image):
20
+ img_byte_arr = BytesIO()
21
+ image.save(img_byte_arr, format='PNG')
22
+ img_byte_arr = img_byte_arr.getvalue()
23
+ return img_byte_arr
24
+
25
+ def convert_image_to_base64(file_content):
26
+ mime_type = magic.from_buffer(file_content, mime=True)
27
+ base64_encoded_data = base64.b64encode(file_content).decode('utf-8')
28
+ return f"data:{mime_type};base64,{base64_encoded_data}"
29
+
30
+
31
+ def set_seed(seed: int):
32
+ """
33
+ Args:
34
+ Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch`.
35
+ seed (`int`): The seed to set.
36
+ """
37
+ random.seed(seed)
38
+ np.random.seed(seed)
39
+ torch.manual_seed(seed)
40
+ torch.cuda.manual_seed_all(seed)
41
+
42
+ class Qwen25VL():
43
+ def __init__(self) -> None:
44
+ attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else None
45
+ self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
46
+ "/mnt/jfs-test/pretrained_models/Qwen2.5-VL-72B-Instruct-AWQ",
47
+ torch_dtype=torch.float16,
48
+ device_map="auto"
49
+ ).eval()
50
+ self.processor = AutoProcessor.from_pretrained("/mnt/jfs-test/pretrained_models/Qwen2.5-VL-72B-Instruct-AWQ")
51
+
52
+ print(f"Using {attn_implementation} for attention implementation")
53
+
54
+ def prepare_prompt(self, image_links: List = [], text_prompt: str = ""):
55
+ if not isinstance(image_links, list):
56
+ image_links = [image_links]
57
+
58
+ image_links_base64 = []
59
+
60
+ for img_link in image_links:
61
+ if type(img_link) == str:
62
+ image_links_base64.append(convert_image_to_base64(process_image(megfile.smart_open(img_link, 'rb'))))
63
+ else:
64
+ image_links_base64.append(convert_image_to_base64(process_image(img_link)))
65
+
66
+ messages = [
67
+ {
68
+ "role": "user",
69
+ "content": [
70
+ {"type": "image", "image": img_link} for img_link in image_links_base64
71
+ ] + [{"type": "text", "text": text_prompt}]
72
+ }
73
+ ]
74
+ return messages
75
+
76
+ def get_parsed_output(self, messages):
77
+ set_seed(42)
78
+ # Prepare the inputs
79
+ text = self.processor.apply_chat_template(
80
+ messages, tokenize=False, add_generation_prompt=True
81
+ )
82
+ image_inputs, video_inputs = process_vision_info(messages)
83
+
84
+ # Process inputs
85
+ inputs = self.processor(
86
+ text=[text],
87
+ images=image_inputs,
88
+ videos=video_inputs,
89
+ padding=True,
90
+ return_tensors="pt"
91
+ )
92
+ inputs = inputs.to("cuda")
93
+
94
+ # Generate output
95
+ generation_config = {
96
+ "max_new_tokens": 512,
97
+ "num_beams": 1,
98
+ "do_sample": False,
99
+ "temperature": 0.1,
100
+ "top_p": None,
101
+ }
102
+ generated_ids = self.model.generate(**inputs, **generation_config)
103
+ generated_ids_trimmed = [
104
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
105
+ ]
106
+ output_text = self.processor.batch_decode(
107
+ generated_ids_trimmed,
108
+ skip_special_tokens=True,
109
+ clean_up_tokenization_spaces=False
110
+ )
111
+
112
+ return output_text[0] if output_text else ""
113
+
114
+ if __name__ == "__main__":
115
+ model = Qwen25VL()
116
+ prompt = model.prepare_prompt(
117
+ ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"],
118
+ 'Describe the image in detail.'
119
+ )
120
+ res = model.get_parsed_output(prompt)
121
+ print("result : \n", res)
univa/eval/gedit/viescore/mllm_tools/utils.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import base64
3
+ from io import BytesIO
4
+ from PIL import Image
5
+ import requests
6
+
7
+ def pil_image_to_base64(pil_image, format="PNG"):
8
+ buffered = BytesIO()
9
+ pil_image.save(buffered, format=format) # Save image to the buffer in the specified format
10
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') # Encode the buffer's content to base64
11
+ return img_str
12
+
13
+ def load_image(image_file):
14
+ if image_file.startswith("http"):
15
+ response = requests.get(image_file)
16
+ image = Image.open(BytesIO(response.content)).convert("RGB")
17
+ else:
18
+ import os
19
+ image = Image.open(image_file).convert("RGB")
20
+ return image
21
+
22
+
23
+ def load_images(image_files):
24
+ out = []
25
+ for image_file in image_files:
26
+ image = load_image(image_file)
27
+ out.append(image)
28
+ return out
29
+
30
+ def merge_images(image_links: List = []):
31
+ """Merge multiple images into one image
32
+
33
+ Args:
34
+ image_links (List, optional): List of image links. Defaults to [].
35
+
36
+ Returns:
37
+ [type]: [description]
38
+ """
39
+ if len(image_links) == 0:
40
+ return None
41
+ images = load_images(image_links)
42
+ if len(images) == 1:
43
+ return images[0]
44
+ widths, heights = zip(*(i.size for i in images))
45
+ average_height = sum(heights) // len(heights)
46
+ for i, im in enumerate(images):
47
+ # scale in proportion
48
+ images[i] = im.resize((int(im.size[0] * average_height / im.size[1]), average_height))
49
+ widths, heights = zip(*(i.size for i in images))
50
+ total_width = sum(widths)
51
+ max_height = max(heights)
52
+ new_im = Image.new("RGB", (total_width + 10 * (len(images) - 1), max_height))
53
+ x_offset = 0
54
+ for i, im in enumerate(images):
55
+ if i > 0:
56
+ # past a column of 1 pixel starting from x_offset width being black, 8 pixels being white, and 1 pixel being black
57
+ new_im.paste(Image.new("RGB", (1, max_height), (0, 0, 0)), (x_offset, 0))
58
+ x_offset += 1
59
+ new_im.paste(Image.new("RGB", (8, max_height), (255, 255, 255)), (x_offset, 0))
60
+ x_offset += 8
61
+ new_im.paste(Image.new("RGB", (1, max_height), (0, 0, 0)), (x_offset, 0))
62
+ x_offset += 1
63
+ new_im.paste(im, (x_offset, 0))
64
+ x_offset += im.size[0]
65
+ return new_im
univa/eval/gedit/viescore/parse_prompt.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def create_python_file_with_texts(folder_path, output_file):
4
+ with open(output_file, 'w', encoding='utf-8') as out_file:
5
+ out_file.write("# This file is generated automatically through parse_prompt.py\n\n")
6
+ for root, dirs, files in os.walk(folder_path):
7
+ for file in files:
8
+ if file.endswith(".txt"):
9
+ file_path = os.path.join(root, file)
10
+ var_name = "_" + file_path.replace(folder_path, "").replace(os.sep, "_").replace(".txt", "").strip("_")
11
+ with open(file_path, 'r', encoding='utf-8') as f:
12
+ content = f.read().replace('"""', '\"\"\"')
13
+ out_file.write(f'{var_name} = """{content}"""\n\n')
14
+
15
+ # Example usage
16
+ current_file_path = os.path.abspath(__file__)
17
+ current_folder_path = os.path.dirname(current_file_path)
18
+ folder_path = os.path.join(current_folder_path, "prompts_raw")
19
+ output_file = os.path.join(current_folder_path, "vie_prompts.py")
20
+ create_python_file_with_texts(folder_path, output_file)
univa/eval/gedit/viescore/utils.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Union, List, Optional
3
+ import json
4
+ import regex as re
5
+ import ast
6
+ import random
7
+
8
+ def fix_json(input_str):
9
+ # Add double quotes around keys using regex
10
+ fixed_str = re.sub(r'(\w+):', r'"\1":', input_str)
11
+
12
+ # Add double quotes around string values if necessary and wrap int/float values in []
13
+ def format_value(match):
14
+ key, value, comma = match.groups()
15
+ value = value.strip()
16
+ # Check if value is an integer or float
17
+ if re.match(r'^-?\d+(\.\d+)?$', value):
18
+ value = f'[{value}]'
19
+ # Check if value is a boolean or null
20
+ elif re.match(r'^(true|false|null)$', value, re.IGNORECASE):
21
+ pass # leave as is
22
+ else:
23
+ # Add quotes around string values
24
+ value = f'"{value}"'
25
+ return f'{key}: {value}{comma}'
26
+
27
+ fixed_str = re.sub(r'(".*?"):(.*?)(,|})', format_value, fixed_str)
28
+
29
+ return fixed_str
30
+
31
+ def read_file_to_string(file_path):
32
+ """
33
+ Reads the contents of a text file and returns it as a string.
34
+
35
+ :param file_path: The path to the text file.
36
+ :return: A string containing the contents of the file.
37
+ """
38
+ try:
39
+ with open(file_path, 'r', encoding='utf-8') as file:
40
+ return file.read()
41
+ except FileNotFoundError:
42
+ print(f"The file {file_path} was not found.")
43
+ return None
44
+ except Exception as e:
45
+ print(f"An error occurred: {e}")
46
+ return None
47
+
48
+ def read_files_to_string(file_paths):
49
+ """
50
+ Reads the contents of multiple text files and returns them as a single string,
51
+ with each file's contents separated by a newline.
52
+
53
+ :param file_paths: A list of paths to text files.
54
+ :return: A string containing the concatenated contents of the files.
55
+ """
56
+ all_contents = [] # List to hold the contents of each file
57
+
58
+ for file_path in file_paths:
59
+ try:
60
+ with open(file_path, 'r', encoding='utf-8') as file:
61
+ all_contents.append(file.read())
62
+ except FileNotFoundError:
63
+ print(f"The file {file_path} was not found.")
64
+ except Exception as e:
65
+ print(f"An error occurred while reading {file_path}: {e}")
66
+
67
+ # Join all the contents with a newline character
68
+ return "\n".join(all_contents)
69
+
70
+ def get_file_path(filename: Union[str, os.PathLike], search_from: Union[str, os.PathLike] = "."):
71
+ """
72
+ Search for a file across a directory and return its absolute path.
73
+
74
+ Args:
75
+ filename (Union[str, os.PathLike]): The name of the file to search for.
76
+ search_from (Union[str, os.PathLike], optional): The directory from which to start the search. Defaults to ".".
77
+
78
+ Returns:
79
+ str: Absolute path to the found file.
80
+
81
+ Raises:
82
+ FileNotFoundError: If the file is not found.
83
+ """
84
+ for root, dirs, files in os.walk(search_from):
85
+ for name in files:
86
+ if name == filename:
87
+ return os.path.abspath(os.path.join(root, name))
88
+ raise FileNotFoundError(filename, "not found.")
89
+
90
+
91
+
92
+ #+=========================================================================================
93
+ def verify(s, target_sequence):
94
+ # Count the occurrences of the target sequence
95
+ count = s.count(target_sequence)
96
+
97
+ # Check if the target sequence appears exactly twice
98
+ return count == 2
99
+
100
+
101
+ def is_int_between_0_and_10(s):
102
+ try:
103
+ num = int(s)
104
+ return 0 <= num <= 10
105
+ except ValueError:
106
+ return False
107
+
108
+ def is_str_a_list_of_ints_0_to_10(s):
109
+ try:
110
+ # Attempt to parse the string as a Python literal (list, dict, etc.)
111
+ parsed = ast.literal_eval(s)
112
+
113
+ # Check if the parsed object is a list
114
+ if not isinstance(parsed, list):
115
+ return False
116
+
117
+ # Check if all elements are integers and between 0 to 10
118
+ return all(isinstance(item, int) and 0 <= item <= 10 for item in parsed)
119
+
120
+ except (ValueError, SyntaxError):
121
+ # If parsing fails or any other error occurs
122
+ return False
123
+
124
+ def is_str_valid_score_format_brackets(s):
125
+ try:
126
+ # Removing brackets and splitting the string by commas
127
+ content = s.strip("[]").split(',')
128
+
129
+ length = len(content)
130
+
131
+ # Parsing each element and checking the format and range
132
+ scores = {}
133
+ for item in content:
134
+ key, value = item.split(':')
135
+ key = key.strip()
136
+ value = int(value.strip())
137
+
138
+ # Check if the key starts with 'score' and the value is in the correct range
139
+ if not key.startswith("score") or not 0 <= value <= 10:
140
+ return False
141
+
142
+ scores[key] = value
143
+
144
+ fetch_words = [f"score{i+1}" for i in range(length)]
145
+ # Check if at least 'score1' and 'score2' are present
146
+ return all(key in scores for key in fetch_words)
147
+
148
+ except (ValueError, SyntaxError):
149
+ # If any parsing error occurs
150
+ return False
151
+
152
+
153
+ #+=========================================================================================
154
+ def mllm_output_to_dict(input_string, give_up_parsing=False):
155
+ """
156
+ Args:
157
+ input_string (str): actually the output of the mllm model to be parsed
158
+ output_file_name (str): The name of the output file.
159
+ """
160
+ # Catch for gpt4v rate_limit_exceeded error
161
+ if input_string == "rate_limit_exceeded":
162
+ return "rate_limit_exceeded"
163
+
164
+ # Define the delimiters
165
+ delimiter = '||V^=^V||'
166
+
167
+ if input_string.count(delimiter) == 2:
168
+ if not verify(input_string, delimiter):
169
+ print("The required delimiters were not found correctly in the string.")
170
+ return False
171
+ # Extract the content between the delimiters
172
+ start_index = input_string.find(delimiter) + len(delimiter)
173
+ end_index = input_string.rfind(delimiter)
174
+ else:
175
+ # find the json mannually
176
+ # some mllm tends not to output the delimiters, but it does output the json contents
177
+ # so we will find the json content mannually
178
+ start_index = input_string.find('{')
179
+ end_index = input_string.rfind('}') + 1
180
+ if start_index == -1 or end_index == 0:
181
+ # json not found
182
+ # some mllm tends to output only a list of scores like [6, 0],
183
+ # this time we will just get the scores and ignore the reasoning (other part of the json)
184
+ start_index = input_string.find('[')
185
+ end_index = input_string.rfind(']') + 1
186
+ if give_up_parsing: # if we want to give up parsing
187
+ guessed_value = random.randint(0, 10)
188
+ print(f"Failed to find the json content in the string. Guess a value : {guessed_value}.")
189
+ json_content = {'score': [guessed_value], "reasoning": f"guess_if_cannot_parse | {input_string}"}
190
+ json_str = json.dumps(json_content)
191
+ input_string = json_str
192
+ start_index = 0
193
+ end_index = len(json_str)
194
+ elif re.match(r'^\[\d+, ?\d+\]$', input_string[start_index:end_index]):
195
+ scores = json.loads(input_string[start_index:end_index])
196
+ if not isinstance(scores, list):
197
+ scores = [scores]
198
+ json_content = {'score': scores, "reasoning": "System: output is simply a list of scores"}
199
+ json_str = json.dumps(json_content)
200
+ input_string = json_str
201
+ start_index = 0
202
+ end_index = len(json_str)
203
+ elif is_int_between_0_and_10(input_string): # if output is simply a number
204
+ scores = [int(input_string)]
205
+ json_content = {'score': scores, "reasoning": "System: output is simply a number"}
206
+ json_str = json.dumps(json_content)
207
+ input_string = json_str
208
+ start_index = 0
209
+ end_index = len(json_str)
210
+ else:
211
+ print("Failed to find the json content in the string.")
212
+ return False
213
+
214
+ # Check if we found two delimiters
215
+ if start_index != -1 and end_index != -1 and start_index != end_index:
216
+ # Extract the JSON string
217
+ json_str = input_string[start_index:end_index].strip()
218
+ json_str = json_str.replace("\n", "")
219
+ # Parse the JSON string into a dictionary
220
+ try:
221
+ new_data = json.loads(json_str)
222
+ if not isinstance(new_data['score'], list):
223
+ new_data['score'] = [new_data['score']]
224
+ except:
225
+ print("Now fixing: ", json_str)
226
+ try:
227
+ new_data = json.loads(fix_json(json_str))
228
+ return new_data
229
+ except:
230
+ print("Error: Cannot fix", json_str)
231
+ return False
232
+ return new_data
233
+ else:
234
+ print("The required delimiters were not found correctly in the string.")
235
+ return False
236
+
237
+ def write_entry_to_json_file(input_string, uid, prompt_input, vision_input, output_file_name, give_up_parsing=False):
238
+ """
239
+ Args:
240
+ input_string (str): actually the output of the mllm model to be parsed
241
+ uid (str): The unique identifier for the each item in the test data
242
+ prompt_input (str): The prompt input for the entry. text prompt.
243
+ vision_input (str): The vision input for the entry. image links.
244
+ output_file_name (str): The name of the output file.
245
+ """
246
+ # Catch for gpt4v rate_limit_exceeded error
247
+ if input_string == "rate_limit_exceeded":
248
+ return "rate_limit_exceeded"
249
+
250
+ # Define the delimiters
251
+ delimiter = '||V^=^V||'
252
+
253
+ if input_string.count(delimiter) == 2:
254
+ if not verify(input_string, delimiter):
255
+ print("The required delimiters were not found correctly in the string.")
256
+ return False
257
+ # Extract the content between the delimiters
258
+ start_index = input_string.find(delimiter) + len(delimiter)
259
+ end_index = input_string.rfind(delimiter)
260
+ else:
261
+ # find the json mannually
262
+ # some mllm tends not to output the delimiters, but it does output the json contents
263
+ # so we will find the json content mannually
264
+ start_index = input_string.find('{')
265
+ end_index = input_string.rfind('}') + 1
266
+ if start_index == -1 or end_index == 0:
267
+ # json not found
268
+ # some mllm tends to output only a list of scores like [6, 0],
269
+ # this time we will just get the scores and ignore the reasoning (other part of the json)
270
+ start_index = input_string.find('[')
271
+ end_index = input_string.rfind(']') + 1
272
+ if give_up_parsing: # if we want to give up parsing
273
+ guessed_value = random.randint(0, 10)
274
+ print(f"Failed to find the json content in the string. Guess a value : {guessed_value}.")
275
+ json_content = {'score': [guessed_value], "reasoning": f"guess_if_cannot_parse | {input_string}"}
276
+ json_str = json.dumps(json_content)
277
+ input_string = json_str
278
+ start_index = 0
279
+ end_index = len(json_str)
280
+ elif re.match(r'^\[\d+, ?\d+\]$', input_string[start_index:end_index]):
281
+ scores = json.loads(input_string[start_index:end_index])
282
+ json_content = {'score': scores, "reasoning": None}
283
+ json_str = json.dumps(json_content)
284
+ input_string = json_str
285
+ start_index = 0
286
+ end_index = len(json_str)
287
+ elif is_int_between_0_and_10(input_string): # if output is simply a number
288
+ scores = [int(input_string)]
289
+ json_content = {'score': scores, "reasoning": None}
290
+ json_str = json.dumps(json_content)
291
+ input_string = json_str
292
+ start_index = 0
293
+ end_index = len(json_str)
294
+ else:
295
+ print("Failed to find the json content in the string.")
296
+ return False
297
+
298
+ # Check if we found two delimiters
299
+ if start_index != -1 and end_index != -1 and start_index != end_index:
300
+ # Extract the JSON string
301
+ json_str = input_string[start_index:end_index].strip()
302
+ json_str = json_str.replace("\n", "")
303
+ try:
304
+ # Parse the JSON string into a dictionary
305
+ new_data = json.loads(json_str)
306
+
307
+ # Ensure the directory exists
308
+ os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
309
+
310
+ # Initialize or load existing data
311
+ if os.path.exists(output_file_name):
312
+ with open(output_file_name, 'r') as json_file:
313
+ data = json.load(json_file)
314
+ else:
315
+ data = {}
316
+
317
+ # If the additional key is already in the data, add or update notes
318
+ if uid in data:
319
+ data[uid].update(new_data) # Update with new data
320
+ if prompt_input: # If there are new notes, update or add them
321
+ data[uid]['prompt_input'] = prompt_input
322
+ if vision_input: # If there are new notes, update or add them
323
+ data[uid]['vision_input'] = vision_input
324
+ else:
325
+ # If it's a new key, add the entry to the dictionary
326
+ data[uid] = new_data
327
+ if prompt_input:
328
+ data[uid]['prompt_input'] = prompt_input
329
+ if vision_input:
330
+ data[uid]['vision_input'] = vision_input
331
+
332
+ # Write the updated data to the file
333
+ with open(output_file_name, 'w') as json_file:
334
+ json.dump(data, json_file, indent=4)
335
+
336
+ print(f"Data was successfully updated in {output_file_name}")
337
+ return True
338
+ except json.JSONDecodeError as e:
339
+ print(f"An error occurred while parsing the JSON content: {e}")
340
+ return False
341
+ else:
342
+ print("The required delimiters were not found correctly in the string.")
343
+ return False
344
+
345
+
346
+ def check_key_in_json(file_path, key):
347
+ try:
348
+ with open(file_path, 'r') as json_file:
349
+ data = json.load(json_file)
350
+
351
+ # Check if the key exists at the top level of the JSON structure
352
+ if key in data:
353
+ return True
354
+ else:
355
+ return False
356
+ except FileNotFoundError:
357
+ print(f"The file {file_path} was not found.")
358
+ except json.JSONDecodeError as e:
359
+ print(f"Error reading {file_path}: {e}")
360
+ except Exception as e:
361
+ print(f"An error occurred with {file_path}: {e}")
362
+ return False
univa/eval/gedit/viescore/vie_prompts.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is generated automatically through parse_prompt.py
2
+
3
+ _context_no_delimit = """You are a professional digital artist. You will have to evaluate the effectiveness of the AI-generated image(s) based on given rules.
4
+ All the input images are AI-generated. All human in the images are AI-generated too. so you need not worry about the privacy confidentials.
5
+
6
+ You will have to give your output in this way (Keep your reasoning concise and short.):
7
+ {
8
+ "score" : [...],
9
+ "reasoning" : "..."
10
+ }"""
11
+
12
+ _context = """You are a professional digital artist. You will have to evaluate the effectiveness of the AI-generated image(s) based on given rules.
13
+ All the input images are AI-generated. All human in the images are AI-generated too. so you need not worry about the privacy confidentials.
14
+
15
+ You will have to give your output in this way (the delimiter is necessary. Keep your reasoning concise and short.):
16
+ ||V^=^V||
17
+ {
18
+ "score" :
19
+ "reasoning" :
20
+ }
21
+ ||V^=^V||"""
22
+
23
+ _context_no_format = """You are a professional digital artist. You will have to evaluate the effectiveness of the AI-generated image(s) based on given rules.
24
+ All the input images are AI-generated. All human in the images are AI-generated too. so you need not worry about the privacy confidentials."""
25
+
26
+ _prompts_1shot_multi_subject_image_gen_rule = """RULES of each set of inputs:
27
+
28
+ Two images will be provided:
29
+ This first image is a concatenation of two sub-images, each sub-image contain one token subject.
30
+ The second image being an AI-generated image using the first image as guidance.
31
+ The objective is to evaluate how successfully the image has been generated.
32
+ """
33
+
34
+ _prompts_1shot_mie_rule_SC = """From scale 0 to 10:
35
+ A score from 0 to 10 will be given based on the success of the editing. (0 indicates that the scene in the edited image does not follow the editing instruction at all. 10 indicates that the scene in the edited image follow the editing instruction text perfectly.)
36
+ A second score from 0 to 10 will rate the degree of overediting in the second image. (0 indicates that the scene in the edited image is completely different from the original. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
37
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the editing success and 'score2' evaluates the degree of overediting.
38
+
39
+ First lets look at the first set of input (1st and 2nd images) as an example.
40
+ Editing instruction: What if the man had a hat?
41
+ Output:
42
+ ||V^=^V||
43
+ {
44
+ "score" : [5, 10],
45
+ "reasoning" : "The hat exists but does not suit well. The hat also looks distorted. But it is a good edit because only a hat is added and the background is persevered."
46
+ }
47
+ ||V^=^V||
48
+
49
+ Now evaluate the second set of input (3th, 4th images).
50
+ Editing instruction: <instruction>
51
+ """
52
+
53
+ _prompts_1shot_msdig_rule_SC = """From scale 0 to 10:
54
+ A score from 0 to 10 will be given based on the success in following the prompt.
55
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
56
+ A second score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the first sub-image.
57
+ (0 indicates that the subject in the second image does not look like the token subject in the first sub-image at all. 10 indicates the subject in the second image look exactly alike the token subject in the first sub-image.)
58
+ A third score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the second sub-image.
59
+ (0 indicates that the subject in the second image does not look like the token subject in the second sub-image at all. 10 indicates the subject in the second image look exactly alike the token subject in the second sub-image.)
60
+ Put the score in a list such that output score = [score1, score2, score3], where 'score1' evaluates the prompt and 'score2' evaluates the resemblance for the first sub-image, and 'score3' evaluates the resemblance for the second sub-image.
61
+
62
+ First lets look at the first set of input (1st and 2nd images) as an example.
63
+ Text Prompt: A digital illustration of a cat beside a wooden pot
64
+ Output:
65
+ ||V^=^V||
66
+ {
67
+ "score" : [5, 5, 10],
68
+ "reasoning" : "The cat is not beside the wooden pot. The pot looks partially resemble to the subject pot. The cat looks highly resemble to the subject cat."
69
+ }
70
+ ||V^=^V||
71
+
72
+ Now evaluate the second set of input (3th, 4th images).
73
+ Text Prompt: <prompt>"""
74
+
75
+ _prompts_1shot_t2i_rule_SC = """From scale 0 to 10:
76
+ A score from 0 to 10 will be given based on the success in following the prompt.
77
+ (0 indicates that the AI generated image does not follow the prompt at all. 10 indicates the AI generated image follows the prompt perfectly.)
78
+
79
+ Put the score in a list such that output score = [score].
80
+
81
+ First lets look at the first set of input (1st image) as an example.
82
+ Text Prompt: A pink and a white frisbee are on the ground.
83
+ Output:
84
+ ||V^=^V||
85
+ {
86
+ "score" : [5],
87
+ "reasoning" : "White frisbee not present in the image."
88
+ }
89
+ ||V^=^V||
90
+
91
+ Now evaluate the second set of input (2nd image).
92
+ Text Prompt: <prompt>
93
+ """
94
+
95
+ _prompts_1shot_tie_rule_SC = """From scale 0 to 10:
96
+ A score from 0 to 10 will be given based on the success of the editing. (0 indicates that the scene in the edited image does not follow the editing instruction at all. 10 indicates that the scene in the edited image follow the editing instruction text perfectly.)
97
+ A second score from 0 to 10 will rate the degree of overediting in the second image. (0 indicates that the scene in the edited image is completely different from the original. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
98
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the editing success and 'score2' evaluates the degree of overediting.
99
+
100
+ First lets look at the first set of input (1st and 2nd images) as an example.
101
+ Editing instruction: What if the man had a hat?
102
+ Output:
103
+ ||V^=^V||
104
+ {
105
+ "score" : [5, 10],
106
+ "reasoning" : "The hat exists but does not suit well. The hat also looks distorted. But it is a good edit because only a hat is added and the background is persevered."
107
+ }
108
+ ||V^=^V||
109
+
110
+ Now evaluate the second set of input (3th, 4th images).
111
+ Editing instruction: <instruction>
112
+ """
113
+
114
+ _prompts_1shot_sdie_rule_SC = """From scale 0 to 10:
115
+ A score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the second image.
116
+ (0 indicates that the subject in the third image does not look like the token subject at all. 10 indicates the subject in the third image look exactly alike the token subject.)
117
+ A second score from 0 to 10 will rate the degree of overediting in the second image.
118
+ (0 indicates that the scene in the edited image is completely different from the first image. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
119
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the resemblance and 'score2' evaluates the degree of overediting.
120
+
121
+ First lets look at the first set of input (1st, 2nd and 3rd images) as an example.
122
+ Subject: <subject>
123
+ Output:
124
+ ||V^=^V||
125
+ {
126
+ "score" : [5, 10],
127
+ "reasoning" : "The monster toy looks partially resemble to the token subject. The edit is minimal."
128
+ }
129
+ ||V^=^V||
130
+
131
+ Now evaluate the second set of input (4th, 5th, and 6th images).
132
+ Subject: <subject>
133
+ """
134
+
135
+ _prompts_1shot_one_image_gen_rule = """RULES of each set of inputs:
136
+
137
+ One image will be provided; The image is an AI-generated image.
138
+ The objective is to evaluate how successfully the image has been generated.
139
+ """
140
+
141
+ _prompts_1shot_sdig_rule_SC = """From scale 0 to 10:
142
+ A score from 0 to 10 will be given based on the success in following the prompt.
143
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
144
+ A second score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the first image.
145
+ (0 indicates that the subject in the second image does not look like the token subject at all. 10 indicates the subject in the second image look exactly alike the token subject.)
146
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the prompt and 'score2' evaluates the resemblance.
147
+
148
+ First lets look at the first set of input (1st and 2nd images) as an example.
149
+ Text Prompt: a red cartoon figure eating a banana
150
+ Output:
151
+ ||V^=^V||
152
+ {
153
+ "score" : [10, 5],
154
+ "reasoning" : "The red cartoon figure is eating a banana. The red cartoon figure looks partially resemble to the subject."
155
+ }
156
+ ||V^=^V||
157
+
158
+ Now evaluate the second set of input (3th, 4th images).
159
+ Text Prompt: <prompt>
160
+ """
161
+
162
+ _prompts_1shot_rule_PQ = """RULES of each set of inputs:
163
+
164
+ One image will be provided; The image is an AI-generated image.
165
+ The objective is to evaluate how successfully the image has been generated.
166
+
167
+ From scale 0 to 10:
168
+ A score from 0 to 10 will be given based on image naturalness.
169
+ (
170
+ 0 indicates that the scene in the image does not look natural at all or give a unnatural feeling such as wrong sense of distance, or wrong shadow, or wrong lighting.
171
+ 10 indicates that the image looks natural.
172
+ )
173
+ A second score from 0 to 10 will rate the image artifacts.
174
+ (
175
+ 0 indicates that the image contains a large portion of distortion, or watermark, or scratches, or blurred faces, or unusual body parts, or subjects not harmonized.
176
+ 10 indicates the image has no artifacts.
177
+ )
178
+ Put the score in a list such that output score = [naturalness, artifacts]
179
+
180
+
181
+ First lets look at the first set of input (1st image) as an example.
182
+ Output:
183
+ ||V^=^V||
184
+ {
185
+ "score" : [5, 5],
186
+ "reasoning" : "The image gives an unnatural feeling on hands of the girl. There is also minor distortion on the eyes of the girl."
187
+ }
188
+ ||V^=^V||
189
+
190
+ Now evaluate the second set of input (2nd image).
191
+
192
+ """
193
+
194
+ _prompts_1shot_subject_image_gen_rule = """RULES of each set of inputs:
195
+
196
+ Two images will be provided: The first being a token subject image and the second being an AI-generated image using the first image as guidance.
197
+ The objective is to evaluate how successfully the image has been generated.
198
+ """
199
+
200
+ _prompts_1shot_cig_rule_SC = """
201
+ From scale 0 to 10:
202
+ A score from 0 to 10 will be given based on the success in following the prompt.
203
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
204
+ A second score from 0 to 10 will rate how well the generated image is following the guidance image.
205
+ (0 indicates that the second image is not following the guidance at all. 10 indicates that second image is following the guidance image.)
206
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the prompt and 'score2' evaluates the guidance.
207
+
208
+ First lets look at the first set of input (1st and 2nd images) as an example.
209
+ Text Prompt: the bridge is red, Golden Gate Bridge in San Francisco, USA
210
+ Output:
211
+ ||V^=^V||
212
+ {
213
+ "score" : [5, 5],
214
+ "reasoning" : "The bridge is red. But half of the bridge is gone."
215
+ }
216
+ ||V^=^V||
217
+
218
+ Now evaluate the second set of input (3th, 4th images).
219
+ Text Prompt: <prompt>
220
+ """
221
+
222
+ _prompts_1shot_two_image_edit_rule = """RULES of each set of inputs:
223
+
224
+ Two images will be provided: The first being the original AI-generated image and the second being an edited version of the first.
225
+ The objective is to evaluate how successfully the editing instruction has been executed in the second image.
226
+
227
+ Note that sometimes the two images might look identical due to the failure of image edit.
228
+ """
229
+
230
+ _prompts_1shot_subject_image_edit_rule = """RULES of each set of inputs:
231
+
232
+ Three images will be provided:
233
+ The first image is a input image to be edited.
234
+ The second image is a token subject image.
235
+ The third image is an AI-edited image from the first image. it should contain a subject that looks alike the subject in second image.
236
+ The objective is to evaluate how successfully the image has been edited.
237
+ """
238
+
239
+ _prompts_1shot_control_image_gen_rule = """RULES of each set of inputs:
240
+
241
+ Two images will be provided: The first being a processed image (e.g. Canny edges, openpose, grayscale etc.) and the second being an AI-generated image using the first image as guidance.
242
+ The objective is to evaluate how successfully the image has been generated.
243
+ """
244
+
245
+ _prompts_0shot_two_image_edit_rule = """RULES:
246
+
247
+ Two images will be provided: The first being the original AI-generated image and the second being an edited version of the first.
248
+ The objective is to evaluate how successfully the editing instruction has been executed in the second image.
249
+
250
+ Note that sometimes the two images might look identical due to the failure of image edit.
251
+ """
252
+
253
+ _prompts_0shot_one_video_gen_rule = """RULES:
254
+
255
+ The images are extracted from a AI-generated video according to the text prompt.
256
+ The objective is to evaluate how successfully the video has been generated.
257
+ """
258
+
259
+ _prompts_0shot_t2v_rule_PQ = """RULES:
260
+
261
+ The image frames are AI-generated.
262
+ The objective is to evaluate how successfully the image frames has been generated.
263
+
264
+ From scale 0 to 10:
265
+ A score from 0 to 10 will be given based on the image frames naturalness.
266
+ (
267
+ 0 indicates that the scene in the image frames does not look natural at all or give a unnatural feeling such as wrong sense of distance, or wrong shadow, or wrong lighting.
268
+ 10 indicates that the image frames looks natural.
269
+ )
270
+ A second score from 0 to 10 will rate the image frames artifacts.
271
+ (
272
+ 0 indicates that the image frames contains a large portion of distortion, or watermark, or scratches, or blurred faces, or unusual body parts, or subjects not harmonized.
273
+ 10 indicates the image frames has no artifacts.
274
+ )
275
+ Put the score in a list such that output score = [naturalness, artifacts]
276
+ """
277
+
278
+ _prompts_0shot_msdig_rule_SC = """From scale 0 to 10:
279
+ A score from 0 to 10 will be given based on the success in following the prompt.
280
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
281
+ A second score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the first sub-image.
282
+ (0 indicates that the subject in the second image does not look like the token subject in the first sub-image at all. 10 indicates the subject in the second image look exactly alike the token subject in the first sub-image.)
283
+ A third score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the second sub-image.
284
+ (0 indicates that the subject in the second image does not look like the token subject in the second sub-image at all. 10 indicates the subject in the second image look exactly alike the token subject in the second sub-image.)
285
+ Put the score in a list such that output score = [score1, score2, score3], where 'score1' evaluates the prompt and 'score2' evaluates the resemblance for the first sub-image, and 'score3' evaluates the resemblance for the second sub-image.
286
+
287
+ Text Prompt: <prompt>
288
+ """
289
+
290
+ _prompts_0shot_sdie_rule_SC = """From scale 0 to 10:
291
+ A score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the second image.
292
+ (0 indicates that the subject in the third image does not look like the token subject at all. 10 indicates the subject in the third image look exactly alike the token subject.)
293
+ A second score from 0 to 10 will rate the degree of overediting in the second image.
294
+ (0 indicates that the scene in the edited image is completely different from the first image. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
295
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the resemblance and 'score2' evaluates the degree of overediting.
296
+
297
+ Subject: <subject>"""
298
+
299
+ _prompts_0shot_subject_image_edit_rule = """RULES:
300
+
301
+ Three images will be provided:
302
+ The first image is a input image to be edited.
303
+ The second image is a token subject image.
304
+ The third image is an AI-edited image from the first image. it should contain a subject that looks alike the subject in second image.
305
+ The objective is to evaluate how successfully the image has been edited.
306
+ """
307
+
308
+ _prompts_0shot_mie_rule_SC = """From scale 0 to 10:
309
+ A score from 0 to 10 will be given based on the success of the editing. (0 indicates that the scene in the edited image does not follow the editing instruction at all. 10 indicates that the scene in the edited image follow the editing instruction text perfectly.)
310
+ A second score from 0 to 10 will rate the degree of overediting in the second image. (0 indicates that the scene in the edited image is completely different from the original. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
311
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the editing success and 'score2' evaluates the degree of overediting.
312
+
313
+ Editing instruction: <instruction>
314
+ """
315
+
316
+ _prompts_0shot_sdig_rule_SC = """From scale 0 to 10:
317
+ A score from 0 to 10 will be given based on the success in following the prompt.
318
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
319
+ A second score from 0 to 10 will rate how well the subject in the generated image resemble to the token subject in the first image.
320
+ (0 indicates that the subject in the second image does not look like the token subject at all. 10 indicates the subject in the second image look exactly alike the token subject.)
321
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the prompt and 'score2' evaluates the resemblance.
322
+
323
+ Text Prompt: <prompt>
324
+ """
325
+
326
+ _prompts_0shot_tie_rule_SC = """
327
+ From scale 0 to 10:
328
+ A score from 0 to 10 will be given based on the success of the editing. (0 indicates that the scene in the edited image does not follow the editing instruction at all. 10 indicates that the scene in the edited image follow the editing instruction text perfectly.)
329
+ A second score from 0 to 10 will rate the degree of overediting in the second image. (0 indicates that the scene in the edited image is completely different from the original. 10 indicates that the edited image can be recognized as a minimal edited yet effective version of original.)
330
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the editing success and 'score2' evaluates the degree of overediting.
331
+
332
+ Editing instruction: <instruction>
333
+ """
334
+
335
+ _prompts_0shot_t2i_rule_SC = """From scale 0 to 10:
336
+ A score from 0 to 10 will be given based on the success in following the prompt.
337
+ (0 indicates that the AI generated image does not follow the prompt at all. 10 indicates the AI generated image follows the prompt perfectly.)
338
+
339
+ Put the score in a list such that output score = [score].
340
+
341
+ Text Prompt: <prompt>
342
+ """
343
+
344
+ _prompts_0shot_cig_rule_SC = """From scale 0 to 10:
345
+ A score from 0 to 10 will be given based on the success in following the prompt.
346
+ (0 indicates that the second image does not follow the prompt at all. 10 indicates the second image follows the prompt perfectly.)
347
+ A second score from 0 to 10 will rate how well the generated image is following the guidance image.
348
+ (0 indicates that the second image is not following the guidance at all. 10 indicates that second image is following the guidance image.)
349
+ Put the score in a list such that output score = [score1, score2], where 'score1' evaluates the prompt and 'score2' evaluates the guidance.
350
+
351
+ Text Prompt: <prompt>"""
352
+
353
+ _prompts_0shot_control_image_gen_rule = """RULES:
354
+
355
+ Two images will be provided: The first being a processed image (e.g. Canny edges, openpose, grayscale etc.) and the second being an AI-generated image using the first image as guidance.
356
+ The objective is to evaluate how successfully the image has been generated.
357
+ """
358
+
359
+ _prompts_0shot_rule_PQ = """RULES:
360
+
361
+ The image is an AI-generated image.
362
+ The objective is to evaluate how successfully the image has been generated.
363
+
364
+ From scale 0 to 10:
365
+ A score from 0 to 10 will be given based on image naturalness.
366
+ (
367
+ 0 indicates that the scene in the image does not look natural at all or give a unnatural feeling such as wrong sense of distance, or wrong shadow, or wrong lighting.
368
+ 10 indicates that the image looks natural.
369
+ )
370
+ A second score from 0 to 10 will rate the image artifacts.
371
+ (
372
+ 0 indicates that the image contains a large portion of distortion, or watermark, or scratches, or blurred faces, or unusual body parts, or subjects not harmonized.
373
+ 10 indicates the image has no artifacts.
374
+ )
375
+ Put the score in a list such that output score = [naturalness, artifacts]
376
+ """
377
+
378
+ _prompts_0shot_t2v_rule_SC = """From scale 0 to 10:
379
+ A score from 0 to 10 will be given based on the success in following the prompt.
380
+ (0 indicates that the image frames does not follow the prompt at all. 10 indicates the image frames follows the prompt perfectly.)
381
+
382
+ Put the score in a list such that output score = [score].
383
+
384
+ Text Prompt: <prompt>
385
+ """
386
+
387
+ _prompts_0shot_multi_subject_image_gen_rule = """RULES:
388
+
389
+ Two images will be provided:
390
+ This first image is a concatenation of two sub-images, each sub-image contain one token subject.
391
+ The second image being an AI-generated image using the first image as guidance.
392
+ The objective is to evaluate how successfully the image has been generated.
393
+ """
394
+
395
+ _prompts_0shot_subject_image_gen_rule = """RULES:
396
+
397
+ Two images will be provided: The first being a token subject image and the second being an AI-generated image using the first image as guidance.
398
+ The objective is to evaluate how successfully the image has been generated.
399
+ """
400
+
401
+ _prompts_0shot_one_image_gen_rule = """RULES:
402
+
403
+ The image is an AI-generated image according to the text prompt.
404
+ The objective is to evaluate how successfully the image has been generated.
405
+ """
406
+
univa/eval/genai/README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The original code is from [GenAI-Bench](https://github.com/linzhiqiu/t2v_metrics).
3
+
4
+
5
+ ## Requirements and Installation
6
+
7
+ ```
8
+ pip install git+https://github.com/openai/CLIP.git
9
+ pip install open-clip-torch
10
+ ```
11
+
12
+
13
+ ## Eval
14
+
15
+ ### Generate samples
16
+
17
+ We also support `genai1600`, just replace`genai527.yaml` with `genai1600.yaml` and change `$OUTPUT_DIR`.
18
+
19
+ ```bash
20
+ # switch to univa env
21
+ MODEL_PATH='path/to/model'
22
+ OUTPUT_DIR='path/to/eval_output/genai527'
23
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun \
24
+ --nproc_per_node 8 \
25
+ -m step1_gen_samples \
26
+ genai527.yaml \
27
+ --pretrained_lvlm_name_or_path ${MODEL_PATH} \
28
+ --output_dir ${OUTPUT_DIR}
29
+ ```
30
+
31
+ ### Evaluation & Summary
32
+
33
+
34
+ Download [zhiqiulin/clip-flant5-xxl](https://huggingface.co/zhiqiulin/clip-flant5-xxl) to `$T5_PATH`.
35
+ Download [openai/clip-vit-large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336) to `$VISION_TOWER`.
36
+
37
+ ```bash
38
+ # switch to univa env
39
+ META_DIR="eval_prompts/genai527"
40
+ IMAGE_DIR=${OUTPUT_DIR}
41
+ CUDA_VISIBLE_DEVICES=4 VISION_TOWER=${VISION_TOWER} python -m step2_run_model \
42
+ --model_path ${T5_PATH} \
43
+ --image_dir ${IMAGE_DIR} \
44
+ --meta_dir ${META_DIR} > ${IMAGE_DIR}.txt
45
+ cat ${IMAGE_DIR}.txt
46
+ ```
47
+
univa/eval/genai/__init__.py ADDED
File without changes
univa/eval/genai/eval_prompts/genai1600/genai_image.json ADDED
The diff for this file is too large to render. See raw diff
 
univa/eval/genai/eval_prompts/genai1600/genai_skills.json ADDED
@@ -0,0 +1,4872 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "basic": [
3
+ 0,
4
+ 1,
5
+ 2,
6
+ 3,
7
+ 4,
8
+ 5,
9
+ 6,
10
+ 7,
11
+ 8,
12
+ 9,
13
+ 11,
14
+ 12,
15
+ 13,
16
+ 14,
17
+ 15,
18
+ 16,
19
+ 17,
20
+ 18,
21
+ 19,
22
+ 20,
23
+ 21,
24
+ 22,
25
+ 23,
26
+ 24,
27
+ 25,
28
+ 26,
29
+ 27,
30
+ 28,
31
+ 29,
32
+ 30,
33
+ 31,
34
+ 32,
35
+ 33,
36
+ 34,
37
+ 35,
38
+ 36,
39
+ 37,
40
+ 38,
41
+ 39,
42
+ 40,
43
+ 41,
44
+ 42,
45
+ 43,
46
+ 44,
47
+ 45,
48
+ 46,
49
+ 47,
50
+ 48,
51
+ 49,
52
+ 50,
53
+ 51,
54
+ 52,
55
+ 53,
56
+ 54,
57
+ 55,
58
+ 56,
59
+ 57,
60
+ 58,
61
+ 59,
62
+ 60,
63
+ 61,
64
+ 62,
65
+ 63,
66
+ 64,
67
+ 65,
68
+ 66,
69
+ 67,
70
+ 68,
71
+ 69,
72
+ 70,
73
+ 71,
74
+ 72,
75
+ 73,
76
+ 74,
77
+ 75,
78
+ 76,
79
+ 77,
80
+ 78,
81
+ 79,
82
+ 80,
83
+ 81,
84
+ 82,
85
+ 83,
86
+ 84,
87
+ 85,
88
+ 86,
89
+ 87,
90
+ 88,
91
+ 89,
92
+ 90,
93
+ 91,
94
+ 92,
95
+ 93,
96
+ 94,
97
+ 95,
98
+ 96,
99
+ 97,
100
+ 98,
101
+ 99,
102
+ 100,
103
+ 101,
104
+ 118,
105
+ 119,
106
+ 120,
107
+ 121,
108
+ 125,
109
+ 126,
110
+ 127,
111
+ 129,
112
+ 130,
113
+ 131,
114
+ 132,
115
+ 218,
116
+ 219,
117
+ 220,
118
+ 221,
119
+ 222,
120
+ 223,
121
+ 224,
122
+ 225,
123
+ 226,
124
+ 227,
125
+ 228,
126
+ 229,
127
+ 230,
128
+ 231,
129
+ 232,
130
+ 233,
131
+ 234,
132
+ 235,
133
+ 236,
134
+ 237,
135
+ 269,
136
+ 270,
137
+ 271,
138
+ 272,
139
+ 273,
140
+ 274,
141
+ 275,
142
+ 276,
143
+ 277,
144
+ 278,
145
+ 279,
146
+ 280,
147
+ 281,
148
+ 282,
149
+ 283,
150
+ 284,
151
+ 285,
152
+ 286,
153
+ 287,
154
+ 288,
155
+ 289,
156
+ 290,
157
+ 291,
158
+ 300,
159
+ 301,
160
+ 302,
161
+ 303,
162
+ 304,
163
+ 305,
164
+ 306,
165
+ 307,
166
+ 308,
167
+ 309,
168
+ 310,
169
+ 311,
170
+ 312,
171
+ 313,
172
+ 314,
173
+ 315,
174
+ 316,
175
+ 317,
176
+ 318,
177
+ 319,
178
+ 320,
179
+ 321,
180
+ 322,
181
+ 323,
182
+ 324,
183
+ 325,
184
+ 326,
185
+ 327,
186
+ 328,
187
+ 329,
188
+ 330,
189
+ 331,
190
+ 332,
191
+ 333,
192
+ 334,
193
+ 335,
194
+ 336,
195
+ 337,
196
+ 338,
197
+ 339,
198
+ 340,
199
+ 341,
200
+ 342,
201
+ 343,
202
+ 344,
203
+ 345,
204
+ 346,
205
+ 347,
206
+ 348,
207
+ 352,
208
+ 353,
209
+ 354,
210
+ 355,
211
+ 382,
212
+ 386,
213
+ 387,
214
+ 403,
215
+ 527,
216
+ 528,
217
+ 529,
218
+ 530,
219
+ 531,
220
+ 532,
221
+ 533,
222
+ 534,
223
+ 535,
224
+ 536,
225
+ 537,
226
+ 538,
227
+ 539,
228
+ 540,
229
+ 541,
230
+ 542,
231
+ 543,
232
+ 544,
233
+ 545,
234
+ 546,
235
+ 547,
236
+ 548,
237
+ 549,
238
+ 550,
239
+ 551,
240
+ 552,
241
+ 553,
242
+ 554,
243
+ 555,
244
+ 556,
245
+ 557,
246
+ 558,
247
+ 559,
248
+ 560,
249
+ 561,
250
+ 562,
251
+ 563,
252
+ 564,
253
+ 565,
254
+ 566,
255
+ 567,
256
+ 568,
257
+ 569,
258
+ 570,
259
+ 571,
260
+ 572,
261
+ 573,
262
+ 574,
263
+ 575,
264
+ 576,
265
+ 577,
266
+ 578,
267
+ 579,
268
+ 580,
269
+ 581,
270
+ 582,
271
+ 583,
272
+ 584,
273
+ 585,
274
+ 586,
275
+ 587,
276
+ 588,
277
+ 589,
278
+ 590,
279
+ 591,
280
+ 592,
281
+ 593,
282
+ 594,
283
+ 595,
284
+ 596,
285
+ 597,
286
+ 598,
287
+ 599,
288
+ 602,
289
+ 603,
290
+ 604,
291
+ 606,
292
+ 607,
293
+ 608,
294
+ 609,
295
+ 610,
296
+ 611,
297
+ 612,
298
+ 613,
299
+ 614,
300
+ 615,
301
+ 616,
302
+ 617,
303
+ 618,
304
+ 619,
305
+ 620,
306
+ 621,
307
+ 622,
308
+ 623,
309
+ 624,
310
+ 625,
311
+ 626,
312
+ 627,
313
+ 628,
314
+ 629,
315
+ 630,
316
+ 631,
317
+ 632,
318
+ 633,
319
+ 634,
320
+ 635,
321
+ 636,
322
+ 637,
323
+ 638,
324
+ 639,
325
+ 640,
326
+ 641,
327
+ 642,
328
+ 643,
329
+ 644,
330
+ 645,
331
+ 646,
332
+ 647,
333
+ 648,
334
+ 649,
335
+ 650,
336
+ 651,
337
+ 652,
338
+ 653,
339
+ 654,
340
+ 655,
341
+ 656,
342
+ 657,
343
+ 658,
344
+ 659,
345
+ 660,
346
+ 661,
347
+ 662,
348
+ 663,
349
+ 664,
350
+ 665,
351
+ 666,
352
+ 667,
353
+ 668,
354
+ 669,
355
+ 670,
356
+ 671,
357
+ 672,
358
+ 673,
359
+ 674,
360
+ 675,
361
+ 676,
362
+ 677,
363
+ 678,
364
+ 679,
365
+ 680,
366
+ 681,
367
+ 682,
368
+ 683,
369
+ 684,
370
+ 685,
371
+ 686,
372
+ 687,
373
+ 688,
374
+ 689,
375
+ 690,
376
+ 692,
377
+ 693,
378
+ 694,
379
+ 695,
380
+ 696,
381
+ 697,
382
+ 698,
383
+ 699,
384
+ 700,
385
+ 701,
386
+ 702,
387
+ 704,
388
+ 705,
389
+ 706,
390
+ 707,
391
+ 708,
392
+ 709,
393
+ 710,
394
+ 711,
395
+ 712,
396
+ 713,
397
+ 714,
398
+ 715,
399
+ 716,
400
+ 717,
401
+ 718,
402
+ 720,
403
+ 722,
404
+ 723,
405
+ 724,
406
+ 725,
407
+ 726,
408
+ 727,
409
+ 728,
410
+ 729,
411
+ 731,
412
+ 732,
413
+ 734,
414
+ 735,
415
+ 736,
416
+ 738,
417
+ 740,
418
+ 741,
419
+ 742,
420
+ 744,
421
+ 745,
422
+ 746,
423
+ 747,
424
+ 748,
425
+ 749,
426
+ 750,
427
+ 751,
428
+ 752,
429
+ 753,
430
+ 754,
431
+ 755,
432
+ 756,
433
+ 757,
434
+ 758,
435
+ 759,
436
+ 760,
437
+ 761,
438
+ 762,
439
+ 763,
440
+ 764,
441
+ 765,
442
+ 766,
443
+ 767,
444
+ 768,
445
+ 769,
446
+ 770,
447
+ 771,
448
+ 772,
449
+ 773,
450
+ 774,
451
+ 775,
452
+ 776,
453
+ 777,
454
+ 778,
455
+ 779,
456
+ 780,
457
+ 781,
458
+ 782,
459
+ 783,
460
+ 784,
461
+ 785,
462
+ 786,
463
+ 787,
464
+ 788,
465
+ 789,
466
+ 790,
467
+ 791,
468
+ 792,
469
+ 793,
470
+ 794,
471
+ 795,
472
+ 796,
473
+ 797,
474
+ 798,
475
+ 799,
476
+ 800,
477
+ 803,
478
+ 805,
479
+ 806,
480
+ 809,
481
+ 811,
482
+ 812,
483
+ 815,
484
+ 817,
485
+ 818,
486
+ 820,
487
+ 823,
488
+ 827,
489
+ 829,
490
+ 840,
491
+ 843,
492
+ 847,
493
+ 869,
494
+ 905,
495
+ 935,
496
+ 941,
497
+ 952,
498
+ 954,
499
+ 965,
500
+ 974,
501
+ 980,
502
+ 984,
503
+ 985,
504
+ 988,
505
+ 989,
506
+ 990,
507
+ 991,
508
+ 992,
509
+ 993,
510
+ 994,
511
+ 995,
512
+ 996,
513
+ 997,
514
+ 998,
515
+ 1007,
516
+ 1009,
517
+ 1010,
518
+ 1011,
519
+ 1012,
520
+ 1013,
521
+ 1014,
522
+ 1015,
523
+ 1016,
524
+ 1017,
525
+ 1018,
526
+ 1019,
527
+ 1020,
528
+ 1021,
529
+ 1022,
530
+ 1023,
531
+ 1024,
532
+ 1025,
533
+ 1026,
534
+ 1027,
535
+ 1028,
536
+ 1029,
537
+ 1031,
538
+ 1032,
539
+ 1033,
540
+ 1034,
541
+ 1036,
542
+ 1038,
543
+ 1039,
544
+ 1040,
545
+ 1041,
546
+ 1042,
547
+ 1044,
548
+ 1045,
549
+ 1046,
550
+ 1047,
551
+ 1048,
552
+ 1049,
553
+ 1050,
554
+ 1051,
555
+ 1052,
556
+ 1053,
557
+ 1054,
558
+ 1055,
559
+ 1056,
560
+ 1057,
561
+ 1058,
562
+ 1059,
563
+ 1060,
564
+ 1061,
565
+ 1062,
566
+ 1063,
567
+ 1064,
568
+ 1065,
569
+ 1066,
570
+ 1067,
571
+ 1068,
572
+ 1069,
573
+ 1070,
574
+ 1071,
575
+ 1072,
576
+ 1073,
577
+ 1074,
578
+ 1075,
579
+ 1076,
580
+ 1077,
581
+ 1078,
582
+ 1079,
583
+ 1080,
584
+ 1081,
585
+ 1082,
586
+ 1083,
587
+ 1084,
588
+ 1085,
589
+ 1086,
590
+ 1087,
591
+ 1088,
592
+ 1089,
593
+ 1090,
594
+ 1091,
595
+ 1092,
596
+ 1093,
597
+ 1094,
598
+ 1095,
599
+ 1096,
600
+ 1097,
601
+ 1098,
602
+ 1099,
603
+ 1100,
604
+ 1101,
605
+ 1102,
606
+ 1103,
607
+ 1104,
608
+ 1105,
609
+ 1106,
610
+ 1107,
611
+ 1108,
612
+ 1109,
613
+ 1110,
614
+ 1111,
615
+ 1112,
616
+ 1113,
617
+ 1114,
618
+ 1115,
619
+ 1117,
620
+ 1119,
621
+ 1120,
622
+ 1121,
623
+ 1122,
624
+ 1123,
625
+ 1124,
626
+ 1125,
627
+ 1126,
628
+ 1127,
629
+ 1128,
630
+ 1129,
631
+ 1130,
632
+ 1131,
633
+ 1132,
634
+ 1133,
635
+ 1134,
636
+ 1135,
637
+ 1136,
638
+ 1138,
639
+ 1139,
640
+ 1140,
641
+ 1141,
642
+ 1143,
643
+ 1144,
644
+ 1145,
645
+ 1146,
646
+ 1147,
647
+ 1148,
648
+ 1149,
649
+ 1150,
650
+ 1151,
651
+ 1152,
652
+ 1153,
653
+ 1154,
654
+ 1155,
655
+ 1156,
656
+ 1157,
657
+ 1158,
658
+ 1159,
659
+ 1160,
660
+ 1161,
661
+ 1162,
662
+ 1163,
663
+ 1164,
664
+ 1165,
665
+ 1166,
666
+ 1167,
667
+ 1168,
668
+ 1169,
669
+ 1170,
670
+ 1171,
671
+ 1172,
672
+ 1173,
673
+ 1174,
674
+ 1175,
675
+ 1176,
676
+ 1177,
677
+ 1178,
678
+ 1179,
679
+ 1180,
680
+ 1181,
681
+ 1182,
682
+ 1183,
683
+ 1184,
684
+ 1185,
685
+ 1186,
686
+ 1187,
687
+ 1188,
688
+ 1189,
689
+ 1190,
690
+ 1191,
691
+ 1192,
692
+ 1193,
693
+ 1194,
694
+ 1195,
695
+ 1196,
696
+ 1197,
697
+ 1198,
698
+ 1199,
699
+ 1200,
700
+ 1201,
701
+ 1202,
702
+ 1203,
703
+ 1204,
704
+ 1205,
705
+ 1206,
706
+ 1207,
707
+ 1208,
708
+ 1209,
709
+ 1210,
710
+ 1211,
711
+ 1212,
712
+ 1213,
713
+ 1214,
714
+ 1215,
715
+ 1216,
716
+ 1245,
717
+ 1322,
718
+ 1369,
719
+ 1405,
720
+ 1406,
721
+ 1407,
722
+ 1408,
723
+ 1425,
724
+ 1533
725
+ ],
726
+ "advanced": [
727
+ 10,
728
+ 102,
729
+ 103,
730
+ 104,
731
+ 105,
732
+ 106,
733
+ 107,
734
+ 108,
735
+ 109,
736
+ 110,
737
+ 111,
738
+ 112,
739
+ 113,
740
+ 114,
741
+ 115,
742
+ 116,
743
+ 117,
744
+ 122,
745
+ 123,
746
+ 124,
747
+ 128,
748
+ 133,
749
+ 134,
750
+ 135,
751
+ 136,
752
+ 137,
753
+ 138,
754
+ 139,
755
+ 140,
756
+ 141,
757
+ 142,
758
+ 143,
759
+ 144,
760
+ 145,
761
+ 146,
762
+ 147,
763
+ 148,
764
+ 149,
765
+ 150,
766
+ 151,
767
+ 152,
768
+ 153,
769
+ 154,
770
+ 155,
771
+ 156,
772
+ 157,
773
+ 158,
774
+ 159,
775
+ 160,
776
+ 161,
777
+ 162,
778
+ 163,
779
+ 164,
780
+ 165,
781
+ 166,
782
+ 167,
783
+ 168,
784
+ 169,
785
+ 170,
786
+ 171,
787
+ 172,
788
+ 173,
789
+ 174,
790
+ 175,
791
+ 176,
792
+ 177,
793
+ 178,
794
+ 179,
795
+ 180,
796
+ 181,
797
+ 182,
798
+ 183,
799
+ 184,
800
+ 185,
801
+ 186,
802
+ 187,
803
+ 188,
804
+ 189,
805
+ 190,
806
+ 191,
807
+ 192,
808
+ 193,
809
+ 194,
810
+ 195,
811
+ 196,
812
+ 197,
813
+ 198,
814
+ 199,
815
+ 200,
816
+ 201,
817
+ 202,
818
+ 203,
819
+ 204,
820
+ 205,
821
+ 206,
822
+ 207,
823
+ 208,
824
+ 209,
825
+ 210,
826
+ 211,
827
+ 212,
828
+ 213,
829
+ 214,
830
+ 215,
831
+ 216,
832
+ 217,
833
+ 238,
834
+ 239,
835
+ 240,
836
+ 241,
837
+ 242,
838
+ 243,
839
+ 244,
840
+ 245,
841
+ 246,
842
+ 247,
843
+ 248,
844
+ 249,
845
+ 250,
846
+ 251,
847
+ 252,
848
+ 253,
849
+ 254,
850
+ 255,
851
+ 256,
852
+ 257,
853
+ 258,
854
+ 259,
855
+ 260,
856
+ 261,
857
+ 262,
858
+ 263,
859
+ 264,
860
+ 265,
861
+ 266,
862
+ 267,
863
+ 268,
864
+ 292,
865
+ 293,
866
+ 294,
867
+ 295,
868
+ 296,
869
+ 297,
870
+ 298,
871
+ 299,
872
+ 349,
873
+ 350,
874
+ 351,
875
+ 356,
876
+ 357,
877
+ 358,
878
+ 359,
879
+ 360,
880
+ 361,
881
+ 362,
882
+ 363,
883
+ 364,
884
+ 365,
885
+ 366,
886
+ 367,
887
+ 368,
888
+ 369,
889
+ 370,
890
+ 371,
891
+ 372,
892
+ 373,
893
+ 374,
894
+ 375,
895
+ 376,
896
+ 377,
897
+ 378,
898
+ 379,
899
+ 380,
900
+ 381,
901
+ 383,
902
+ 384,
903
+ 385,
904
+ 388,
905
+ 389,
906
+ 390,
907
+ 391,
908
+ 392,
909
+ 393,
910
+ 394,
911
+ 395,
912
+ 396,
913
+ 397,
914
+ 398,
915
+ 399,
916
+ 400,
917
+ 401,
918
+ 402,
919
+ 404,
920
+ 405,
921
+ 406,
922
+ 407,
923
+ 408,
924
+ 409,
925
+ 410,
926
+ 411,
927
+ 412,
928
+ 413,
929
+ 414,
930
+ 415,
931
+ 416,
932
+ 417,
933
+ 418,
934
+ 419,
935
+ 420,
936
+ 421,
937
+ 422,
938
+ 423,
939
+ 424,
940
+ 425,
941
+ 426,
942
+ 427,
943
+ 428,
944
+ 429,
945
+ 430,
946
+ 431,
947
+ 432,
948
+ 433,
949
+ 434,
950
+ 435,
951
+ 436,
952
+ 437,
953
+ 438,
954
+ 439,
955
+ 440,
956
+ 441,
957
+ 442,
958
+ 443,
959
+ 444,
960
+ 445,
961
+ 446,
962
+ 447,
963
+ 448,
964
+ 449,
965
+ 450,
966
+ 451,
967
+ 452,
968
+ 453,
969
+ 454,
970
+ 455,
971
+ 456,
972
+ 457,
973
+ 458,
974
+ 459,
975
+ 460,
976
+ 461,
977
+ 462,
978
+ 463,
979
+ 464,
980
+ 465,
981
+ 466,
982
+ 467,
983
+ 468,
984
+ 469,
985
+ 470,
986
+ 471,
987
+ 472,
988
+ 473,
989
+ 474,
990
+ 475,
991
+ 476,
992
+ 477,
993
+ 478,
994
+ 479,
995
+ 480,
996
+ 481,
997
+ 482,
998
+ 483,
999
+ 484,
1000
+ 485,
1001
+ 486,
1002
+ 487,
1003
+ 488,
1004
+ 489,
1005
+ 490,
1006
+ 491,
1007
+ 492,
1008
+ 493,
1009
+ 494,
1010
+ 495,
1011
+ 496,
1012
+ 497,
1013
+ 498,
1014
+ 499,
1015
+ 500,
1016
+ 501,
1017
+ 502,
1018
+ 503,
1019
+ 504,
1020
+ 505,
1021
+ 506,
1022
+ 507,
1023
+ 508,
1024
+ 509,
1025
+ 510,
1026
+ 511,
1027
+ 512,
1028
+ 513,
1029
+ 514,
1030
+ 515,
1031
+ 516,
1032
+ 517,
1033
+ 518,
1034
+ 519,
1035
+ 520,
1036
+ 521,
1037
+ 522,
1038
+ 523,
1039
+ 524,
1040
+ 525,
1041
+ 526,
1042
+ 600,
1043
+ 601,
1044
+ 605,
1045
+ 691,
1046
+ 703,
1047
+ 737,
1048
+ 801,
1049
+ 802,
1050
+ 804,
1051
+ 807,
1052
+ 808,
1053
+ 810,
1054
+ 813,
1055
+ 814,
1056
+ 816,
1057
+ 819,
1058
+ 821,
1059
+ 822,
1060
+ 824,
1061
+ 825,
1062
+ 826,
1063
+ 828,
1064
+ 830,
1065
+ 831,
1066
+ 832,
1067
+ 833,
1068
+ 834,
1069
+ 835,
1070
+ 836,
1071
+ 837,
1072
+ 838,
1073
+ 839,
1074
+ 841,
1075
+ 842,
1076
+ 844,
1077
+ 845,
1078
+ 846,
1079
+ 848,
1080
+ 849,
1081
+ 850,
1082
+ 851,
1083
+ 852,
1084
+ 853,
1085
+ 854,
1086
+ 855,
1087
+ 856,
1088
+ 857,
1089
+ 858,
1090
+ 859,
1091
+ 860,
1092
+ 861,
1093
+ 862,
1094
+ 863,
1095
+ 864,
1096
+ 865,
1097
+ 866,
1098
+ 867,
1099
+ 868,
1100
+ 870,
1101
+ 871,
1102
+ 872,
1103
+ 873,
1104
+ 874,
1105
+ 875,
1106
+ 876,
1107
+ 877,
1108
+ 878,
1109
+ 879,
1110
+ 880,
1111
+ 881,
1112
+ 882,
1113
+ 883,
1114
+ 884,
1115
+ 885,
1116
+ 886,
1117
+ 887,
1118
+ 888,
1119
+ 889,
1120
+ 890,
1121
+ 891,
1122
+ 892,
1123
+ 893,
1124
+ 894,
1125
+ 895,
1126
+ 896,
1127
+ 897,
1128
+ 898,
1129
+ 899,
1130
+ 900,
1131
+ 901,
1132
+ 902,
1133
+ 903,
1134
+ 904,
1135
+ 906,
1136
+ 907,
1137
+ 908,
1138
+ 909,
1139
+ 910,
1140
+ 911,
1141
+ 912,
1142
+ 913,
1143
+ 914,
1144
+ 915,
1145
+ 916,
1146
+ 917,
1147
+ 918,
1148
+ 919,
1149
+ 920,
1150
+ 921,
1151
+ 922,
1152
+ 923,
1153
+ 924,
1154
+ 925,
1155
+ 926,
1156
+ 927,
1157
+ 928,
1158
+ 929,
1159
+ 930,
1160
+ 931,
1161
+ 932,
1162
+ 933,
1163
+ 934,
1164
+ 936,
1165
+ 937,
1166
+ 938,
1167
+ 939,
1168
+ 940,
1169
+ 942,
1170
+ 943,
1171
+ 944,
1172
+ 945,
1173
+ 946,
1174
+ 947,
1175
+ 948,
1176
+ 949,
1177
+ 950,
1178
+ 953,
1179
+ 955,
1180
+ 956,
1181
+ 957,
1182
+ 958,
1183
+ 959,
1184
+ 960,
1185
+ 961,
1186
+ 962,
1187
+ 963,
1188
+ 964,
1189
+ 966,
1190
+ 967,
1191
+ 968,
1192
+ 969,
1193
+ 970,
1194
+ 971,
1195
+ 972,
1196
+ 973,
1197
+ 975,
1198
+ 976,
1199
+ 977,
1200
+ 978,
1201
+ 979,
1202
+ 981,
1203
+ 982,
1204
+ 983,
1205
+ 986,
1206
+ 987,
1207
+ 999,
1208
+ 1000,
1209
+ 1001,
1210
+ 1002,
1211
+ 1003,
1212
+ 1004,
1213
+ 1005,
1214
+ 1006,
1215
+ 1008,
1216
+ 1030,
1217
+ 1035,
1218
+ 1037,
1219
+ 1043,
1220
+ 1116,
1221
+ 1118,
1222
+ 1137,
1223
+ 1142,
1224
+ 1217,
1225
+ 1218,
1226
+ 1219,
1227
+ 1220,
1228
+ 1221,
1229
+ 1222,
1230
+ 1223,
1231
+ 1224,
1232
+ 1225,
1233
+ 1226,
1234
+ 1227,
1235
+ 1228,
1236
+ 1229,
1237
+ 1230,
1238
+ 1231,
1239
+ 1232,
1240
+ 1233,
1241
+ 1234,
1242
+ 1235,
1243
+ 1236,
1244
+ 1237,
1245
+ 1238,
1246
+ 1239,
1247
+ 1240,
1248
+ 1241,
1249
+ 1242,
1250
+ 1243,
1251
+ 1244,
1252
+ 1246,
1253
+ 1247,
1254
+ 1248,
1255
+ 1249,
1256
+ 1250,
1257
+ 1251,
1258
+ 1252,
1259
+ 1253,
1260
+ 1254,
1261
+ 1255,
1262
+ 1256,
1263
+ 1257,
1264
+ 1258,
1265
+ 1259,
1266
+ 1260,
1267
+ 1261,
1268
+ 1262,
1269
+ 1263,
1270
+ 1264,
1271
+ 1265,
1272
+ 1266,
1273
+ 1267,
1274
+ 1268,
1275
+ 1269,
1276
+ 1270,
1277
+ 1271,
1278
+ 1272,
1279
+ 1273,
1280
+ 1274,
1281
+ 1275,
1282
+ 1276,
1283
+ 1277,
1284
+ 1278,
1285
+ 1279,
1286
+ 1280,
1287
+ 1281,
1288
+ 1282,
1289
+ 1283,
1290
+ 1284,
1291
+ 1285,
1292
+ 1286,
1293
+ 1287,
1294
+ 1288,
1295
+ 1289,
1296
+ 1290,
1297
+ 1291,
1298
+ 1292,
1299
+ 1293,
1300
+ 1294,
1301
+ 1295,
1302
+ 1296,
1303
+ 1297,
1304
+ 1298,
1305
+ 1299,
1306
+ 1300,
1307
+ 1301,
1308
+ 1302,
1309
+ 1303,
1310
+ 1304,
1311
+ 1305,
1312
+ 1306,
1313
+ 1307,
1314
+ 1308,
1315
+ 1309,
1316
+ 1310,
1317
+ 1311,
1318
+ 1312,
1319
+ 1313,
1320
+ 1314,
1321
+ 1315,
1322
+ 1316,
1323
+ 1317,
1324
+ 1318,
1325
+ 1319,
1326
+ 1320,
1327
+ 1321,
1328
+ 1323,
1329
+ 1324,
1330
+ 1325,
1331
+ 1326,
1332
+ 1327,
1333
+ 1328,
1334
+ 1329,
1335
+ 1330,
1336
+ 1331,
1337
+ 1332,
1338
+ 1333,
1339
+ 1334,
1340
+ 1335,
1341
+ 1336,
1342
+ 1337,
1343
+ 1338,
1344
+ 1339,
1345
+ 1340,
1346
+ 1341,
1347
+ 1342,
1348
+ 1343,
1349
+ 1344,
1350
+ 1345,
1351
+ 1346,
1352
+ 1347,
1353
+ 1348,
1354
+ 1349,
1355
+ 1350,
1356
+ 1351,
1357
+ 1352,
1358
+ 1353,
1359
+ 1354,
1360
+ 1355,
1361
+ 1356,
1362
+ 1357,
1363
+ 1358,
1364
+ 1359,
1365
+ 1360,
1366
+ 1361,
1367
+ 1362,
1368
+ 1363,
1369
+ 1364,
1370
+ 1365,
1371
+ 1366,
1372
+ 1367,
1373
+ 1368,
1374
+ 1370,
1375
+ 1371,
1376
+ 1372,
1377
+ 1373,
1378
+ 1374,
1379
+ 1375,
1380
+ 1376,
1381
+ 1377,
1382
+ 1378,
1383
+ 1379,
1384
+ 1380,
1385
+ 1381,
1386
+ 1382,
1387
+ 1383,
1388
+ 1384,
1389
+ 1385,
1390
+ 1386,
1391
+ 1387,
1392
+ 1388,
1393
+ 1389,
1394
+ 1390,
1395
+ 1391,
1396
+ 1392,
1397
+ 1393,
1398
+ 1394,
1399
+ 1395,
1400
+ 1396,
1401
+ 1397,
1402
+ 1398,
1403
+ 1399,
1404
+ 1400,
1405
+ 1401,
1406
+ 1402,
1407
+ 1403,
1408
+ 1404,
1409
+ 1409,
1410
+ 1410,
1411
+ 1411,
1412
+ 1412,
1413
+ 1413,
1414
+ 1414,
1415
+ 1415,
1416
+ 1416,
1417
+ 1417,
1418
+ 1418,
1419
+ 1419,
1420
+ 1420,
1421
+ 1421,
1422
+ 1422,
1423
+ 1423,
1424
+ 1424,
1425
+ 1426,
1426
+ 1427,
1427
+ 1428,
1428
+ 1429,
1429
+ 1430,
1430
+ 1431,
1431
+ 1432,
1432
+ 1433,
1433
+ 1434,
1434
+ 1435,
1435
+ 1436,
1436
+ 1437,
1437
+ 1438,
1438
+ 1439,
1439
+ 1440,
1440
+ 1441,
1441
+ 1442,
1442
+ 1443,
1443
+ 1444,
1444
+ 1445,
1445
+ 1446,
1446
+ 1447,
1447
+ 1448,
1448
+ 1449,
1449
+ 1450,
1450
+ 1451,
1451
+ 1452,
1452
+ 1453,
1453
+ 1454,
1454
+ 1455,
1455
+ 1456,
1456
+ 1457,
1457
+ 1458,
1458
+ 1459,
1459
+ 1460,
1460
+ 1461,
1461
+ 1462,
1462
+ 1463,
1463
+ 1464,
1464
+ 1465,
1465
+ 1466,
1466
+ 1467,
1467
+ 1468,
1468
+ 1469,
1469
+ 1470,
1470
+ 1471,
1471
+ 1472,
1472
+ 1473,
1473
+ 1474,
1474
+ 1475,
1475
+ 1476,
1476
+ 1477,
1477
+ 1478,
1478
+ 1479,
1479
+ 1480,
1480
+ 1481,
1481
+ 1482,
1482
+ 1483,
1483
+ 1484,
1484
+ 1485,
1485
+ 1486,
1486
+ 1487,
1487
+ 1488,
1488
+ 1489,
1489
+ 1490,
1490
+ 1491,
1491
+ 1492,
1492
+ 1493,
1493
+ 1494,
1494
+ 1495,
1495
+ 1496,
1496
+ 1497,
1497
+ 1498,
1498
+ 1499,
1499
+ 1500,
1500
+ 1501,
1501
+ 1502,
1502
+ 1503,
1503
+ 1504,
1504
+ 1505,
1505
+ 1506,
1506
+ 1507,
1507
+ 1508,
1508
+ 1509,
1509
+ 1510,
1510
+ 1511,
1511
+ 1512,
1512
+ 1513,
1513
+ 1514,
1514
+ 1515,
1515
+ 1516,
1516
+ 1517,
1517
+ 1518,
1518
+ 1519,
1519
+ 1520,
1520
+ 1521,
1521
+ 1522,
1522
+ 1523,
1523
+ 1524,
1524
+ 1525,
1525
+ 1526,
1526
+ 1527,
1527
+ 1528,
1528
+ 1529,
1529
+ 1530,
1530
+ 1531,
1531
+ 1532,
1532
+ 1534,
1533
+ 1535,
1534
+ 1536,
1535
+ 1537,
1536
+ 1538,
1537
+ 1539,
1538
+ 1540,
1539
+ 1541,
1540
+ 1542,
1541
+ 1543,
1542
+ 1544,
1543
+ 1545,
1544
+ 1546,
1545
+ 1547,
1546
+ 1548,
1547
+ 1549,
1548
+ 1550,
1549
+ 1551,
1550
+ 1552,
1551
+ 1553,
1552
+ 1554,
1553
+ 1555,
1554
+ 1556,
1555
+ 1557,
1556
+ 1558,
1557
+ 1559,
1558
+ 1560,
1559
+ 1561,
1560
+ 1562,
1561
+ 1563,
1562
+ 1564,
1563
+ 1565,
1564
+ 1566,
1565
+ 1567,
1566
+ 1568,
1567
+ 1569,
1568
+ 1570,
1569
+ 1571,
1570
+ 1572,
1571
+ 1573,
1572
+ 1574,
1573
+ 1575,
1574
+ 1576,
1575
+ 1577,
1576
+ 1578,
1577
+ 1579,
1578
+ 1580,
1579
+ 1581,
1580
+ 1582,
1581
+ 1583,
1582
+ 1584,
1583
+ 1585,
1584
+ 1586,
1585
+ 1587,
1586
+ 1588,
1587
+ 1589,
1588
+ 1590,
1589
+ 1591,
1590
+ 1592,
1591
+ 1593,
1592
+ 1594,
1593
+ 1595,
1594
+ 1596,
1595
+ 1597,
1596
+ 1598,
1597
+ 1599
1598
+ ],
1599
+ "attribute": [
1600
+ 0,
1601
+ 1,
1602
+ 3,
1603
+ 4,
1604
+ 5,
1605
+ 6,
1606
+ 7,
1607
+ 8,
1608
+ 9,
1609
+ 11,
1610
+ 12,
1611
+ 13,
1612
+ 14,
1613
+ 15,
1614
+ 16,
1615
+ 17,
1616
+ 18,
1617
+ 19,
1618
+ 20,
1619
+ 23,
1620
+ 24,
1621
+ 25,
1622
+ 27,
1623
+ 29,
1624
+ 31,
1625
+ 34,
1626
+ 35,
1627
+ 38,
1628
+ 39,
1629
+ 40,
1630
+ 41,
1631
+ 42,
1632
+ 44,
1633
+ 45,
1634
+ 46,
1635
+ 47,
1636
+ 48,
1637
+ 49,
1638
+ 50,
1639
+ 51,
1640
+ 52,
1641
+ 55,
1642
+ 57,
1643
+ 58,
1644
+ 63,
1645
+ 64,
1646
+ 68,
1647
+ 71,
1648
+ 72,
1649
+ 73,
1650
+ 74,
1651
+ 75,
1652
+ 76,
1653
+ 77,
1654
+ 78,
1655
+ 79,
1656
+ 80,
1657
+ 81,
1658
+ 83,
1659
+ 84,
1660
+ 85,
1661
+ 87,
1662
+ 88,
1663
+ 89,
1664
+ 90,
1665
+ 91,
1666
+ 92,
1667
+ 93,
1668
+ 94,
1669
+ 95,
1670
+ 96,
1671
+ 97,
1672
+ 98,
1673
+ 99,
1674
+ 100,
1675
+ 101,
1676
+ 120,
1677
+ 129,
1678
+ 131,
1679
+ 218,
1680
+ 219,
1681
+ 220,
1682
+ 221,
1683
+ 223,
1684
+ 224,
1685
+ 225,
1686
+ 226,
1687
+ 227,
1688
+ 228,
1689
+ 229,
1690
+ 232,
1691
+ 233,
1692
+ 234,
1693
+ 235,
1694
+ 236,
1695
+ 237,
1696
+ 269,
1697
+ 271,
1698
+ 272,
1699
+ 275,
1700
+ 276,
1701
+ 277,
1702
+ 278,
1703
+ 285,
1704
+ 289,
1705
+ 290,
1706
+ 291,
1707
+ 300,
1708
+ 302,
1709
+ 303,
1710
+ 304,
1711
+ 305,
1712
+ 306,
1713
+ 307,
1714
+ 310,
1715
+ 312,
1716
+ 314,
1717
+ 315,
1718
+ 316,
1719
+ 317,
1720
+ 318,
1721
+ 319,
1722
+ 332,
1723
+ 333,
1724
+ 334,
1725
+ 336,
1726
+ 338,
1727
+ 339,
1728
+ 340,
1729
+ 343,
1730
+ 344,
1731
+ 346,
1732
+ 347,
1733
+ 352,
1734
+ 353,
1735
+ 354,
1736
+ 355,
1737
+ 382,
1738
+ 386,
1739
+ 387,
1740
+ 403,
1741
+ 527,
1742
+ 528,
1743
+ 530,
1744
+ 531,
1745
+ 534,
1746
+ 535,
1747
+ 536,
1748
+ 538,
1749
+ 539,
1750
+ 540,
1751
+ 541,
1752
+ 542,
1753
+ 543,
1754
+ 544,
1755
+ 545,
1756
+ 547,
1757
+ 548,
1758
+ 549,
1759
+ 550,
1760
+ 551,
1761
+ 552,
1762
+ 553,
1763
+ 554,
1764
+ 555,
1765
+ 558,
1766
+ 559,
1767
+ 560,
1768
+ 563,
1769
+ 565,
1770
+ 567,
1771
+ 568,
1772
+ 570,
1773
+ 572,
1774
+ 573,
1775
+ 574,
1776
+ 576,
1777
+ 577,
1778
+ 578,
1779
+ 579,
1780
+ 580,
1781
+ 583,
1782
+ 585,
1783
+ 586,
1784
+ 587,
1785
+ 589,
1786
+ 590,
1787
+ 591,
1788
+ 592,
1789
+ 595,
1790
+ 597,
1791
+ 598,
1792
+ 602,
1793
+ 603,
1794
+ 604,
1795
+ 606,
1796
+ 607,
1797
+ 608,
1798
+ 609,
1799
+ 610,
1800
+ 611,
1801
+ 612,
1802
+ 613,
1803
+ 614,
1804
+ 615,
1805
+ 616,
1806
+ 618,
1807
+ 620,
1808
+ 621,
1809
+ 622,
1810
+ 623,
1811
+ 633,
1812
+ 634,
1813
+ 636,
1814
+ 639,
1815
+ 640,
1816
+ 642,
1817
+ 643,
1818
+ 645,
1819
+ 646,
1820
+ 648,
1821
+ 649,
1822
+ 650,
1823
+ 651,
1824
+ 652,
1825
+ 653,
1826
+ 654,
1827
+ 655,
1828
+ 656,
1829
+ 658,
1830
+ 659,
1831
+ 660,
1832
+ 663,
1833
+ 667,
1834
+ 668,
1835
+ 669,
1836
+ 670,
1837
+ 671,
1838
+ 672,
1839
+ 673,
1840
+ 674,
1841
+ 676,
1842
+ 677,
1843
+ 678,
1844
+ 679,
1845
+ 680,
1846
+ 681,
1847
+ 682,
1848
+ 683,
1849
+ 686,
1850
+ 692,
1851
+ 693,
1852
+ 694,
1853
+ 696,
1854
+ 697,
1855
+ 698,
1856
+ 699,
1857
+ 700,
1858
+ 701,
1859
+ 702,
1860
+ 704,
1861
+ 705,
1862
+ 706,
1863
+ 707,
1864
+ 708,
1865
+ 709,
1866
+ 710,
1867
+ 711,
1868
+ 712,
1869
+ 713,
1870
+ 714,
1871
+ 715,
1872
+ 716,
1873
+ 718,
1874
+ 722,
1875
+ 723,
1876
+ 724,
1877
+ 725,
1878
+ 726,
1879
+ 727,
1880
+ 729,
1881
+ 731,
1882
+ 732,
1883
+ 734,
1884
+ 735,
1885
+ 736,
1886
+ 738,
1887
+ 740,
1888
+ 742,
1889
+ 744,
1890
+ 745,
1891
+ 746,
1892
+ 747,
1893
+ 748,
1894
+ 749,
1895
+ 750,
1896
+ 751,
1897
+ 752,
1898
+ 753,
1899
+ 754,
1900
+ 755,
1901
+ 756,
1902
+ 757,
1903
+ 758,
1904
+ 765,
1905
+ 768,
1906
+ 769,
1907
+ 770,
1908
+ 772,
1909
+ 773,
1910
+ 774,
1911
+ 775,
1912
+ 777,
1913
+ 779,
1914
+ 780,
1915
+ 781,
1916
+ 782,
1917
+ 783,
1918
+ 784,
1919
+ 785,
1920
+ 786,
1921
+ 787,
1922
+ 789,
1923
+ 790,
1924
+ 791,
1925
+ 792,
1926
+ 793,
1927
+ 794,
1928
+ 795,
1929
+ 796,
1930
+ 797,
1931
+ 798,
1932
+ 799,
1933
+ 803,
1934
+ 805,
1935
+ 806,
1936
+ 809,
1937
+ 811,
1938
+ 812,
1939
+ 815,
1940
+ 817,
1941
+ 818,
1942
+ 820,
1943
+ 823,
1944
+ 829,
1945
+ 840,
1946
+ 843,
1947
+ 869,
1948
+ 905,
1949
+ 935,
1950
+ 941,
1951
+ 952,
1952
+ 954,
1953
+ 965,
1954
+ 974,
1955
+ 980,
1956
+ 984,
1957
+ 985,
1958
+ 988,
1959
+ 989,
1960
+ 991,
1961
+ 992,
1962
+ 993,
1963
+ 996,
1964
+ 997,
1965
+ 1007,
1966
+ 1009,
1967
+ 1010,
1968
+ 1011,
1969
+ 1012,
1970
+ 1013,
1971
+ 1014,
1972
+ 1015,
1973
+ 1016,
1974
+ 1018,
1975
+ 1019,
1976
+ 1020,
1977
+ 1021,
1978
+ 1022,
1979
+ 1023,
1980
+ 1024,
1981
+ 1025,
1982
+ 1026,
1983
+ 1027,
1984
+ 1028,
1985
+ 1029,
1986
+ 1031,
1987
+ 1032,
1988
+ 1033,
1989
+ 1034,
1990
+ 1036,
1991
+ 1038,
1992
+ 1039,
1993
+ 1041,
1994
+ 1042,
1995
+ 1044,
1996
+ 1045,
1997
+ 1046,
1998
+ 1047,
1999
+ 1048,
2000
+ 1049,
2001
+ 1050,
2002
+ 1051,
2003
+ 1052,
2004
+ 1053,
2005
+ 1054,
2006
+ 1055,
2007
+ 1057,
2008
+ 1059,
2009
+ 1060,
2010
+ 1061,
2011
+ 1062,
2012
+ 1063,
2013
+ 1064,
2014
+ 1065,
2015
+ 1066,
2016
+ 1067,
2017
+ 1069,
2018
+ 1070,
2019
+ 1071,
2020
+ 1074,
2021
+ 1075,
2022
+ 1077,
2023
+ 1078,
2024
+ 1079,
2025
+ 1080,
2026
+ 1081,
2027
+ 1082,
2028
+ 1083,
2029
+ 1084,
2030
+ 1085,
2031
+ 1086,
2032
+ 1087,
2033
+ 1088,
2034
+ 1089,
2035
+ 1090,
2036
+ 1091,
2037
+ 1092,
2038
+ 1093,
2039
+ 1094,
2040
+ 1095,
2041
+ 1096,
2042
+ 1097,
2043
+ 1098,
2044
+ 1099,
2045
+ 1100,
2046
+ 1101,
2047
+ 1102,
2048
+ 1103,
2049
+ 1104,
2050
+ 1106,
2051
+ 1107,
2052
+ 1109,
2053
+ 1110,
2054
+ 1111,
2055
+ 1112,
2056
+ 1113,
2057
+ 1114,
2058
+ 1115,
2059
+ 1117,
2060
+ 1119,
2061
+ 1120,
2062
+ 1121,
2063
+ 1122,
2064
+ 1123,
2065
+ 1124,
2066
+ 1125,
2067
+ 1126,
2068
+ 1127,
2069
+ 1128,
2070
+ 1129,
2071
+ 1130,
2072
+ 1131,
2073
+ 1133,
2074
+ 1134,
2075
+ 1135,
2076
+ 1136,
2077
+ 1138,
2078
+ 1140,
2079
+ 1141,
2080
+ 1143,
2081
+ 1144,
2082
+ 1145,
2083
+ 1146,
2084
+ 1147,
2085
+ 1148,
2086
+ 1149,
2087
+ 1150,
2088
+ 1151,
2089
+ 1152,
2090
+ 1153,
2091
+ 1155,
2092
+ 1156,
2093
+ 1158,
2094
+ 1159,
2095
+ 1160,
2096
+ 1161,
2097
+ 1162,
2098
+ 1163,
2099
+ 1164,
2100
+ 1165,
2101
+ 1166,
2102
+ 1167,
2103
+ 1168,
2104
+ 1169,
2105
+ 1170,
2106
+ 1171,
2107
+ 1172,
2108
+ 1173,
2109
+ 1176,
2110
+ 1177,
2111
+ 1178,
2112
+ 1179,
2113
+ 1180,
2114
+ 1181,
2115
+ 1182,
2116
+ 1183,
2117
+ 1184,
2118
+ 1185,
2119
+ 1186,
2120
+ 1187,
2121
+ 1188,
2122
+ 1189,
2123
+ 1190,
2124
+ 1191,
2125
+ 1192,
2126
+ 1193,
2127
+ 1194,
2128
+ 1195,
2129
+ 1196,
2130
+ 1197,
2131
+ 1198,
2132
+ 1199,
2133
+ 1200,
2134
+ 1203,
2135
+ 1204,
2136
+ 1205,
2137
+ 1206,
2138
+ 1207,
2139
+ 1208,
2140
+ 1209,
2141
+ 1210,
2142
+ 1211,
2143
+ 1213,
2144
+ 1215,
2145
+ 1231,
2146
+ 1245,
2147
+ 1322,
2148
+ 1405,
2149
+ 1406,
2150
+ 1408,
2151
+ 1425,
2152
+ 1533
2153
+ ],
2154
+ "scene": [
2155
+ 0,
2156
+ 1,
2157
+ 2,
2158
+ 3,
2159
+ 4,
2160
+ 5,
2161
+ 7,
2162
+ 8,
2163
+ 11,
2164
+ 12,
2165
+ 13,
2166
+ 15,
2167
+ 16,
2168
+ 17,
2169
+ 18,
2170
+ 20,
2171
+ 21,
2172
+ 22,
2173
+ 23,
2174
+ 27,
2175
+ 41,
2176
+ 44,
2177
+ 45,
2178
+ 46,
2179
+ 47,
2180
+ 48,
2181
+ 49,
2182
+ 50,
2183
+ 52,
2184
+ 53,
2185
+ 54,
2186
+ 55,
2187
+ 58,
2188
+ 59,
2189
+ 63,
2190
+ 64,
2191
+ 65,
2192
+ 66,
2193
+ 68,
2194
+ 69,
2195
+ 70,
2196
+ 71,
2197
+ 76,
2198
+ 77,
2199
+ 79,
2200
+ 83,
2201
+ 84,
2202
+ 86,
2203
+ 87,
2204
+ 88,
2205
+ 90,
2206
+ 91,
2207
+ 93,
2208
+ 95,
2209
+ 96,
2210
+ 97,
2211
+ 98,
2212
+ 99,
2213
+ 100,
2214
+ 101,
2215
+ 118,
2216
+ 119,
2217
+ 125,
2218
+ 129,
2219
+ 131,
2220
+ 218,
2221
+ 219,
2222
+ 221,
2223
+ 222,
2224
+ 223,
2225
+ 224,
2226
+ 226,
2227
+ 228,
2228
+ 229,
2229
+ 231,
2230
+ 233,
2231
+ 235,
2232
+ 236,
2233
+ 237,
2234
+ 281,
2235
+ 282,
2236
+ 287,
2237
+ 288,
2238
+ 289,
2239
+ 290,
2240
+ 304,
2241
+ 305,
2242
+ 307,
2243
+ 308,
2244
+ 309,
2245
+ 310,
2246
+ 312,
2247
+ 314,
2248
+ 315,
2249
+ 316,
2250
+ 317,
2251
+ 318,
2252
+ 320,
2253
+ 321,
2254
+ 322,
2255
+ 323,
2256
+ 325,
2257
+ 328,
2258
+ 329,
2259
+ 330,
2260
+ 331,
2261
+ 332,
2262
+ 334,
2263
+ 336,
2264
+ 337,
2265
+ 338,
2266
+ 339,
2267
+ 340,
2268
+ 341,
2269
+ 344,
2270
+ 345,
2271
+ 346,
2272
+ 347,
2273
+ 352,
2274
+ 354,
2275
+ 355,
2276
+ 387,
2277
+ 403,
2278
+ 527,
2279
+ 528,
2280
+ 531,
2281
+ 532,
2282
+ 533,
2283
+ 534,
2284
+ 537,
2285
+ 538,
2286
+ 539,
2287
+ 540,
2288
+ 541,
2289
+ 543,
2290
+ 547,
2291
+ 548,
2292
+ 549,
2293
+ 550,
2294
+ 553,
2295
+ 555,
2296
+ 557,
2297
+ 560,
2298
+ 561,
2299
+ 562,
2300
+ 563,
2301
+ 564,
2302
+ 566,
2303
+ 567,
2304
+ 568,
2305
+ 569,
2306
+ 570,
2307
+ 571,
2308
+ 572,
2309
+ 573,
2310
+ 574,
2311
+ 575,
2312
+ 578,
2313
+ 579,
2314
+ 580,
2315
+ 581,
2316
+ 583,
2317
+ 584,
2318
+ 586,
2319
+ 594,
2320
+ 595,
2321
+ 597,
2322
+ 598,
2323
+ 599,
2324
+ 604,
2325
+ 607,
2326
+ 608,
2327
+ 609,
2328
+ 613,
2329
+ 615,
2330
+ 617,
2331
+ 618,
2332
+ 622,
2333
+ 623,
2334
+ 624,
2335
+ 625,
2336
+ 636,
2337
+ 638,
2338
+ 639,
2339
+ 640,
2340
+ 641,
2341
+ 643,
2342
+ 646,
2343
+ 649,
2344
+ 651,
2345
+ 652,
2346
+ 653,
2347
+ 656,
2348
+ 657,
2349
+ 658,
2350
+ 660,
2351
+ 661,
2352
+ 662,
2353
+ 663,
2354
+ 664,
2355
+ 665,
2356
+ 667,
2357
+ 668,
2358
+ 669,
2359
+ 670,
2360
+ 671,
2361
+ 672,
2362
+ 673,
2363
+ 674,
2364
+ 675,
2365
+ 676,
2366
+ 677,
2367
+ 678,
2368
+ 680,
2369
+ 681,
2370
+ 682,
2371
+ 684,
2372
+ 685,
2373
+ 686,
2374
+ 687,
2375
+ 688,
2376
+ 689,
2377
+ 690,
2378
+ 692,
2379
+ 693,
2380
+ 695,
2381
+ 696,
2382
+ 697,
2383
+ 700,
2384
+ 701,
2385
+ 709,
2386
+ 717,
2387
+ 720,
2388
+ 723,
2389
+ 724,
2390
+ 726,
2391
+ 727,
2392
+ 728,
2393
+ 756,
2394
+ 757,
2395
+ 758,
2396
+ 759,
2397
+ 760,
2398
+ 766,
2399
+ 770,
2400
+ 772,
2401
+ 773,
2402
+ 775,
2403
+ 777,
2404
+ 779,
2405
+ 780,
2406
+ 781,
2407
+ 782,
2408
+ 783,
2409
+ 784,
2410
+ 785,
2411
+ 786,
2412
+ 787,
2413
+ 788,
2414
+ 789,
2415
+ 790,
2416
+ 791,
2417
+ 792,
2418
+ 793,
2419
+ 794,
2420
+ 795,
2421
+ 796,
2422
+ 797,
2423
+ 798,
2424
+ 799,
2425
+ 800,
2426
+ 809,
2427
+ 811,
2428
+ 812,
2429
+ 820,
2430
+ 827,
2431
+ 843,
2432
+ 847,
2433
+ 869,
2434
+ 905,
2435
+ 954,
2436
+ 965,
2437
+ 974,
2438
+ 1010,
2439
+ 1013,
2440
+ 1018,
2441
+ 1027,
2442
+ 1033,
2443
+ 1038,
2444
+ 1040,
2445
+ 1042,
2446
+ 1044,
2447
+ 1045,
2448
+ 1047,
2449
+ 1049,
2450
+ 1050,
2451
+ 1051,
2452
+ 1053,
2453
+ 1057,
2454
+ 1060,
2455
+ 1062,
2456
+ 1064,
2457
+ 1068,
2458
+ 1073,
2459
+ 1074,
2460
+ 1075,
2461
+ 1076,
2462
+ 1080,
2463
+ 1091,
2464
+ 1092,
2465
+ 1114,
2466
+ 1117,
2467
+ 1119,
2468
+ 1123,
2469
+ 1128,
2470
+ 1129,
2471
+ 1131,
2472
+ 1138,
2473
+ 1143,
2474
+ 1152,
2475
+ 1153,
2476
+ 1163,
2477
+ 1164,
2478
+ 1166,
2479
+ 1167,
2480
+ 1168,
2481
+ 1169,
2482
+ 1170,
2483
+ 1172,
2484
+ 1173,
2485
+ 1174,
2486
+ 1175,
2487
+ 1176,
2488
+ 1177,
2489
+ 1178,
2490
+ 1179,
2491
+ 1180,
2492
+ 1181,
2493
+ 1182,
2494
+ 1183,
2495
+ 1184,
2496
+ 1186,
2497
+ 1188,
2498
+ 1189,
2499
+ 1190,
2500
+ 1191,
2501
+ 1192,
2502
+ 1193,
2503
+ 1194,
2504
+ 1195,
2505
+ 1196,
2506
+ 1197,
2507
+ 1198,
2508
+ 1207,
2509
+ 1208,
2510
+ 1209,
2511
+ 1212,
2512
+ 1213,
2513
+ 1407,
2514
+ 1408
2515
+ ],
2516
+ "spatial relation": [
2517
+ 0,
2518
+ 3,
2519
+ 5,
2520
+ 6,
2521
+ 7,
2522
+ 9,
2523
+ 12,
2524
+ 13,
2525
+ 14,
2526
+ 16,
2527
+ 19,
2528
+ 21,
2529
+ 22,
2530
+ 24,
2531
+ 26,
2532
+ 28,
2533
+ 30,
2534
+ 38,
2535
+ 39,
2536
+ 40,
2537
+ 42,
2538
+ 43,
2539
+ 47,
2540
+ 51,
2541
+ 56,
2542
+ 59,
2543
+ 62,
2544
+ 67,
2545
+ 72,
2546
+ 73,
2547
+ 74,
2548
+ 75,
2549
+ 76,
2550
+ 77,
2551
+ 78,
2552
+ 79,
2553
+ 80,
2554
+ 81,
2555
+ 82,
2556
+ 85,
2557
+ 86,
2558
+ 89,
2559
+ 90,
2560
+ 91,
2561
+ 92,
2562
+ 93,
2563
+ 94,
2564
+ 100,
2565
+ 118,
2566
+ 120,
2567
+ 121,
2568
+ 125,
2569
+ 126,
2570
+ 127,
2571
+ 130,
2572
+ 131,
2573
+ 132,
2574
+ 218,
2575
+ 219,
2576
+ 220,
2577
+ 221,
2578
+ 223,
2579
+ 225,
2580
+ 227,
2581
+ 229,
2582
+ 230,
2583
+ 231,
2584
+ 232,
2585
+ 234,
2586
+ 236,
2587
+ 237,
2588
+ 269,
2589
+ 270,
2590
+ 273,
2591
+ 274,
2592
+ 275,
2593
+ 276,
2594
+ 285,
2595
+ 286,
2596
+ 302,
2597
+ 315,
2598
+ 319,
2599
+ 321,
2600
+ 324,
2601
+ 326,
2602
+ 331,
2603
+ 335,
2604
+ 336,
2605
+ 339,
2606
+ 342,
2607
+ 343,
2608
+ 344,
2609
+ 345,
2610
+ 346,
2611
+ 347,
2612
+ 352,
2613
+ 353,
2614
+ 354,
2615
+ 355,
2616
+ 382,
2617
+ 386,
2618
+ 387,
2619
+ 403,
2620
+ 527,
2621
+ 533,
2622
+ 534,
2623
+ 535,
2624
+ 538,
2625
+ 540,
2626
+ 542,
2627
+ 543,
2628
+ 546,
2629
+ 547,
2630
+ 548,
2631
+ 550,
2632
+ 551,
2633
+ 552,
2634
+ 554,
2635
+ 556,
2636
+ 557,
2637
+ 560,
2638
+ 561,
2639
+ 562,
2640
+ 564,
2641
+ 565,
2642
+ 566,
2643
+ 569,
2644
+ 571,
2645
+ 574,
2646
+ 575,
2647
+ 576,
2648
+ 577,
2649
+ 580,
2650
+ 582,
2651
+ 585,
2652
+ 588,
2653
+ 590,
2654
+ 595,
2655
+ 596,
2656
+ 599,
2657
+ 602,
2658
+ 603,
2659
+ 604,
2660
+ 606,
2661
+ 607,
2662
+ 608,
2663
+ 609,
2664
+ 610,
2665
+ 611,
2666
+ 612,
2667
+ 614,
2668
+ 615,
2669
+ 616,
2670
+ 619,
2671
+ 621,
2672
+ 624,
2673
+ 625,
2674
+ 626,
2675
+ 627,
2676
+ 628,
2677
+ 629,
2678
+ 630,
2679
+ 631,
2680
+ 632,
2681
+ 633,
2682
+ 634,
2683
+ 635,
2684
+ 637,
2685
+ 638,
2686
+ 641,
2687
+ 642,
2688
+ 643,
2689
+ 644,
2690
+ 647,
2691
+ 650,
2692
+ 655,
2693
+ 657,
2694
+ 660,
2695
+ 662,
2696
+ 663,
2697
+ 664,
2698
+ 665,
2699
+ 667,
2700
+ 668,
2701
+ 672,
2702
+ 674,
2703
+ 678,
2704
+ 679,
2705
+ 684,
2706
+ 688,
2707
+ 690,
2708
+ 693,
2709
+ 696,
2710
+ 700,
2711
+ 701,
2712
+ 702,
2713
+ 704,
2714
+ 706,
2715
+ 707,
2716
+ 708,
2717
+ 709,
2718
+ 711,
2719
+ 712,
2720
+ 713,
2721
+ 714,
2722
+ 715,
2723
+ 716,
2724
+ 717,
2725
+ 718,
2726
+ 722,
2727
+ 723,
2728
+ 724,
2729
+ 725,
2730
+ 726,
2731
+ 727,
2732
+ 728,
2733
+ 729,
2734
+ 756,
2735
+ 757,
2736
+ 758,
2737
+ 759,
2738
+ 760,
2739
+ 761,
2740
+ 762,
2741
+ 763,
2742
+ 764,
2743
+ 765,
2744
+ 767,
2745
+ 769,
2746
+ 771,
2747
+ 772,
2748
+ 773,
2749
+ 774,
2750
+ 775,
2751
+ 776,
2752
+ 778,
2753
+ 779,
2754
+ 784,
2755
+ 790,
2756
+ 791,
2757
+ 793,
2758
+ 794,
2759
+ 796,
2760
+ 799,
2761
+ 800,
2762
+ 806,
2763
+ 809,
2764
+ 811,
2765
+ 815,
2766
+ 818,
2767
+ 827,
2768
+ 829,
2769
+ 840,
2770
+ 905,
2771
+ 935,
2772
+ 952,
2773
+ 954,
2774
+ 974,
2775
+ 980,
2776
+ 984,
2777
+ 985,
2778
+ 988,
2779
+ 989,
2780
+ 994,
2781
+ 996,
2782
+ 997,
2783
+ 1009,
2784
+ 1010,
2785
+ 1011,
2786
+ 1013,
2787
+ 1014,
2788
+ 1015,
2789
+ 1016,
2790
+ 1017,
2791
+ 1018,
2792
+ 1020,
2793
+ 1021,
2794
+ 1022,
2795
+ 1025,
2796
+ 1026,
2797
+ 1027,
2798
+ 1029,
2799
+ 1031,
2800
+ 1032,
2801
+ 1034,
2802
+ 1036,
2803
+ 1039,
2804
+ 1040,
2805
+ 1042,
2806
+ 1044,
2807
+ 1045,
2808
+ 1046,
2809
+ 1047,
2810
+ 1048,
2811
+ 1050,
2812
+ 1052,
2813
+ 1054,
2814
+ 1055,
2815
+ 1056,
2816
+ 1057,
2817
+ 1059,
2818
+ 1060,
2819
+ 1061,
2820
+ 1062,
2821
+ 1063,
2822
+ 1064,
2823
+ 1065,
2824
+ 1067,
2825
+ 1068,
2826
+ 1070,
2827
+ 1071,
2828
+ 1072,
2829
+ 1075,
2830
+ 1076,
2831
+ 1079,
2832
+ 1080,
2833
+ 1081,
2834
+ 1082,
2835
+ 1083,
2836
+ 1084,
2837
+ 1085,
2838
+ 1086,
2839
+ 1088,
2840
+ 1089,
2841
+ 1090,
2842
+ 1091,
2843
+ 1092,
2844
+ 1094,
2845
+ 1095,
2846
+ 1096,
2847
+ 1097,
2848
+ 1099,
2849
+ 1100,
2850
+ 1101,
2851
+ 1102,
2852
+ 1103,
2853
+ 1104,
2854
+ 1105,
2855
+ 1106,
2856
+ 1107,
2857
+ 1108,
2858
+ 1109,
2859
+ 1110,
2860
+ 1111,
2861
+ 1113,
2862
+ 1119,
2863
+ 1129,
2864
+ 1143,
2865
+ 1152,
2866
+ 1153,
2867
+ 1156,
2868
+ 1164,
2869
+ 1166,
2870
+ 1167,
2871
+ 1168,
2872
+ 1169,
2873
+ 1171,
2874
+ 1172,
2875
+ 1174,
2876
+ 1175,
2877
+ 1177,
2878
+ 1182,
2879
+ 1183,
2880
+ 1185,
2881
+ 1187,
2882
+ 1188,
2883
+ 1190,
2884
+ 1192,
2885
+ 1199,
2886
+ 1200,
2887
+ 1201,
2888
+ 1202,
2889
+ 1203,
2890
+ 1204,
2891
+ 1205,
2892
+ 1206,
2893
+ 1207,
2894
+ 1209,
2895
+ 1213,
2896
+ 1215,
2897
+ 1216,
2898
+ 1245,
2899
+ 1322,
2900
+ 1406,
2901
+ 1408,
2902
+ 1425
2903
+ ],
2904
+ "action relation": [
2905
+ 0,
2906
+ 1,
2907
+ 2,
2908
+ 3,
2909
+ 4,
2910
+ 5,
2911
+ 6,
2912
+ 8,
2913
+ 9,
2914
+ 15,
2915
+ 17,
2916
+ 18,
2917
+ 19,
2918
+ 20,
2919
+ 22,
2920
+ 23,
2921
+ 24,
2922
+ 25,
2923
+ 26,
2924
+ 28,
2925
+ 29,
2926
+ 30,
2927
+ 31,
2928
+ 32,
2929
+ 33,
2930
+ 34,
2931
+ 35,
2932
+ 36,
2933
+ 37,
2934
+ 38,
2935
+ 39,
2936
+ 41,
2937
+ 42,
2938
+ 45,
2939
+ 52,
2940
+ 53,
2941
+ 55,
2942
+ 56,
2943
+ 58,
2944
+ 59,
2945
+ 72,
2946
+ 73,
2947
+ 74,
2948
+ 75,
2949
+ 76,
2950
+ 77,
2951
+ 80,
2952
+ 81,
2953
+ 82,
2954
+ 83,
2955
+ 85,
2956
+ 86,
2957
+ 91,
2958
+ 92,
2959
+ 95,
2960
+ 96,
2961
+ 97,
2962
+ 98,
2963
+ 99,
2964
+ 100,
2965
+ 101,
2966
+ 118,
2967
+ 119,
2968
+ 120,
2969
+ 121,
2970
+ 125,
2971
+ 126,
2972
+ 127,
2973
+ 130,
2974
+ 131,
2975
+ 132,
2976
+ 218,
2977
+ 222,
2978
+ 229,
2979
+ 236,
2980
+ 273,
2981
+ 274,
2982
+ 275,
2983
+ 276,
2984
+ 279,
2985
+ 280,
2986
+ 281,
2987
+ 282,
2988
+ 287,
2989
+ 288,
2990
+ 289,
2991
+ 290,
2992
+ 291,
2993
+ 301,
2994
+ 302,
2995
+ 303,
2996
+ 304,
2997
+ 305,
2998
+ 306,
2999
+ 307,
3000
+ 308,
3001
+ 309,
3002
+ 310,
3003
+ 311,
3004
+ 312,
3005
+ 313,
3006
+ 314,
3007
+ 315,
3008
+ 316,
3009
+ 317,
3010
+ 318,
3011
+ 319,
3012
+ 320,
3013
+ 321,
3014
+ 322,
3015
+ 323,
3016
+ 324,
3017
+ 325,
3018
+ 326,
3019
+ 327,
3020
+ 328,
3021
+ 329,
3022
+ 330,
3023
+ 331,
3024
+ 332,
3025
+ 333,
3026
+ 334,
3027
+ 335,
3028
+ 336,
3029
+ 337,
3030
+ 338,
3031
+ 339,
3032
+ 340,
3033
+ 341,
3034
+ 342,
3035
+ 343,
3036
+ 344,
3037
+ 345,
3038
+ 347,
3039
+ 348,
3040
+ 352,
3041
+ 354,
3042
+ 355,
3043
+ 527,
3044
+ 528,
3045
+ 529,
3046
+ 530,
3047
+ 531,
3048
+ 532,
3049
+ 533,
3050
+ 534,
3051
+ 535,
3052
+ 536,
3053
+ 537,
3054
+ 538,
3055
+ 539,
3056
+ 540,
3057
+ 541,
3058
+ 542,
3059
+ 543,
3060
+ 544,
3061
+ 545,
3062
+ 546,
3063
+ 547,
3064
+ 548,
3065
+ 549,
3066
+ 550,
3067
+ 551,
3068
+ 552,
3069
+ 553,
3070
+ 554,
3071
+ 555,
3072
+ 556,
3073
+ 557,
3074
+ 558,
3075
+ 559,
3076
+ 560,
3077
+ 561,
3078
+ 562,
3079
+ 564,
3080
+ 566,
3081
+ 567,
3082
+ 568,
3083
+ 569,
3084
+ 570,
3085
+ 571,
3086
+ 572,
3087
+ 573,
3088
+ 574,
3089
+ 575,
3090
+ 576,
3091
+ 579,
3092
+ 581,
3093
+ 582,
3094
+ 583,
3095
+ 584,
3096
+ 585,
3097
+ 586,
3098
+ 588,
3099
+ 589,
3100
+ 590,
3101
+ 593,
3102
+ 594,
3103
+ 597,
3104
+ 598,
3105
+ 599,
3106
+ 602,
3107
+ 604,
3108
+ 607,
3109
+ 608,
3110
+ 609,
3111
+ 610,
3112
+ 611,
3113
+ 613,
3114
+ 614,
3115
+ 615,
3116
+ 616,
3117
+ 617,
3118
+ 618,
3119
+ 620,
3120
+ 622,
3121
+ 624,
3122
+ 632,
3123
+ 633,
3124
+ 634,
3125
+ 635,
3126
+ 636,
3127
+ 637,
3128
+ 638,
3129
+ 640,
3130
+ 641,
3131
+ 642,
3132
+ 643,
3133
+ 644,
3134
+ 645,
3135
+ 648,
3136
+ 649,
3137
+ 651,
3138
+ 652,
3139
+ 656,
3140
+ 657,
3141
+ 658,
3142
+ 661,
3143
+ 664,
3144
+ 665,
3145
+ 666,
3146
+ 667,
3147
+ 670,
3148
+ 671,
3149
+ 675,
3150
+ 684,
3151
+ 685,
3152
+ 686,
3153
+ 687,
3154
+ 688,
3155
+ 689,
3156
+ 690,
3157
+ 692,
3158
+ 693,
3159
+ 695,
3160
+ 696,
3161
+ 702,
3162
+ 705,
3163
+ 706,
3164
+ 708,
3165
+ 715,
3166
+ 716,
3167
+ 722,
3168
+ 723,
3169
+ 726,
3170
+ 728,
3171
+ 729,
3172
+ 741,
3173
+ 756,
3174
+ 757,
3175
+ 758,
3176
+ 759,
3177
+ 760,
3178
+ 761,
3179
+ 762,
3180
+ 763,
3181
+ 764,
3182
+ 765,
3183
+ 766,
3184
+ 767,
3185
+ 768,
3186
+ 769,
3187
+ 770,
3188
+ 771,
3189
+ 772,
3190
+ 773,
3191
+ 774,
3192
+ 775,
3193
+ 776,
3194
+ 777,
3195
+ 778,
3196
+ 779,
3197
+ 783,
3198
+ 785,
3199
+ 786,
3200
+ 790,
3201
+ 793,
3202
+ 794,
3203
+ 796,
3204
+ 799,
3205
+ 800,
3206
+ 803,
3207
+ 805,
3208
+ 806,
3209
+ 809,
3210
+ 811,
3211
+ 812,
3212
+ 815,
3213
+ 817,
3214
+ 818,
3215
+ 820,
3216
+ 823,
3217
+ 827,
3218
+ 829,
3219
+ 840,
3220
+ 843,
3221
+ 847,
3222
+ 869,
3223
+ 935,
3224
+ 952,
3225
+ 954,
3226
+ 965,
3227
+ 974,
3228
+ 984,
3229
+ 989,
3230
+ 990,
3231
+ 991,
3232
+ 992,
3233
+ 993,
3234
+ 994,
3235
+ 995,
3236
+ 996,
3237
+ 997,
3238
+ 998,
3239
+ 1009,
3240
+ 1010,
3241
+ 1011,
3242
+ 1012,
3243
+ 1013,
3244
+ 1015,
3245
+ 1017,
3246
+ 1018,
3247
+ 1019,
3248
+ 1020,
3249
+ 1021,
3250
+ 1022,
3251
+ 1025,
3252
+ 1026,
3253
+ 1028,
3254
+ 1029,
3255
+ 1031,
3256
+ 1032,
3257
+ 1033,
3258
+ 1034,
3259
+ 1036,
3260
+ 1038,
3261
+ 1039,
3262
+ 1040,
3263
+ 1041,
3264
+ 1042,
3265
+ 1044,
3266
+ 1045,
3267
+ 1046,
3268
+ 1047,
3269
+ 1049,
3270
+ 1050,
3271
+ 1053,
3272
+ 1054,
3273
+ 1055,
3274
+ 1056,
3275
+ 1057,
3276
+ 1058,
3277
+ 1059,
3278
+ 1060,
3279
+ 1061,
3280
+ 1062,
3281
+ 1063,
3282
+ 1064,
3283
+ 1065,
3284
+ 1066,
3285
+ 1067,
3286
+ 1068,
3287
+ 1069,
3288
+ 1070,
3289
+ 1071,
3290
+ 1072,
3291
+ 1073,
3292
+ 1074,
3293
+ 1076,
3294
+ 1077,
3295
+ 1078,
3296
+ 1079,
3297
+ 1080,
3298
+ 1081,
3299
+ 1082,
3300
+ 1083,
3301
+ 1084,
3302
+ 1085,
3303
+ 1086,
3304
+ 1087,
3305
+ 1088,
3306
+ 1089,
3307
+ 1090,
3308
+ 1092,
3309
+ 1093,
3310
+ 1094,
3311
+ 1095,
3312
+ 1096,
3313
+ 1097,
3314
+ 1098,
3315
+ 1099,
3316
+ 1100,
3317
+ 1102,
3318
+ 1103,
3319
+ 1104,
3320
+ 1105,
3321
+ 1107,
3322
+ 1108,
3323
+ 1112,
3324
+ 1113,
3325
+ 1117,
3326
+ 1122,
3327
+ 1125,
3328
+ 1129,
3329
+ 1130,
3330
+ 1131,
3331
+ 1138,
3332
+ 1139,
3333
+ 1155,
3334
+ 1163,
3335
+ 1165,
3336
+ 1166,
3337
+ 1169,
3338
+ 1170,
3339
+ 1172,
3340
+ 1173,
3341
+ 1174,
3342
+ 1175,
3343
+ 1176,
3344
+ 1177,
3345
+ 1179,
3346
+ 1180,
3347
+ 1185,
3348
+ 1186,
3349
+ 1187,
3350
+ 1190,
3351
+ 1192,
3352
+ 1197,
3353
+ 1204,
3354
+ 1205,
3355
+ 1206,
3356
+ 1207,
3357
+ 1208,
3358
+ 1209,
3359
+ 1210,
3360
+ 1211,
3361
+ 1212,
3362
+ 1213,
3363
+ 1214,
3364
+ 1216,
3365
+ 1369,
3366
+ 1405,
3367
+ 1406,
3368
+ 1407,
3369
+ 1408,
3370
+ 1425,
3371
+ 1533
3372
+ ],
3373
+ "part relation": [
3374
+ 7,
3375
+ 14,
3376
+ 43,
3377
+ 60,
3378
+ 61,
3379
+ 62,
3380
+ 63,
3381
+ 64,
3382
+ 65,
3383
+ 66,
3384
+ 67,
3385
+ 68,
3386
+ 69,
3387
+ 70,
3388
+ 71,
3389
+ 72,
3390
+ 73,
3391
+ 74,
3392
+ 226,
3393
+ 237,
3394
+ 275,
3395
+ 276,
3396
+ 277,
3397
+ 278,
3398
+ 283,
3399
+ 284,
3400
+ 300,
3401
+ 301,
3402
+ 302,
3403
+ 303,
3404
+ 306,
3405
+ 308,
3406
+ 312,
3407
+ 319,
3408
+ 327,
3409
+ 337,
3410
+ 340,
3411
+ 353,
3412
+ 529,
3413
+ 530,
3414
+ 531,
3415
+ 532,
3416
+ 536,
3417
+ 537,
3418
+ 538,
3419
+ 542,
3420
+ 544,
3421
+ 545,
3422
+ 547,
3423
+ 548,
3424
+ 549,
3425
+ 552,
3426
+ 554,
3427
+ 555,
3428
+ 558,
3429
+ 559,
3430
+ 563,
3431
+ 567,
3432
+ 572,
3433
+ 573,
3434
+ 577,
3435
+ 581,
3436
+ 584,
3437
+ 588,
3438
+ 589,
3439
+ 591,
3440
+ 592,
3441
+ 593,
3442
+ 603,
3443
+ 609,
3444
+ 611,
3445
+ 616,
3446
+ 617,
3447
+ 618,
3448
+ 619,
3449
+ 620,
3450
+ 621,
3451
+ 622,
3452
+ 623,
3453
+ 624,
3454
+ 625,
3455
+ 634,
3456
+ 637,
3457
+ 642,
3458
+ 654,
3459
+ 667,
3460
+ 669,
3461
+ 670,
3462
+ 671,
3463
+ 675,
3464
+ 679,
3465
+ 683,
3466
+ 694,
3467
+ 698,
3468
+ 705,
3469
+ 707,
3470
+ 713,
3471
+ 722,
3472
+ 725,
3473
+ 754,
3474
+ 762,
3475
+ 781,
3476
+ 782,
3477
+ 803,
3478
+ 805,
3479
+ 806,
3480
+ 809,
3481
+ 811,
3482
+ 812,
3483
+ 815,
3484
+ 817,
3485
+ 818,
3486
+ 820,
3487
+ 823,
3488
+ 827,
3489
+ 829,
3490
+ 840,
3491
+ 847,
3492
+ 941,
3493
+ 952,
3494
+ 985,
3495
+ 989,
3496
+ 991,
3497
+ 995,
3498
+ 998,
3499
+ 1007,
3500
+ 1019,
3501
+ 1027,
3502
+ 1028,
3503
+ 1031,
3504
+ 1036,
3505
+ 1039,
3506
+ 1046,
3507
+ 1047,
3508
+ 1049,
3509
+ 1055,
3510
+ 1056,
3511
+ 1061,
3512
+ 1063,
3513
+ 1065,
3514
+ 1066,
3515
+ 1069,
3516
+ 1070,
3517
+ 1071,
3518
+ 1072,
3519
+ 1073,
3520
+ 1074,
3521
+ 1075,
3522
+ 1076,
3523
+ 1077,
3524
+ 1078,
3525
+ 1079,
3526
+ 1081,
3527
+ 1082,
3528
+ 1084,
3529
+ 1086,
3530
+ 1087,
3531
+ 1089,
3532
+ 1113,
3533
+ 1114,
3534
+ 1115,
3535
+ 1117,
3536
+ 1119,
3537
+ 1120,
3538
+ 1121,
3539
+ 1122,
3540
+ 1124,
3541
+ 1125,
3542
+ 1126,
3543
+ 1127,
3544
+ 1132,
3545
+ 1133,
3546
+ 1134,
3547
+ 1135,
3548
+ 1138,
3549
+ 1139,
3550
+ 1140,
3551
+ 1141,
3552
+ 1145,
3553
+ 1146,
3554
+ 1148,
3555
+ 1149,
3556
+ 1151,
3557
+ 1153,
3558
+ 1154,
3559
+ 1156,
3560
+ 1157,
3561
+ 1158,
3562
+ 1162,
3563
+ 1171,
3564
+ 1200,
3565
+ 1211,
3566
+ 1213,
3567
+ 1405,
3568
+ 1406
3569
+ ],
3570
+ "counting": [
3571
+ 10,
3572
+ 108,
3573
+ 122,
3574
+ 133,
3575
+ 134,
3576
+ 137,
3577
+ 138,
3578
+ 139,
3579
+ 140,
3580
+ 141,
3581
+ 142,
3582
+ 143,
3583
+ 144,
3584
+ 145,
3585
+ 146,
3586
+ 147,
3587
+ 148,
3588
+ 149,
3589
+ 150,
3590
+ 151,
3591
+ 152,
3592
+ 153,
3593
+ 154,
3594
+ 155,
3595
+ 156,
3596
+ 157,
3597
+ 160,
3598
+ 173,
3599
+ 175,
3600
+ 178,
3601
+ 191,
3602
+ 197,
3603
+ 199,
3604
+ 200,
3605
+ 245,
3606
+ 246,
3607
+ 247,
3608
+ 248,
3609
+ 249,
3610
+ 250,
3611
+ 251,
3612
+ 297,
3613
+ 298,
3614
+ 356,
3615
+ 358,
3616
+ 359,
3617
+ 360,
3618
+ 361,
3619
+ 362,
3620
+ 364,
3621
+ 365,
3622
+ 366,
3623
+ 367,
3624
+ 369,
3625
+ 370,
3626
+ 374,
3627
+ 375,
3628
+ 399,
3629
+ 422,
3630
+ 423,
3631
+ 424,
3632
+ 425,
3633
+ 426,
3634
+ 427,
3635
+ 428,
3636
+ 429,
3637
+ 430,
3638
+ 431,
3639
+ 447,
3640
+ 461,
3641
+ 467,
3642
+ 471,
3643
+ 472,
3644
+ 474,
3645
+ 476,
3646
+ 479,
3647
+ 480,
3648
+ 482,
3649
+ 504,
3650
+ 505,
3651
+ 506,
3652
+ 507,
3653
+ 508,
3654
+ 509,
3655
+ 510,
3656
+ 511,
3657
+ 512,
3658
+ 513,
3659
+ 514,
3660
+ 515,
3661
+ 516,
3662
+ 517,
3663
+ 518,
3664
+ 519,
3665
+ 520,
3666
+ 521,
3667
+ 522,
3668
+ 523,
3669
+ 524,
3670
+ 525,
3671
+ 526,
3672
+ 600,
3673
+ 601,
3674
+ 605,
3675
+ 691,
3676
+ 703,
3677
+ 737,
3678
+ 819,
3679
+ 821,
3680
+ 822,
3681
+ 824,
3682
+ 826,
3683
+ 828,
3684
+ 830,
3685
+ 831,
3686
+ 832,
3687
+ 833,
3688
+ 834,
3689
+ 835,
3690
+ 836,
3691
+ 838,
3692
+ 841,
3693
+ 842,
3694
+ 844,
3695
+ 845,
3696
+ 846,
3697
+ 850,
3698
+ 851,
3699
+ 852,
3700
+ 853,
3701
+ 854,
3702
+ 855,
3703
+ 856,
3704
+ 857,
3705
+ 858,
3706
+ 859,
3707
+ 860,
3708
+ 861,
3709
+ 862,
3710
+ 863,
3711
+ 866,
3712
+ 872,
3713
+ 879,
3714
+ 880,
3715
+ 882,
3716
+ 884,
3717
+ 885,
3718
+ 886,
3719
+ 887,
3720
+ 888,
3721
+ 889,
3722
+ 890,
3723
+ 891,
3724
+ 893,
3725
+ 895,
3726
+ 897,
3727
+ 899,
3728
+ 902,
3729
+ 904,
3730
+ 906,
3731
+ 907,
3732
+ 908,
3733
+ 910,
3734
+ 911,
3735
+ 919,
3736
+ 920,
3737
+ 924,
3738
+ 927,
3739
+ 928,
3740
+ 929,
3741
+ 930,
3742
+ 931,
3743
+ 933,
3744
+ 934,
3745
+ 936,
3746
+ 937,
3747
+ 938,
3748
+ 939,
3749
+ 945,
3750
+ 964,
3751
+ 967,
3752
+ 971,
3753
+ 972,
3754
+ 973,
3755
+ 979,
3756
+ 981,
3757
+ 982,
3758
+ 983,
3759
+ 986,
3760
+ 987,
3761
+ 999,
3762
+ 1000,
3763
+ 1001,
3764
+ 1002,
3765
+ 1003,
3766
+ 1004,
3767
+ 1005,
3768
+ 1006,
3769
+ 1008,
3770
+ 1030,
3771
+ 1035,
3772
+ 1037,
3773
+ 1043,
3774
+ 1118,
3775
+ 1137,
3776
+ 1142,
3777
+ 1219,
3778
+ 1220,
3779
+ 1221,
3780
+ 1222,
3781
+ 1223,
3782
+ 1224,
3783
+ 1226,
3784
+ 1227,
3785
+ 1228,
3786
+ 1229,
3787
+ 1230,
3788
+ 1231,
3789
+ 1232,
3790
+ 1233,
3791
+ 1234,
3792
+ 1235,
3793
+ 1236,
3794
+ 1237,
3795
+ 1238,
3796
+ 1239,
3797
+ 1240,
3798
+ 1241,
3799
+ 1242,
3800
+ 1243,
3801
+ 1244,
3802
+ 1246,
3803
+ 1247,
3804
+ 1248,
3805
+ 1249,
3806
+ 1251,
3807
+ 1252,
3808
+ 1253,
3809
+ 1254,
3810
+ 1255,
3811
+ 1256,
3812
+ 1257,
3813
+ 1258,
3814
+ 1261,
3815
+ 1264,
3816
+ 1296,
3817
+ 1299,
3818
+ 1300,
3819
+ 1303,
3820
+ 1305,
3821
+ 1306,
3822
+ 1307,
3823
+ 1328,
3824
+ 1330,
3825
+ 1333,
3826
+ 1336,
3827
+ 1342,
3828
+ 1421,
3829
+ 1422,
3830
+ 1431,
3831
+ 1432,
3832
+ 1441,
3833
+ 1442,
3834
+ 1444,
3835
+ 1448,
3836
+ 1455,
3837
+ 1456,
3838
+ 1457,
3839
+ 1458,
3840
+ 1459,
3841
+ 1462,
3842
+ 1464,
3843
+ 1465,
3844
+ 1467,
3845
+ 1468,
3846
+ 1469,
3847
+ 1470,
3848
+ 1472,
3849
+ 1473,
3850
+ 1474,
3851
+ 1475,
3852
+ 1476,
3853
+ 1477,
3854
+ 1479,
3855
+ 1480,
3856
+ 1483,
3857
+ 1489,
3858
+ 1510,
3859
+ 1512,
3860
+ 1513,
3861
+ 1514,
3862
+ 1516,
3863
+ 1517,
3864
+ 1518,
3865
+ 1519,
3866
+ 1520,
3867
+ 1521,
3868
+ 1522,
3869
+ 1523,
3870
+ 1524,
3871
+ 1525,
3872
+ 1527,
3873
+ 1528,
3874
+ 1529,
3875
+ 1531,
3876
+ 1532,
3877
+ 1534,
3878
+ 1538,
3879
+ 1540,
3880
+ 1541,
3881
+ 1542,
3882
+ 1546,
3883
+ 1556,
3884
+ 1557,
3885
+ 1558,
3886
+ 1559,
3887
+ 1566,
3888
+ 1567,
3889
+ 1568,
3890
+ 1570,
3891
+ 1572,
3892
+ 1573,
3893
+ 1574,
3894
+ 1577,
3895
+ 1580,
3896
+ 1582,
3897
+ 1583,
3898
+ 1584,
3899
+ 1585,
3900
+ 1586,
3901
+ 1587,
3902
+ 1588,
3903
+ 1589,
3904
+ 1590,
3905
+ 1591,
3906
+ 1592,
3907
+ 1593,
3908
+ 1595,
3909
+ 1598,
3910
+ 1599
3911
+ ],
3912
+ "comparison": [
3913
+ 102,
3914
+ 103,
3915
+ 104,
3916
+ 105,
3917
+ 106,
3918
+ 107,
3919
+ 108,
3920
+ 109,
3921
+ 110,
3922
+ 111,
3923
+ 112,
3924
+ 113,
3925
+ 114,
3926
+ 115,
3927
+ 116,
3928
+ 117,
3929
+ 177,
3930
+ 178,
3931
+ 196,
3932
+ 210,
3933
+ 238,
3934
+ 239,
3935
+ 240,
3936
+ 241,
3937
+ 294,
3938
+ 356,
3939
+ 357,
3940
+ 358,
3941
+ 359,
3942
+ 360,
3943
+ 361,
3944
+ 362,
3945
+ 363,
3946
+ 364,
3947
+ 365,
3948
+ 366,
3949
+ 367,
3950
+ 369,
3951
+ 390,
3952
+ 491,
3953
+ 492,
3954
+ 493,
3955
+ 494,
3956
+ 495,
3957
+ 496,
3958
+ 497,
3959
+ 498,
3960
+ 499,
3961
+ 500,
3962
+ 501,
3963
+ 502,
3964
+ 503,
3965
+ 838,
3966
+ 839,
3967
+ 842,
3968
+ 848,
3969
+ 854,
3970
+ 859,
3971
+ 860,
3972
+ 864,
3973
+ 871,
3974
+ 876,
3975
+ 880,
3976
+ 881,
3977
+ 895,
3978
+ 899,
3979
+ 903,
3980
+ 918,
3981
+ 923,
3982
+ 937,
3983
+ 938,
3984
+ 942,
3985
+ 943,
3986
+ 945,
3987
+ 953,
3988
+ 956,
3989
+ 957,
3990
+ 958,
3991
+ 959,
3992
+ 960,
3993
+ 962,
3994
+ 963,
3995
+ 968,
3996
+ 970,
3997
+ 976,
3998
+ 977,
3999
+ 978,
4000
+ 979,
4001
+ 981,
4002
+ 986,
4003
+ 1217,
4004
+ 1250,
4005
+ 1259,
4006
+ 1263,
4007
+ 1271,
4008
+ 1318,
4009
+ 1321,
4010
+ 1324,
4011
+ 1326,
4012
+ 1339,
4013
+ 1376,
4014
+ 1377,
4015
+ 1388,
4016
+ 1389,
4017
+ 1392,
4018
+ 1396,
4019
+ 1397,
4020
+ 1399,
4021
+ 1400,
4022
+ 1401,
4023
+ 1402,
4024
+ 1416,
4025
+ 1424,
4026
+ 1427,
4027
+ 1437,
4028
+ 1441,
4029
+ 1442,
4030
+ 1450,
4031
+ 1460,
4032
+ 1466,
4033
+ 1482,
4034
+ 1483,
4035
+ 1484,
4036
+ 1486,
4037
+ 1518,
4038
+ 1521,
4039
+ 1522,
4040
+ 1523,
4041
+ 1524,
4042
+ 1527,
4043
+ 1528,
4044
+ 1535,
4045
+ 1538,
4046
+ 1539,
4047
+ 1540,
4048
+ 1541,
4049
+ 1542,
4050
+ 1552,
4051
+ 1553,
4052
+ 1555,
4053
+ 1556,
4054
+ 1557,
4055
+ 1558,
4056
+ 1559,
4057
+ 1560,
4058
+ 1561,
4059
+ 1562,
4060
+ 1563,
4061
+ 1564,
4062
+ 1565,
4063
+ 1566,
4064
+ 1567,
4065
+ 1568,
4066
+ 1569,
4067
+ 1570,
4068
+ 1573,
4069
+ 1589,
4070
+ 1594,
4071
+ 1596,
4072
+ 1597,
4073
+ 1598,
4074
+ 1599
4075
+ ],
4076
+ "differentiation": [
4077
+ 104,
4078
+ 108,
4079
+ 109,
4080
+ 117,
4081
+ 122,
4082
+ 123,
4083
+ 124,
4084
+ 128,
4085
+ 133,
4086
+ 134,
4087
+ 135,
4088
+ 136,
4089
+ 154,
4090
+ 158,
4091
+ 159,
4092
+ 160,
4093
+ 161,
4094
+ 162,
4095
+ 163,
4096
+ 164,
4097
+ 165,
4098
+ 166,
4099
+ 167,
4100
+ 168,
4101
+ 169,
4102
+ 170,
4103
+ 171,
4104
+ 172,
4105
+ 173,
4106
+ 174,
4107
+ 175,
4108
+ 176,
4109
+ 177,
4110
+ 178,
4111
+ 179,
4112
+ 180,
4113
+ 181,
4114
+ 182,
4115
+ 183,
4116
+ 184,
4117
+ 194,
4118
+ 195,
4119
+ 196,
4120
+ 200,
4121
+ 201,
4122
+ 202,
4123
+ 203,
4124
+ 242,
4125
+ 243,
4126
+ 244,
4127
+ 245,
4128
+ 292,
4129
+ 293,
4130
+ 349,
4131
+ 350,
4132
+ 351,
4133
+ 356,
4134
+ 357,
4135
+ 358,
4136
+ 359,
4137
+ 360,
4138
+ 361,
4139
+ 362,
4140
+ 363,
4141
+ 364,
4142
+ 365,
4143
+ 366,
4144
+ 367,
4145
+ 368,
4146
+ 369,
4147
+ 370,
4148
+ 371,
4149
+ 372,
4150
+ 373,
4151
+ 374,
4152
+ 375,
4153
+ 376,
4154
+ 377,
4155
+ 378,
4156
+ 379,
4157
+ 380,
4158
+ 381,
4159
+ 383,
4160
+ 384,
4161
+ 385,
4162
+ 388,
4163
+ 389,
4164
+ 390,
4165
+ 391,
4166
+ 392,
4167
+ 393,
4168
+ 394,
4169
+ 395,
4170
+ 396,
4171
+ 397,
4172
+ 398,
4173
+ 399,
4174
+ 400,
4175
+ 401,
4176
+ 402,
4177
+ 422,
4178
+ 423,
4179
+ 424,
4180
+ 425,
4181
+ 426,
4182
+ 427,
4183
+ 428,
4184
+ 429,
4185
+ 430,
4186
+ 431,
4187
+ 447,
4188
+ 472,
4189
+ 473,
4190
+ 474,
4191
+ 479,
4192
+ 480,
4193
+ 482,
4194
+ 494,
4195
+ 499,
4196
+ 801,
4197
+ 802,
4198
+ 804,
4199
+ 807,
4200
+ 808,
4201
+ 810,
4202
+ 813,
4203
+ 814,
4204
+ 816,
4205
+ 822,
4206
+ 839,
4207
+ 842,
4208
+ 850,
4209
+ 851,
4210
+ 852,
4211
+ 853,
4212
+ 861,
4213
+ 864,
4214
+ 868,
4215
+ 873,
4216
+ 876,
4217
+ 880,
4218
+ 881,
4219
+ 894,
4220
+ 895,
4221
+ 897,
4222
+ 903,
4223
+ 906,
4224
+ 907,
4225
+ 908,
4226
+ 909,
4227
+ 913,
4228
+ 915,
4229
+ 918,
4230
+ 920,
4231
+ 925,
4232
+ 927,
4233
+ 934,
4234
+ 936,
4235
+ 938,
4236
+ 943,
4237
+ 949,
4238
+ 953,
4239
+ 956,
4240
+ 958,
4241
+ 960,
4242
+ 973,
4243
+ 976,
4244
+ 977,
4245
+ 978,
4246
+ 979,
4247
+ 981,
4248
+ 986,
4249
+ 1116,
4250
+ 1217,
4251
+ 1249,
4252
+ 1250,
4253
+ 1259,
4254
+ 1318,
4255
+ 1330,
4256
+ 1339,
4257
+ 1366,
4258
+ 1376,
4259
+ 1392,
4260
+ 1397,
4261
+ 1401,
4262
+ 1402,
4263
+ 1424,
4264
+ 1427,
4265
+ 1431,
4266
+ 1444,
4267
+ 1445,
4268
+ 1461,
4269
+ 1462,
4270
+ 1463,
4271
+ 1464,
4272
+ 1465,
4273
+ 1466,
4274
+ 1467,
4275
+ 1468,
4276
+ 1469,
4277
+ 1470,
4278
+ 1471,
4279
+ 1472,
4280
+ 1473,
4281
+ 1474,
4282
+ 1475,
4283
+ 1476,
4284
+ 1477,
4285
+ 1478,
4286
+ 1479,
4287
+ 1480,
4288
+ 1481,
4289
+ 1482,
4290
+ 1483,
4291
+ 1484,
4292
+ 1485,
4293
+ 1486,
4294
+ 1487,
4295
+ 1488,
4296
+ 1489,
4297
+ 1490,
4298
+ 1491,
4299
+ 1492,
4300
+ 1493,
4301
+ 1494,
4302
+ 1495,
4303
+ 1496,
4304
+ 1497,
4305
+ 1498,
4306
+ 1499,
4307
+ 1500,
4308
+ 1501,
4309
+ 1502,
4310
+ 1503,
4311
+ 1504,
4312
+ 1505,
4313
+ 1506,
4314
+ 1507,
4315
+ 1508,
4316
+ 1509,
4317
+ 1510,
4318
+ 1514,
4319
+ 1516,
4320
+ 1518,
4321
+ 1519,
4322
+ 1520,
4323
+ 1523,
4324
+ 1524,
4325
+ 1525,
4326
+ 1526,
4327
+ 1527,
4328
+ 1528,
4329
+ 1534,
4330
+ 1535,
4331
+ 1538,
4332
+ 1539,
4333
+ 1540,
4334
+ 1541,
4335
+ 1542,
4336
+ 1546,
4337
+ 1553,
4338
+ 1554,
4339
+ 1555,
4340
+ 1556,
4341
+ 1559,
4342
+ 1560,
4343
+ 1562,
4344
+ 1563,
4345
+ 1564,
4346
+ 1565,
4347
+ 1566,
4348
+ 1567,
4349
+ 1568,
4350
+ 1570,
4351
+ 1571,
4352
+ 1572,
4353
+ 1573,
4354
+ 1576,
4355
+ 1579,
4356
+ 1582,
4357
+ 1583,
4358
+ 1584,
4359
+ 1585,
4360
+ 1586,
4361
+ 1587,
4362
+ 1588,
4363
+ 1590,
4364
+ 1591,
4365
+ 1592,
4366
+ 1593,
4367
+ 1594,
4368
+ 1595,
4369
+ 1596,
4370
+ 1597,
4371
+ 1598,
4372
+ 1599
4373
+ ],
4374
+ "negation": [
4375
+ 168,
4376
+ 169,
4377
+ 172,
4378
+ 177,
4379
+ 179,
4380
+ 185,
4381
+ 186,
4382
+ 187,
4383
+ 188,
4384
+ 189,
4385
+ 190,
4386
+ 191,
4387
+ 192,
4388
+ 193,
4389
+ 194,
4390
+ 195,
4391
+ 196,
4392
+ 197,
4393
+ 198,
4394
+ 199,
4395
+ 200,
4396
+ 201,
4397
+ 202,
4398
+ 203,
4399
+ 204,
4400
+ 205,
4401
+ 206,
4402
+ 207,
4403
+ 208,
4404
+ 209,
4405
+ 210,
4406
+ 211,
4407
+ 212,
4408
+ 213,
4409
+ 214,
4410
+ 215,
4411
+ 216,
4412
+ 217,
4413
+ 295,
4414
+ 296,
4415
+ 360,
4416
+ 371,
4417
+ 432,
4418
+ 433,
4419
+ 434,
4420
+ 435,
4421
+ 436,
4422
+ 437,
4423
+ 438,
4424
+ 439,
4425
+ 440,
4426
+ 441,
4427
+ 442,
4428
+ 443,
4429
+ 444,
4430
+ 445,
4431
+ 446,
4432
+ 447,
4433
+ 448,
4434
+ 449,
4435
+ 450,
4436
+ 451,
4437
+ 452,
4438
+ 454,
4439
+ 455,
4440
+ 456,
4441
+ 457,
4442
+ 458,
4443
+ 459,
4444
+ 460,
4445
+ 461,
4446
+ 462,
4447
+ 463,
4448
+ 464,
4449
+ 465,
4450
+ 466,
4451
+ 467,
4452
+ 468,
4453
+ 469,
4454
+ 470,
4455
+ 471,
4456
+ 472,
4457
+ 473,
4458
+ 474,
4459
+ 475,
4460
+ 476,
4461
+ 477,
4462
+ 478,
4463
+ 479,
4464
+ 480,
4465
+ 481,
4466
+ 482,
4467
+ 483,
4468
+ 484,
4469
+ 486,
4470
+ 487,
4471
+ 488,
4472
+ 489,
4473
+ 490,
4474
+ 851,
4475
+ 863,
4476
+ 864,
4477
+ 865,
4478
+ 868,
4479
+ 870,
4480
+ 873,
4481
+ 876,
4482
+ 894,
4483
+ 896,
4484
+ 901,
4485
+ 907,
4486
+ 909,
4487
+ 913,
4488
+ 923,
4489
+ 934,
4490
+ 939,
4491
+ 945,
4492
+ 949,
4493
+ 953,
4494
+ 956,
4495
+ 958,
4496
+ 960,
4497
+ 962,
4498
+ 977,
4499
+ 979,
4500
+ 1217,
4501
+ 1225,
4502
+ 1250,
4503
+ 1260,
4504
+ 1261,
4505
+ 1262,
4506
+ 1264,
4507
+ 1265,
4508
+ 1266,
4509
+ 1267,
4510
+ 1268,
4511
+ 1270,
4512
+ 1272,
4513
+ 1274,
4514
+ 1275,
4515
+ 1276,
4516
+ 1277,
4517
+ 1278,
4518
+ 1279,
4519
+ 1280,
4520
+ 1281,
4521
+ 1282,
4522
+ 1283,
4523
+ 1284,
4524
+ 1285,
4525
+ 1286,
4526
+ 1287,
4527
+ 1288,
4528
+ 1289,
4529
+ 1290,
4530
+ 1291,
4531
+ 1292,
4532
+ 1293,
4533
+ 1294,
4534
+ 1295,
4535
+ 1296,
4536
+ 1297,
4537
+ 1298,
4538
+ 1299,
4539
+ 1300,
4540
+ 1301,
4541
+ 1302,
4542
+ 1303,
4543
+ 1304,
4544
+ 1305,
4545
+ 1306,
4546
+ 1307,
4547
+ 1308,
4548
+ 1309,
4549
+ 1310,
4550
+ 1311,
4551
+ 1312,
4552
+ 1313,
4553
+ 1314,
4554
+ 1315,
4555
+ 1316,
4556
+ 1317,
4557
+ 1319,
4558
+ 1320,
4559
+ 1323,
4560
+ 1325,
4561
+ 1327,
4562
+ 1328,
4563
+ 1329,
4564
+ 1330,
4565
+ 1331,
4566
+ 1332,
4567
+ 1333,
4568
+ 1334,
4569
+ 1335,
4570
+ 1336,
4571
+ 1337,
4572
+ 1338,
4573
+ 1340,
4574
+ 1341,
4575
+ 1342,
4576
+ 1343,
4577
+ 1344,
4578
+ 1345,
4579
+ 1346,
4580
+ 1347,
4581
+ 1348,
4582
+ 1349,
4583
+ 1350,
4584
+ 1351,
4585
+ 1352,
4586
+ 1353,
4587
+ 1354,
4588
+ 1355,
4589
+ 1356,
4590
+ 1357,
4591
+ 1358,
4592
+ 1359,
4593
+ 1360,
4594
+ 1361,
4595
+ 1362,
4596
+ 1363,
4597
+ 1364,
4598
+ 1365,
4599
+ 1366,
4600
+ 1367,
4601
+ 1368,
4602
+ 1370,
4603
+ 1371,
4604
+ 1372,
4605
+ 1373,
4606
+ 1374,
4607
+ 1375,
4608
+ 1377,
4609
+ 1378,
4610
+ 1379,
4611
+ 1380,
4612
+ 1381,
4613
+ 1382,
4614
+ 1383,
4615
+ 1384,
4616
+ 1385,
4617
+ 1386,
4618
+ 1387,
4619
+ 1390,
4620
+ 1391,
4621
+ 1393,
4622
+ 1394,
4623
+ 1395,
4624
+ 1398,
4625
+ 1403,
4626
+ 1404,
4627
+ 1409,
4628
+ 1410,
4629
+ 1411,
4630
+ 1412,
4631
+ 1413,
4632
+ 1414,
4633
+ 1415,
4634
+ 1416,
4635
+ 1417,
4636
+ 1418,
4637
+ 1419,
4638
+ 1420,
4639
+ 1422,
4640
+ 1423,
4641
+ 1426,
4642
+ 1428,
4643
+ 1429,
4644
+ 1430,
4645
+ 1431,
4646
+ 1433,
4647
+ 1434,
4648
+ 1435,
4649
+ 1436,
4650
+ 1437,
4651
+ 1438,
4652
+ 1439,
4653
+ 1440,
4654
+ 1441,
4655
+ 1442,
4656
+ 1443,
4657
+ 1444,
4658
+ 1445,
4659
+ 1446,
4660
+ 1447,
4661
+ 1448,
4662
+ 1449,
4663
+ 1450,
4664
+ 1452,
4665
+ 1453,
4666
+ 1454,
4667
+ 1456,
4668
+ 1457,
4669
+ 1458,
4670
+ 1459,
4671
+ 1460,
4672
+ 1461,
4673
+ 1462,
4674
+ 1463,
4675
+ 1465,
4676
+ 1466,
4677
+ 1467,
4678
+ 1468,
4679
+ 1469,
4680
+ 1470,
4681
+ 1471,
4682
+ 1472,
4683
+ 1474,
4684
+ 1475,
4685
+ 1476,
4686
+ 1477,
4687
+ 1478,
4688
+ 1479,
4689
+ 1480,
4690
+ 1481,
4691
+ 1482,
4692
+ 1483,
4693
+ 1484,
4694
+ 1485,
4695
+ 1486,
4696
+ 1488,
4697
+ 1489,
4698
+ 1511,
4699
+ 1512,
4700
+ 1513,
4701
+ 1514,
4702
+ 1515,
4703
+ 1517,
4704
+ 1519,
4705
+ 1520,
4706
+ 1526,
4707
+ 1529,
4708
+ 1530,
4709
+ 1532,
4710
+ 1534,
4711
+ 1536,
4712
+ 1537,
4713
+ 1539,
4714
+ 1541,
4715
+ 1543,
4716
+ 1544,
4717
+ 1546,
4718
+ 1548,
4719
+ 1549,
4720
+ 1562,
4721
+ 1566
4722
+ ],
4723
+ "universal": [
4724
+ 252,
4725
+ 253,
4726
+ 254,
4727
+ 255,
4728
+ 256,
4729
+ 257,
4730
+ 258,
4731
+ 259,
4732
+ 260,
4733
+ 261,
4734
+ 262,
4735
+ 263,
4736
+ 264,
4737
+ 265,
4738
+ 266,
4739
+ 267,
4740
+ 268,
4741
+ 297,
4742
+ 299,
4743
+ 365,
4744
+ 404,
4745
+ 405,
4746
+ 406,
4747
+ 407,
4748
+ 408,
4749
+ 409,
4750
+ 410,
4751
+ 411,
4752
+ 412,
4753
+ 413,
4754
+ 414,
4755
+ 415,
4756
+ 416,
4757
+ 417,
4758
+ 418,
4759
+ 419,
4760
+ 420,
4761
+ 421,
4762
+ 422,
4763
+ 423,
4764
+ 424,
4765
+ 425,
4766
+ 426,
4767
+ 427,
4768
+ 428,
4769
+ 429,
4770
+ 430,
4771
+ 431,
4772
+ 453,
4773
+ 485,
4774
+ 825,
4775
+ 835,
4776
+ 836,
4777
+ 837,
4778
+ 839,
4779
+ 846,
4780
+ 849,
4781
+ 852,
4782
+ 855,
4783
+ 857,
4784
+ 858,
4785
+ 859,
4786
+ 860,
4787
+ 867,
4788
+ 874,
4789
+ 875,
4790
+ 877,
4791
+ 878,
4792
+ 883,
4793
+ 884,
4794
+ 892,
4795
+ 897,
4796
+ 898,
4797
+ 900,
4798
+ 910,
4799
+ 911,
4800
+ 912,
4801
+ 914,
4802
+ 916,
4803
+ 917,
4804
+ 919,
4805
+ 921,
4806
+ 922,
4807
+ 926,
4808
+ 932,
4809
+ 934,
4810
+ 939,
4811
+ 940,
4812
+ 942,
4813
+ 944,
4814
+ 946,
4815
+ 947,
4816
+ 948,
4817
+ 950,
4818
+ 955,
4819
+ 961,
4820
+ 966,
4821
+ 969,
4822
+ 975,
4823
+ 976,
4824
+ 978,
4825
+ 1218,
4826
+ 1251,
4827
+ 1267,
4828
+ 1269,
4829
+ 1273,
4830
+ 1327,
4831
+ 1378,
4832
+ 1431,
4833
+ 1433,
4834
+ 1441,
4835
+ 1451,
4836
+ 1452,
4837
+ 1453,
4838
+ 1454,
4839
+ 1455,
4840
+ 1456,
4841
+ 1457,
4842
+ 1458,
4843
+ 1459,
4844
+ 1463,
4845
+ 1516,
4846
+ 1517,
4847
+ 1525,
4848
+ 1529,
4849
+ 1530,
4850
+ 1531,
4851
+ 1532,
4852
+ 1535,
4853
+ 1543,
4854
+ 1544,
4855
+ 1545,
4856
+ 1547,
4857
+ 1549,
4858
+ 1550,
4859
+ 1551,
4860
+ 1554,
4861
+ 1573,
4862
+ 1574,
4863
+ 1575,
4864
+ 1576,
4865
+ 1577,
4866
+ 1578,
4867
+ 1579,
4868
+ 1580,
4869
+ 1581,
4870
+ 1589
4871
+ ]
4872
+ }
univa/eval/genai/eval_prompts/genai527/genai_image.json ADDED
The diff for this file is too large to render. See raw diff
 
univa/eval/genai/eval_prompts/genai527/genai_skills.json ADDED
@@ -0,0 +1,1482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "basic": [
3
+ 1,
4
+ 2,
5
+ 3,
6
+ 4,
7
+ 5,
8
+ 6,
9
+ 7,
10
+ 8,
11
+ 9,
12
+ 10,
13
+ 11,
14
+ 12,
15
+ 13,
16
+ 14,
17
+ 15,
18
+ 16,
19
+ 17,
20
+ 18,
21
+ 19,
22
+ 20,
23
+ 21,
24
+ 22,
25
+ 23,
26
+ 24,
27
+ 25,
28
+ 26,
29
+ 27,
30
+ 28,
31
+ 29,
32
+ 30,
33
+ 31,
34
+ 32,
35
+ 33,
36
+ 34,
37
+ 35,
38
+ 36,
39
+ 37,
40
+ 38,
41
+ 39,
42
+ 40,
43
+ 41,
44
+ 42,
45
+ 43,
46
+ 44,
47
+ 45,
48
+ 46,
49
+ 47,
50
+ 48,
51
+ 49,
52
+ 50,
53
+ 51,
54
+ 52,
55
+ 53,
56
+ 54,
57
+ 55,
58
+ 56,
59
+ 57,
60
+ 58,
61
+ 59,
62
+ 60,
63
+ 61,
64
+ 62,
65
+ 63,
66
+ 64,
67
+ 65,
68
+ 66,
69
+ 67,
70
+ 68,
71
+ 69,
72
+ 70,
73
+ 71,
74
+ 72,
75
+ 73,
76
+ 74,
77
+ 75,
78
+ 76,
79
+ 77,
80
+ 78,
81
+ 79,
82
+ 80,
83
+ 81,
84
+ 82,
85
+ 83,
86
+ 84,
87
+ 85,
88
+ 86,
89
+ 87,
90
+ 88,
91
+ 89,
92
+ 90,
93
+ 91,
94
+ 92,
95
+ 93,
96
+ 94,
97
+ 95,
98
+ 96,
99
+ 97,
100
+ 98,
101
+ 99,
102
+ 100,
103
+ 101,
104
+ 102,
105
+ 119,
106
+ 120,
107
+ 121,
108
+ 122,
109
+ 126,
110
+ 127,
111
+ 128,
112
+ 130,
113
+ 131,
114
+ 132,
115
+ 133,
116
+ 219,
117
+ 220,
118
+ 221,
119
+ 222,
120
+ 223,
121
+ 224,
122
+ 225,
123
+ 226,
124
+ 227,
125
+ 228,
126
+ 229,
127
+ 230,
128
+ 231,
129
+ 232,
130
+ 233,
131
+ 234,
132
+ 235,
133
+ 236,
134
+ 237,
135
+ 238,
136
+ 270,
137
+ 271,
138
+ 272,
139
+ 273,
140
+ 274,
141
+ 275,
142
+ 276,
143
+ 277,
144
+ 278,
145
+ 279,
146
+ 280,
147
+ 281,
148
+ 282,
149
+ 283,
150
+ 284,
151
+ 285,
152
+ 286,
153
+ 287,
154
+ 288,
155
+ 289,
156
+ 290,
157
+ 291,
158
+ 292,
159
+ 301,
160
+ 302,
161
+ 303,
162
+ 304,
163
+ 305,
164
+ 306,
165
+ 307,
166
+ 308,
167
+ 309,
168
+ 310,
169
+ 311,
170
+ 312,
171
+ 313,
172
+ 314,
173
+ 315,
174
+ 316,
175
+ 317,
176
+ 318,
177
+ 319,
178
+ 320,
179
+ 321,
180
+ 322,
181
+ 323,
182
+ 324,
183
+ 325,
184
+ 326,
185
+ 327,
186
+ 328,
187
+ 329,
188
+ 330,
189
+ 331,
190
+ 332,
191
+ 333,
192
+ 334,
193
+ 335,
194
+ 336,
195
+ 337,
196
+ 338,
197
+ 339,
198
+ 340,
199
+ 341,
200
+ 342,
201
+ 343,
202
+ 344,
203
+ 345,
204
+ 346,
205
+ 347,
206
+ 348,
207
+ 349,
208
+ 350,
209
+ 353,
210
+ 354,
211
+ 355,
212
+ 356,
213
+ 383,
214
+ 387,
215
+ 388,
216
+ 404
217
+ ],
218
+ "advanced": [
219
+ 103,
220
+ 104,
221
+ 105,
222
+ 106,
223
+ 107,
224
+ 108,
225
+ 109,
226
+ 110,
227
+ 111,
228
+ 112,
229
+ 113,
230
+ 114,
231
+ 115,
232
+ 116,
233
+ 117,
234
+ 118,
235
+ 123,
236
+ 124,
237
+ 125,
238
+ 129,
239
+ 134,
240
+ 135,
241
+ 136,
242
+ 137,
243
+ 138,
244
+ 139,
245
+ 140,
246
+ 141,
247
+ 142,
248
+ 143,
249
+ 144,
250
+ 145,
251
+ 146,
252
+ 147,
253
+ 148,
254
+ 149,
255
+ 150,
256
+ 151,
257
+ 152,
258
+ 153,
259
+ 154,
260
+ 155,
261
+ 156,
262
+ 157,
263
+ 158,
264
+ 159,
265
+ 160,
266
+ 161,
267
+ 162,
268
+ 163,
269
+ 164,
270
+ 165,
271
+ 166,
272
+ 167,
273
+ 168,
274
+ 169,
275
+ 170,
276
+ 171,
277
+ 172,
278
+ 173,
279
+ 174,
280
+ 175,
281
+ 176,
282
+ 177,
283
+ 178,
284
+ 179,
285
+ 180,
286
+ 181,
287
+ 182,
288
+ 183,
289
+ 184,
290
+ 185,
291
+ 186,
292
+ 187,
293
+ 188,
294
+ 189,
295
+ 190,
296
+ 191,
297
+ 192,
298
+ 193,
299
+ 194,
300
+ 195,
301
+ 196,
302
+ 197,
303
+ 198,
304
+ 199,
305
+ 200,
306
+ 201,
307
+ 202,
308
+ 203,
309
+ 204,
310
+ 205,
311
+ 206,
312
+ 207,
313
+ 208,
314
+ 209,
315
+ 210,
316
+ 211,
317
+ 212,
318
+ 213,
319
+ 214,
320
+ 215,
321
+ 216,
322
+ 217,
323
+ 218,
324
+ 239,
325
+ 240,
326
+ 241,
327
+ 242,
328
+ 243,
329
+ 244,
330
+ 245,
331
+ 246,
332
+ 247,
333
+ 248,
334
+ 249,
335
+ 250,
336
+ 251,
337
+ 252,
338
+ 253,
339
+ 254,
340
+ 255,
341
+ 256,
342
+ 257,
343
+ 258,
344
+ 259,
345
+ 260,
346
+ 261,
347
+ 262,
348
+ 263,
349
+ 264,
350
+ 265,
351
+ 266,
352
+ 267,
353
+ 268,
354
+ 269,
355
+ 293,
356
+ 294,
357
+ 295,
358
+ 296,
359
+ 297,
360
+ 298,
361
+ 299,
362
+ 300,
363
+ 351,
364
+ 352,
365
+ 357,
366
+ 358,
367
+ 359,
368
+ 360,
369
+ 361,
370
+ 362,
371
+ 363,
372
+ 364,
373
+ 365,
374
+ 366,
375
+ 367,
376
+ 368,
377
+ 369,
378
+ 370,
379
+ 371,
380
+ 372,
381
+ 373,
382
+ 374,
383
+ 375,
384
+ 376,
385
+ 377,
386
+ 378,
387
+ 379,
388
+ 380,
389
+ 381,
390
+ 382,
391
+ 384,
392
+ 385,
393
+ 386,
394
+ 389,
395
+ 390,
396
+ 391,
397
+ 392,
398
+ 393,
399
+ 394,
400
+ 395,
401
+ 396,
402
+ 397,
403
+ 398,
404
+ 399,
405
+ 400,
406
+ 401,
407
+ 402,
408
+ 403,
409
+ 405,
410
+ 406,
411
+ 407,
412
+ 408,
413
+ 409,
414
+ 410,
415
+ 411,
416
+ 412,
417
+ 413,
418
+ 414,
419
+ 415,
420
+ 416,
421
+ 417,
422
+ 418,
423
+ 419,
424
+ 420,
425
+ 421,
426
+ 422,
427
+ 423,
428
+ 424,
429
+ 425,
430
+ 426,
431
+ 427,
432
+ 428,
433
+ 429,
434
+ 430,
435
+ 431,
436
+ 432,
437
+ 433,
438
+ 434,
439
+ 435,
440
+ 436,
441
+ 437,
442
+ 438,
443
+ 439,
444
+ 440,
445
+ 441,
446
+ 442,
447
+ 443,
448
+ 444,
449
+ 445,
450
+ 446,
451
+ 447,
452
+ 448,
453
+ 449,
454
+ 450,
455
+ 451,
456
+ 452,
457
+ 453,
458
+ 454,
459
+ 455,
460
+ 456,
461
+ 457,
462
+ 458,
463
+ 459,
464
+ 460,
465
+ 461,
466
+ 462,
467
+ 463,
468
+ 464,
469
+ 465,
470
+ 466,
471
+ 467,
472
+ 468,
473
+ 469,
474
+ 470,
475
+ 471,
476
+ 472,
477
+ 473,
478
+ 474,
479
+ 475,
480
+ 476,
481
+ 477,
482
+ 478,
483
+ 479,
484
+ 480,
485
+ 481,
486
+ 482,
487
+ 483,
488
+ 484,
489
+ 485,
490
+ 486,
491
+ 487,
492
+ 488,
493
+ 489,
494
+ 490,
495
+ 491,
496
+ 492,
497
+ 493,
498
+ 494,
499
+ 495,
500
+ 496,
501
+ 497,
502
+ 498,
503
+ 499,
504
+ 500,
505
+ 501,
506
+ 502,
507
+ 503,
508
+ 504,
509
+ 505,
510
+ 506,
511
+ 507,
512
+ 508,
513
+ 509,
514
+ 510,
515
+ 511,
516
+ 512,
517
+ 513,
518
+ 514,
519
+ 515,
520
+ 516,
521
+ 517,
522
+ 518,
523
+ 519,
524
+ 520,
525
+ 521,
526
+ 522,
527
+ 523,
528
+ 524,
529
+ 525,
530
+ 526,
531
+ 527
532
+ ],
533
+ "attribute": [
534
+ 1,
535
+ 2,
536
+ 3,
537
+ 4,
538
+ 5,
539
+ 6,
540
+ 7,
541
+ 8,
542
+ 9,
543
+ 10,
544
+ 11,
545
+ 12,
546
+ 13,
547
+ 14,
548
+ 15,
549
+ 16,
550
+ 17,
551
+ 18,
552
+ 19,
553
+ 20,
554
+ 21,
555
+ 24,
556
+ 25,
557
+ 26,
558
+ 28,
559
+ 30,
560
+ 32,
561
+ 35,
562
+ 36,
563
+ 39,
564
+ 40,
565
+ 41,
566
+ 42,
567
+ 43,
568
+ 45,
569
+ 46,
570
+ 47,
571
+ 48,
572
+ 49,
573
+ 50,
574
+ 51,
575
+ 52,
576
+ 53,
577
+ 56,
578
+ 58,
579
+ 59,
580
+ 64,
581
+ 65,
582
+ 69,
583
+ 72,
584
+ 73,
585
+ 74,
586
+ 75,
587
+ 76,
588
+ 77,
589
+ 78,
590
+ 79,
591
+ 80,
592
+ 81,
593
+ 82,
594
+ 84,
595
+ 85,
596
+ 86,
597
+ 88,
598
+ 89,
599
+ 90,
600
+ 91,
601
+ 92,
602
+ 93,
603
+ 94,
604
+ 95,
605
+ 96,
606
+ 97,
607
+ 98,
608
+ 99,
609
+ 100,
610
+ 101,
611
+ 102,
612
+ 121,
613
+ 130,
614
+ 132,
615
+ 219,
616
+ 220,
617
+ 221,
618
+ 222,
619
+ 224,
620
+ 225,
621
+ 226,
622
+ 227,
623
+ 228,
624
+ 229,
625
+ 230,
626
+ 233,
627
+ 234,
628
+ 236,
629
+ 237,
630
+ 238,
631
+ 270,
632
+ 272,
633
+ 273,
634
+ 276,
635
+ 277,
636
+ 278,
637
+ 279,
638
+ 286,
639
+ 290,
640
+ 291,
641
+ 292,
642
+ 301,
643
+ 303,
644
+ 304,
645
+ 305,
646
+ 306,
647
+ 307,
648
+ 308,
649
+ 311,
650
+ 313,
651
+ 315,
652
+ 316,
653
+ 317,
654
+ 318,
655
+ 319,
656
+ 320,
657
+ 333,
658
+ 334,
659
+ 335,
660
+ 337,
661
+ 339,
662
+ 340,
663
+ 341,
664
+ 344,
665
+ 345,
666
+ 347,
667
+ 348,
668
+ 350,
669
+ 353,
670
+ 354,
671
+ 355,
672
+ 356,
673
+ 383,
674
+ 387,
675
+ 388,
676
+ 404
677
+ ],
678
+ "scene": [
679
+ 1,
680
+ 2,
681
+ 3,
682
+ 4,
683
+ 5,
684
+ 9,
685
+ 11,
686
+ 12,
687
+ 13,
688
+ 14,
689
+ 16,
690
+ 18,
691
+ 19,
692
+ 21,
693
+ 22,
694
+ 24,
695
+ 28,
696
+ 42,
697
+ 45,
698
+ 46,
699
+ 47,
700
+ 48,
701
+ 49,
702
+ 50,
703
+ 51,
704
+ 53,
705
+ 54,
706
+ 55,
707
+ 56,
708
+ 59,
709
+ 60,
710
+ 64,
711
+ 65,
712
+ 66,
713
+ 67,
714
+ 69,
715
+ 70,
716
+ 71,
717
+ 72,
718
+ 84,
719
+ 85,
720
+ 87,
721
+ 88,
722
+ 89,
723
+ 91,
724
+ 92,
725
+ 94,
726
+ 96,
727
+ 97,
728
+ 98,
729
+ 99,
730
+ 100,
731
+ 101,
732
+ 102,
733
+ 119,
734
+ 120,
735
+ 130,
736
+ 132,
737
+ 219,
738
+ 222,
739
+ 223,
740
+ 224,
741
+ 225,
742
+ 227,
743
+ 229,
744
+ 230,
745
+ 232,
746
+ 234,
747
+ 237,
748
+ 282,
749
+ 283,
750
+ 288,
751
+ 289,
752
+ 290,
753
+ 291,
754
+ 305,
755
+ 306,
756
+ 308,
757
+ 309,
758
+ 310,
759
+ 311,
760
+ 313,
761
+ 315,
762
+ 317,
763
+ 318,
764
+ 319,
765
+ 321,
766
+ 322,
767
+ 323,
768
+ 324,
769
+ 326,
770
+ 329,
771
+ 331,
772
+ 332,
773
+ 333,
774
+ 335,
775
+ 337,
776
+ 338,
777
+ 339,
778
+ 341,
779
+ 342,
780
+ 345,
781
+ 346,
782
+ 347,
783
+ 348,
784
+ 353,
785
+ 356,
786
+ 388,
787
+ 404
788
+ ],
789
+ "spatial relation": [
790
+ 1,
791
+ 4,
792
+ 6,
793
+ 7,
794
+ 8,
795
+ 10,
796
+ 11,
797
+ 13,
798
+ 14,
799
+ 15,
800
+ 17,
801
+ 20,
802
+ 22,
803
+ 23,
804
+ 25,
805
+ 27,
806
+ 29,
807
+ 31,
808
+ 39,
809
+ 40,
810
+ 41,
811
+ 43,
812
+ 44,
813
+ 48,
814
+ 52,
815
+ 57,
816
+ 60,
817
+ 63,
818
+ 68,
819
+ 73,
820
+ 74,
821
+ 75,
822
+ 76,
823
+ 77,
824
+ 78,
825
+ 79,
826
+ 80,
827
+ 81,
828
+ 82,
829
+ 83,
830
+ 86,
831
+ 87,
832
+ 90,
833
+ 91,
834
+ 92,
835
+ 93,
836
+ 94,
837
+ 95,
838
+ 101,
839
+ 119,
840
+ 121,
841
+ 122,
842
+ 126,
843
+ 127,
844
+ 128,
845
+ 131,
846
+ 132,
847
+ 133,
848
+ 219,
849
+ 220,
850
+ 221,
851
+ 222,
852
+ 224,
853
+ 226,
854
+ 228,
855
+ 231,
856
+ 232,
857
+ 233,
858
+ 235,
859
+ 237,
860
+ 238,
861
+ 270,
862
+ 271,
863
+ 274,
864
+ 275,
865
+ 276,
866
+ 277,
867
+ 286,
868
+ 287,
869
+ 303,
870
+ 316,
871
+ 320,
872
+ 322,
873
+ 325,
874
+ 327,
875
+ 332,
876
+ 336,
877
+ 337,
878
+ 340,
879
+ 343,
880
+ 344,
881
+ 345,
882
+ 346,
883
+ 347,
884
+ 348,
885
+ 350,
886
+ 353,
887
+ 354,
888
+ 355,
889
+ 356,
890
+ 383,
891
+ 387,
892
+ 388,
893
+ 404
894
+ ],
895
+ "action relation": [
896
+ 1,
897
+ 2,
898
+ 3,
899
+ 4,
900
+ 5,
901
+ 6,
902
+ 7,
903
+ 8,
904
+ 9,
905
+ 10,
906
+ 12,
907
+ 16,
908
+ 18,
909
+ 19,
910
+ 20,
911
+ 21,
912
+ 23,
913
+ 24,
914
+ 25,
915
+ 26,
916
+ 27,
917
+ 29,
918
+ 30,
919
+ 31,
920
+ 32,
921
+ 33,
922
+ 34,
923
+ 35,
924
+ 36,
925
+ 37,
926
+ 38,
927
+ 40,
928
+ 42,
929
+ 43,
930
+ 46,
931
+ 54,
932
+ 56,
933
+ 57,
934
+ 59,
935
+ 60,
936
+ 73,
937
+ 74,
938
+ 75,
939
+ 76,
940
+ 77,
941
+ 78,
942
+ 81,
943
+ 82,
944
+ 83,
945
+ 84,
946
+ 86,
947
+ 92,
948
+ 93,
949
+ 95,
950
+ 96,
951
+ 97,
952
+ 98,
953
+ 99,
954
+ 100,
955
+ 101,
956
+ 102,
957
+ 119,
958
+ 120,
959
+ 121,
960
+ 122,
961
+ 126,
962
+ 127,
963
+ 128,
964
+ 131,
965
+ 132,
966
+ 133,
967
+ 219,
968
+ 223,
969
+ 237,
970
+ 274,
971
+ 275,
972
+ 276,
973
+ 277,
974
+ 280,
975
+ 281,
976
+ 282,
977
+ 283,
978
+ 288,
979
+ 289,
980
+ 290,
981
+ 291,
982
+ 292,
983
+ 302,
984
+ 303,
985
+ 304,
986
+ 305,
987
+ 306,
988
+ 307,
989
+ 308,
990
+ 309,
991
+ 310,
992
+ 311,
993
+ 312,
994
+ 313,
995
+ 314,
996
+ 315,
997
+ 316,
998
+ 317,
999
+ 318,
1000
+ 319,
1001
+ 320,
1002
+ 321,
1003
+ 322,
1004
+ 323,
1005
+ 324,
1006
+ 325,
1007
+ 326,
1008
+ 327,
1009
+ 328,
1010
+ 329,
1011
+ 330,
1012
+ 331,
1013
+ 332,
1014
+ 333,
1015
+ 334,
1016
+ 335,
1017
+ 336,
1018
+ 337,
1019
+ 338,
1020
+ 339,
1021
+ 340,
1022
+ 341,
1023
+ 342,
1024
+ 343,
1025
+ 344,
1026
+ 345,
1027
+ 346,
1028
+ 348,
1029
+ 349,
1030
+ 350,
1031
+ 353,
1032
+ 355,
1033
+ 356
1034
+ ],
1035
+ "part relation": [
1036
+ 15,
1037
+ 276,
1038
+ 277,
1039
+ 278,
1040
+ 279,
1041
+ 284,
1042
+ 285,
1043
+ 44,
1044
+ 301,
1045
+ 302,
1046
+ 303,
1047
+ 307,
1048
+ 309,
1049
+ 313,
1050
+ 61,
1051
+ 62,
1052
+ 63,
1053
+ 64,
1054
+ 65,
1055
+ 66,
1056
+ 67,
1057
+ 68,
1058
+ 69,
1059
+ 70,
1060
+ 71,
1061
+ 72,
1062
+ 73,
1063
+ 74,
1064
+ 75,
1065
+ 320,
1066
+ 328,
1067
+ 338,
1068
+ 341,
1069
+ 354
1070
+ ],
1071
+ "counting": [
1072
+ 512,
1073
+ 513,
1074
+ 514,
1075
+ 515,
1076
+ 516,
1077
+ 517,
1078
+ 518,
1079
+ 519,
1080
+ 520,
1081
+ 521,
1082
+ 522,
1083
+ 523,
1084
+ 524,
1085
+ 525,
1086
+ 526,
1087
+ 527,
1088
+ 109,
1089
+ 123,
1090
+ 134,
1091
+ 135,
1092
+ 138,
1093
+ 139,
1094
+ 140,
1095
+ 141,
1096
+ 142,
1097
+ 143,
1098
+ 144,
1099
+ 145,
1100
+ 146,
1101
+ 147,
1102
+ 148,
1103
+ 149,
1104
+ 150,
1105
+ 151,
1106
+ 152,
1107
+ 153,
1108
+ 154,
1109
+ 155,
1110
+ 156,
1111
+ 157,
1112
+ 158,
1113
+ 161,
1114
+ 174,
1115
+ 176,
1116
+ 179,
1117
+ 198,
1118
+ 200,
1119
+ 201,
1120
+ 246,
1121
+ 247,
1122
+ 248,
1123
+ 249,
1124
+ 250,
1125
+ 251,
1126
+ 252,
1127
+ 298,
1128
+ 299,
1129
+ 357,
1130
+ 359,
1131
+ 360,
1132
+ 361,
1133
+ 362,
1134
+ 363,
1135
+ 365,
1136
+ 366,
1137
+ 367,
1138
+ 368,
1139
+ 370,
1140
+ 371,
1141
+ 375,
1142
+ 376,
1143
+ 400,
1144
+ 423,
1145
+ 424,
1146
+ 425,
1147
+ 426,
1148
+ 427,
1149
+ 428,
1150
+ 429,
1151
+ 430,
1152
+ 431,
1153
+ 432,
1154
+ 448,
1155
+ 462,
1156
+ 468,
1157
+ 473,
1158
+ 475,
1159
+ 477,
1160
+ 480,
1161
+ 483,
1162
+ 505,
1163
+ 506,
1164
+ 507,
1165
+ 508,
1166
+ 509,
1167
+ 510,
1168
+ 511
1169
+ ],
1170
+ "comparison": [
1171
+ 504,
1172
+ 359,
1173
+ 360,
1174
+ 362,
1175
+ 363,
1176
+ 295,
1177
+ 364,
1178
+ 492,
1179
+ 365,
1180
+ 493,
1181
+ 178,
1182
+ 179,
1183
+ 366,
1184
+ 494,
1185
+ 367,
1186
+ 495,
1187
+ 368,
1188
+ 496,
1189
+ 197,
1190
+ 497,
1191
+ 498,
1192
+ 206,
1193
+ 211,
1194
+ 503,
1195
+ 357,
1196
+ 358,
1197
+ 103,
1198
+ 104,
1199
+ 105,
1200
+ 106,
1201
+ 107,
1202
+ 108,
1203
+ 109,
1204
+ 110,
1205
+ 111,
1206
+ 112,
1207
+ 113,
1208
+ 114,
1209
+ 115,
1210
+ 116,
1211
+ 117,
1212
+ 118,
1213
+ 239,
1214
+ 240,
1215
+ 241,
1216
+ 242,
1217
+ 370,
1218
+ 499,
1219
+ 500,
1220
+ 501,
1221
+ 502
1222
+ ],
1223
+ "differentiation": [
1224
+ 105,
1225
+ 109,
1226
+ 110,
1227
+ 118,
1228
+ 123,
1229
+ 124,
1230
+ 125,
1231
+ 129,
1232
+ 134,
1233
+ 135,
1234
+ 136,
1235
+ 137,
1236
+ 155,
1237
+ 159,
1238
+ 160,
1239
+ 161,
1240
+ 162,
1241
+ 163,
1242
+ 164,
1243
+ 165,
1244
+ 166,
1245
+ 167,
1246
+ 168,
1247
+ 169,
1248
+ 170,
1249
+ 171,
1250
+ 172,
1251
+ 173,
1252
+ 174,
1253
+ 175,
1254
+ 176,
1255
+ 177,
1256
+ 178,
1257
+ 179,
1258
+ 180,
1259
+ 181,
1260
+ 182,
1261
+ 183,
1262
+ 184,
1263
+ 185,
1264
+ 195,
1265
+ 196,
1266
+ 197,
1267
+ 201,
1268
+ 202,
1269
+ 203,
1270
+ 204,
1271
+ 243,
1272
+ 244,
1273
+ 245,
1274
+ 246,
1275
+ 293,
1276
+ 294,
1277
+ 351,
1278
+ 352,
1279
+ 357,
1280
+ 358,
1281
+ 359,
1282
+ 360,
1283
+ 361,
1284
+ 362,
1285
+ 363,
1286
+ 364,
1287
+ 365,
1288
+ 366,
1289
+ 367,
1290
+ 368,
1291
+ 369,
1292
+ 370,
1293
+ 371,
1294
+ 372,
1295
+ 373,
1296
+ 374,
1297
+ 375,
1298
+ 376,
1299
+ 377,
1300
+ 378,
1301
+ 379,
1302
+ 380,
1303
+ 381,
1304
+ 382,
1305
+ 384,
1306
+ 385,
1307
+ 386,
1308
+ 389,
1309
+ 390,
1310
+ 391,
1311
+ 392,
1312
+ 393,
1313
+ 394,
1314
+ 395,
1315
+ 396,
1316
+ 397,
1317
+ 398,
1318
+ 399,
1319
+ 400,
1320
+ 401,
1321
+ 402,
1322
+ 403,
1323
+ 448,
1324
+ 473,
1325
+ 474,
1326
+ 475,
1327
+ 495,
1328
+ 500
1329
+ ],
1330
+ "negation": [
1331
+ 169,
1332
+ 170,
1333
+ 173,
1334
+ 178,
1335
+ 180,
1336
+ 186,
1337
+ 187,
1338
+ 188,
1339
+ 189,
1340
+ 190,
1341
+ 191,
1342
+ 192,
1343
+ 193,
1344
+ 194,
1345
+ 195,
1346
+ 196,
1347
+ 197,
1348
+ 198,
1349
+ 199,
1350
+ 200,
1351
+ 201,
1352
+ 202,
1353
+ 203,
1354
+ 204,
1355
+ 205,
1356
+ 206,
1357
+ 207,
1358
+ 208,
1359
+ 209,
1360
+ 210,
1361
+ 211,
1362
+ 212,
1363
+ 213,
1364
+ 214,
1365
+ 215,
1366
+ 216,
1367
+ 217,
1368
+ 218,
1369
+ 296,
1370
+ 297,
1371
+ 298,
1372
+ 361,
1373
+ 372,
1374
+ 433,
1375
+ 434,
1376
+ 435,
1377
+ 436,
1378
+ 437,
1379
+ 438,
1380
+ 439,
1381
+ 440,
1382
+ 441,
1383
+ 442,
1384
+ 443,
1385
+ 444,
1386
+ 445,
1387
+ 446,
1388
+ 447,
1389
+ 448,
1390
+ 449,
1391
+ 450,
1392
+ 451,
1393
+ 452,
1394
+ 453,
1395
+ 455,
1396
+ 456,
1397
+ 457,
1398
+ 458,
1399
+ 459,
1400
+ 460,
1401
+ 461,
1402
+ 462,
1403
+ 463,
1404
+ 464,
1405
+ 465,
1406
+ 466,
1407
+ 467,
1408
+ 468,
1409
+ 469,
1410
+ 470,
1411
+ 471,
1412
+ 472,
1413
+ 473,
1414
+ 474,
1415
+ 475,
1416
+ 476,
1417
+ 477,
1418
+ 478,
1419
+ 479,
1420
+ 480,
1421
+ 481,
1422
+ 482,
1423
+ 483,
1424
+ 484,
1425
+ 485,
1426
+ 486,
1427
+ 487,
1428
+ 488,
1429
+ 489,
1430
+ 490,
1431
+ 491
1432
+ ],
1433
+ "universal": [
1434
+ 256,
1435
+ 257,
1436
+ 258,
1437
+ 259,
1438
+ 260,
1439
+ 261,
1440
+ 262,
1441
+ 263,
1442
+ 264,
1443
+ 265,
1444
+ 266,
1445
+ 267,
1446
+ 268,
1447
+ 269,
1448
+ 405,
1449
+ 406,
1450
+ 407,
1451
+ 408,
1452
+ 409,
1453
+ 410,
1454
+ 411,
1455
+ 412,
1456
+ 413,
1457
+ 414,
1458
+ 415,
1459
+ 416,
1460
+ 417,
1461
+ 418,
1462
+ 419,
1463
+ 420,
1464
+ 421,
1465
+ 422,
1466
+ 423,
1467
+ 424,
1468
+ 425,
1469
+ 426,
1470
+ 427,
1471
+ 300,
1472
+ 428,
1473
+ 429,
1474
+ 430,
1475
+ 431,
1476
+ 432,
1477
+ 454,
1478
+ 253,
1479
+ 254,
1480
+ 255
1481
+ ]
1482
+ }
univa/eval/genai/genai1600.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ pretrained_lvlm_name_or_path: /mnt/data/lb/Remake/UniWorld//checkpoints/flux_qwen2p5vl_7b_vlm_mlp_siglip_stage2_ts_1024_bs42x8x1_fa_any_11ratio_ema999_ocr_adamw_t5_1p0_lr5e-6_mask_refstyle_extract/checkpoint-20000/model_ema
3
+ pretrained_denoiser_name_or_path: /mnt/data/checkpoints/black-forest-labs/FLUX.1-dev/
4
+ pretrained_siglip_name_or_path: /mnt/data/checkpoints/google/siglip2-so400m-patch16-512
5
+ joint_with_t5: true
6
+
7
+ seed: 42
8
+ allow_tf32: false
9
+
10
+ output_dir: /mnt/data/lb/Remake/UniWorld//eval_output/genai1600
11
+
12
+ num_images_per_prompt: 1
13
+ num_inference_steps: 28
14
+ guidance_scale: 3.5
15
+ height: 1024
16
+ width: 1024
17
+
18
+ genai_prompt_path: eval_prompts/genai1600/genai_image.json
univa/eval/genai/genai527.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ pretrained_lvlm_name_or_path: /mnt/data/lb/Remake/UniWorld//checkpoints/flux_qwen2p5vl_7b_vlm_mlp_siglip_stage2_ts_1024_bs42x8x1_fa_any_11ratio_ema999_ocr_adamw_t5_1p0_lr5e-6_mask_refstyle_extract/checkpoint-20000/model_ema
3
+ pretrained_denoiser_name_or_path: /mnt/data/checkpoints/black-forest-labs/FLUX.1-dev/
4
+ pretrained_siglip_name_or_path: /mnt/data/checkpoints/google/siglip2-so400m-patch16-512
5
+ joint_with_t5: true
6
+
7
+ seed: 42
8
+ allow_tf32: false
9
+
10
+ output_dir: /mnt/data/lb/Remake/UniWorld//eval_output/genai527
11
+
12
+ num_images_per_prompt: 1
13
+ num_inference_steps: 28
14
+ guidance_scale: 3.5
15
+ height: 1024
16
+ width: 1024
17
+
18
+ genai_prompt_path: eval_prompts/genai527/genai_image.json
univa/eval/genai/step1_gen_samples.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
5
+ sys.path.append(root)
6
+ import torch
7
+ import random
8
+ import subprocess
9
+ import numpy as np
10
+ import torch.distributed as dist
11
+ import pandas as pd
12
+ import argparse
13
+ import torch
14
+ import os
15
+ from PIL import Image
16
+ from tqdm import tqdm
17
+ import torch.distributed as dist
18
+ from qwen_vl_utils import process_vision_info
19
+ from torchvision import transforms
20
+ from transformers import AutoProcessor
21
+ from transformers import SiglipImageProcessor, SiglipVisionModel
22
+ from univa.utils.flux_pipeline import FluxPipeline
23
+ from univa.eval.configuration_eval import EvalConfig
24
+ from univa.utils.get_ocr import get_ocr_result
25
+ from univa.utils.denoiser_prompt_embedding_flux import encode_prompt
26
+ from univa.models.qwen2p5vl.modeling_univa_qwen2p5vl import UnivaQwen2p5VLForConditionalGeneration
27
+
28
+ import pandas as pd
29
+ from copy import deepcopy
30
+ import json
31
+
32
+ def get_meta(prompt_path):
33
+ '''
34
+ [
35
+ {
36
+ "Prompt": "a photo of a cat",
37
+ "Category": "",
38
+ "id": "",
39
+ },
40
+ ...
41
+ ]
42
+ '''
43
+ with open(prompt_path, 'r') as f:
44
+ meta_info = json.load(f)
45
+
46
+ ret_meta_info = []
47
+ for v in meta_info.values():
48
+ if 'models' in v: del v['models']
49
+ if 'prompt in Chinese' in v: del v['prompt in Chinese']
50
+ v['Prompts'] = deepcopy(v['prompt'])
51
+ if 'prompt' in v: del v['prompt']
52
+ v['Category'] = 'No Category'
53
+ v['id'] = f"{int(v['id']):09d}"
54
+ ret_meta_info.append(v)
55
+ return ret_meta_info
56
+
57
+
58
+ # adapted from https://github.com/huggingface/accelerate/blob/main/src/accelerate/utils/random.py#L31
59
+ def set_seed(seed, rank, device_specific=True):
60
+ if device_specific:
61
+ seed += rank
62
+ random.seed(seed)
63
+ np.random.seed(seed)
64
+ torch.manual_seed(seed)
65
+ torch.cuda.manual_seed(seed)
66
+ torch.cuda.manual_seed_all(seed)
67
+ torch.backends.cudnn.deterministic = True
68
+ torch.backends.cudnn.benchmark = False
69
+
70
+ def initialize_models(args, device):
71
+
72
+ # Load main model and task head
73
+ model = UnivaQwen2p5VLForConditionalGeneration.from_pretrained(
74
+ args.pretrained_lvlm_name_or_path,
75
+ torch_dtype=torch.bfloat16,
76
+ attn_implementation="flash_attention_2",
77
+ ).to(device)
78
+
79
+ processor = AutoProcessor.from_pretrained(
80
+ args.pretrained_lvlm_name_or_path,
81
+ min_pixels=args.min_pixels,
82
+ max_pixels=args.max_pixels,
83
+ )
84
+
85
+ # Load FLUX pipeline
86
+ pipe = FluxPipeline.from_pretrained(
87
+ args.pretrained_denoiser_name_or_path,
88
+ transformer=model.denoise_tower.denoiser,
89
+ torch_dtype=torch.bfloat16,
90
+ ).to(device)
91
+ tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
92
+ text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
93
+
94
+ siglip_processor = SiglipImageProcessor.from_pretrained(args.pretrained_siglip_name_or_path)
95
+ siglip_model = SiglipVisionModel.from_pretrained(
96
+ args.pretrained_siglip_name_or_path,
97
+ torch_dtype=torch.bfloat16,
98
+ ).to(device)
99
+
100
+ return {
101
+ 'model': model,
102
+ 'processor': processor,
103
+ 'pipe': pipe,
104
+ 'tokenizers': tokenizers,
105
+ 'text_encoders': text_encoders,
106
+ 'device': device,
107
+ 'siglip_model': siglip_model,
108
+ 'siglip_processor': siglip_processor,
109
+ }
110
+
111
+
112
+ def init_gpu_env(args):
113
+ local_rank = int(os.getenv('RANK', 0))
114
+ world_size = int(os.getenv('WORLD_SIZE', 1))
115
+ args.local_rank = local_rank
116
+ args.world_size = world_size
117
+ torch.cuda.set_device(local_rank)
118
+ dist.init_process_group(
119
+ backend='nccl', init_method='env://',
120
+ world_size=world_size, rank=local_rank
121
+ )
122
+ return args
123
+
124
+
125
+ def run_model_and_return_samples(args, state, text, image1=None, image2=None):
126
+
127
+ # Build content
128
+ convo = []
129
+ image_paths = []
130
+ content = []
131
+ for img in (image1, image2):
132
+ if img:
133
+ content.append({'type':'image','image':img,'min_pixels':args.min_pixels,'max_pixels':args.max_pixels})
134
+ image_paths.append(img)
135
+ if text:
136
+ ocr_text = ''
137
+ if args.ocr_enhancer and content:
138
+ ocr_texts = []
139
+ for img in (image1, image2):
140
+ if img:
141
+ ocr_texts.append(get_ocr_result(img, cur_ocr_i))
142
+ cur_ocr_i += 1
143
+ ocr_text = '\n'.join(ocr_texts)
144
+ content.append({'type':'text','text': text + ocr_text})
145
+
146
+ if not args.only_use_t5:
147
+ convo.append({'role':'user','content':content})
148
+
149
+ # Prepare inputs
150
+ chat_text = state['processor'].apply_chat_template(
151
+ convo,
152
+ tokenize=False,
153
+ add_generation_prompt=True
154
+ )
155
+ chat_text = '<|im_end|>\n'.join(chat_text.split('<|im_end|>\n')[1:])
156
+ image_inputs, video_inputs = process_vision_info(convo)
157
+ inputs = state['processor'](
158
+ text=[chat_text], images=image_inputs, videos=video_inputs,
159
+ padding=True, return_tensors='pt'
160
+ ).to(state['device'])
161
+
162
+ # Generate
163
+ # image generation pipeline
164
+ siglip_hs = None
165
+ if state['siglip_processor'] and image_paths:
166
+ vals = [state['siglip_processor'].preprocess(
167
+ images=Image.open(p).convert('RGB'), do_resize=True,
168
+ return_tensors='pt', do_convert_rgb=True
169
+ ).pixel_values.to(state['device'])
170
+ for p in image_paths]
171
+ siglip_hs = state['siglip_model'](torch.concat(vals)).last_hidden_state
172
+
173
+ with torch.no_grad():
174
+ lvlm = state['model'](
175
+ inputs.input_ids, pixel_values=getattr(inputs,'pixel_values',None),
176
+ attention_mask=inputs.attention_mask,
177
+ image_grid_thw=getattr(inputs,'image_grid_thw',None),
178
+ siglip_hidden_states=siglip_hs,
179
+ output_type='denoise_embeds'
180
+ )
181
+ prm_embeds, pooled = encode_prompt(
182
+ state['text_encoders'], state['tokenizers'],
183
+ text if args.joint_with_t5 else '', 256, state['device'], 1
184
+ )
185
+ emb = torch.concat([lvlm, prm_embeds], dim=1) if args.joint_with_t5 else lvlm
186
+ else:
187
+ prm_embeds, pooled = encode_prompt(
188
+ state['text_encoders'], state['tokenizers'],
189
+ text, 256, state['device'], 1
190
+ )
191
+ emb = prm_embeds
192
+
193
+ with torch.no_grad():
194
+ img = state['pipe'](
195
+ prompt_embeds=emb,
196
+ pooled_prompt_embeds=pooled,
197
+ height=args.height,
198
+ width=args.width,
199
+ num_inference_steps=args.num_inference_steps,
200
+ guidance_scale=args.guidance_scale,
201
+ num_images_per_prompt=args.num_images_per_prompt,
202
+ ).images
203
+ return img
204
+
205
+ def main(args):
206
+
207
+ args = init_gpu_env(args)
208
+
209
+ torch.backends.cuda.matmul.allow_tf32 = False
210
+ torch.backends.cudnn.allow_tf32 = False
211
+ if args.allow_tf32:
212
+ torch.backends.cuda.matmul.allow_tf32 = True
213
+ torch.backends.cudnn.allow_tf32 = True
214
+
215
+ set_seed(args.seed, rank=args.local_rank, device_specific=True)
216
+ device = torch.cuda.current_device()
217
+ state = initialize_models(args, device)
218
+
219
+ meta_info = get_meta(args.genai_prompt_path)
220
+ print(f'origin meta_info ({len(meta_info)})')
221
+ text_and_savepath = [
222
+ [
223
+ meta_info[i]['Prompts'], os.path.join(args.output_dir, f"{meta_info[i]['id']}.jpg")
224
+ ] for i in range(len(meta_info))
225
+ ]
226
+
227
+ text_and_savepath_ = [
228
+ [text_prompt, save_path] for text_prompt, save_path in text_and_savepath if not os.path.exists(save_path)
229
+ ]
230
+ print(f'need to process ({len(text_and_savepath_)})')
231
+ if len(text_and_savepath_) == 0:
232
+ import sys;sys.exit(0)
233
+ text_and_savepath = text_and_savepath[args.local_rank::args.world_size]
234
+ os.makedirs(args.output_dir, exist_ok=True)
235
+ print(f'args: {args}')
236
+ cnt = 0
237
+ for text_prompt, save_path in tqdm(text_and_savepath):
238
+ # print(text_prompt, save_path)
239
+ if os.path.exists(save_path):
240
+ continue
241
+ set_seed(args.seed + cnt * 50, rank=args.local_rank, device_specific=True)
242
+ image = run_model_and_return_samples(args, state, text_prompt, image1=None, image2=None)
243
+ image = image[0]
244
+ image.save(save_path)
245
+ # import ipdb;ipdb.set_trace()
246
+ assert args.num_samples_per_prompt == 1
247
+ cnt += 1
248
+
249
+
250
+
251
+ if __name__ == "__main__":
252
+ import argparse
253
+ from omegaconf import OmegaConf
254
+
255
+ parser = argparse.ArgumentParser()
256
+ parser.add_argument("config", type=str)
257
+ parser.add_argument("--pretrained_lvlm_name_or_path", type=str, default=None, required=False)
258
+ parser.add_argument("--output_dir", type=str, default=None, required=False)
259
+
260
+ args = parser.parse_args()
261
+
262
+ config = OmegaConf.load(args.config)
263
+ schema = OmegaConf.structured(EvalConfig)
264
+ conf = OmegaConf.merge(schema, config)
265
+ if args.pretrained_lvlm_name_or_path is not None:
266
+ assert args.output_dir is not None
267
+ conf.pretrained_lvlm_name_or_path = args.pretrained_lvlm_name_or_path
268
+ conf.output_dir = args.output_dir
269
+ main(conf)
univa/eval/genai/step2_run_model.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluate on GenAI-Bench-Image (with 527 prompt) using a specific model
2
+ # Example scripts to run:
3
+ # VQAScore: python genai_image_eval.py --model clip-flant5-xxl
4
+ # CLIPScore: python genai_image_eval.py --model openai:ViT-L-14-336
5
+ # GPT4o VQAScore: python genai_image_eval.py --model gpt-4o
6
+ import sys
7
+ import os
8
+ import argparse
9
+ import os
10
+ import t2v_metrics
11
+ import json
12
+ import torch
13
+ import numpy as np
14
+ from t2v_metrics.dataset import GenAIBench_Image
15
+
16
+ tag_groups = {
17
+ 'basic': ['attribute', 'scene', 'spatial relation', 'action relation', 'part relation', 'basic'],
18
+ 'advanced': ['counting', 'comparison', 'differentiation', 'negation', 'universal', 'advanced'],
19
+ 'overall': ['basic', 'advanced', 'all']
20
+ }
21
+
22
+ def show_performance_per_skill(our_scores, dataset, items_name='images', prompt_to_items_name='prompt_to_images', print_std=False):
23
+ tag_result = {}
24
+ tag_file = f"{dataset.meta_dir}/genai_skills.json"
25
+ tags = json.load(open(tag_file))
26
+ items = getattr(dataset, items_name)
27
+ prompt_to_items = getattr(dataset, prompt_to_items_name)
28
+ items_by_model_tag = {}
29
+ for tag in tags:
30
+ items_by_model_tag[tag] = {}
31
+ for prompt_idx in tags[tag]:
32
+ for image_idx in prompt_to_items[f"{prompt_idx:05d}"]:
33
+ model = items[image_idx]['model']
34
+ if model not in items_by_model_tag[tag]:
35
+ items_by_model_tag[tag][model] = []
36
+ items_by_model_tag[tag][model].append(image_idx)
37
+
38
+ for tag in tags:
39
+ # print(f"Tag: {tag}")
40
+ tag_result[tag] = {}
41
+ for model in items_by_model_tag[tag]:
42
+ our_scores_mean = our_scores[items_by_model_tag[tag][model]].mean()
43
+ our_scores_std = our_scores[items_by_model_tag[tag][model]].std()
44
+ # print(f"{model} (Metric Score): {our_scores_mean:.2f} +- {our_scores_std:.2f}")
45
+ tag_result[tag][model] = {
46
+ 'metric': {'mean': our_scores_mean, 'std': our_scores_std},
47
+ }
48
+ # print()
49
+
50
+ # print("All")
51
+ tag_result['all'] = {}
52
+ all_models = items_by_model_tag[tag]
53
+ for model in all_models:
54
+ all_model_indices = set()
55
+ for tag in items_by_model_tag:
56
+ all_model_indices = all_model_indices.union(set(items_by_model_tag[tag][model]))
57
+ all_model_indices = list(all_model_indices)
58
+ our_scores_mean = our_scores[all_model_indices].mean()
59
+ our_scores_std = our_scores[all_model_indices].std()
60
+ # print(f"{model} (Metric Score): {our_scores_mean:.2f} +- {our_scores_std:.2f}")
61
+ tag_result['all'][model] = {
62
+ 'metric': {'mean': our_scores_mean, 'std': our_scores_std},
63
+ }
64
+
65
+ for tag_group in tag_groups:
66
+ for score_name in ['metric']:
67
+ print(f"Tag Group: {tag_group} ({score_name} performance)")
68
+ tag_header = f"{'Model':<17}" + " ".join([f"{tag:<17}" for tag in tag_groups[tag_group]])
69
+ print(tag_header)
70
+ for model_name in all_models:
71
+ if print_std:
72
+ detailed_scores = [f"{tag_result[tag][model_name][score_name]['mean']:.6f}+-{tag_result[tag][model_name][score_name]['std']:.6f}" for tag in tag_groups[tag_group]]
73
+ else:
74
+ detailed_scores = [f"{tag_result[tag][model_name][score_name]['mean']:.6f}" for tag in tag_groups[tag_group]]
75
+ detailed_scores = " ".join([f"{score:<17}" for score in detailed_scores])
76
+ model_scores = f"{model_name:<17}" + detailed_scores
77
+ print(model_scores)
78
+ print()
79
+ print()
80
+
81
+
82
+ def get_args():
83
+ parser = argparse.ArgumentParser()
84
+ parser.add_argument("--meta_dir", type=str, required=True)
85
+ parser.add_argument("--model_path", type=str, required=True)
86
+ parser.add_argument("--image_dir", type=str, required=True)
87
+ parser.add_argument("--batch_size", type=int, default=16)
88
+ parser.add_argument("--seed", type=int, default=1234)
89
+
90
+ args = parser.parse_args()
91
+ return args
92
+
93
+ def main():
94
+ args = get_args()
95
+ image_dir = args.image_dir
96
+ meta_dir = args.meta_dir
97
+
98
+ dataset = GenAIBench_Image(root_dir=image_dir, meta_dir=meta_dir)
99
+
100
+ model = args.model_path
101
+ device = torch.device('cuda:0')
102
+ score_func = t2v_metrics.get_score_model(model=model, device=device)
103
+
104
+ kwargs = {}
105
+ scores = score_func.batch_forward(dataset, batch_size=args.batch_size, **kwargs).cpu()
106
+
107
+
108
+ ### Get performance per skill
109
+ our_scores = scores.mean(axis=1)
110
+ show_performance_per_skill(our_scores, dataset, print_std=True)
111
+
112
+ if __name__ == "__main__":
113
+ main()
univa/eval/genai/t2v_metrics/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from __future__ import absolute_import
3
+ from __future__ import division
4
+ from __future__ import print_function
5
+
6
+ from .constants import HF_CACHE_DIR
7
+ from .vqascore import VQAScore, list_all_vqascore_models
8
+
9
+ def list_all_models():
10
+ return list_all_vqascore_models()
11
+
12
+ def get_score_model(model='clip-flant5-xxl', device='cuda', cache_dir=HF_CACHE_DIR, **kwargs):
13
+ return VQAScore(model, device=device, cache_dir=cache_dir, **kwargs)
univa/eval/genai/t2v_metrics/clipscore.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from .score import Score
4
+
5
+ from .constants import HF_CACHE_DIR
6
+
7
+ from .models.clipscore_models import list_all_clipscore_models, get_clipscore_model
8
+
9
+ class CLIPScore(Score):
10
+ def prepare_scoremodel(self,
11
+ model='openai:ViT-L/14',
12
+ device='cuda',
13
+ cache_dir=HF_CACHE_DIR):
14
+ return get_clipscore_model(
15
+ model,
16
+ device=device,
17
+ cache_dir=cache_dir
18
+ )
19
+
20
+ def list_all_models(self) -> List[str]:
21
+ return list_all_clipscore_models()
univa/eval/genai/t2v_metrics/constants.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ HF_CACHE_DIR = "./hf_cache/" # TODO: change this to your own cache dir
2
+
3
+ # For CLIP-FlanT5 and LLaVA-1.5 (copied from llava)
4
+ CONTEXT_LEN = 2048
5
+ SYSTEM_MSG = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
6
+ IGNORE_INDEX = -100
7
+ IMAGE_TOKEN_INDEX = -200
8
+ DEFAULT_IMAGE_TOKEN = "<image>"