NVLM-D-72B / eval /full_eval.yaml
Add benchmark evaluation scripts
b925209
datasets:
coco_caption:
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
output_max_len: 30
top_k: 3
temperature: 1.0
flickr30k_caption:
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
output_max_len: 30
top_k: 3
temperature: 1.0
vqav2:
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word or phrase.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 8
temperature: 1.0
mmmu:
split: "validation"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 1024
temperature: 1.0
apply_lemmatizer: False
task_instructions: ""
multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
use_chat_format: True
conv_format: "yi_nous_sft"
default_image_token: "<image>"
prompt_offset: 4
answer_dict: "path/to/answer_dict_val.json"
textvqa:
split: "val"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 10
temperature: 1.0
mathvista:
split: "testmini"
prompt: "<|im_start|>system\nYou are math expert. Use your math knowledge to calculate the answer.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 1024
temperature: 1.0
mmbench:
split: "dev"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}Answer with the option's letter from the given choices directly.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 10
temperature: 1.0
submission: False
chartqa:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0
docvqa:
split: "val"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0
realworldqa:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0
submission: False
ocrbench:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 70
temperature: 1.0
submission: False
ai2diagram:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0
ai2diagram_nomask:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0
mmmu_pro:
split: "validation"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 10
temperature: 1.0
apply_lemmatizer: False
task_instructions: ""
multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
use_chat_format: True
conv_format: "yi_nous_sft"
default_image_token: "<image>"
prompt_offset: 4
answer_dict: "path/to/answer_dict.json"
docvqa_test:
split: "test"
image_dir: "path/to/image"
gt_path: "path/to/ground_truth"
prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|>\n<|im_start|>user\n<image>\n{}\nAnswer this question using the text in the image directly.<|im_end|>\n<|im_start|>assistant\n"
beam_search: True
beam_size: 1
top_k: 1
top_p: 0.0
output_max_len: 20
temperature: 1.0