NVLM-D-72B / eval /full_eval.yaml

Add benchmark evaluation scripts

b925209 3 months ago

6.35 kB

	datasets:
	coco_caption:
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\nGive a brief description of this image in one sentence.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	output_max_len: 30
	top_k: 3
	temperature: 1.0

	flickr30k_caption:
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\nGive a brief description of this image in one sentence.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	output_max_len: 30
	top_k: 3
	temperature: 1.0

	vqav2:
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}\nAnswer the question using a single word or phrase.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 8
	temperature: 1.0

	mmmu:
	split: "validation"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 1024
	temperature: 1.0
	apply_lemmatizer: False
	task_instructions: ""
	multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
	short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
	use_chat_format: True
	conv_format: "yi_nous_sft"
	default_image_token: "<image>"
	prompt_offset: 4
	answer_dict: "path/to/answer_dict_val.json"

	textvqa:
	split: "val"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 10
	temperature: 1.0

	mathvista:
	split: "testmini"
	prompt: "<\|im_start\|>system\nYou are math expert. Use your math knowledge to calculate the answer.<\|im_end\|><\|im_start\|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 1024
	temperature: 1.0

	mmbench:
	split: "dev"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}Answer with the option's letter from the given choices directly.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 10
	temperature: 1.0
	submission: False

	chartqa:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}<\|im_end\|><\|im_start\|>assistant\n"

	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0

	docvqa:
	split: "val"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0

	realworldqa:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0
	submission: False

	ocrbench:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 70
	temperature: 1.0
	submission: False

	ai2diagram:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0

	ai2diagram_nomask:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|><\|im_start\|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<\|im_end\|><\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0

	mmmu_pro:
	split: "validation"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 10
	temperature: 1.0
	apply_lemmatizer: False
	task_instructions: ""
	multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
	short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
	use_chat_format: True
	conv_format: "yi_nous_sft"
	default_image_token: "<image>"
	prompt_offset: 4
	answer_dict: "path/to/answer_dict.json"

	docvqa_test:
	split: "test"
	image_dir: "path/to/image"
	gt_path: "path/to/ground_truth"
	prompt: "<\|im_start\|>system\nFollow the user's instruction and answer questions.<\|im_end\|>\n<\|im_start\|>user\n<image>\n{}\nAnswer this question using the text in the image directly.<\|im_end\|>\n<\|im_start\|>assistant\n"
	beam_search: True
	beam_size: 1
	top_k: 1
	top_p: 0.0
	output_max_len: 20
	temperature: 1.0