File size: 6,351 Bytes

b925209

datasets:
  coco_caption:
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    output_max_len: 30
    top_k: 3
    temperature: 1.0

  flickr30k_caption:
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\nGive a brief description of this image in one sentence.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    output_max_len: 30
    top_k: 3
    temperature: 1.0

  vqav2:
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word or phrase.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 8
    temperature: 1.0

  mmmu:
    split: "validation"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 1024
    temperature: 1.0
    apply_lemmatizer: False
    task_instructions: ""
    multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
    short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
    use_chat_format: True
    conv_format: "yi_nous_sft"
    default_image_token: "<image>"
    prompt_offset: 4
    answer_dict: "path/to/answer_dict_val.json"

  textvqa:
    split: "val"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 10
    temperature: 1.0

  mathvista:
    split: "testmini"
    prompt: "<|im_start|>system\nYou are math expert. Use your math knowledge to calculate the answer.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 1024
    temperature: 1.0

  mmbench:
    split: "dev"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}Answer with the option's letter from the given choices directly.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 10
    temperature: 1.0
    submission: False

  chartqa:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
    
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0

  docvqa:
    split: "val"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0

  realworldqa:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0
    submission: False

  ocrbench:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 70
    temperature: 1.0
    submission: False

  ai2diagram:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0

  ai2diagram_nomask:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|><|im_start|>user\n<image>\n{}\nAnswer the question using a single word, phrase, or number.<|im_end|><|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0

  mmmu_pro:
    split: "validation"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 10
    temperature: 1.0
    apply_lemmatizer: False
    task_instructions: ""
    multi_choice_example_format: "{}\n{}\nAnswer with the option's letter from the given choices directly."
    short_ans_example_format: "{}\nAnswer the question using a single word or phrase."
    use_chat_format: True
    conv_format: "yi_nous_sft"
    default_image_token: "<image>"
    prompt_offset: 4
    answer_dict: "path/to/answer_dict.json"

  docvqa_test:
    split: "test"
    image_dir: "path/to/image"
    gt_path: "path/to/ground_truth"
    prompt: "<|im_start|>system\nFollow the user's instruction and answer questions.<|im_end|>\n<|im_start|>user\n<image>\n{}\nAnswer this question using the text in the image directly.<|im_end|>\n<|im_start|>assistant\n"
    beam_search: True
    beam_size: 1
    top_k: 1
    top_p: 0.0
    output_max_len: 20
    temperature: 1.0