{ "config_general": { "lighteval_sha": "167773f1d5d1647c60dadc31c9e731ab7dbcbbad", "num_few_shot_default": 0, "num_fewshot_seeds": 1, "override_batch_size": 1, "max_samples": null, "job_id": "", "model_name": "JosephusCheung/Pwen-VL-Chat-20_30", "model_sha": "64a9b89fb18140fc1af1f11471dc9fe34ebc7446", "model_dtype": "torch.bfloat16", "model_size": "14.51 GB" }, "results": { "harness|drop|3": { "em": 0.3234060402684564, "em_stderr": 0.004790466119380845, "f1": 0.3795564177852361, "f1_stderr": 0.004705234681743664 }, "harness|gsm8k|5": { "acc": 0.1910538286580743, "acc_stderr": 0.010828791191755175 }, "harness|winogrande|5": { "acc": 0.6835043409629045, "acc_stderr": 0.013071868328051477 }, "all": { "em": 0.3234060402684564, "em_stderr": 0.004790466119380845, "f1": 0.3795564177852361, "f1_stderr": 0.004705234681743664, "acc": 0.4372790848104894, "acc_stderr": 0.011950329759903327 } }, "versions": { "all": 0, "harness|drop|3": 1, "harness|gsm8k|5": 0, "harness|winogrande|5": 0 }, "config_tasks": { "harness|drop": "LM Harness task", "harness|gsm8k": "LM Harness task", "harness|winogrande": "LM Harness task" }, "summary_tasks": { "harness|drop|3": { "hashes": { "hash_examples": "1d27416e8324e9a3", "hash_full_prompts": "a5513ff9a741b385", "hash_input_tokens": "e4d9d658ccb42fc3", "hash_cont_tokens": "3acffef0fcdaa041" }, "truncated": 0, "non_truncated": 9536, "padded": 0, "non_padded": 9536, "effective_few_shots": 3.0, "num_truncated_few_shots": 0 }, "harness|gsm8k|5": { "hashes": { "hash_examples": "4c0843a5d99bcfdc", "hash_full_prompts": "41d55e83abc0e02d", "hash_input_tokens": "2282d6efefcc7579", "hash_cont_tokens": "dcabb1d9463db6f1" }, "truncated": 0, "non_truncated": 1319, "padded": 0, "non_padded": 1319, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "harness|winogrande|5": { "hashes": { "hash_examples": "aada0a176fd81218", "hash_full_prompts": "c8655cbd12de8409", "hash_input_tokens": "288ed7294cb59f7d", "hash_cont_tokens": "f4a307afe0c47a4a" }, "truncated": 0, "non_truncated": 1267, "padded": 2429, "non_padded": 105, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "9b4d8993161e637d", "hash_full_prompts": "08215e527b7e60a5", "hash_input_tokens": "329083a90a12723b", "hash_cont_tokens": "1578bed2931bae1e" }, "truncated": 0, "non_truncated": 12122, "padded": 2429, "non_padded": 10960, "num_truncated_few_shots": 0, "total_evaluation_time_secondes": 0 } }