cccjc commited on
Commit
14394ad
·
1 Parent(s): 12d161a

add a flag to disable single-image results in display

Browse files
Files changed (2) hide show
  1. app.py +44 -27
  2. constants.py +3 -0
app.py CHANGED
@@ -18,7 +18,8 @@ with open(table_css_file, "r") as f:
18
 
19
  # Initialize data loaders
20
  default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
21
- si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
 
22
 
23
  with gr.Blocks() as block:
24
  # Add a style element that we'll update
@@ -44,18 +45,20 @@ with gr.Blocks() as block:
44
  TABLE_INTRODUCTION
45
  )
46
 
47
- with gr.Row():
48
- table_selector = gr.Radio(
49
- choices=["Default", "Single Image"],
50
- label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
51
- value="Default"
52
- )
53
-
54
  # Define different captions for each table
55
  default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
56
 
57
  single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
58
 
 
 
 
 
 
 
 
 
 
59
  caption_component = gr.Markdown(
60
  value=default_caption,
61
  elem_classes="table-caption",
@@ -86,7 +89,8 @@ with gr.Blocks() as block:
86
  )
87
 
88
  def update_table_and_caption(table_type, super_group, model_group):
89
- if table_type == "Default":
 
90
  headers, data = default_loader.get_leaderboard_data(super_group, model_group)
91
  caption = default_caption
92
  else: # Single-image
@@ -106,7 +110,8 @@ with gr.Blocks() as block:
106
  ]
107
 
108
  def update_selectors(table_type):
109
- loader = default_loader if table_type == "Default" else si_loader
 
110
  return [
111
  gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
112
  gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
@@ -114,29 +119,41 @@ with gr.Blocks() as block:
114
 
115
  refresh_button = gr.Button("Refresh")
116
 
117
- # Update click and change handlers to include caption updates
118
- refresh_button.click(
119
- fn=update_table_and_caption,
120
- inputs=[table_selector, super_group_selector, model_group_selector],
121
- outputs=[data_component, caption_component, css_style]
122
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  super_group_selector.change(
124
  fn=update_table_and_caption,
125
- inputs=[table_selector, super_group_selector, model_group_selector],
126
  outputs=[data_component, caption_component, css_style]
127
  )
 
128
  model_group_selector.change(
129
  fn=update_table_and_caption,
130
- inputs=[table_selector, super_group_selector, model_group_selector],
131
- outputs=[data_component, caption_component, css_style]
132
- )
133
- table_selector.change(
134
- fn=update_selectors,
135
- inputs=[table_selector],
136
- outputs=[super_group_selector, model_group_selector]
137
- ).then(
138
- fn=update_table_and_caption,
139
- inputs=[table_selector, super_group_selector, model_group_selector],
140
  outputs=[data_component, caption_component, css_style]
141
  )
142
 
 
18
 
19
  # Initialize data loaders
20
  default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
21
+ # Initialize single image loader only if enabled
22
+ si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") if ENABLE_SINGLE_IMAGE_TABLE else None
23
 
24
  with gr.Blocks() as block:
25
  # Add a style element that we'll update
 
45
  TABLE_INTRODUCTION
46
  )
47
 
 
 
 
 
 
 
 
48
  # Define different captions for each table
49
  default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
50
 
51
  single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
52
 
53
+ with gr.Row():
54
+ # Only show table selector if single image table is enabled
55
+ if ENABLE_SINGLE_IMAGE_TABLE:
56
+ table_selector = gr.Radio(
57
+ choices=["Default", "Single Image"],
58
+ label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
59
+ value="Default"
60
+ )
61
+
62
  caption_component = gr.Markdown(
63
  value=default_caption,
64
  elem_classes="table-caption",
 
89
  )
90
 
91
  def update_table_and_caption(table_type, super_group, model_group):
92
+ # If single image is disabled, always use default table
93
+ if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default":
94
  headers, data = default_loader.get_leaderboard_data(super_group, model_group)
95
  caption = default_caption
96
  else: # Single-image
 
110
  ]
111
 
112
  def update_selectors(table_type):
113
+ # If single image is disabled, always use default loader
114
+ loader = default_loader if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default" else si_loader
115
  return [
116
  gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
117
  gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
 
119
 
120
  refresh_button = gr.Button("Refresh")
121
 
122
+ # Set up different handlers based on whether single image table is enabled
123
+ if ENABLE_SINGLE_IMAGE_TABLE:
124
+ refresh_button.click(
125
+ fn=update_table_and_caption,
126
+ inputs=[table_selector, super_group_selector, model_group_selector],
127
+ outputs=[data_component, caption_component, css_style]
128
+ )
129
+
130
+ table_selector.change(
131
+ fn=update_selectors,
132
+ inputs=[table_selector],
133
+ outputs=[super_group_selector, model_group_selector]
134
+ ).then(
135
+ fn=update_table_and_caption,
136
+ inputs=[table_selector, super_group_selector, model_group_selector],
137
+ outputs=[data_component, caption_component, css_style]
138
+ )
139
+ else:
140
+ # Simplified handlers when single image is disabled
141
+ refresh_button.click(
142
+ fn=lambda super_group, model_group: update_table_and_caption("Default", super_group, model_group),
143
+ inputs=[super_group_selector, model_group_selector],
144
+ outputs=[data_component, caption_component, css_style]
145
+ )
146
+
147
+ # These handlers are needed in both cases
148
  super_group_selector.change(
149
  fn=update_table_and_caption,
150
+ inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector],
151
  outputs=[data_component, caption_component, css_style]
152
  )
153
+
154
  model_group_selector.change(
155
  fn=update_table_and_caption,
156
+ inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector],
 
 
 
 
 
 
 
 
 
157
  outputs=[data_component, caption_component, css_style]
158
  )
159
 
constants.py CHANGED
@@ -2,6 +2,9 @@ import os
2
 
3
  HF_TOKEN = os.environ.get("HF_TOKEN")
4
 
 
 
 
5
  LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
6
 
7
  ## 🚀 Introduction
 
2
 
3
  HF_TOKEN = os.environ.get("HF_TOKEN")
4
 
5
+ # Global configuration flag to control whether the "Single Image" table option should be displayed
6
+ ENABLE_SINGLE_IMAGE_TABLE = False # Set to True to enable, False to disable
7
+
8
  LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
9
 
10
  ## 🚀 Introduction