mgyigit commited on
Commit
b12fa6d
·
verified ·
1 Parent(s): b711897

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -98
app.py CHANGED
@@ -10,12 +10,11 @@ import matplotlib.pyplot as plt
10
  import seaborn as sns
11
  import plotnine as p9
12
  import sys
 
 
13
  sys.path.append('./src')
14
  sys.path.append('.')
15
 
16
- from huggingface_hub import HfApi
17
- api = HfApi(token=os.getenv("api-key")) #load api-key secret
18
-
19
  from src.about import *
20
  from src.saving_utils import *
21
  from src.vis_utils import *
@@ -34,10 +33,10 @@ def add_new_eval(
34
  family_prediction_dataset,
35
  save,
36
  ):
 
37
  if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
38
  gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
39
  return -1
40
-
41
  if 'affinity' in benchmark_types and skempi_file is None:
42
  gr.Warning("SKEMPI representations are required for affinity benchmark!")
43
  return -1
@@ -47,60 +46,161 @@ def add_new_eval(
47
  representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
48
 
49
  try:
50
- results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
51
- except:
52
- completion_info = gr.Warning("Your submission has not been processed. Please check your representation files!")
 
 
 
 
 
 
 
 
 
53
  return -1
54
 
55
-
56
  if save:
57
  save_results(representation_name, benchmark_types, results)
58
- completion_info = gr.Info("Your submission has been processed and results are saved!")
59
-
60
  else:
61
- completion_info = gr.Info("Your submission has been processed!")
62
 
63
  return 0
64
 
 
65
  def refresh_data():
66
- print(api.whoami())
67
- api.restart_space(repo_id="HUBioDataLab/PROBE", token=os.getenv("api-key"))
68
  benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
69
-
70
  for benchmark_type in benchmark_types:
71
  path = f"/tmp/{benchmark_type}_results.csv"
72
  if os.path.exists(path):
73
  os.remove(path)
74
-
75
  benchmark_types.remove("leaderboard")
76
  download_from_hub(benchmark_types)
77
 
78
- # Define a function to update metrics based on benchmark type selection
79
- def update_metrics(selected_benchmarks):
80
- updated_metrics = set()
81
- for benchmark in selected_benchmarks:
82
- updated_metrics.update(benchmark_metric_mapping.get(benchmark, []))
83
- return list(updated_metrics)
84
-
85
- # Define a function to update the leaderboard
86
- def update_leaderboard(selected_methods, selected_metrics):
87
- updated_df = get_baseline_df(selected_methods, selected_metrics)
88
- return updated_df
89
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  block = gr.Blocks()
91
 
92
  with block:
93
  gr.Markdown(LEADERBOARD_INTRODUCTION)
94
-
95
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
96
  with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
97
-
98
- leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering
99
-
100
  method_names = leaderboard['Method'].unique().tolist()
101
  metric_names = leaderboard.columns.tolist()
102
  metrics_with_method = metric_names.copy()
103
- metric_names.remove('Method') # Remove method_name from the metric options
104
 
105
  benchmark_metric_mapping = {
106
  "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
@@ -108,25 +208,28 @@ with block:
108
  "family": [metric for metric in metric_names if metric.startswith('fam_')],
109
  "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
110
  }
111
-
112
- # Leaderboard section with method and metric selectors
113
  leaderboard_method_selector = gr.CheckboxGroup(
114
- choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True
 
 
 
115
  )
116
-
117
  benchmark_type_selector = gr.CheckboxGroup(
118
- choices=list(benchmark_metric_mapping.keys()),
119
- label="Select Benchmark Types",
120
- value=None, # Initially select all benchmark types
121
  interactive=True
122
  )
123
  leaderboard_metric_selector = gr.CheckboxGroup(
124
- choices=metric_names, label="Select Metrics for the Leaderboard", value=None, interactive=True
 
 
 
125
  )
126
 
127
- # Display the filtered leaderboard
128
  baseline_value = get_baseline_df(method_names, metric_names)
129
- baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x) # Round all numeric values to 4 decimal places
130
  baseline_header = ["Method"] + metric_names
131
  baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
132
 
@@ -140,93 +243,80 @@ with block:
140
  visible=True,
141
  )
142
 
143
- # Update leaderboard when method/metric selection changes
144
  leaderboard_method_selector.change(
145
- get_baseline_df,
146
- inputs=[leaderboard_method_selector, leaderboard_metric_selector],
147
  outputs=data_component
148
  )
149
-
150
- # Update metrics when benchmark type changes
151
  benchmark_type_selector.change(
152
  lambda selected_benchmarks: update_metrics(selected_benchmarks),
153
  inputs=[benchmark_type_selector],
154
  outputs=leaderboard_metric_selector
155
  )
156
-
157
  leaderboard_metric_selector.change(
158
- get_baseline_df,
159
- inputs=[leaderboard_method_selector, leaderboard_metric_selector],
160
  outputs=data_component
161
  )
162
 
163
  with gr.Row():
164
  gr.Markdown(
165
  """
166
- ## **Below, you can visualize the results displayed in the Leaderboard.**
167
- ### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations.
168
  """
169
  )
170
-
171
- # Dropdown for benchmark type
172
- benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None)
173
-
 
 
174
  with gr.Row():
175
- # Dynamic selectors
176
  x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
177
  y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
178
  aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
179
  dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
180
  single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
181
-
182
- method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
183
-
184
- # Button to draw the plot for the selected benchmark
185
-
 
186
  plot_button = gr.Button("Plot")
187
-
188
  with gr.Row(show_progress=True, variant='panel'):
189
  plot_output = gr.Image(label="Plot")
190
-
191
- # Update selectors when benchmark type changes
192
- benchmark_type_selector.change(
193
  update_metric_choices,
194
- inputs=[benchmark_type_selector],
195
  outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
196
  )
197
-
198
  plot_button.click(
199
  benchmark_plot,
200
- inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
201
  outputs=plot_output
202
  )
203
-
204
  with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
205
  with gr.Row():
206
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
207
  with gr.Row():
208
  gr.Image(
209
- value="./src/data/PROBE_workflow_figure.jpg", # Replace with your image file path or URL
210
- label="PROBE Workflow Figure", # Optional label
211
- elem_classes="about-image", # Optional CSS class for styling
212
  )
213
-
214
  with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
215
  with gr.Row():
216
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
217
-
218
  with gr.Row():
219
  gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
220
-
221
  with gr.Row():
222
  with gr.Column():
223
- model_name_textbox = gr.Textbox(
224
- label="Method name",
225
- )
226
- revision_name_textbox = gr.Textbox(
227
- label="Revision Method Name",
228
- )
229
-
230
  benchmark_types = gr.CheckboxGroup(
231
  choices=TASK_INFO,
232
  label="Benchmark Types",
@@ -234,42 +324,51 @@ with block:
234
  )
235
  similarity_tasks = gr.CheckboxGroup(
236
  choices=similarity_tasks_options,
237
- label="Similarity Tasks",
238
  interactive=True,
239
  )
240
-
241
  function_prediction_aspect = gr.Radio(
242
  choices=function_prediction_aspect_options,
243
- label="Function Prediction Aspects",
244
  interactive=True,
245
  )
246
-
247
  family_prediction_dataset = gr.CheckboxGroup(
248
  choices=family_prediction_dataset_options,
249
- label="Family Prediction Datasets",
250
  interactive=True,
251
  )
252
-
253
  function_dataset = gr.Textbox(
254
  label="Function Prediction Datasets",
255
  visible=False,
256
  value="All_Data_Sets"
257
  )
258
-
259
  save_checkbox = gr.Checkbox(
260
  label="Save results for leaderboard and visualization",
261
  value=True
262
  )
263
-
264
- #with gr.Column():
265
  with gr.Row():
266
- human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath')
267
- skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
268
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  submit_button = gr.Button("Submit Eval")
270
- submission_result = gr.Markdown()
 
271
  submit_button.click(
272
- add_new_eval,
273
  inputs=[
274
  human_file,
275
  skempi_file,
@@ -281,7 +380,9 @@ with block:
281
  function_dataset,
282
  family_prediction_dataset,
283
  save_checkbox,
 
284
  ],
 
285
  )
286
 
287
  with gr.Row():
@@ -296,4 +397,4 @@ with block:
296
  show_copy_button=True,
297
  )
298
 
299
- block.launch()
 
10
  import seaborn as sns
11
  import plotnine as p9
12
  import sys
13
+ import zipfile
14
+ import tempfile
15
  sys.path.append('./src')
16
  sys.path.append('.')
17
 
 
 
 
18
  from src.about import *
19
  from src.saving_utils import *
20
  from src.vis_utils import *
 
33
  family_prediction_dataset,
34
  save,
35
  ):
36
+ # Validate required files based on selected benchmarks
37
  if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
38
  gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
39
  return -1
 
40
  if 'affinity' in benchmark_types and skempi_file is None:
41
  gr.Warning("SKEMPI representations are required for affinity benchmark!")
42
  return -1
 
46
  representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
47
 
48
  try:
49
+ results = run_probe(
50
+ benchmark_types,
51
+ representation_name,
52
+ human_file,
53
+ skempi_file,
54
+ similarity_tasks,
55
+ function_prediction_aspect,
56
+ function_prediction_dataset,
57
+ family_prediction_dataset,
58
+ )
59
+ except Exception as e:
60
+ gr.Warning("Your submission has not been processed. Please check your representation files!")
61
  return -1
62
 
63
+ # Even if save is False, we store the submission (e.g., temporarily) so that the leaderboard includes it.
64
  if save:
65
  save_results(representation_name, benchmark_types, results)
 
 
66
  else:
67
+ save_results(representation_name, benchmark_types, results, temporary=True)
68
 
69
  return 0
70
 
71
+
72
  def refresh_data():
 
 
73
  benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
 
74
  for benchmark_type in benchmark_types:
75
  path = f"/tmp/{benchmark_type}_results.csv"
76
  if os.path.exists(path):
77
  os.remove(path)
 
78
  benchmark_types.remove("leaderboard")
79
  download_from_hub(benchmark_types)
80
 
81
+
82
+ def download_leaderboard_csv():
83
+ """Generates a CSV file for the updated leaderboard."""
84
+ df = get_baseline_df(None, None)
85
+ tmp_csv = os.path.join(tempfile.gettempdir(), "leaderboard_download.csv")
86
+ df.to_csv(tmp_csv, index=False)
87
+ return tmp_csv
88
+
89
+
90
+ def generate_plots_based_on_submission(benchmark_types, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset):
91
+ """
92
+ For each benchmark type selected during submission, generate a plot based on the corresponding extra parameters.
93
+ """
94
+ tmp_dir = tempfile.mkdtemp()
95
+ plot_files = []
96
+ # Get the current leaderboard to retrieve available method names.
97
+ leaderboard = get_baseline_df(None, None)
98
+ method_names = leaderboard['Method'].unique().tolist()
99
+
100
+ for btype in benchmark_types:
101
+ # For each benchmark type, choose plotting parameters based on additional selections.
102
+ if btype == "similarity":
103
+ # Use the user-selected similarity tasks (if provided) to determine the metrics.
104
+ x_metric = similarity_tasks[0] if similarity_tasks and len(similarity_tasks) > 0 else None
105
+ y_metric = similarity_tasks[1] if similarity_tasks and len(similarity_tasks) > 1 else None
106
+ elif btype == "function":
107
+ x_metric = function_prediction_aspect if function_prediction_aspect else None
108
+ y_metric = function_prediction_dataset if function_prediction_dataset else None
109
+ elif btype == "family":
110
+ # For family, assume that family_prediction_dataset is a list of datasets.
111
+ x_metric = family_prediction_dataset[0] if family_prediction_dataset and len(family_prediction_dataset) > 0 else None
112
+ y_metric = family_prediction_dataset[1] if family_prediction_dataset and len(family_prediction_dataset) > 1 else None
113
+ elif btype == "affinity":
114
+ # For affinity, you may use default plotting parameters.
115
+ x_metric, y_metric = None, None
116
+ else:
117
+ x_metric, y_metric = None, None
118
+
119
+ # Generate the plot using your benchmark_plot function.
120
+ # Here, aspect, dataset, and single_metric are passed as None, but you could extend this logic.
121
+ plot_img = benchmark_plot(btype, method_names, x_metric, y_metric, None, None, None)
122
+ plot_file = os.path.join(tmp_dir, f"{btype}.png")
123
+ if isinstance(plot_img, plt.Figure):
124
+ plot_img.savefig(plot_file)
125
+ plt.close(plot_img)
126
+ else:
127
+ # If benchmark_plot already returns a file path, use it directly.
128
+ plot_file = plot_img
129
+ plot_files.append(plot_file)
130
+
131
+ # Zip all plot images
132
+ zip_path = os.path.join(tmp_dir, "submission_plots.zip")
133
+ with zipfile.ZipFile(zip_path, "w") as zipf:
134
+ for file in plot_files:
135
+ zipf.write(file, arcname=os.path.basename(file))
136
+ return zip_path
137
+
138
+
139
+ def submission_callback(
140
+ human_file,
141
+ skempi_file,
142
+ model_name_textbox,
143
+ revision_name_textbox,
144
+ benchmark_types,
145
+ similarity_tasks,
146
+ function_prediction_aspect,
147
+ function_prediction_dataset,
148
+ family_prediction_dataset,
149
+ save_checkbox,
150
+ return_option, # New radio selection: "Leaderboard CSV" or "Plot Results"
151
+ ):
152
+ """
153
+ Runs the evaluation and then returns either a downloadable CSV of the leaderboard
154
+ (which includes the new submission) or a ZIP file of plots generated based on the submission's selections.
155
+ """
156
+ eval_status = add_new_eval(
157
+ human_file,
158
+ skempi_file,
159
+ model_name_textbox,
160
+ revision_name_textbox,
161
+ benchmark_types,
162
+ similarity_tasks,
163
+ function_prediction_aspect,
164
+ function_prediction_dataset,
165
+ family_prediction_dataset,
166
+ save_checkbox,
167
+ )
168
+
169
+ if eval_status == -1:
170
+ return "Submission failed. Please check your files and selections.", None
171
+
172
+ if return_option == "Leaderboard CSV":
173
+ csv_path = download_leaderboard_csv()
174
+ return "Your leaderboard CSV (including your submission) is ready for download.", csv_path
175
+ elif return_option == "Plot Results":
176
+ zip_path = generate_plots_based_on_submission(
177
+ benchmark_types,
178
+ similarity_tasks,
179
+ function_prediction_aspect,
180
+ function_prediction_dataset,
181
+ family_prediction_dataset,
182
+ )
183
+ return "Your plots are ready for download.", zip_path
184
+ else:
185
+ return "Submission processed, but no output option was selected.", None
186
+
187
+
188
+ # --------------------------
189
+ # Build the Gradio interface
190
+ # --------------------------
191
  block = gr.Blocks()
192
 
193
  with block:
194
  gr.Markdown(LEADERBOARD_INTRODUCTION)
195
+
196
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
197
  with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
198
+ # Leaderboard tab (unchanged from before)
199
+ leaderboard = get_baseline_df(None, None)
 
200
  method_names = leaderboard['Method'].unique().tolist()
201
  metric_names = leaderboard.columns.tolist()
202
  metrics_with_method = metric_names.copy()
203
+ metric_names.remove('Method')
204
 
205
  benchmark_metric_mapping = {
206
  "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
 
208
  "family": [metric for metric in metric_names if metric.startswith('fam_')],
209
  "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
210
  }
211
+
 
212
  leaderboard_method_selector = gr.CheckboxGroup(
213
+ choices=method_names,
214
+ label="Select Methods for the Leaderboard",
215
+ value=method_names,
216
+ interactive=True
217
  )
 
218
  benchmark_type_selector = gr.CheckboxGroup(
219
+ choices=list(benchmark_metric_mapping.keys()),
220
+ label="Select Benchmark Types",
221
+ value=None,
222
  interactive=True
223
  )
224
  leaderboard_metric_selector = gr.CheckboxGroup(
225
+ choices=metric_names,
226
+ label="Select Metrics for the Leaderboard",
227
+ value=None,
228
+ interactive=True
229
  )
230
 
 
231
  baseline_value = get_baseline_df(method_names, metric_names)
232
+ baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)
233
  baseline_header = ["Method"] + metric_names
234
  baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
235
 
 
243
  visible=True,
244
  )
245
 
 
246
  leaderboard_method_selector.change(
247
+ get_baseline_df,
248
+ inputs=[leaderboard_method_selector, leaderboard_metric_selector],
249
  outputs=data_component
250
  )
 
 
251
  benchmark_type_selector.change(
252
  lambda selected_benchmarks: update_metrics(selected_benchmarks),
253
  inputs=[benchmark_type_selector],
254
  outputs=leaderboard_metric_selector
255
  )
 
256
  leaderboard_metric_selector.change(
257
+ get_baseline_df,
258
+ inputs=[leaderboard_method_selector, leaderboard_metric_selector],
259
  outputs=data_component
260
  )
261
 
262
  with gr.Row():
263
  gr.Markdown(
264
  """
265
+ ## **Visualize the Leaderboard Results**
266
+ Select options to update the visualization.
267
  """
268
  )
269
+ # (Plotting section remains available as before; not the focus of the submission callback)
270
+ benchmark_type_selector_plot = gr.Dropdown(
271
+ choices=list(benchmark_specific_metrics.keys()),
272
+ label="Select Benchmark Type for Plotting",
273
+ value=None
274
+ )
275
  with gr.Row():
 
276
  x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
277
  y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
278
  aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
279
  dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
280
  single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
281
+ method_selector = gr.CheckboxGroup(
282
+ choices=method_names,
283
+ label="Select Methods to Visualize",
284
+ interactive=True,
285
+ value=method_names
286
+ )
287
  plot_button = gr.Button("Plot")
 
288
  with gr.Row(show_progress=True, variant='panel'):
289
  plot_output = gr.Image(label="Plot")
290
+ benchmark_type_selector_plot.change(
 
 
291
  update_metric_choices,
292
+ inputs=[benchmark_type_selector_plot],
293
  outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
294
  )
 
295
  plot_button.click(
296
  benchmark_plot,
297
+ inputs=[benchmark_type_selector_plot, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
298
  outputs=plot_output
299
  )
300
+
301
  with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
302
  with gr.Row():
303
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
304
  with gr.Row():
305
  gr.Image(
306
+ value="./src/data/PROBE_workflow_figure.jpg",
307
+ label="PROBE Workflow Figure",
308
+ elem_classes="about-image",
309
  )
310
+
311
  with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
312
  with gr.Row():
313
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
314
  with gr.Row():
315
  gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
 
316
  with gr.Row():
317
  with gr.Column():
318
+ model_name_textbox = gr.Textbox(label="Method name")
319
+ revision_name_textbox = gr.Textbox(label="Revision Method Name")
 
 
 
 
 
320
  benchmark_types = gr.CheckboxGroup(
321
  choices=TASK_INFO,
322
  label="Benchmark Types",
 
324
  )
325
  similarity_tasks = gr.CheckboxGroup(
326
  choices=similarity_tasks_options,
327
+ label="Similarity Tasks (if selected)",
328
  interactive=True,
329
  )
 
330
  function_prediction_aspect = gr.Radio(
331
  choices=function_prediction_aspect_options,
332
+ label="Function Prediction Aspects (if selected)",
333
  interactive=True,
334
  )
 
335
  family_prediction_dataset = gr.CheckboxGroup(
336
  choices=family_prediction_dataset_options,
337
+ label="Family Prediction Datasets (if selected)",
338
  interactive=True,
339
  )
 
340
  function_dataset = gr.Textbox(
341
  label="Function Prediction Datasets",
342
  visible=False,
343
  value="All_Data_Sets"
344
  )
 
345
  save_checkbox = gr.Checkbox(
346
  label="Save results for leaderboard and visualization",
347
  value=True
348
  )
 
 
349
  with gr.Row():
350
+ human_file = gr.components.File(
351
+ label="The representation file (csv) for Human dataset",
352
+ file_count="single",
353
+ type='filepath'
354
+ )
355
+ skempi_file = gr.components.File(
356
+ label="The representation file (csv) for SKEMPI dataset",
357
+ file_count="single",
358
+ type='filepath'
359
+ )
360
+ # New radio button for output selection.
361
+ return_option = gr.Radio(
362
+ choices=["Leaderboard CSV", "Plot Results"],
363
+ label="Return Output",
364
+ value="Leaderboard CSV",
365
+ interactive=True,
366
+ )
367
  submit_button = gr.Button("Submit Eval")
368
+ submission_result_msg = gr.Markdown()
369
+ submission_result_file = gr.File()
370
  submit_button.click(
371
+ submission_callback,
372
  inputs=[
373
  human_file,
374
  skempi_file,
 
380
  function_dataset,
381
  family_prediction_dataset,
382
  save_checkbox,
383
+ return_option,
384
  ],
385
+ outputs=[submission_result_msg, submission_result_file]
386
  )
387
 
388
  with gr.Row():
 
397
  show_copy_button=True,
398
  )
399
 
400
+ block.launch()