PROBE

Running

App Files Files Community

mgyigit commited on Mar 28

Commit

b12fa6d

verified ·

1 Parent(s): b711897

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -98

app.py CHANGED Viewed

@@ -10,12 +10,11 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 import plotnine as p9
 import sys
 sys.path.append('./src')
 sys.path.append('.')
-from huggingface_hub import HfApi
-api = HfApi(token=os.getenv("api-key")) #load api-key secret
 from src.about import *
 from src.saving_utils import *
 from src.vis_utils import *
@@ -34,10 +33,10 @@ def add_new_eval(
     family_prediction_dataset,
     save,
 ):
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
     if 'affinity' in benchmark_types and skempi_file is None:
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
@@ -47,60 +46,161 @@ def add_new_eval(
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
-        results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
-    except:
-        completion_info = gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
     if save:
         save_results(representation_name, benchmark_types, results)
-        completion_info = gr.Info("Your submission has been processed and results are saved!")
     else:
-        completion_info = gr.Info("Your submission has been processed!")
     return 0
 def refresh_data():
-    print(api.whoami())
-    api.restart_space(repo_id="HUBioDataLab/PROBE", token=os.getenv("api-key"))
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
     for benchmark_type in benchmark_types:
         path = f"/tmp/{benchmark_type}_results.csv"
         if os.path.exists(path):
             os.remove(path)
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
-            # Define a function to update metrics based on benchmark type selection
-def update_metrics(selected_benchmarks):
-    updated_metrics = set()
-    for benchmark in selected_benchmarks:
-        updated_metrics.update(benchmark_metric_mapping.get(benchmark, []))
-    return list(updated_metrics)
-            # Define a function to update the leaderboard
-def update_leaderboard(selected_methods, selected_metrics):
-    updated_df = get_baseline_df(selected_methods, selected_metrics)
-    return updated_df
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
-            leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
             metrics_with_method = metric_names.copy()
-            metric_names.remove('Method')  # Remove method_name from the metric options
             benchmark_metric_mapping = {
                 "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
@@ -108,25 +208,28 @@ with block:
                 "family": [metric for metric in metric_names if metric.startswith('fam_')],
                 "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
             }
-            # Leaderboard section with method and metric selectors
             leaderboard_method_selector = gr.CheckboxGroup(
-                choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True
             )
             benchmark_type_selector = gr.CheckboxGroup(
-                choices=list(benchmark_metric_mapping.keys()),
-                label="Select Benchmark Types",
-                value=None,  # Initially select all benchmark types
                 interactive=True
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
-                choices=metric_names, label="Select Metrics for the Leaderboard", value=None, interactive=True
             )
-            # Display the filtered leaderboard
             baseline_value = get_baseline_df(method_names, metric_names)
-            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)  # Round all numeric values to 4 decimal places
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
@@ -140,93 +243,80 @@ with block:
                     visible=True,
                 )
-            # Update leaderboard when method/metric selection changes
             leaderboard_method_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
-            # Update metrics when benchmark type changes
             benchmark_type_selector.change(
                 lambda selected_benchmarks: update_metrics(selected_benchmarks),
                 inputs=[benchmark_type_selector],
                 outputs=leaderboard_metric_selector
             )
             leaderboard_metric_selector.change(
-                get_baseline_df,
-                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             with gr.Row():
                 gr.Markdown(
                     """
-                    ## **Below, you can visualize the results displayed in the Leaderboard.**
-                    ### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations.
                     """
                 )
-            # Dropdown for benchmark type
-            benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None)
             with gr.Row():
-                # Dynamic selectors
                 x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
                 y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
                 aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
                 dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
                 single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
-            method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
-            # Button to draw the plot for the selected benchmark
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
-            # Update selectors when benchmark type changes
-            benchmark_type_selector.change(
                 update_metric_choices,
-                inputs=[benchmark_type_selector],
                 outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
             )
             plot_button.click(
                 benchmark_plot,
-                inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
                 outputs=plot_output
             )
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
-                    value="./src/data/PROBE_workflow_figure.jpg",  # Replace with your image file path or URL
-                    label="PROBE Workflow Figure",  # Optional label
-                    elem_classes="about-image",  # Optional CSS class for styling
                 )
         with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
-                    model_name_textbox = gr.Textbox(
-                        label="Method name",
-                    )
-                    revision_name_textbox = gr.Textbox(
-                        label="Revision Method Name",
-                    )
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
                         label="Benchmark Types",
@@ -234,42 +324,51 @@ with block:
                     )
                     similarity_tasks = gr.CheckboxGroup(
                         choices=similarity_tasks_options,
-                        label="Similarity Tasks",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
-                        label="Function Prediction Aspects",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
-                        label="Family Prediction Datasets",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
                         value="All_Data_Sets"
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
                         value=True
                     )
-            #with gr.Column():
             with gr.Row():
-                human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath')
-                skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
             submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
             submit_button.click(
-                add_new_eval,
                 inputs=[
                     human_file,
                     skempi_file,
@@ -281,7 +380,9 @@ with block:
                     function_dataset,
                     family_prediction_dataset,
                     save_checkbox,
                 ],
             )
     with gr.Row():
@@ -296,4 +397,4 @@ with block:
             show_copy_button=True,
         )
-block.launch()

 import seaborn as sns
 import plotnine as p9
 import sys
+import zipfile
+import tempfile
 sys.path.append('./src')
 sys.path.append('.')
 from src.about import *
 from src.saving_utils import *
 from src.vis_utils import *
     family_prediction_dataset,
     save,
 ):
+    # Validate required files based on selected benchmarks
     if any(task in benchmark_types for task in ['similarity', 'family', 'function']) and human_file is None:
         gr.Warning("Human representations are required for similarity, family, or function benchmarks!")
         return -1
     if 'affinity' in benchmark_types and skempi_file is None:
         gr.Warning("SKEMPI representations are required for affinity benchmark!")
         return -1
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
     try:
+        results = run_probe(
+            benchmark_types,
+            representation_name,
+            human_file,
+            skempi_file,
+            similarity_tasks,
+            function_prediction_aspect,
+            function_prediction_dataset,
+            family_prediction_dataset,
+        )
+    except Exception as e:
+        gr.Warning("Your submission has not been processed. Please check your representation files!")
         return -1
+    # Even if save is False, we store the submission (e.g., temporarily) so that the leaderboard includes it.
     if save:
         save_results(representation_name, benchmark_types, results)
     else:
+        save_results(representation_name, benchmark_types, results, temporary=True)
     return 0
 def refresh_data():
     benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"]
     for benchmark_type in benchmark_types:
         path = f"/tmp/{benchmark_type}_results.csv"
         if os.path.exists(path):
             os.remove(path)
     benchmark_types.remove("leaderboard")
     download_from_hub(benchmark_types)
+def download_leaderboard_csv():
+    """Generates a CSV file for the updated leaderboard."""
+    df = get_baseline_df(None, None)
+    tmp_csv = os.path.join(tempfile.gettempdir(), "leaderboard_download.csv")
+    df.to_csv(tmp_csv, index=False)
+    return tmp_csv
+def generate_plots_based_on_submission(benchmark_types, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset):
+    """
+    For each benchmark type selected during submission, generate a plot based on the corresponding extra parameters.
+    """
+    tmp_dir = tempfile.mkdtemp()
+    plot_files = []
+    # Get the current leaderboard to retrieve available method names.
+    leaderboard = get_baseline_df(None, None)
+    method_names = leaderboard['Method'].unique().tolist()
+    for btype in benchmark_types:
+        # For each benchmark type, choose plotting parameters based on additional selections.
+        if btype == "similarity":
+            # Use the user-selected similarity tasks (if provided) to determine the metrics.
+            x_metric = similarity_tasks[0] if similarity_tasks and len(similarity_tasks) > 0 else None
+            y_metric = similarity_tasks[1] if similarity_tasks and len(similarity_tasks) > 1 else None
+        elif btype == "function":
+            x_metric = function_prediction_aspect if function_prediction_aspect else None
+            y_metric = function_prediction_dataset if function_prediction_dataset else None
+        elif btype == "family":
+            # For family, assume that family_prediction_dataset is a list of datasets.
+            x_metric = family_prediction_dataset[0] if family_prediction_dataset and len(family_prediction_dataset) > 0 else None
+            y_metric = family_prediction_dataset[1] if family_prediction_dataset and len(family_prediction_dataset) > 1 else None
+        elif btype == "affinity":
+            # For affinity, you may use default plotting parameters.
+            x_metric, y_metric = None, None
+        else:
+            x_metric, y_metric = None, None
+        # Generate the plot using your benchmark_plot function.
+        # Here, aspect, dataset, and single_metric are passed as None, but you could extend this logic.
+        plot_img = benchmark_plot(btype, method_names, x_metric, y_metric, None, None, None)
+        plot_file = os.path.join(tmp_dir, f"{btype}.png")
+        if isinstance(plot_img, plt.Figure):
+            plot_img.savefig(plot_file)
+            plt.close(plot_img)
+        else:
+            # If benchmark_plot already returns a file path, use it directly.
+            plot_file = plot_img
+        plot_files.append(plot_file)
+    # Zip all plot images
+    zip_path = os.path.join(tmp_dir, "submission_plots.zip")
+    with zipfile.ZipFile(zip_path, "w") as zipf:
+        for file in plot_files:
+            zipf.write(file, arcname=os.path.basename(file))
+    return zip_path
+def submission_callback(
+    human_file,
+    skempi_file,
+    model_name_textbox,
+    revision_name_textbox,
+    benchmark_types,
+    similarity_tasks,
+    function_prediction_aspect,
+    function_prediction_dataset,
+    family_prediction_dataset,
+    save_checkbox,
+    return_option,  # New radio selection: "Leaderboard CSV" or "Plot Results"
+):
+    """
+    Runs the evaluation and then returns either a downloadable CSV of the leaderboard
+    (which includes the new submission) or a ZIP file of plots generated based on the submission's selections.
+    """
+    eval_status = add_new_eval(
+        human_file,
+        skempi_file,
+        model_name_textbox,
+        revision_name_textbox,
+        benchmark_types,
+        similarity_tasks,
+        function_prediction_aspect,
+        function_prediction_dataset,
+        family_prediction_dataset,
+        save_checkbox,
+    )
+    if eval_status == -1:
+        return "Submission failed. Please check your files and selections.", None
+    if return_option == "Leaderboard CSV":
+        csv_path = download_leaderboard_csv()
+        return "Your leaderboard CSV (including your submission) is ready for download.", csv_path
+    elif return_option == "Plot Results":
+        zip_path = generate_plots_based_on_submission(
+            benchmark_types,
+            similarity_tasks,
+            function_prediction_aspect,
+            function_prediction_dataset,
+            family_prediction_dataset,
+        )
+        return "Your plots are ready for download.", zip_path
+    else:
+        return "Submission processed, but no output option was selected.", None
+# --------------------------
+# Build the Gradio interface
+# --------------------------
 block = gr.Blocks()
 with block:
     gr.Markdown(LEADERBOARD_INTRODUCTION)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
+            # Leaderboard tab (unchanged from before)
+            leaderboard = get_baseline_df(None, None)
             method_names = leaderboard['Method'].unique().tolist()
             metric_names = leaderboard.columns.tolist()
             metrics_with_method = metric_names.copy()
+            metric_names.remove('Method')
             benchmark_metric_mapping = {
                 "similarity": [metric for metric in metric_names if metric.startswith('sim_')],
                 "family": [metric for metric in metric_names if metric.startswith('fam_')],
                 "affinity": [metric for metric in metric_names if metric.startswith('aff_')],
             }
             leaderboard_method_selector = gr.CheckboxGroup(
+                choices=method_names,
+                label="Select Methods for the Leaderboard",
+                value=method_names,
+                interactive=True
             )
             benchmark_type_selector = gr.CheckboxGroup(
+                choices=list(benchmark_metric_mapping.keys()),
+                label="Select Benchmark Types",
+                value=None,
                 interactive=True
             )
             leaderboard_metric_selector = gr.CheckboxGroup(
+                choices=metric_names,
+                label="Select Metrics for the Leaderboard",
+                value=None,
+                interactive=True
             )
             baseline_value = get_baseline_df(method_names, metric_names)
+            baseline_value = baseline_value.applymap(lambda x: round(x, 4) if isinstance(x, (int, float)) else x)
             baseline_header = ["Method"] + metric_names
             baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
                     visible=True,
                 )
             leaderboard_method_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             benchmark_type_selector.change(
                 lambda selected_benchmarks: update_metrics(selected_benchmarks),
                 inputs=[benchmark_type_selector],
                 outputs=leaderboard_metric_selector
             )
             leaderboard_metric_selector.change(
+                get_baseline_df,
+                inputs=[leaderboard_method_selector, leaderboard_metric_selector],
                 outputs=data_component
             )
             with gr.Row():
                 gr.Markdown(
                     """
+                    ## **Visualize the Leaderboard Results**
+                    Select options to update the visualization.
                     """
                 )
+            # (Plotting section remains available as before; not the focus of the submission callback)
+            benchmark_type_selector_plot = gr.Dropdown(
+                choices=list(benchmark_specific_metrics.keys()),
+                label="Select Benchmark Type for Plotting",
+                value=None
+            )
             with gr.Row():
                 x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False)
                 y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False)
                 aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False)
                 dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False)
                 single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False)
+            method_selector = gr.CheckboxGroup(
+                choices=method_names,
+                label="Select Methods to Visualize",
+                interactive=True,
+                value=method_names
+            )
             plot_button = gr.Button("Plot")
             with gr.Row(show_progress=True, variant='panel'):
                 plot_output = gr.Image(label="Plot")
+            benchmark_type_selector_plot.change(
                 update_metric_choices,
+                inputs=[benchmark_type_selector_plot],
                 outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector]
             )
             plot_button.click(
                 benchmark_plot,
+                inputs=[benchmark_type_selector_plot, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector],
                 outputs=plot_output
             )
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Image(
+                    value="./src/data/PROBE_workflow_figure.jpg",
+                    label="PROBE Workflow Figure",
+                    elem_classes="about-image",
                 )
         with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Method name")
+                    revision_name_textbox = gr.Textbox(label="Revision Method Name")
                     benchmark_types = gr.CheckboxGroup(
                         choices=TASK_INFO,
                         label="Benchmark Types",
                     )
                     similarity_tasks = gr.CheckboxGroup(
                         choices=similarity_tasks_options,
+                        label="Similarity Tasks (if selected)",
                         interactive=True,
                     )
                     function_prediction_aspect = gr.Radio(
                         choices=function_prediction_aspect_options,
+                        label="Function Prediction Aspects (if selected)",
                         interactive=True,
                     )
                     family_prediction_dataset = gr.CheckboxGroup(
                         choices=family_prediction_dataset_options,
+                        label="Family Prediction Datasets (if selected)",
                         interactive=True,
                     )
                     function_dataset = gr.Textbox(
                         label="Function Prediction Datasets",
                         visible=False,
                         value="All_Data_Sets"
                     )
                     save_checkbox = gr.Checkbox(
                         label="Save results for leaderboard and visualization",
                         value=True
                     )
             with gr.Row():
+                human_file = gr.components.File(
+                    label="The representation file (csv) for Human dataset",
+                    file_count="single",
+                    type='filepath'
+                )
+                skempi_file = gr.components.File(
+                    label="The representation file (csv) for SKEMPI dataset",
+                    file_count="single",
+                    type='filepath'
+                )
+            # New radio button for output selection.
+            return_option = gr.Radio(
+                choices=["Leaderboard CSV", "Plot Results"],
+                label="Return Output",
+                value="Leaderboard CSV",
+                interactive=True,
+            )
             submit_button = gr.Button("Submit Eval")
+            submission_result_msg = gr.Markdown()
+            submission_result_file = gr.File()
             submit_button.click(
+                submission_callback,
                 inputs=[
                     human_file,
                     skempi_file,
                     function_dataset,
                     family_prediction_dataset,
                     save_checkbox,
+                    return_option,
                 ],
+                outputs=[submission_result_msg, submission_result_file]
             )
     with gr.Row():
             show_copy_button=True,
         )
+block.launch()