PROBE

Sleeping

File size: 10,785 Bytes

__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']

import gradio as gr
import pandas as pd
import re
import os
import json
import yaml
import matplotlib.pyplot as plt
import seaborn as sns

from src.about import *
from src.bin.PROBE import run_probe

global data_component, filter_component



def benchmark_plot(benchmark_type, methods_selected, x_metric, y_metric):
    if benchmark_type == 'Flexible':
        # Use general visualizer logic
        return general_visualizer_plot(methods_selected, x_metric=x_metric, y_metric=y_metric)
    elif benchmark_type == 'Benchmark 1':
        return benchmark_1_plot(x_metric, y_metric)
    elif benchmark_type == 'Benchmark 2':
        return benchmark_2_plot(x_metric, y_metric)
    elif benchmark_type == 'Benchmark 3':
        return benchmark_3_plot(x_metric, y_metric)
    elif benchmark_type == 'Benchmark 4':
        return benchmark_4_plot(x_metric, y_metric)
    else:
        return "Invalid benchmark type selected."


def get_baseline_df(selected_methods, selected_metrics):
    df = pd.read_csv(CSV_RESULT_PATH)
    present_columns = ["method_name"] + selected_metrics
    df = df[df['method_name'].isin(selected_methods)][present_columns]
    return df

def general_visualizer(methods_selected, x_metric, y_metric):
    df = pd.read_csv(CSV_RESULT_PATH)
    filtered_df = df[df['method_name'].isin(methods_selected)]

    # Create a Seaborn lineplot with method as hue
    plt.figure(figsize=(10, 8))  # Increase figure size
    sns.lineplot(
        data=filtered_df, 
        x=x_metric, 
        y=y_metric, 
        hue="method_name",  # Different colors for different methods
        marker="o",  # Add markers to the line plot
    )
    
    # Add labels and title
    plt.xlabel(x_metric)
    plt.ylabel(y_metric)
    plt.title(f'{y_metric} vs {x_metric} for selected methods')
    plt.grid(True)
    
    # Save the plot to display it in Gradio
    plot_path = "plot.png"
    plt.savefig(plot_path)
    plt.close()
    
    return plot_path

def add_new_eval(
    human_file,
    skempi_file,
    model_name_textbox: str,
    revision_name_textbox: str,
    benchmark_type,
    similarity_tasks,
    function_prediction_aspect,
    function_prediction_dataset,
    family_prediction_dataset,
):
    representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
    results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
    return None

# Function to update leaderboard dynamically based on user selection
def update_leaderboard(selected_methods, selected_metrics):
    return get_baseline_df(selected_methods, selected_metrics)

block = gr.Blocks()

with block:
    gr.Markdown(LEADERBOARD_INTRODUCTION)
    
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        # table jmmmu bench
        with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):

            method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist()
            metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
            metrics_with_method = metric_names.copy()
            metric_names.remove('method_name')  # Remove method_name from the metric options
        
            # Leaderboard section with method and metric selectors
            with gr.Row():
                # Add method and metric selectors for leaderboard
                leaderboard_method_selector = gr.CheckboxGroup(
                    choices=method_names, label="Select method_names for Leaderboard", value=method_names, interactive=True
                )
                leaderboard_metric_selector = gr.CheckboxGroup(
                    choices=metric_names, label="Select Metrics for Leaderboard", value=metric_names, interactive=True
                )

            # Display the filtered leaderboard
            baseline_value = get_baseline_df(method_names, metric_names)
            baseline_header = ["method_name"] + metric_names
            baseline_datatype = ['markdown'] + ['number'] * len(metric_names)

            data_component = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
            )

            # Update leaderboard when method/metric selection changes
            leaderboard_method_selector.change(
                update_leaderboard, 
                inputs=[leaderboard_method_selector, leaderboard_metric_selector], 
                outputs=data_component
            )
            leaderboard_metric_selector.change(
                update_leaderboard, 
                inputs=[leaderboard_method_selector, leaderboard_metric_selector], 
                outputs=data_component
            )

        with gr.TabItem("Visualizer"):
            
            # Dropdown for benchmark type
            benchmark_types = TASK_INFO + ['flexible']
            benchmark_type_selector = gr.Dropdown(choices=benchmark_types, label="Select Benchmark Type for Visualization", value="flexible")
            
            # Dynamic metric selectors (will be updated based on benchmark type)
            x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric")
            y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric")
            method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
            
            # Button to draw the plot for the selected benchmark
            plot_button = gr.Button("Plot Visualization")
            plot_output = gr.Image(label="Plot")

            # Update metric selectors when benchmark type is chosen
            def update_metric_choices(benchmark_type):
                if benchmark_type == 'flexible':
                    # Show all metrics for the flexible visualizer
                    metric_names = df.columns.tolist()
                    return gr.update(choices=metric_names, value=metric_names[0]), gr.update(choices=metric_names, value=metric_names[1])
                elif benchmark_type in benchmark_specific_metrics:
                    metrics = benchmark_specific_metrics[benchmark_type]
                    return gr.update(choices=metrics, value=metrics[0]), gr.update(choices=metrics[1])
                return gr.update(choices=[]), gr.update(choices=[])

            benchmark_type_selector.change(
                update_metric_choices, 
                inputs=[benchmark_type_selector], 
                outputs=[x_metric_selector, y_metric_selector]
            )

            # Generate the plot based on user input
            plot_button.click(
                benchmark_plot, 
                inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector], 
                outputs=plot_output
            )
            
        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
            with gr.Row():
                gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
            with gr.Row():
                gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(
                        label="Model name",
                    )
                    revision_name_textbox = gr.Textbox(
                        label="Revision Model Name",
                    )
                    
                    benchmark_type = gr.CheckboxGroup(
                        choices=TASK_INFO,
                        label="Benchmark Type",
                        interactive=True,
                    )
                    similarity_tasks = gr.CheckboxGroup(
                        choices=similarity_tasks_options,
                        label="Select Similarity Tasks",
                        interactive=True,
                    )
                
                    function_prediction_aspect = gr.Radio(
                        choices=function_prediction_aspect_options,
                        label="Select Function Prediction Aspect",
                        interactive=True,
                    )
                
                    function_prediction_dataset = gr.Radio(
                        choices=function_prediction_dataset_options,
                        label="Select Function Prediction Dataset",
                        interactive=True,
                    )
                
                    family_prediction_dataset = gr.CheckboxGroup(
                        choices=family_prediction_dataset_options,
                        label="Select Family Prediction Dataset",
                        interactive=True,
                    )

            with gr.Column():
                human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
                skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
    
                submit_button = gr.Button("Submit Eval")
                submission_result = gr.Markdown()
                submit_button.click(
                    add_new_eval,
                    inputs=[
                        human_file,
                        skempi_file,
                        model_name_textbox,
                        revision_name_textbox,
                        benchmark_type,
                        similarity_tasks,
                        function_prediction_aspect,
                        function_prediction_dataset,
                        family_prediction_dataset,
                    ],
                )

    def refresh_data():
        value = get_baseline_df(method_names, metric_names)
        return value

    with gr.Row():
        data_run = gr.Button("Refresh")
        data_run.click(refresh_data, outputs=[data_component])

    with gr.Accordion("Citation", open=False):
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            label=CITATION_BUTTON_LABEL,
            elem_id="citation-button",
            show_copy_button=True,
        )

block.launch()