Spaces:

stacklok
/

secure_code_leaderboard_archived

Running

File size: 11,228 Bytes

import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    COLS,
    EVAL_COLS,
    EVAL_TYPES,
    AutoEvalColumn,
    ModelType,
    fields,
    WeightType,
    Precision
)
from src.envs import EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval
from src.leaderboard.security_eval import check_safetensors

# Skip HuggingFace downloads for local testing
print("Creating leaderboard DataFrame...")
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
print(f"LEADERBOARD_DF shape: {LEADERBOARD_DF.shape}")
print(f"LEADERBOARD_DF columns: {LEADERBOARD_DF.columns.tolist()}")
print(f"LEADERBOARD_DF data:\n{LEADERBOARD_DF}")

print("\nGetting evaluation queue DataFrames...")
(
    finished_eval_queue_df,
    running_eval_queue_df,
    pending_eval_queue_df,
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)

def get_field_mapping():
    """Create a mapping from display names to field names."""
    auto_eval_fields = fields(AutoEvalColumn)
    return {f.name: f for f in auto_eval_fields}

def create_empty_dataframe(field_mapping):
    """Create an empty DataFrame with the correct columns."""
    import pandas as pd
    return pd.DataFrame(columns=[f.name for f in field_mapping.values()])

def verify_columns(dataframe, field_mapping):
    """Verify all required columns are present."""
    for col in dataframe.columns:
        if col not in field_mapping:
            print(f"Warning: Column {col} not found in field mapping")

def init_leaderboard(dataframe):
    print(f"Initializing leaderboard with DataFrame shape: {dataframe.shape}")
    
    field_mapping = get_field_mapping()
    print(f"Field mapping: {field_mapping}")
    
    if dataframe is None or len(dataframe) == 0:
        dataframe = create_empty_dataframe(field_mapping)
        print("Created empty DataFrame with correct columns")
    
    verify_columns(dataframe, field_mapping)
    
    return Leaderboard(
        value=dataframe,
        datatype=["str" if col not in field_mapping else field_mapping[col].type for col in dataframe.columns],
        select_columns=SelectColumns(
            default_selection=[col for col in dataframe.columns if col in field_mapping and field_mapping[col].displayed_by_default],
            cant_deselect=[col for col in dataframe.columns if col in field_mapping and field_mapping[col].never_hidden],
            label="Select Columns to Display:",
        ),
        search_columns=["Model", "Hub License"],
        hide_columns=[col for col in dataframe.columns if col in field_mapping and field_mapping[col].hidden],
        filter_columns=[
            ColumnFilter("Type", type="checkboxgroup", label="Model types"),
            ColumnFilter("Weight Format", type="checkboxgroup", label="Weight Format"),
            ColumnFilter("Precision", type="checkboxgroup", label="Precision"),
            ColumnFilter(
                "#Params (B)",
                type="slider",
                min=0.01,
                max=150,
                label="Select the number of parameters (B)",
            ),
            ColumnFilter(
                "Available on Hub", type="boolean", label="Deleted/incomplete", default=True
            ),
        ],
        bool_checkboxgroup_label="Hide models",
        interactive=False,
    )


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🔒 Security Leaderboard", elem_id="security-leaderboard-tab", id=0):
            leaderboard = init_leaderboard(LEADERBOARD_DF)

        with gr.TabItem("📝 About", elem_id="about-tab", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit Model", elem_id="submit-tab", id=3):
            with gr.Column():
                with gr.Row():
                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

                with gr.Column():
                    with gr.Accordion(
                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            finished_eval_table = gr.components.Dataframe(
                                value=finished_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )
                    with gr.Accordion(
                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            running_eval_table = gr.components.Dataframe(
                                value=running_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )

                    with gr.Accordion(
                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            pending_eval_table = gr.components.Dataframe(
                                value=pending_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )
            with gr.Row():
                gr.Markdown("# 🔒 Submit Your Model for Security Evaluation", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(
                        label="Model name (organization/model-name)",
                        placeholder="huggingface/model-name"
                    )
                    revision_name_textbox = gr.Textbox(
                        label="Revision commit",
                        placeholder="main"
                    )
                    model_type = gr.Dropdown(
                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
                        label="Model type",
                        multiselect=False,
                        value=None,
                        interactive=True,
                    )

                with gr.Column():
                    precision = gr.Dropdown(
                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
                        label="Precision",
                        multiselect=False,
                        value="float16",
                        interactive=True,
                    )
                    weight_type = gr.Dropdown(
                        choices=[i.value.name for i in WeightType],
                        label="Weight Format",
                        multiselect=False,
                        value="Safetensors",
                        interactive=True,
                    )
                    base_model_name_textbox = gr.Textbox(
                        label="Base model (for delta or adapter weights)",
                        placeholder="Optional: base model path"
                    )

            with gr.Row():
                gr.Markdown(
                    """
                    ### Security Requirements:
                    1. Model weights must be in safetensors format
                    2. Model card must include security considerations
                    3. Model will be evaluated on secure coding capabilities
                    """,
                    elem_classes="markdown-text"
                )

            submit_button = gr.Button("Submit for Security Evaluation")
            submission_result = gr.Markdown()

            def handle_submission(model, base_model, revision, precision, weight_type, model_type):
                """Handle new model submission."""
                try:
                    print(f"New submission received for {model}")
                    
                    # Add to queue
                    result = add_new_eval(model, base_model, revision, precision, weight_type, model_type)
                    
                    # Update pending evaluations table
                    global pending_eval_queue_df
                    _, _, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
                    
                    return [
                        gr.Markdown("Submission successful! Your model has been added to the evaluation queue. Please check the 'Pending Evaluation Queue' for status updates."),
                        gr.Dataframe(value=pending_eval_queue_df)
                    ]
                except Exception as e:
                    print(f"Submission failed: {str(e)}")
                    return [gr.Markdown(f"Error: {str(e)}"), gr.Dataframe(value=pending_eval_queue_df)]

            # Update tables periodically
            def update_evaluation_tables():
                global finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
                finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
                return [
                    finished_eval_table.update(value=finished_eval_queue_df),
                    running_eval_table.update(value=running_eval_queue_df),
                    pending_eval_table.update(value=pending_eval_queue_df)
                ]

            submit_button.click(
                handle_submission,
                [
                    model_name_textbox,
                    base_model_name_textbox,
                    revision_name_textbox,
                    precision,
                    weight_type,
                    model_type,
                ],
                [submission_result, pending_eval_table],
            )

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=20,
                elem_id="citation-button",
                show_copy_button=True,
            )

# Setup periodic updates
import time
import threading

def periodic_update():
    while True:
        time.sleep(60)  # Update every 60 seconds
        demo.queue(update_evaluation_tables)()

update_thread = threading.Thread(target=periodic_update, daemon=True)
update_thread.start()

demo.queue(default_concurrency_limit=40).launch()