Spaces:

holistic-ai
/

LibVulnWatch

Running

File size: 9,845 Bytes

9ab539a
 
 
 
 
fbd403a
8558676
9ab539a
 
 
 
 
 
 
 
f03f82b
9ab539a
 
 
 
 
 
 
 
bccaf50
9ab539a
bccaf50
 
9ab539a
fbd403a
9ab539a
 
 
 
 
fbd403a
 
 
 
 
 
 
 
 
 
9ab539a
 
fbd403a
 
 
 
 
 
8558676
 
 
 
 
 
 
 
 
 
 
 
 
 
fbd403a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ab539a
 
fbd403a
9ab539a
 
 
 
 
 
 
fbd403a
9ab539a
fbd403a
5fc842f
f03f82b
fbd403a
 
 
9ab539a
 
 
 
 
 
 
 
505dacc
9ab539a
 
bccaf50
 
9ab539a
bccaf50
9ab539a
bccaf50
 
 
9ab539a
 
bccaf50
9ab539a
 
bccaf50
9ab539a
 
 
 
 
 
 
 
 
 
bccaf50
9ab539a
 
bccaf50
9ab539a
 
bccaf50
9ab539a
 
 
 
 
 
bccaf50
9ab539a
 
 
 
 
 
 
 
 
 
bccaf50
9ab539a
 
 
 
 
 
 
 
 
 
 
bccaf50
9ab539a
 
 
 
 
 
 
 
 
 
bccaf50
9ab539a
 
 
bccaf50
 
 
 
 
9ab539a
 
 
 
 
 
bccaf50
 
 
9ab539a
bccaf50
9ab539a
 
bccaf50
 
9ab539a
bccaf50
9ab539a
 
 
 
bccaf50
 
 
 
 
 
9ab539a
 
 
 
 
 
 
 
 
 
 
 
 
 
fbd403a
 
 
 
 
 
 
b5628ea

import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
import os
import shutil

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
    Tasks
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    COLS,
    EVAL_COLS,
    EVAL_TYPES,
    AutoEvalColumn,
    LibraryType,
    fields,
    Language,
    AssessmentStatus
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval


def restart_space():
    """Restart the Hugging Face space"""
    if LOCAL_MODE:
        print("Running in local mode, skipping space restart")
        return
        
    try:
        API.restart_space(repo_id=REPO_ID)
    except Exception as e:
        print(f"Failed to restart space: {e}")
        print("Continuing without restart")

### Space initialisation
def initialize_data_directories():
    """Initialize directories for assessment data"""
    # Create local directories if they don't exist
    os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
    os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
    
    # Create reports directory in the Gradio public directory to serve files
    reports_dir = os.path.join("public", "reports")
    os.makedirs(reports_dir, exist_ok=True)
    
    # Copy report files to the public directory
    src_reports_dir = "reports"
    if os.path.exists(src_reports_dir):
        for report_file in os.listdir(src_reports_dir):
            src_path = os.path.join(src_reports_dir, report_file)
            dst_path = os.path.join(reports_dir, report_file)
            if os.path.isfile(src_path):
                shutil.copy2(src_path, dst_path)
                print(f"Copied report file {report_file} to public directory")
    
    if LOCAL_MODE:
        print("Running in local mode, using local directories only")
        return
    
    # Try to download from HF if not in local mode
    try:
        print(f"Downloading request data from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
        snapshot_download(
            repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", 
            tqdm_class=None, etag_timeout=30, token=TOKEN
        )
    except Exception as e:
        print(f"Failed to download request data: {e}")
        print("Using local data only")
    
    try:
        print(f"Downloading result data from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
        snapshot_download(
            repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", 
            tqdm_class=None, etag_timeout=30, token=TOKEN
        )
    except Exception as e:
        print(f"Failed to download result data: {e}")
        print("Using local data only")

# Initialize data
initialize_data_directories()

# Load data for leaderboard
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)

# Load queue data
(
    finished_eval_queue_df,
    running_eval_queue_df,
    pending_eval_queue_df,
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)

def init_leaderboard(dataframe):
    """Initialize the leaderboard component"""
    if dataframe is None or dataframe.empty:
        # Create an empty dataframe with the expected columns
        all_columns = COLS + [task.value.col_name for task in Tasks]
        empty_df = pd.DataFrame(columns=all_columns)
        print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
        dataframe = empty_df
        
    return Leaderboard(
        value=dataframe,
        datatype=[c.type for c in fields(AutoEvalColumn)],
        select_columns=SelectColumns(
            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
            label="Select Columns to Display:",
        ),
        search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
        filter_columns=[
            ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
            ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
            ColumnFilter(
                AutoEvalColumn.stars.name,
                type="slider",
                min=0,
                max=50000,
                label="GitHub Stars",
            ),
            ColumnFilter(
                AutoEvalColumn.availability.name, type="boolean", label="Show only active libraries", default=True
            ),
        ],
        bool_checkboxgroup_label="Filter libraries",
        interactive=False,
    )


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 Vulnerability Leaderboard", elem_id="vulnerability-leaderboard-tab", id=0):
            leaderboard = init_leaderboard(LEADERBOARD_DF)

        with gr.TabItem("📝 About", elem_id="about-tab", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit Library", elem_id="submit-library-tab", id=3):
            with gr.Column():
                with gr.Row():
                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

                with gr.Column():
                    with gr.Accordion(
                        f"✅ Completed Assessments ({len(finished_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            finished_eval_table = gr.components.Dataframe(
                                value=finished_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )
                    with gr.Accordion(
                        f"🔄 In Progress Assessments ({len(running_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            running_eval_table = gr.components.Dataframe(
                                value=running_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )

                    with gr.Accordion(
                        f"⏳ Pending Assessment Queue ({len(pending_eval_queue_df)})",
                        open=False,
                    ):
                        with gr.Row():
                            pending_eval_table = gr.components.Dataframe(
                                value=pending_eval_queue_df,
                                headers=EVAL_COLS,
                                datatype=EVAL_TYPES,
                                row_count=5,
                            )
            with gr.Row():
                gr.Markdown("# ✉️✨ Submit a library for vulnerability assessment", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    library_name_textbox = gr.Textbox(label="Library name (org/repo format)")
                    library_version_textbox = gr.Textbox(label="Version", placeholder="v1.0.0")
                    library_type = gr.Dropdown(
                        choices=[t.to_str(" : ") for t in LibraryType if t != LibraryType.Unknown],
                        label="Library type",
                        multiselect=False,
                        value=None,
                        interactive=True,
                    )

                with gr.Column():
                    language = gr.Dropdown(
                        choices=[i.value.name for i in Language if i != Language.Other],
                        label="Programming Language",
                        multiselect=False,
                        value="Python",
                        interactive=True,
                    )
                    framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
                    repository_url = gr.Textbox(label="Repository URL")

            submit_button = gr.Button("Submit for Assessment")
            submission_result = gr.Markdown()
            submit_button.click(
                add_new_eval,
                [
                    library_name_textbox,
                    library_version_textbox,
                    repository_url,
                    language,
                    framework,
                    library_type,
                ],
                submission_result,
            )

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=20,
                elem_id="citation-button",
                show_copy_button=True,
            )

# Only schedule space restarts if not in local mode
if not LOCAL_MODE:
    scheduler = BackgroundScheduler()
    scheduler.add_job(restart_space, "interval", seconds=1800)
    scheduler.start()

# Launch the app
demo.queue(default_concurrency_limit=40).launch(show_error=True)