import streamlit as st import pandas as pd st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide") st.title("Cyber Benchmark Hub: SECQA Leaderboard") st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)") with st.sidebar: st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True) st.markdown("[Priam.ai](https://www.priam.ai/)") st.divider() dataset_categories = ["Multiple Choice"] selected_category = st.selectbox("Select Dataset Category", dataset_categories, index=0) datasets_by_category = { "Multiple Choice": ["secQA"], } dataset_choice = st.selectbox("Select Dataset", datasets_by_category[selected_category], index=0) st.divider() st.header("Filters & Options") dataset_version = st.radio("Select Dataset Version", ["v1", "v2"]) # For filtering the leaderboard by model type # Note: The available model types will come from the CSV, once loaded. # We'll load the CSV later and then update this filter accordingly. source_filter_placeholder = st.empty() # placeholder for source filter after data is loaded st.markdown("---") st.header("Test Parameters") test_params = pd.DataFrame({ "Value": [0, 1, 0, 1, 0] }, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"]) st.table(test_params) # Determine file path based on dataset choice. # For now, if dataset_choice is "secQA", we use "Benchmark.csv" if dataset_choice == "secQA": file_path = "Benchmark.csv" # Ensure this file is uploaded in your Hugging Face Space else: file_path = "Benchmark.csv" # Placeholder: update with actual file paths for future datasets # Function to load and clean CSV data @st.cache_data def load_data(file_path): df = pd.read_csv(file_path) # Remove any unnamed columns (caused by trailing commas) df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)] # Standardize column names df.columns = df.columns.str.strip() df.rename(columns={ "model name": "Model", "source": "Type", "v1 metric": "V1 Accuracy", "v2 metric": "V2 Accuracy" }, inplace=True) # Convert percentage strings to floats (e.g., "100%" → 1.0) for col in ["V1 Accuracy", "V2 Accuracy"]: df[col] = df[col].astype(str).str.replace("%", "").str.strip() df[col] = pd.to_numeric(df[col], errors='coerce') / 100 return df # Load dataset df = load_data(file_path) # Update the source filter with the actual options from the data source_filter = source_filter_placeholder.multiselect( "Select Model Type", options=df["Type"].unique().tolist(), default=df["Type"].unique().tolist() ) # Apply filtering based on the sidebar selections df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df # Choose the correct metric version and compute Accuracy df_filtered["Accuracy"] = df_filtered["V1 Accuracy"] if dataset_version == "v1" else df_filtered["V2 Accuracy"] df_filtered = df_filtered[["Model", "Type", "Accuracy"]].dropna() # Drop rows with errors # Sort by Accuracy descending df_filtered = df_filtered.sort_values("Accuracy", ascending=False).reset_index(drop=True) # Compute dense ranking so that models with equal accuracy share the same rank df_filtered['Rank'] = df_filtered['Accuracy'].rank(method='dense', ascending=False).astype(int) df_filtered = df_filtered[['Rank', 'Model', 'Type', 'Accuracy']] # Use columns to display leaderboard and model details side-by-side col1, col2 = st.columns([2, 1]) with col1: st.subheader(f"Leaderboard for {dataset_choice.upper()} Version {dataset_version}") st.dataframe(df_filtered.style.hide(axis='index')) with col2: st.subheader("Model Details") selected_model = st.selectbox("Select a Model", df_filtered["Model"].tolist()) model_details = df_filtered[df_filtered["Model"] == selected_model].iloc[0] st.write(f"**Model:** {model_details['Model']}") st.write(f"**Type:** {model_details['Type']}") st.write(f"**Accuracy:** {model_details['Accuracy']:.2%}") st.write(f"**Rank:** {model_details['Rank']}") # Footer st.markdown("---") st.info("More dataset benchmarks will be added to this hub in the future.")