test / app.py
ash-98's picture
Second
6037f36
raw
history blame
4.53 kB
import streamlit as st
import pandas as pd
# Set page configuration
st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide")
# Main Title
st.title("Cyber Benchmark Hub: SECQA Leaderboard")
st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)")
# Sidebar: Logo and Website Link
with st.sidebar:
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True)
st.markdown("[Priam.ai](https://www.priam.ai/)")
st.divider()
# Top-level: Dataset Category
dataset_categories = ["Multiple Choice", "Open Question", "Steps (Reasoning)"]
selected_category = st.selectbox("Select Dataset Category", dataset_categories, index=0)
# Filter dataset options based on category
datasets_by_category = {
"Multiple Choice": ["secQA"],
"Open Question": ["Testing..."],
"Steps (Reasoning)": ["Testing..."]
}
dataset_choice = st.selectbox("Select Dataset", datasets_by_category[selected_category], index=0)
st.divider()
st.header("Filters & Options")
dataset_version = st.radio("Select Dataset Version", ["v1", "v2"])
# For filtering the leaderboard by model type
# Note: The available model types will come from the CSV, once loaded.
# We'll load the CSV later and then update this filter accordingly.
source_filter_placeholder = st.empty() # placeholder for source filter after data is loaded
st.markdown("---")
st.header("Test Parameters")
test_params = pd.DataFrame({
"Value": [0, 1, 0, 1, 0]
}, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"])
st.table(test_params)
# Determine file path based on dataset choice.
# For now, if dataset_choice is "secQA", we use "Benchmark.csv"
if dataset_choice == "secQA":
file_path = "Benchmark.csv" # Ensure this file is uploaded in your Hugging Face Space
else:
file_path = "Benchmark.csv" # Placeholder: update with actual file paths for future datasets
# Function to load and clean CSV data
@st.cache_data
def load_data(file_path):
df = pd.read_csv(file_path)
# Remove any unnamed columns (caused by trailing commas)
df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)]
# Standardize column names
df.columns = df.columns.str.strip()
df.rename(columns={
"model name": "Model",
"source": "Type",
"v1 metric": "V1 Accuracy",
"v2 metric": "V2 Accuracy"
}, inplace=True)
# Convert percentage strings to floats (e.g., "100%" β†’ 1.0)
for col in ["V1 Accuracy", "V2 Accuracy"]:
df[col] = df[col].astype(str).str.replace("%", "").str.strip()
df[col] = pd.to_numeric(df[col], errors='coerce') / 100
return df
# Load dataset
df = load_data(file_path)
# Update the source filter with the actual options from the data
source_filter = source_filter_placeholder.multiselect(
"Select Model Type",
options=df["Type"].unique().tolist(),
default=df["Type"].unique().tolist()
)
# Apply filtering based on the sidebar selections
df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df
# Choose the correct metric version and compute Accuracy
df_filtered["Accuracy"] = df_filtered["V1 Accuracy"] if dataset_version == "v1" else df_filtered["V2 Accuracy"]
df_filtered = df_filtered[["Model", "Type", "Accuracy"]].dropna() # Drop rows with errors
# Sort by Accuracy descending and add a Rank column starting from 1
df_filtered = df_filtered.sort_values("Accuracy", ascending=False).reset_index(drop=True)
df_filtered.insert(0, "Rank", range(1, len(df_filtered) + 1))
# Use columns to display leaderboard and model details side-by-side
col1, col2 = st.columns([2, 1])
with col1:
st.subheader(f"Leaderboard for {dataset_choice.upper()} Version {dataset_version}")
st.dataframe(df_filtered.reset_index(drop=True))
with col2:
st.subheader("Model Details")
selected_model = st.selectbox("Select a Model", df_filtered["Model"].tolist())
model_details = df_filtered[df_filtered["Model"] == selected_model].iloc[0]
st.write(f"**Model:** {model_details['Model']}")
st.write(f"**Type:** {model_details['Type']}")
st.write(f"**Accuracy:** {model_details['Accuracy']:.2%}")
st.write(f"**Rank:** {model_details['Rank']}")
# Footer
st.markdown("---")
st.info("More dataset benchmarks will be added to this hub in the future.")