|
import streamlit as st |
|
import pandas as pd |
|
|
|
st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide") |
|
|
|
st.title("Cyber Benchmark Hub: SECQA Leaderboard") |
|
st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)") |
|
|
|
with st.sidebar: |
|
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True) |
|
st.markdown("[Priam.ai](https://www.priam.ai/)") |
|
st.divider() |
|
|
|
dataset_categories = ["Multiple Choice"] |
|
selected_category = st.selectbox("Select Dataset Category", dataset_categories, index=0) |
|
|
|
datasets_by_category = { |
|
"Multiple Choice": ["secQA"], |
|
} |
|
dataset_choice = st.selectbox("Select Dataset", datasets_by_category[selected_category], index=0) |
|
|
|
st.divider() |
|
st.header("Filters & Options") |
|
dataset_version = st.radio("Select Dataset Version", ["v1", "v2"]) |
|
|
|
|
|
|
|
source_filter_placeholder = st.empty() |
|
|
|
st.markdown("---") |
|
st.header("Test Parameters") |
|
test_params = pd.DataFrame({ |
|
"Value": [0, 1, 0, 1, 0] |
|
}, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"]) |
|
st.table(test_params) |
|
|
|
|
|
|
|
if dataset_choice == "secQA": |
|
file_path = "Benchmark.csv" |
|
else: |
|
file_path = "Benchmark.csv" |
|
|
|
|
|
@st.cache_data |
|
def load_data(file_path): |
|
df = pd.read_csv(file_path) |
|
|
|
|
|
df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)] |
|
|
|
|
|
df.columns = df.columns.str.strip() |
|
df.rename(columns={ |
|
"model name": "Model", |
|
"source": "Type", |
|
"v1 metric": "V1 Accuracy", |
|
"v2 metric": "V2 Accuracy" |
|
}, inplace=True) |
|
|
|
|
|
for col in ["V1 Accuracy", "V2 Accuracy"]: |
|
df[col] = df[col].astype(str).str.replace("%", "").str.strip() |
|
df[col] = pd.to_numeric(df[col], errors='coerce') / 100 |
|
|
|
return df |
|
|
|
|
|
df = load_data(file_path) |
|
|
|
|
|
source_filter = source_filter_placeholder.multiselect( |
|
"Select Model Type", |
|
options=df["Type"].unique().tolist(), |
|
default=df["Type"].unique().tolist() |
|
) |
|
|
|
|
|
df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df |
|
|
|
|
|
df_filtered["Accuracy"] = df_filtered["V1 Accuracy"] if dataset_version == "v1" else df_filtered["V2 Accuracy"] |
|
df_filtered = df_filtered[["Model", "Type", "Accuracy"]].dropna() |
|
|
|
|
|
df_filtered = df_filtered.sort_values("Accuracy", ascending=False).reset_index(drop=True) |
|
|
|
|
|
df_filtered['Rank'] = df_filtered['Accuracy'].rank(method='dense', ascending=False).astype(int) |
|
df_filtered = df_filtered[['Rank', 'Model', 'Type', 'Accuracy']] |
|
|
|
|
|
col1, col2 = st.columns([2, 1]) |
|
|
|
with col1: |
|
st.subheader(f"Leaderboard for {dataset_choice.upper()} Version {dataset_version}") |
|
st.dataframe(df_filtered.style.hide(axis='index')) |
|
|
|
with col2: |
|
st.subheader("Model Details") |
|
selected_model = st.selectbox("Select a Model", df_filtered["Model"].tolist()) |
|
model_details = df_filtered[df_filtered["Model"] == selected_model].iloc[0] |
|
st.write(f"**Model:** {model_details['Model']}") |
|
st.write(f"**Type:** {model_details['Type']}") |
|
st.write(f"**Accuracy:** {model_details['Accuracy']:.2%}") |
|
st.write(f"**Rank:** {model_details['Rank']}") |
|
|
|
|
|
st.markdown("---") |
|
st.info("More dataset benchmarks will be added to this hub in the future.") |
|
|