Spaces:

ash-98
/

test

Sleeping

App Files Files Community

ash-98 commited on Mar 24

Commit

45e320b

1 Parent(s): 11e7d15

Initial

Browse files

Files changed (5) hide show

.gitignore +2 -0
.streamlit/config.toml +5 -0
Benchmark.csv +24 -0
app.py +87 -0
requirements.txt +37 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv
2	+ Dockerfile

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,5 @@

+[theme]
+primaryColor="#01d2fc"
+backgroundColor="#252040"
+secondaryBackgroundColor="#262626"
+textColor="#f4f4f4"

Benchmark.csv ADDED Viewed

	@@ -0,0 +1,24 @@

+model name, source, v1 metric, v2 metric
+OpenAI: GPT-4.5 (Preview),Proprietary Model,100%,97%
+OpenAI: o3 Mini High,Proprietary Model,100%,96%
+OpenAI: o3 Mini,Proprietary Model,100%,96%
+OpenAI: GPT-4o,Proprietary Model,99.09%,95%
+OpenAI: GPT-4o-mini,Proprietary Model,99.09%,97%
+Anthropic: Claude 3.5 Sonnet,Proprietary Model,99.09%,97%
+Anthropic: Claude 3.5 Haiku,Proprietary Model,100%,97%
+Anthropic: Claude 3.7 Sonnet,Proprietary Model,99.09%,98%
+Google: Gemma 3 27B ,Open Source,98.18%,95%
+Google: Gemini Flash 2.0,Proprietary Model,100%,99%
+Google: Gemini 2.0 Flash Lite,Proprietary Model,100%,97%
+DeepSeek: R1,Open Source,100%,98%
+DeepSeek: DeepSeek V3,Open Source,100%,97%
+Mistral: Mistral Small 3.1 24B,Open Source,100%,97%
+Mistral: Mistral Small 3,Open Source,99.09%,97%
+Mistral Large 2411,Open Source,99.09%,96%
+Meta: Llama 3.3 70B Instruct,Open Source,100%,97%
+Meta: Llama 3.2 3B Instruct,Open Source,78.18%,75%
+Qwen: QwQ 32B,Open Source,100.00%,96%
+Microsoft: Phi 4,Proprietary Model,100%,97%
+Microsoft: Phi-3.5 Mini 128K Instruct,Open Source,99.09%,97%
+Microsoft: Phi-3 Mini 128K Instruct,Open Source,98.18%,98%

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import streamlit as st
+import pandas as pd
+# Set page configuration
+st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide")
+# Main Title (ensures it's displayed on the main page)
+st.title("Cyber Benchmark Hub: SECQA Leaderboard")
+st.markdown("## Powered by **Priam Cyber AI**")
+st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)")
+# Function to load and clean CSV data
+@st.cache_data
+def load_data(file_path):
+    df = pd.read_csv(file_path)
+    # Remove any unnamed columns (caused by trailing commas)
+    df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)]
+    # Standardize column names
+    df.columns = df.columns.str.strip()
+    df.rename(columns={
+        "model name": "Model",
+        "source": "Type",
+        "v1 metric": "V1 Accuracy",
+        "v2 metric": "V2 Accuracy"
+    }, inplace=True)
+    # Convert percentage strings to floats (e.g., "100%" → 1.0)
+    for col in ["V1 Accuracy", "V2 Accuracy"]:
+        df[col] = df[col].astype(str).str.replace("%", "").str.strip()
+        df[col] = pd.to_numeric(df[col], errors='coerce') / 100
+    return df
+# Load dataset
+file_path = "Benchmark.csv"  # Ensure this file is uploaded in your Hugging Face Space
+df = load_data(file_path)
+# Sidebar: Logo, then Filters and Options
+with st.sidebar:
+    st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True)
+    st.divider()
+    st.header("Filters & Options")
+    dataset_version = st.radio("Select Dataset Version", ["v1", "v2"])
+    source_filter = st.multiselect(
+        "Select Model Type",
+        options=df["Type"].unique().tolist(),
+        default=df["Type"].unique().tolist()
+    )
+    st.markdown("---")
+    st.header("Test Parameters")
+    test_params = pd.DataFrame({
+        "Value": [0, 1, 0, 1, 0]
+    }, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"])
+    st.table(test_params)
+# Apply filtering based on the sidebar selections
+df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df
+# Choose the correct metric version and compute Accuracy
+df_filtered["Accuracy"] = df_filtered["V1 Accuracy"] if dataset_version == "v1" else df_filtered["V2 Accuracy"]
+df_filtered = df_filtered[["Model", "Type", "Accuracy"]].dropna()  # Drop rows with errors
+# Sort by Accuracy descending and add a Rank column starting from 1
+df_filtered = df_filtered.sort_values("Accuracy", ascending=False).reset_index(drop=True)
+df_filtered.insert(0, "Rank", range(1, len(df_filtered) + 1))
+# Use columns to display leaderboard and model details side-by-side
+col1, col2 = st.columns([2, 1])
+with col1:
+    st.subheader(f"Leaderboard for SECQA Version {dataset_version}")
+    st.dataframe(df_filtered.reset_index(drop=True))
+with col2:
+    st.subheader("Model Details")
+    selected_model = st.selectbox("Select a Model", df_filtered["Model"].tolist())
+    model_details = df_filtered[df_filtered["Model"] == selected_model].iloc[0]
+    st.write(f"**Model:** {model_details['Model']}")
+    st.write(f"**Type:** {model_details['Type']}")
+    st.write(f"**Accuracy:** {model_details['Accuracy']:.2%}")
+    st.write(f"**Rank:** {model_details['Rank']}")
+# Footer
+st.markdown("---")
+st.info("More dataset benchmarks will be added to this hub in the future.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,37 @@

+altair==5.5.0
+attrs==25.3.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+gitdb==4.0.12
+GitPython==3.1.44
+idna==3.10
+Jinja2==3.1.6
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+MarkupSafe==3.0.2
+narwhals==1.31.0
+numpy==2.2.4
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+protobuf==5.29.4
+pyarrow==19.0.1
+pydeck==0.9.1
+python-dateutil==2.9.0.post0
+pytz==2025.1
+referencing==0.36.2
+requests==2.32.3
+rpds-py==0.23.1
+six==1.17.0
+smmap==5.0.2
+streamlit==1.43.2
+tenacity==9.0.0
+toml==0.10.2
+tornado==6.4.2
+typing_extensions==4.12.2
+tzdata==2025.2
+urllib3==2.3.0
+watchdog==6.0.0