import streamlit as st
import pandas as pd
from PIL import Image
import base64
from io import BytesIO
# ─── Page config ──────────────────────────────────────────────────────────────
st.set_page_config(page_title="ExpertLongBench Leaderboard", layout="wide")
logo_image = Image.open("src/ExpertLongBench.png")
# Display logo
buffered = BytesIO()
logo_image.save(buffered, format="PNG")
img_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
st.markdown(
f"""
""",
unsafe_allow_html=True
)
st.markdown(
'''
''',
unsafe_allow_html=True
)
# ─── Load data ────────────────────────────────────────────────────────────────
@st.cache_data
def load_data(path="src/models.json"):
df = pd.read_json(path, lines=True)
score_cols = [f"T{i}" for i in range(1, 12)]
df["Avg"] = df[score_cols].mean(axis=1).round(1)
# Compute rank per column (1 = best)
for col in score_cols + ["Avg"]:
df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
return df
df = load_data()
# Precompute max ranks for color scaling
score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
# ─── Tabs ──────────────────────────────────────────────────────────────────────
tab1, tab2 = st.tabs(["Leaderboard", "Benchmark Details"])
with tab1:
# st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
# Build raw HTML table
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
html = ""
# header
html += "" + "".join(f"{col} | " for col in cols) + "
"
# rows
for _, row in df.iterrows():
html += ""
for col in cols:
val = row[col]
if col == "Model":
html += f"{val} | "
else:
rank = int(row[f"{col}_rank"])
norm = 1 - (rank - 1) / ((max_ranks[col] - 1) or 1)
# interpolate green (182,243,182) → white (255,255,255)
r = int(255 - norm*(255-182))
g = int(255 - norm*(255-243))
b = 255
bold = "font-weight:bold;" if rank == 1 else ""
style = f"background-color:rgb({r},{g},{b}); padding:6px; {bold}"
html += f"{val} | "
html += "
"
html += "
"
st.markdown(html, unsafe_allow_html=True)
with tab2:
st.markdown("## Abstract")
st.write(
""
)
st.markdown("## Pipeline")
st.write(
""
)