agh123's picture
chore: update doc
f03edeb
import asyncio
import streamlit as st
import pandas as pd
from typing import Optional, List, Set, Tuple, Dict, Any
from .components.filters import render_table_filters
from .components.visualizations import (
render_leaderboard_table,
render_performance_plots,
render_device_rankings,
)
from .components.header import render_header, render_contribution_guide
from .services.firebase import fetch_leaderboard_data
from .core.styles import CUSTOM_CSS
from .core.scoring import (
calculate_performance_score,
get_performance_metrics,
StandardBenchmarkConditions,
)
def get_filter_values(
df: pd.DataFrame,
) -> tuple[
List[str],
List[str],
List[str],
List[str],
List[str],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int],
List[str],
int,
]:
"""Get unique values for filters"""
models = sorted(df["Model ID"].unique().tolist())
platforms = sorted(df["Platform"].unique().tolist())
devices = sorted(df["Device"].unique().tolist())
cache_type_v = sorted(df["cache_type_v"].unique().tolist())
cache_type_k = sorted(df["cache_type_k"].unique().tolist())
n_threads = (df["n_threads"].min(), df["n_threads"].max())
max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist()))
pp_range = (df["PP Config"].min(), df["PP Config"].max())
tg_range = (df["TG Config"].min(), df["TG Config"].max())
versions = sorted(df["Version"].unique().tolist())
return (
models,
platforms,
devices,
cache_type_v,
cache_type_k,
pp_range,
tg_range,
n_threads,
versions,
max_n_gpu_layers,
)
def render_performance_metrics(metrics: Dict[str, Any]):
"""Render performance metrics in a nice grid"""
st.markdown("### πŸ† Performance Overview")
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Top Device", metrics["top_device"])
with col2:
st.metric("Top Score", f"{metrics['top_score']:.1f}")
with col3:
st.metric("Average Score", f"{metrics['avg_score']:.1f}")
with col4:
st.metric("Total Devices", metrics["total_devices"])
with col5:
st.metric("Total Models", metrics["total_models"])
async def main():
"""Main application entry point"""
st.set_page_config(
page_title="AI Phone Benchmark Leaderboard",
page_icon="πŸ“±",
layout="wide",
)
# Apply custom styles
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
# Fetch initial data
df = await fetch_leaderboard_data()
if df.empty:
st.error("No data available. Please check your connection and try again.")
return
# Calculate performance scores
df = calculate_performance_score(df)
metrics = get_performance_metrics(df)
# Render header
render_header()
# Get unique values for filters
(
models,
platforms,
devices,
cache_type_v,
cache_type_k,
pp_range,
tg_range,
n_threads,
versions,
max_n_gpu_layers,
) = get_filter_values(df)
# Create main layout with sidebar for contribution guide
if "show_guide" not in st.session_state:
st.session_state.show_guide = True
main_col, guide_col = st.columns(
[
0.9 if not st.session_state.show_guide else 0.8,
0.1 if not st.session_state.show_guide else 0.2,
]
)
with main_col:
# Create tabs for different views
tab1, tab2 = st.tabs(["Device Rankings", "Benchmark Results"])
with tab1:
# Device rankings view
st.title("πŸ† Device Rankings")
# Show standardization notice
std = StandardBenchmarkConditions()
st.info(
f"πŸ“Š Rankings are based on benchmarks with standard conditions: "
f"PP={std.PP_CONFIG} tokens, TG={std.TG_CONFIG} tokens. "
f"Scores factor in model size and quantization."
)
# Render performance metrics
render_performance_metrics(metrics)
# Render device rankings
render_device_rankings(df)
with tab2:
# Original benchmark view
table_filters = render_table_filters(
models,
platforms,
devices,
cache_type_v,
cache_type_k,
pp_range,
tg_range,
n_threads,
versions,
max_n_gpu_layers,
)
# Render the main leaderboard table
render_leaderboard_table(df, table_filters)
# Render plot section
st.markdown("---")
# Render performance plots with table filters
render_performance_plots(df, table_filters)
with guide_col:
render_contribution_guide()
if __name__ == "__main__":
asyncio.run(main())