import asyncio import streamlit as st import pandas as pd from typing import Optional, List, Set, Tuple, Dict, Any from .components.filters import render_table_filters from .components.visualizations import ( render_leaderboard_table, render_performance_plots, render_device_rankings, ) from .components.header import render_header, render_contribution_guide from .services.firebase import fetch_leaderboard_data from .core.styles import CUSTOM_CSS from .core.scoring import ( calculate_performance_score, get_performance_metrics, StandardBenchmarkConditions, ) def get_filter_values( df: pd.DataFrame, ) -> tuple[ List[str], List[str], List[str], List[str], List[str], Tuple[int, int], Tuple[int, int], Tuple[int, int], List[str], int, ]: """Get unique values for filters""" models = sorted(df["Model ID"].unique().tolist()) platforms = sorted(df["Platform"].unique().tolist()) devices = sorted(df["Device"].unique().tolist()) cache_type_v = sorted(df["cache_type_v"].unique().tolist()) cache_type_k = sorted(df["cache_type_k"].unique().tolist()) n_threads = (df["n_threads"].min(), df["n_threads"].max()) max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist())) pp_range = (df["PP Config"].min(), df["PP Config"].max()) tg_range = (df["TG Config"].min(), df["TG Config"].max()) versions = sorted(df["Version"].unique().tolist()) return ( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) def render_performance_metrics(metrics: Dict[str, Any]): """Render performance metrics in a nice grid""" st.markdown("### 🏆 Performance Overview") col1, col2, col3, col4, col5 = st.columns(5) with col1: st.metric("Top Device", metrics["top_device"]) with col2: st.metric("Top Score", f"{metrics['top_score']:.1f}") with col3: st.metric("Average Score", f"{metrics['avg_score']:.1f}") with col4: st.metric("Total Devices", metrics["total_devices"]) with col5: st.metric("Total Models", metrics["total_models"]) async def main(): """Main application entry point""" st.set_page_config( page_title="AI Phone Benchmark Leaderboard", page_icon="📱", layout="wide", ) # Apply custom styles st.markdown(CUSTOM_CSS, unsafe_allow_html=True) # Fetch initial data df = await fetch_leaderboard_data() if df.empty: st.error("No data available. Please check your connection and try again.") return # Calculate performance scores df = calculate_performance_score(df) metrics = get_performance_metrics(df) # Render header render_header() # Get unique values for filters ( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) = get_filter_values(df) # Create main layout with sidebar for contribution guide if "show_guide" not in st.session_state: st.session_state.show_guide = True main_col, guide_col = st.columns( [ 0.9 if not st.session_state.show_guide else 0.8, 0.1 if not st.session_state.show_guide else 0.2, ] ) with main_col: # Create tabs for different views tab1, tab2 = st.tabs(["Device Rankings", "Benchmark Results"]) with tab1: # Device rankings view st.title("🏆 Device Rankings") # Show standardization notice std = StandardBenchmarkConditions() st.info( f"📊 Scores are normalized to standard conditions: " f"PP={std.PP_CONFIG} tokens, TG={std.TG_CONFIG} tokens. " f"Scores factor in quantization quality and model size." ) # Render performance metrics render_performance_metrics(metrics) # Render device rankings render_device_rankings(df) with tab2: # Original benchmark view table_filters = render_table_filters( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) # Render the main leaderboard table render_leaderboard_table(df, table_filters) # Render plot section st.markdown("---") st.title("📊 Performance Comparison") # Plot specific selectors in a row plot_col1, plot_col2, plot_col3 = st.columns(3) with plot_col1: plot_model = st.selectbox( "Select Model for Comparison", options=models, key="plot_model_selector", ) with plot_col2: pp_options = sorted([int(x) for x in df["PP Config"].unique()]) default_pp_index = ( pp_options.index(std.PP_CONFIG) if std.PP_CONFIG in pp_options else 0 ) plot_pp = st.selectbox( "Select PP Config for Comparison", options=pp_options, key="plot_pp_selector", index=default_pp_index, ) with plot_col3: tg_options = sorted([int(x) for x in df["TG Config"].unique()]) default_tg_index = ( tg_options.index(std.TG_CONFIG) if std.TG_CONFIG in tg_options else 0 ) plot_tg = st.selectbox( "Select TG Config for Comparison", options=tg_options, key="plot_tg_selector", index=default_tg_index, ) # Create plot filters based on table filters but override the model and configs plot_filters = table_filters.copy() plot_filters["model"] = plot_model plot_filters["pp_range"] = (plot_pp, plot_pp) # Set exact PP value plot_filters["tg_range"] = (plot_tg, plot_tg) # Set exact TG value render_performance_plots(df, plot_filters) with guide_col: render_contribution_guide() if __name__ == "__main__": asyncio.run(main())