leaderboard / app.py
hassonofer's picture
Update app.py
37d35c3
from pathlib import Path
import altair as alt
import polars as pl
import gradio as gr
DATASETS = []
BENCHMARKS = {
# Name: (device, AMP, compile, single thread)
"Parameters": (None, None, None, None),
"GPU Memory": (None, None, None, None),
"CPU rate": ("cpu", False, False, False),
"CPU rate single core": ("cpu", False, False, True),
"CPU rate with compile": ("cpu", False, True, False),
"CPU rate AMP with compile": ("cpu", True, True, False),
"CUDA rate": ("cuda", False, False, False),
"CUDA rate with compile": ("cuda", False, True, False),
"CUDA rate AMP with compile": ("cuda", True, True, False),
}
def plot_acc_param(param_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
df = param_compare_results_df.select(
"Model name",
"Model type",
"Accuracy",
"Top-3 accuracy",
"Resolution",
"Parameters (M)",
"Pareto frontier (p)",
"Intermediate",
"MIM",
"Distilled",
)
base = df.plot.point(
x="Parameters (M)",
y="Accuracy",
color="Model type",
shape="Resolution:N",
tooltip=[
"Parameters (M)",
"Accuracy",
"Top-3 accuracy",
"Model name",
"Model type",
"Resolution",
"Intermediate",
"MIM",
"Distilled",
],
)
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
frontier = df.plot.line(x="Parameters (M)", y="Pareto frontier (p)").mark_line(
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
)
chart = base + text + frontier
return chart.properties(title="Accuray vs Parameter Count", width=width, height=height).configure_scale(zero=False)
def plot_acc_memory(memory_compare_results_df: pl.DataFrame, width: int = 900, height: int = 640) -> alt.LayerChart:
if len(memory_compare_results_df) > 0:
batch_size = memory_compare_results_df["batch_size"][0]
amp = memory_compare_results_df["amp"][0]
else:
batch_size = ""
amp = ""
df = memory_compare_results_df.select(
"Model name",
"Model type",
"Accuracy",
"Top-3 accuracy",
"Resolution",
"Peak GPU memory (MB)",
"Parameters (M)",
"Pareto frontier (mem)",
"Intermediate",
"MIM",
"Distilled",
)
base = df.plot.point(
x="Peak GPU memory (MB)",
y="Accuracy",
color="Model type",
shape="Resolution:N",
tooltip=[
"Peak GPU memory (MB)",
"Parameters (M)",
"Accuracy",
"Top-3 accuracy",
"Model name",
"Model type",
"Resolution",
"Intermediate",
"MIM",
"Distilled",
],
)
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
frontier = df.plot.line(x="Peak GPU memory (MB)", y="Pareto frontier (mem)").mark_line(
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
)
chart = base + text + frontier
return chart.properties(
title=f"Accuray vs GPU Memory (batch size={batch_size}, amp={amp})", width=width, height=height
).configure_scale(zero=False)
def plot_acc_rate(rate_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
if len(rate_compare_results_df) > 0:
device = rate_compare_results_df["device"][0]
compiled = rate_compare_results_df["compile"][0]
batch_size = rate_compare_results_df["batch_size"][0]
amp = rate_compare_results_df["amp"][0]
single_thread = rate_compare_results_df["single_thread"][0]
else:
device = ""
compiled = ""
batch_size = ""
amp = ""
single_thread = False
df = rate_compare_results_df.select(
"Model name",
"Model type",
"Accuracy",
"Top-3 accuracy",
"Resolution",
"ms / sample",
"Parameters (M)",
"Pareto frontier (ms)",
"Intermediate",
"MIM",
"Distilled",
)
base = df.plot.point(
x="ms / sample",
y="Accuracy",
color="Model type",
shape="Resolution:N",
tooltip=[
"ms / sample",
"Parameters (M)",
"Accuracy",
"Top-3 accuracy",
"Model name",
"Model type",
"Resolution",
"Intermediate",
"MIM",
"Distilled",
],
)
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
frontier = df.plot.line(x="ms / sample", y="Pareto frontier (ms)").mark_line(
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
)
chart = base + text + frontier
if single_thread is True:
single_thread_title = " Single Core"
else:
single_thread_title = ""
return chart.properties(
title=(
f"Accuracy vs {device.upper()}{single_thread_title} Rate (compile={compiled}, "
f"batch size={batch_size}, amp={amp})"
),
width=width,
height=height,
).configure_scale(zero=False)
def update_data(
dataset: str, benchmark: str, intermediate: bool, mim: bool, dist: bool, log_x: bool, search_bar: str
) -> tuple[alt.LayerChart, pl.DataFrame]:
compare_results_df = pl.read_csv(f"results_{dataset}.csv")
if intermediate is False:
compare_results_df = compare_results_df.filter(pl.col("Intermediate") == intermediate)
if mim is False:
compare_results_df = compare_results_df.filter(pl.col("MIM") == mim)
if dist is False:
compare_results_df = compare_results_df.filter(pl.col("Distilled") == dist)
x_scale_type = "log" if log_x is True else "linear"
# Filter models
compare_results_df = compare_results_df.filter(pl.col("Model name").str.contains(search_bar))
# Parameter count
if benchmark == "Parameters":
param_compare_results_df = compare_results_df.unique(subset=["Model name", "Resolution"]).sort(
"Parameters (M)", descending=False
)
param_compare_results_df = param_compare_results_df.with_columns(
pl.col("Accuracy").cum_max().alias("Pareto frontier (p)")
)
param_compare_results_df = param_compare_results_df.drop(
"Samples / sec", "device", "ms / sample", "Peak GPU memory (MB)"
)
chart = plot_acc_param(param_compare_results_df)
x_max = param_compare_results_df["Parameters (M)"].quantile(0.9)
x_min = param_compare_results_df["Parameters (M)"].quantile(0.1)
chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
output_df = param_compare_results_df
# Peak memory
elif benchmark == "GPU Memory":
memory_compare_results_df = compare_results_df.drop_nulls(subset=["Peak GPU memory (MB)"])
memory_compare_results_df = memory_compare_results_df.unique(subset=["Model name", "Resolution"]).sort(
"Peak GPU memory (MB)", descending=False
)
memory_compare_results_df = memory_compare_results_df.with_columns(
pl.col("Accuracy").cum_max().alias("Pareto frontier (mem)")
)
memory_compare_results_df = memory_compare_results_df.drop("Samples / sec", "device", "ms / sample")
chart = plot_acc_memory(memory_compare_results_df)
x_max = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.9)
x_min = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.1)
chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
output_df = memory_compare_results_df
# Rate
else:
(device, amp_enabled, compiled, single_thread) = BENCHMARKS[benchmark]
df = compare_results_df.drop_nulls(subset=["ms / sample"])
df = df.filter(device=device, amp=amp_enabled, compile=compiled, single_thread=single_thread)
device_compare_results_df = df.unique(subset=["Model name", "Resolution"]).sort("ms / sample", descending=False)
device_compare_results_df = device_compare_results_df.drop("Peak GPU memory (MB)")
device_compare_results_df = device_compare_results_df.with_columns(
pl.col("Accuracy").cum_max().alias("Pareto frontier (ms)")
)
chart = plot_acc_rate(device_compare_results_df)
x_max = device_compare_results_df["ms / sample"].quantile(0.95)
x_min = device_compare_results_df["ms / sample"].min()
if x_max is not None and x_min is not None:
x_max = x_max * 1.04
x_min = x_min * 0.96
chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
output_df = device_compare_results_df
output_df = output_df.select(
[
pl.col(col).round(4) if output_df.schema[col] in [pl.Float32, pl.Float64] else col
for col in output_df.columns
]
)
return (chart, output_df.drop("Mistakes", "Samples", "torch_version"))
def app() -> None:
with gr.Blocks(title="Birder Leaderboard", analytics_enabled=False) as leaderboard:
gr.HTML("<center><h1>The Birder Leaderboard</h1></center>")
with gr.Row():
with gr.Column():
pass
with gr.Column():
gr.Markdown(
"""
Leaderboard of all the pre-trained Birder models across multiple datasets.
### Benchmark Setup
* GPU: A5000 ADA Generation
* CPU: AMD Ryzen Threadripper PRO 7975WX
* PyTorch version: 2.5.1+cu124
### Dataset Information
| Name | Training samples | Validation samples | Classes |
|---------------------|------------------|--------------------|-------------|
| arabian-peninsula | 583,868 | 21,634 | 735 |
| eu-common | 569,784 | 19,869 | 707 |
| il-all | 462,346 | 18,614 | 550 |
| il-common | 330,880 | 15,828 | 371 |
"""
)
with gr.Column():
pass
with gr.Row():
with gr.Column():
pass
with gr.Column():
dataset_dropdown = gr.Dropdown(
choices=DATASETS,
label="Select Dataset",
value=DATASETS[0] if DATASETS else None,
)
benchmark_dropdown = gr.Dropdown(
choices=BENCHMARKS.keys(),
label="Select Benchmark",
value=next(iter(BENCHMARKS.keys())) if BENCHMARKS else None,
filterable=False,
)
with gr.Column():
intermediate = gr.Checkbox(
label="Intermediate",
value=True,
info="Show models that underwent intermediate training (extra data)",
)
mim = gr.Checkbox(label="MIM", value=True, info="Show models with Masked Image Modeling pre-training")
dist = gr.Checkbox(label="Distilled", value=True, info="Show distilled models")
log_x = gr.Checkbox(label="Log scale X-axis", value=False)
with gr.Column():
pass
with gr.Row():
with gr.Column():
pass
with gr.Column(scale=2):
search_bar = gr.Textbox(label="Model Filter", placeholder="e.g. convnext, efficient|mobile")
with gr.Column():
pass
plot = gr.Plot(container=False)
table = gr.Dataframe(show_search="search")
inputs = [dataset_dropdown, benchmark_dropdown, intermediate, mim, dist, log_x, search_bar]
outputs = [plot, table]
leaderboard.load(update_data, inputs=inputs, outputs=outputs)
dataset_dropdown.change(update_data, inputs=inputs, outputs=outputs)
benchmark_dropdown.change(update_data, inputs=inputs, outputs=outputs)
intermediate.change(update_data, inputs=inputs, outputs=outputs)
mim.change(update_data, inputs=inputs, outputs=outputs)
dist.change(update_data, inputs=inputs, outputs=outputs)
log_x.change(update_data, inputs=inputs, outputs=outputs)
search_bar.change(update_data, inputs=inputs, outputs=outputs)
leaderboard.launch()
# Launch the app
if __name__ == "__main__":
file_info = []
for p in Path.glob(Path("."), "results_*.csv"):
file_info.append((p.stat().st_size, p.stem.removeprefix("results_")))
DATASETS = [dataset_name for _, dataset_name in sorted(file_info, key=lambda x: x[1], reverse=True)]
app()