Alvinn-aai commited on
Commit
51d4449
·
2 Parent(s): 82f489a 9558f10

Merge remote-tracking branch 'origin/main'

Browse files
Files changed (2) hide show
  1. app.py +104 -31
  2. src/display/css_html_js.py +23 -0
app.py CHANGED
@@ -1,23 +1,26 @@
1
  # app.py
2
 
 
 
3
  import gradio as gr
4
  import pandas as pd
5
- import plotly.graph_objects as go # NEW: for interactive chart
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from gradio_leaderboard import Leaderboard, SelectColumns
8
  from huggingface_hub import whoami
9
 
10
  # HTML is split so we can inject Gradio media (images/video) where needed.
11
- from src.about import WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL, SUBMISSION_TERMS_TEXT # tail after Tier1 fig
12
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
13
  from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
14
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
15
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
16
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
17
- from src.about import ( # ⬅️ split to insert the tabs right after the heading
18
  CITATION_BUTTON_LABEL,
19
  CITATION_BUTTON_TEXT,
20
  EVALUATION_QUEUE_TEXT,
 
 
21
  WHAT_IS_F1_HTML_TOP,
22
  )
23
  from src.datamodel.data import F1Data
@@ -233,42 +236,96 @@ STATIC_RESULTS = {
233
  },
234
  }
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  def build_accuracy_figure(tier: str):
238
- """Create interactive bar chart with hover showing 'solved / total'."""
239
- results = STATIC_RESULTS.get(tier, {})
240
  total = TIER_TOTALS[tier]
241
- x = MODELS_ORDER
242
- y = [results[m] for m in x]
243
- hover = [f"{m}<br><b>{v}/{total}</b> problems solved" for m, v in zip(x, y)]
244
-
245
- fig = go.Figure(
246
- data=[
247
- go.Bar(
248
- x=x,
249
- y=y,
250
- text=[f"{v}/{total}" for v in y],
251
- textposition="auto",
252
- hovertext=hover,
253
- hoverinfo="text",
254
- marker_line_width=0.5,
 
 
 
 
 
 
255
  )
256
- ]
257
- )
 
 
 
 
258
  fig.update_layout(
259
  template="plotly_white",
260
- margin=dict(l=30, r=20, t=10, b=40),
261
- yaxis=dict(title="# Problems Solved", range=[0, total], dtick=max(5, total // 10)),
262
- xaxis=dict(title=None),
263
  height=420,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  )
265
  return fig
266
 
267
 
268
- # Precompute initial figure (Warmup)
269
  _initial_accuracy_fig = build_accuracy_figure("Warmup")
270
 
271
-
272
  # Force light theme even if HF user prefers dark
273
  blocks = gr.Blocks(
274
  css=custom_css,
@@ -278,14 +335,13 @@ blocks = gr.Blocks(
278
  with blocks:
279
 
280
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
281
- # === NEW LANDING TAB (first) ===
282
- with gr.TabItem("Model Accuracy on FormulaOne", id=0, elem_id="landing-accuracy-tab"):
283
  gr.Markdown(
284
  "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
285
  elem_classes="markdown-text",
286
  )
287
 
288
- # Selector aligned to the top-right (see CSS)
289
  with gr.Row(elem_id="f1-tier-select-row"):
290
  tier_selector = gr.Radio(
291
  choices=list(TIER_TOTALS.keys()),
@@ -295,15 +351,32 @@ with blocks:
295
  elem_id="f1-tier-select",
296
  )
297
 
298
- accuracy_plot = gr.Plot(value=_initial_accuracy_fig)
299
 
300
- # Wire selector → plot
301
  tier_selector.change(
302
  lambda t: build_accuracy_figure(t),
303
  inputs=tier_selector,
304
  outputs=accuracy_plot,
305
  )
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  # Existing "What is FormulaOne" tab
308
  with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):
309
 
 
1
  # app.py
2
 
3
+ import math
4
+
5
  import gradio as gr
6
  import pandas as pd
7
+ import plotly.graph_objects as go
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from gradio_leaderboard import Leaderboard, SelectColumns
10
  from huggingface_hub import whoami
11
 
12
  # HTML is split so we can inject Gradio media (images/video) where needed.
 
13
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
14
  from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
15
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
16
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
17
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
18
+ from src.about import ( # tail after Tier1 fig; ⬅️ split to insert the tabs right after the heading
19
  CITATION_BUTTON_LABEL,
20
  CITATION_BUTTON_TEXT,
21
  EVALUATION_QUEUE_TEXT,
22
+ SUBMISSION_TERMS_TEXT,
23
+ WHAT_IS_F1_HTML_AFTER_TIER1FIG_TAIL,
24
  WHAT_IS_F1_HTML_TOP,
25
  )
26
  from src.datamodel.data import F1Data
 
236
  },
237
  }
238
 
239
+ MODEL_RELEASES = {
240
+ "GPT-5": "2025-08-07",
241
+ "Gemini 2.5 Pro": "2025-03-25",
242
+ "Grok 4": "2025-07-09",
243
+ "Claude Opus 4": "2025-05-22",
244
+ "o3 Pro": "2025-06-10",
245
+ }
246
+
247
+ TIER_TOTALS = {"Warmup": 100, "Tier 1": 100, "Tier 2": 20}
248
+ MODELS_ORDER = ["GPT-5", "Gemini 2.5 Pro", "Grok 4", "Claude Opus 4", "o3 Pro"]
249
+
250
+ ACCURACY_PCT = {
251
+ "Warmup": {
252
+ "GPT-5": 38,
253
+ "Gemini 2.5 Pro": 35,
254
+ "Grok 4": 28,
255
+ "Claude Opus 4": 32,
256
+ "o3 Pro": 30,
257
+ },
258
+ "Tier 1": {
259
+ "GPT-5": 3,
260
+ "Gemini 2.5 Pro": 2,
261
+ "Grok 4": 1,
262
+ "Claude Opus 4": 2,
263
+ "o3 Pro": 2,
264
+ },
265
+ "Tier 2": {
266
+ "GPT-5": 0,
267
+ "Gemini 2.5 Pro": 0,
268
+ "Grok 4": 0,
269
+ "Claude Opus 4": 0,
270
+ "o3 Pro": 0,
271
+ },
272
+ }
273
+
274
 
275
  def build_accuracy_figure(tier: str):
276
+ """Interactive scatter: x = release date, y = accuracy (%). Hover shows solved/total."""
 
277
  total = TIER_TOTALS[tier]
278
+ fig = go.Figure()
279
+
280
+ for model in MODELS_ORDER:
281
+ date_str = MODEL_RELEASES[model]
282
+ y = ACCURACY_PCT[tier][model]
283
+ solved = round(y * total / 100)
284
+ fig.add_trace(
285
+ go.Scatter(
286
+ x=[date_str],
287
+ y=[y],
288
+ mode="markers",
289
+ name=model,
290
+ marker=dict(size=12, line=dict(width=1)),
291
+ hovertemplate=(
292
+ f"<b>{model}</b><br>"
293
+ "Release: %{x|%b %d, %Y}<br>"
294
+ "Accuracy: %{y:.1f}%<br>"
295
+ f"Solved: {solved}/{total}"
296
+ "<extra></extra>"
297
+ ),
298
  )
299
+ )
300
+
301
+ # Comfortable y-range (dynamic ceiling for readability)
302
+ max_y = max(ACCURACY_PCT[tier].values()) or 1
303
+ upper = max(1, math.ceil(max_y * 1.25))
304
+
305
  fig.update_layout(
306
  template="plotly_white",
 
 
 
307
  height=420,
308
+ margin=dict(l=30, r=120, t=10, b=40), # extra right room for legend
309
+ xaxis=dict(
310
+ title=None,
311
+ type="date",
312
+ tickformat="%b %Y",
313
+ showgrid=True,
314
+ ),
315
+ yaxis=dict(
316
+ title="Accuracy (%)",
317
+ range=[0, upper],
318
+ dtick=max(1, upper // 5),
319
+ showgrid=True,
320
+ ),
321
+ legend=dict(title="Models", orientation="v", y=1, x=1.02, yanchor="top"),
322
+ hovermode="closest",
323
  )
324
  return fig
325
 
326
 
 
327
  _initial_accuracy_fig = build_accuracy_figure("Warmup")
328
 
 
329
  # Force light theme even if HF user prefers dark
330
  blocks = gr.Blocks(
331
  css=custom_css,
 
335
  with blocks:
336
 
337
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
338
+ with gr.TabItem("FormulaOne", id=0, elem_id="landing-accuracy-tab"):
 
339
  gr.Markdown(
340
  "The chart below summarizes static (non-live) results for model performance on FormulaOne.",
341
  elem_classes="markdown-text",
342
  )
343
 
344
+ # Pill-style selector aligned to the top-right
345
  with gr.Row(elem_id="f1-tier-select-row"):
346
  tier_selector = gr.Radio(
347
  choices=list(TIER_TOTALS.keys()),
 
351
  elem_id="f1-tier-select",
352
  )
353
 
354
+ accuracy_plot = gr.Plot(value=_initial_accuracy_fig, elem_id="f1-accuracy-plot")
355
 
 
356
  tier_selector.change(
357
  lambda t: build_accuracy_figure(t),
358
  inputs=tier_selector,
359
  outputs=accuracy_plot,
360
  )
361
 
362
+ # Footnote (sampling + prompt details)
363
+ gr.Markdown(
364
+ """
365
+ <div class="f1-container">
366
+ <p class="f1-p" style="font-size:0.95rem;color:var(--f1-subtle);">
367
+ <em>Footnote.</em> All models were sampled with their highest available reasoning settings and a generous token budget.
368
+ We also used a diverse few-shot prompt that is highly supportive for these problems, covering many of the subtle
369
+ details inherent in the tasks (state design, invariants, and bag transformations).
370
+ </p>
371
+ </div>
372
+ """,
373
+ elem_classes="markdown-text",
374
+ )
375
+
376
+ # "Learn more" link to the explainer tab
377
+ gr.Markdown(
378
+ '<div class="f1-container"><p><a class="f1-a" href="#what-is-tab">Learn more about FormulaOne.</a></p></div>'
379
+ )
380
  # Existing "What is FormulaOne" tab
381
  with gr.TabItem("What is FormulaOne", id=1, elem_id="what-is-tab"):
382
 
src/display/css_html_js.py CHANGED
@@ -21,6 +21,29 @@ custom_css = """
21
  /* NEW: landing tab width + tier selector alignment */
22
  #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
23
  #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  /* Text */
26
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }
 
21
  /* NEW: landing tab width + tier selector alignment */
22
  #landing-accuracy-tab { max-width: 800px; margin-left: auto; margin-right: auto; }
23
  #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
24
+ #f1-tier-select-row { justify-content: flex-end; margin-bottom: 6px; }
25
+ #f1-tier-select .wrap {
26
+ display: inline-flex;
27
+ gap: 6px;
28
+ padding: 4px;
29
+ background: #ffffff;
30
+ border: 1px solid var(--f1-border);
31
+ border-radius: 999px;
32
+ }
33
+ #f1-tier-select input[type="radio"] { display: none; }
34
+ #f1-tier-select label {
35
+ border: none;
36
+ border-radius: 999px;
37
+ padding: 6px 12px;
38
+ background: transparent;
39
+ cursor: pointer;
40
+ }
41
+ #f1-tier-select input[type="radio"]:checked + span {
42
+ background: #eef2ff; /* subtle non-white for selected pill */
43
+ border-radius: 999px;
44
+ padding: 6px 12px;
45
+ box-shadow: 0 1px 2px rgba(0,0,0,0.04);
46
+ }
47
 
48
  /* Text */
49
  .f1-p, .f1-li { line-height: 1.75; color: #374151; text-wrap: pretty; overflow-wrap: break-word; hyphens: auto; }