Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

galb-dai commited on 9 days ago

Commit

d20b3cd

1 Parent(s): 07ac8d6

Update plot.

Files changed (4) hide show

app.py CHANGED Viewed

@@ -11,7 +11,6 @@ from huggingface_hub import whoami
 # HTML is split so we can inject Gradio media (images/video) where needed.
 from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO  # text immediately after the video
-from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG  # text between warmup/tier1 figs
 from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS  # text after the heading, before the first figure
 from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS  # up to (and including) the "Infinite Well" heading
 from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG  # evaluation section up to before Warmup fig
@@ -492,7 +491,7 @@ with blocks:
             # Evaluation: Warmup figure
             gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
             gr.Image(
-                "assets/shallow_tier_performance.png",
                 width=600,
                 show_label=False,
                 elem_classes=["f1-image"],

 # HTML is split so we can inject Gradio media (images/video) where needed.
 from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO  # text immediately after the video
 from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS  # text after the heading, before the first figure
 from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS  # up to (and including) the "Infinite Well" heading
 from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG  # evaluation section up to before Warmup fig
             # Evaluation: Warmup figure
             gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
             gr.Image(
+                "assets/perf_plot.png",
                 width=600,
                 show_label=False,
                 elem_classes=["f1-image"],

assets/{deeper_tier_performance.png → perf_plot.png} RENAMED Viewed

File without changes

assets/shallow_tier_performance.png DELETED Viewed

src/about.py CHANGED Viewed

@@ -90,13 +90,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
 WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
     <h2 class="f1-h2">Model Accuracy</h2>
     <p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
-    <!-- warmup_performance figure inserted via gr.Image in app.py -->
-"""
-# Between Shallow and Deeper figures
-WHAT_IS_F1_HTML_AFTER_WARMUPFIG = """
     <p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
-    <!-- tier1_performance figure inserted via gr.Image in app.py -->
 """
 # Tail after Deeper figure (closes evaluation section + container)

 WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
     <h2 class="f1-h2">Model Accuracy</h2>
     <p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
     <p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
 """
 # Tail after Deeper figure (closes evaluation section + container)