Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update plot.
Browse files- app.py +1 -2
- assets/{deeper_tier_performance.png → perf_plot.png} +2 -2
- assets/shallow_tier_performance.png +0 -3
- src/about.py +0 -6
app.py
CHANGED
@@ -11,7 +11,6 @@ from huggingface_hub import whoami
|
|
11 |
|
12 |
# HTML is split so we can inject Gradio media (images/video) where needed.
|
13 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
14 |
-
from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
|
15 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
|
16 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
|
17 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
@@ -492,7 +491,7 @@ with blocks:
|
|
492 |
# Evaluation: Warmup figure
|
493 |
gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
|
494 |
gr.Image(
|
495 |
-
"assets/
|
496 |
width=600,
|
497 |
show_label=False,
|
498 |
elem_classes=["f1-image"],
|
|
|
11 |
|
12 |
# HTML is split so we can inject Gradio media (images/video) where needed.
|
13 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
|
|
14 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
|
15 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
|
16 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
|
|
491 |
# Evaluation: Warmup figure
|
492 |
gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
|
493 |
gr.Image(
|
494 |
+
"assets/perf_plot.png",
|
495 |
width=600,
|
496 |
show_label=False,
|
497 |
elem_classes=["f1-image"],
|
assets/{deeper_tier_performance.png → perf_plot.png}
RENAMED
File without changes
|
assets/shallow_tier_performance.png
DELETED
Git LFS Details
|
src/about.py
CHANGED
@@ -90,13 +90,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
|
|
90 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
|
93 |
-
<!-- warmup_performance figure inserted via gr.Image in app.py -->
|
94 |
-
"""
|
95 |
-
|
96 |
-
# Between Shallow and Deeper figures
|
97 |
-
WHAT_IS_F1_HTML_AFTER_WARMUPFIG = """
|
98 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
99 |
-
<!-- tier1_performance figure inserted via gr.Image in app.py -->
|
100 |
"""
|
101 |
|
102 |
# Tail after Deeper figure (closes evaluation section + container)
|
|
|
90 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
|
|
|
|
|
|
|
|
|
|
|
93 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
|
|
94 |
"""
|
95 |
|
96 |
# Tail after Deeper figure (closes evaluation section + container)
|