galb-dai commited on
Commit
d20b3cd
·
1 Parent(s): 07ac8d6

Update plot.

Browse files
app.py CHANGED
@@ -11,7 +11,6 @@ from huggingface_hub import whoami
11
 
12
  # HTML is split so we can inject Gradio media (images/video) where needed.
13
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
14
- from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
15
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
16
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
17
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
@@ -492,7 +491,7 @@ with blocks:
492
  # Evaluation: Warmup figure
493
  gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
494
  gr.Image(
495
- "assets/shallow_tier_performance.png",
496
  width=600,
497
  show_label=False,
498
  elem_classes=["f1-image"],
 
11
 
12
  # HTML is split so we can inject Gradio media (images/video) where needed.
13
  from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
 
14
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
15
  from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
16
  from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
 
491
  # Evaluation: Warmup figure
492
  gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
493
  gr.Image(
494
+ "assets/perf_plot.png",
495
  width=600,
496
  show_label=False,
497
  elem_classes=["f1-image"],
assets/{deeper_tier_performance.png → perf_plot.png} RENAMED
File without changes
assets/shallow_tier_performance.png DELETED

Git LFS Details

  • SHA256: 9002636852335551645c87932b738a6a33aef67626f28934783054cacd452569
  • Pointer size: 130 Bytes
  • Size of remote file: 77.1 kB
src/about.py CHANGED
@@ -90,13 +90,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
90
  WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
91
  <h2 class="f1-h2">Model Accuracy</h2>
92
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
93
- <!-- warmup_performance figure inserted via gr.Image in app.py -->
94
- """
95
-
96
- # Between Shallow and Deeper figures
97
- WHAT_IS_F1_HTML_AFTER_WARMUPFIG = """
98
  <p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
99
- <!-- tier1_performance figure inserted via gr.Image in app.py -->
100
  """
101
 
102
  # Tail after Deeper figure (closes evaluation section + container)
 
90
  WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
91
  <h2 class="f1-h2">Model Accuracy</h2>
92
  <p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
 
 
 
 
 
93
  <p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
 
94
  """
95
 
96
  # Tail after Deeper figure (closes evaluation section + container)