Spaces:
Running
Running
Commit
·
bd9c702
1
Parent(s):
fc0a17a
updating metric definition
Browse files- src/components/leaderboard.py +25 -11
src/components/leaderboard.py
CHANGED
@@ -186,16 +186,23 @@ def render_leaderboard_table(display_df, metric_columns, primary_metric):
|
|
186 |
formula_html = """
|
187 |
<div style="margin: 15px 0;">
|
188 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
189 |
-
<div style="background-color: #111; padding:
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
</div>
|
192 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
193 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
194 |
-
<li style="margin-bottom: 5px;">
|
195 |
-
<li style="margin-bottom: 5px;">
|
196 |
-
<li style="margin-bottom: 5px;">
|
197 |
-
<li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
|
198 |
</ul>
|
|
|
199 |
</div>
|
200 |
"""
|
201 |
|
@@ -206,15 +213,22 @@ def render_leaderboard_table(display_df, metric_columns, primary_metric):
|
|
206 |
formula_html = """
|
207 |
<div style="margin: 15px 0;">
|
208 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
209 |
-
<div style="background-color: #111; padding:
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
</div>
|
212 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
213 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
214 |
-
<li style="margin-bottom: 5px;">
|
215 |
-
<li style="margin-bottom: 5px;">
|
216 |
-
<li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
|
217 |
</ul>
|
|
|
218 |
</div>
|
219 |
"""
|
220 |
|
|
|
186 |
formula_html = """
|
187 |
<div style="margin: 15px 0;">
|
188 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
189 |
+
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
|
190 |
+
<div style="display: flex; align-items: center; justify-content: center;">
|
191 |
+
<div style="margin-right: 10px;">Relative Improvement to Human =</div>
|
192 |
+
<div style="display: inline-block; text-align: center; padding: 0 10px;">
|
193 |
+
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
|
194 |
+
<div style="padding-top: 5px;">s<sub>top_human</sub> - s<sub>baseline</sub></div>
|
195 |
+
</div>
|
196 |
+
<div style="margin-left: 10px;">× 100%</div>
|
197 |
+
</div>
|
198 |
</div>
|
199 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
200 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
201 |
+
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
|
202 |
+
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
|
203 |
+
<li style="margin-bottom: 5px;">s<sub>top_human</sub> is the top human performance in competition</li>
|
|
|
204 |
</ul>
|
205 |
+
<p style="margin-top: 10px;">This metric normalizes scores by setting the baseline solution to 0 and the top human solution to 100.</p>
|
206 |
</div>
|
207 |
"""
|
208 |
|
|
|
213 |
formula_html = """
|
214 |
<div style="margin: 15px 0;">
|
215 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
216 |
+
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
|
217 |
+
<div style="display: flex; align-items: center; justify-content: center;">
|
218 |
+
<div style="margin-right: 10px;">Absolute Improvement to Baseline =</div>
|
219 |
+
<div style="display: inline-block; text-align: center; padding: 0 10px;">
|
220 |
+
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
|
221 |
+
<div style="padding-top: 5px;">s<sub>baseline</sub></div>
|
222 |
+
</div>
|
223 |
+
<div style="margin-left: 10px;">× 100%</div>
|
224 |
+
</div>
|
225 |
</div>
|
226 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
227 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
228 |
+
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
|
229 |
+
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
|
|
|
230 |
</ul>
|
231 |
+
<p style="margin-top: 10px;">This metric measures the percentage improvement of an agent's performance over the baseline solution.</p>
|
232 |
</div>
|
233 |
"""
|
234 |
|