Spaces:
Running
Running
Commit
·
bd9c702
1
Parent(s):
fc0a17a
updating metric definition
Browse files- src/components/leaderboard.py +25 -11
src/components/leaderboard.py
CHANGED
|
@@ -186,16 +186,23 @@ def render_leaderboard_table(display_df, metric_columns, primary_metric):
|
|
| 186 |
formula_html = """
|
| 187 |
<div style="margin: 15px 0;">
|
| 188 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
| 189 |
-
<div style="background-color: #111; padding:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
</div>
|
| 192 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
| 193 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
| 194 |
-
<li style="margin-bottom: 5px;">
|
| 195 |
-
<li style="margin-bottom: 5px;">
|
| 196 |
-
<li style="margin-bottom: 5px;">
|
| 197 |
-
<li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
|
| 198 |
</ul>
|
|
|
|
| 199 |
</div>
|
| 200 |
"""
|
| 201 |
|
|
@@ -206,15 +213,22 @@ def render_leaderboard_table(display_df, metric_columns, primary_metric):
|
|
| 206 |
formula_html = """
|
| 207 |
<div style="margin: 15px 0;">
|
| 208 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
| 209 |
-
<div style="background-color: #111; padding:
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
</div>
|
| 212 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
| 213 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
| 214 |
-
<li style="margin-bottom: 5px;">
|
| 215 |
-
<li style="margin-bottom: 5px;">
|
| 216 |
-
<li style="margin-bottom: 5px;">The maximum is taken across all experimental runs for a given task-model pair</li>
|
| 217 |
</ul>
|
|
|
|
| 218 |
</div>
|
| 219 |
"""
|
| 220 |
|
|
|
|
| 186 |
formula_html = """
|
| 187 |
<div style="margin: 15px 0;">
|
| 188 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
| 189 |
+
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
|
| 190 |
+
<div style="display: flex; align-items: center; justify-content: center;">
|
| 191 |
+
<div style="margin-right: 10px;">Relative Improvement to Human =</div>
|
| 192 |
+
<div style="display: inline-block; text-align: center; padding: 0 10px;">
|
| 193 |
+
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
|
| 194 |
+
<div style="padding-top: 5px;">s<sub>top_human</sub> - s<sub>baseline</sub></div>
|
| 195 |
+
</div>
|
| 196 |
+
<div style="margin-left: 10px;">× 100%</div>
|
| 197 |
+
</div>
|
| 198 |
</div>
|
| 199 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
| 200 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
| 201 |
+
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
|
| 202 |
+
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
|
| 203 |
+
<li style="margin-bottom: 5px;">s<sub>top_human</sub> is the top human performance in competition</li>
|
|
|
|
| 204 |
</ul>
|
| 205 |
+
<p style="margin-top: 10px;">This metric normalizes scores by setting the baseline solution to 0 and the top human solution to 100.</p>
|
| 206 |
</div>
|
| 207 |
"""
|
| 208 |
|
|
|
|
| 213 |
formula_html = """
|
| 214 |
<div style="margin: 15px 0;">
|
| 215 |
<p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
|
| 216 |
+
<div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
|
| 217 |
+
<div style="display: flex; align-items: center; justify-content: center;">
|
| 218 |
+
<div style="margin-right: 10px;">Absolute Improvement to Baseline =</div>
|
| 219 |
+
<div style="display: inline-block; text-align: center; padding: 0 10px;">
|
| 220 |
+
<div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
|
| 221 |
+
<div style="padding-top: 5px;">s<sub>baseline</sub></div>
|
| 222 |
+
</div>
|
| 223 |
+
<div style="margin-left: 10px;">× 100%</div>
|
| 224 |
+
</div>
|
| 225 |
</div>
|
| 226 |
<p style="margin-top: 10px; font-weight: 500;">Where:</p>
|
| 227 |
<ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
|
| 228 |
+
<li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
|
| 229 |
+
<li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
|
|
|
|
| 230 |
</ul>
|
| 231 |
+
<p style="margin-top: 10px;">This metric measures the percentage improvement of an agent's performance over the baseline solution.</p>
|
| 232 |
</div>
|
| 233 |
"""
|
| 234 |
|