Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
import arena_config
|
7 |
import plotly.graph_objects as go
|
8 |
from typing import Dict
|
9 |
-
from leaderboard import get_current_leaderboard, update_leaderboard, start_backup_thread
|
10 |
|
11 |
|
12 |
# Initialize logging for errors only
|
@@ -129,111 +129,6 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
|
|
129 |
get_leaderboard_chart() # Update leaderboard chart
|
130 |
)
|
131 |
|
132 |
-
def get_leaderboard():
|
133 |
-
battle_results = get_current_leaderboard()
|
134 |
-
|
135 |
-
# Calculate scores for each model
|
136 |
-
for model, results in battle_results.items():
|
137 |
-
total_battles = results["wins"] + results["losses"]
|
138 |
-
if total_battles > 0:
|
139 |
-
win_rate = results["wins"] / total_battles
|
140 |
-
# Score formula: win_rate * (1 - 1 / (total_battles + 1))
|
141 |
-
# This gives more weight to models with more battles
|
142 |
-
results["score"] = win_rate * (1 - 1 / (total_battles + 1))
|
143 |
-
else:
|
144 |
-
results["score"] = 0
|
145 |
-
|
146 |
-
# Sort results by score, then by total battles
|
147 |
-
sorted_results = sorted(
|
148 |
-
battle_results.items(),
|
149 |
-
key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
|
150 |
-
reverse=True
|
151 |
-
)
|
152 |
-
|
153 |
-
leaderboard = """
|
154 |
-
<style>
|
155 |
-
.leaderboard-table {
|
156 |
-
width: 100%;
|
157 |
-
border-collapse: collapse;
|
158 |
-
font-family: Arial, sans-serif;
|
159 |
-
}
|
160 |
-
.leaderboard-table th, .leaderboard-table td {
|
161 |
-
border: 1px solid #ddd;
|
162 |
-
padding: 8px;
|
163 |
-
text-align: left;
|
164 |
-
}
|
165 |
-
.leaderboard-table th {
|
166 |
-
background-color: rgba(255, 255, 255, 0.1);
|
167 |
-
font-weight: bold;
|
168 |
-
}
|
169 |
-
.rank-column {
|
170 |
-
width: 60px;
|
171 |
-
text-align: center;
|
172 |
-
}
|
173 |
-
.opponent-details {
|
174 |
-
font-size: 0.9em;
|
175 |
-
color: #888;
|
176 |
-
}
|
177 |
-
</style>
|
178 |
-
<table class='leaderboard-table'>
|
179 |
-
<tr>
|
180 |
-
<th class='rank-column'>Rank</th>
|
181 |
-
<th>Model</th>
|
182 |
-
<th>Score</th>
|
183 |
-
<th>Wins</th>
|
184 |
-
<th>Losses</th>
|
185 |
-
<th>Win Rate</th>
|
186 |
-
<th>Total Battles</th>
|
187 |
-
<th>Top Rival</th>
|
188 |
-
<th>Toughest Opponent</th>
|
189 |
-
</tr>
|
190 |
-
"""
|
191 |
-
|
192 |
-
for index, (model, results) in enumerate(sorted_results, start=1):
|
193 |
-
total_battles = results["wins"] + results["losses"]
|
194 |
-
win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
|
195 |
-
|
196 |
-
if index == 1:
|
197 |
-
rank_display = "π₯"
|
198 |
-
elif index == 2:
|
199 |
-
rank_display = "π₯"
|
200 |
-
elif index == 3:
|
201 |
-
rank_display = "π₯"
|
202 |
-
elif index == 18:
|
203 |
-
rank_display = "π"
|
204 |
-
elif index == 19:
|
205 |
-
rank_display = "π°"
|
206 |
-
elif index == 20:
|
207 |
-
rank_display = "π"
|
208 |
-
else:
|
209 |
-
rank_display = f"{index}"
|
210 |
-
|
211 |
-
# Find top rival (most wins against)
|
212 |
-
top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
|
213 |
-
top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
|
214 |
-
top_rival_wins = top_rival[1]["wins"]
|
215 |
-
|
216 |
-
# Find toughest opponent (most losses against)
|
217 |
-
toughest_opponent = max(results["opponents"].items(), key=lambda x: x[1]["losses"], default=(None, {"losses": 0}))
|
218 |
-
toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
|
219 |
-
toughest_opponent_losses = toughest_opponent[1]["losses"]
|
220 |
-
|
221 |
-
leaderboard += f"""
|
222 |
-
<tr>
|
223 |
-
<td class='rank-column'>{rank_display}</td>
|
224 |
-
<td>{get_human_readable_name(model)}</td>
|
225 |
-
<td>{results['score']:.4f}</td>
|
226 |
-
<td>{results['wins']}</td>
|
227 |
-
<td>{results['losses']}</td>
|
228 |
-
<td>{win_rate:.2f}%</td>
|
229 |
-
<td>{total_battles}</td>
|
230 |
-
<td class='opponent-details'>{top_rival_name} (W: {top_rival_wins})</td>
|
231 |
-
<td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
|
232 |
-
</tr>
|
233 |
-
"""
|
234 |
-
leaderboard += "</table>"
|
235 |
-
return leaderboard
|
236 |
-
|
237 |
def get_leaderboard_chart():
|
238 |
battle_results = get_current_leaderboard()
|
239 |
|
@@ -461,4 +356,4 @@ with gr.Blocks(css="""
|
|
461 |
demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
|
462 |
|
463 |
if __name__ == "__main__":
|
464 |
-
demo.launch()
|
|
|
6 |
import arena_config
|
7 |
import plotly.graph_objects as go
|
8 |
from typing import Dict
|
9 |
+
from leaderboard import get_current_leaderboard, update_leaderboard, start_backup_thread, get_leaderboard
|
10 |
|
11 |
|
12 |
# Initialize logging for errors only
|
|
|
129 |
get_leaderboard_chart() # Update leaderboard chart
|
130 |
)
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
def get_leaderboard_chart():
|
133 |
battle_results = get_current_leaderboard()
|
134 |
|
|
|
356 |
demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
|
357 |
|
358 |
if __name__ == "__main__":
|
359 |
+
demo.launch(show_api=False)
|