Spaces:
Running
Running
Update leaderboard.py
Browse files- leaderboard.py +166 -47
leaderboard.py
CHANGED
@@ -6,10 +6,15 @@ import time
|
|
6 |
from datetime import datetime
|
7 |
import threading
|
8 |
import arena_config
|
|
|
|
|
9 |
|
10 |
# Initialize Nextcloud client
|
11 |
nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD)
|
12 |
|
|
|
|
|
|
|
13 |
def load_leaderboard() -> Dict[str, Any]:
|
14 |
try:
|
15 |
file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH)
|
@@ -27,6 +32,53 @@ def save_leaderboard(leaderboard_data: Dict[str, Any]) -> bool:
|
|
27 |
print(f"Error saving leaderboard: {str(e)}")
|
28 |
return False
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
|
31 |
leaderboard = load_leaderboard()
|
32 |
|
@@ -41,62 +93,28 @@ def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
|
|
41 |
leaderboard[loser]["losses"] += 1
|
42 |
leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1
|
43 |
|
|
|
|
|
|
|
44 |
save_leaderboard(leaderboard)
|
45 |
return leaderboard
|
46 |
|
47 |
-
# Function to get the current leaderboard
|
48 |
def get_current_leaderboard() -> Dict[str, Any]:
|
49 |
return load_leaderboard()
|
50 |
|
51 |
-
def create_backup():
|
52 |
-
while True:
|
53 |
-
try:
|
54 |
-
leaderboard_data = load_leaderboard()
|
55 |
-
|
56 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
57 |
-
backup_file_name = f"leaderboard_backup_{timestamp}.json"
|
58 |
-
backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}"
|
59 |
-
|
60 |
-
json_data = json.dumps(leaderboard_data, indent=2)
|
61 |
-
|
62 |
-
nc.files.upload(backup_path, json_data.encode('utf-8'))
|
63 |
-
|
64 |
-
print(f"Backup created on Nextcloud: {backup_path}")
|
65 |
-
|
66 |
-
except Exception as e:
|
67 |
-
print(f"Error creating backup: {e}")
|
68 |
-
|
69 |
-
time.sleep(3600) # Sleep for 1 hour
|
70 |
-
|
71 |
-
def start_backup_thread():
|
72 |
-
backup_thread = threading.Thread(target=create_backup, daemon=True)
|
73 |
-
backup_thread.start()
|
74 |
-
|
75 |
-
|
76 |
def get_human_readable_name(model_name: str) -> str:
|
77 |
model_dict = dict(arena_config.APPROVED_MODELS)
|
78 |
return model_dict.get(model_name, model_name)
|
79 |
|
80 |
def get_leaderboard():
|
81 |
-
|
82 |
-
|
83 |
-
# Calculate scores for each model
|
84 |
-
for model, results in battle_results.items():
|
85 |
-
total_battles = results["wins"] + results["losses"]
|
86 |
-
if total_battles > 0:
|
87 |
-
win_rate = results["wins"] / total_battles
|
88 |
-
results["score"] = win_rate * (1 - 1 / (total_battles + 1))
|
89 |
-
else:
|
90 |
-
results["score"] = 0
|
91 |
-
|
92 |
-
# Sort results by score, then by total battles
|
93 |
sorted_results = sorted(
|
94 |
-
|
95 |
-
key=lambda x: (x[1]["
|
96 |
reverse=True
|
97 |
)
|
98 |
|
99 |
-
|
100 |
<style>
|
101 |
.leaderboard-table {
|
102 |
width: 100%;
|
@@ -125,7 +143,6 @@ def get_leaderboard():
|
|
125 |
<tr>
|
126 |
<th class='rank-column'>Rank</th>
|
127 |
<th>Model</th>
|
128 |
-
<th>Score</th>
|
129 |
<th>Wins</th>
|
130 |
<th>Losses</th>
|
131 |
<th>Win Rate</th>
|
@@ -139,7 +156,7 @@ def get_leaderboard():
|
|
139 |
total_battles = results["wins"] + results["losses"]
|
140 |
win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
|
141 |
|
142 |
-
rank_display = {1: "π₯", 2: "π₯", 3: "π₯"
|
143 |
|
144 |
top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
|
145 |
top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
|
@@ -149,11 +166,10 @@ def get_leaderboard():
|
|
149 |
toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
|
150 |
toughest_opponent_losses = toughest_opponent[1]["losses"]
|
151 |
|
152 |
-
|
153 |
<tr>
|
154 |
<td class='rank-column'>{rank_display}</td>
|
155 |
<td>{get_human_readable_name(model)}</td>
|
156 |
-
<td>{results['score']:.4f}</td>
|
157 |
<td>{results['wins']}</td>
|
158 |
<td>{results['losses']}</td>
|
159 |
<td>{win_rate:.2f}%</td>
|
@@ -162,5 +178,108 @@ def get_leaderboard():
|
|
162 |
<td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
|
163 |
</tr>
|
164 |
"""
|
165 |
-
|
166 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from datetime import datetime
|
7 |
import threading
|
8 |
import arena_config
|
9 |
+
import sys
|
10 |
+
import math
|
11 |
|
12 |
# Initialize Nextcloud client
|
13 |
nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD)
|
14 |
|
15 |
+
# Dictionary to store ELO ratings
|
16 |
+
elo_ratings = {}
|
17 |
+
|
18 |
def load_leaderboard() -> Dict[str, Any]:
|
19 |
try:
|
20 |
file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH)
|
|
|
32 |
print(f"Error saving leaderboard: {str(e)}")
|
33 |
return False
|
34 |
|
35 |
+
def get_model_size(model_name):
|
36 |
+
for model, human_readable in arena_config.APPROVED_MODELS:
|
37 |
+
if model == model_name:
|
38 |
+
size = float(human_readable.split('(')[1].split('B')[0])
|
39 |
+
return size
|
40 |
+
return 1.0 # Default size if not found
|
41 |
+
|
42 |
+
def calculate_expected_score(rating_a, rating_b):
|
43 |
+
return 1 / (1 + math.pow(10, (rating_b - rating_a) / 400))
|
44 |
+
|
45 |
+
def update_elo_ratings(winner, loser):
|
46 |
+
if winner not in elo_ratings or loser not in elo_ratings:
|
47 |
+
initialize_elo_ratings()
|
48 |
+
|
49 |
+
winner_rating = elo_ratings[winner]
|
50 |
+
loser_rating = elo_ratings[loser]
|
51 |
+
|
52 |
+
expected_winner = calculate_expected_score(winner_rating, loser_rating)
|
53 |
+
expected_loser = 1 - expected_winner
|
54 |
+
|
55 |
+
winner_size = get_model_size(winner)
|
56 |
+
loser_size = get_model_size(loser)
|
57 |
+
max_size = max(get_model_size(model) for model, _ in arena_config.APPROVED_MODELS)
|
58 |
+
|
59 |
+
k_factor = 32 * (1 + (loser_size - winner_size) / max_size)
|
60 |
+
|
61 |
+
elo_ratings[winner] += k_factor * (1 - expected_winner)
|
62 |
+
elo_ratings[loser] += k_factor * (0 - expected_loser)
|
63 |
+
|
64 |
+
def initialize_elo_ratings():
|
65 |
+
leaderboard = load_leaderboard()
|
66 |
+
for model, _ in arena_config.APPROVED_MODELS:
|
67 |
+
size = get_model_size(model)
|
68 |
+
elo_ratings[model] = 1000 + (size * 100)
|
69 |
+
|
70 |
+
# Replay all battles to update ELO ratings
|
71 |
+
for model, data in leaderboard.items():
|
72 |
+
for opponent, results in data['opponents'].items():
|
73 |
+
for _ in range(results['wins']):
|
74 |
+
update_elo_ratings(model, opponent)
|
75 |
+
for _ in range(results['losses']):
|
76 |
+
update_elo_ratings(opponent, model)
|
77 |
+
|
78 |
+
def ensure_elo_ratings_initialized():
|
79 |
+
if not elo_ratings:
|
80 |
+
initialize_elo_ratings()
|
81 |
+
|
82 |
def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
|
83 |
leaderboard = load_leaderboard()
|
84 |
|
|
|
93 |
leaderboard[loser]["losses"] += 1
|
94 |
leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1
|
95 |
|
96 |
+
# Update ELO ratings
|
97 |
+
update_elo_ratings(winner, loser)
|
98 |
+
|
99 |
save_leaderboard(leaderboard)
|
100 |
return leaderboard
|
101 |
|
|
|
102 |
def get_current_leaderboard() -> Dict[str, Any]:
|
103 |
return load_leaderboard()
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def get_human_readable_name(model_name: str) -> str:
|
106 |
model_dict = dict(arena_config.APPROVED_MODELS)
|
107 |
return model_dict.get(model_name, model_name)
|
108 |
|
109 |
def get_leaderboard():
|
110 |
+
leaderboard = load_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
sorted_results = sorted(
|
112 |
+
leaderboard.items(),
|
113 |
+
key=lambda x: (x[1]["wins"] / (x[1]["wins"] + x[1]["losses"]) if x[1]["wins"] + x[1]["losses"] > 0 else 0, x[1]["wins"] + x[1]["losses"]),
|
114 |
reverse=True
|
115 |
)
|
116 |
|
117 |
+
leaderboard_html = """
|
118 |
<style>
|
119 |
.leaderboard-table {
|
120 |
width: 100%;
|
|
|
143 |
<tr>
|
144 |
<th class='rank-column'>Rank</th>
|
145 |
<th>Model</th>
|
|
|
146 |
<th>Wins</th>
|
147 |
<th>Losses</th>
|
148 |
<th>Win Rate</th>
|
|
|
156 |
total_battles = results["wins"] + results["losses"]
|
157 |
win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
|
158 |
|
159 |
+
rank_display = {1: "π₯", 2: "π₯", 3: "π₯"}.get(index, f"{index}")
|
160 |
|
161 |
top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
|
162 |
top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
|
|
|
166 |
toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
|
167 |
toughest_opponent_losses = toughest_opponent[1]["losses"]
|
168 |
|
169 |
+
leaderboard_html += f"""
|
170 |
<tr>
|
171 |
<td class='rank-column'>{rank_display}</td>
|
172 |
<td>{get_human_readable_name(model)}</td>
|
|
|
173 |
<td>{results['wins']}</td>
|
174 |
<td>{results['losses']}</td>
|
175 |
<td>{win_rate:.2f}%</td>
|
|
|
178 |
<td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
|
179 |
</tr>
|
180 |
"""
|
181 |
+
leaderboard_html += "</table>"
|
182 |
+
return leaderboard_html
|
183 |
+
|
184 |
+
def get_elo_leaderboard():
|
185 |
+
ensure_elo_ratings_initialized()
|
186 |
+
leaderboard = load_leaderboard()
|
187 |
+
sorted_ratings = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
|
188 |
+
|
189 |
+
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
190 |
+
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
191 |
+
|
192 |
+
explanation = f"""
|
193 |
+
<p style="font-size: 16px; margin-bottom: 20px;">
|
194 |
+
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
|
195 |
+
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
|
196 |
+
When a smaller model defeats a larger one, it gains more points, while larger models gain fewer points for beating smaller ones.
|
197 |
+
The "Points Scored" column shows the total ELO points gained by the model from its victories, reflecting both quantity and quality of wins.
|
198 |
+
The "Points Lost" column shows the total ELO points lost by the model from its defeats, indicating the challenges faced.
|
199 |
+
</p>
|
200 |
+
"""
|
201 |
+
|
202 |
+
leaderboard_html = f"""
|
203 |
+
{explanation}
|
204 |
+
<style>
|
205 |
+
.elo-leaderboard-table {{
|
206 |
+
width: 100%;
|
207 |
+
border-collapse: collapse;
|
208 |
+
font-family: Arial, sans-serif;
|
209 |
+
}}
|
210 |
+
.elo-leaderboard-table th, .elo-leaderboard-table td {{
|
211 |
+
border: 1px solid #ddd;
|
212 |
+
padding: 8px;
|
213 |
+
text-align: left;
|
214 |
+
}}
|
215 |
+
.elo-leaderboard-table th {{
|
216 |
+
background-color: rgba(255, 255, 255, 0.1);
|
217 |
+
font-weight: bold;
|
218 |
+
}}
|
219 |
+
.rank-column {{
|
220 |
+
width: 60px;
|
221 |
+
text-align: center;
|
222 |
+
}}
|
223 |
+
</style>
|
224 |
+
<table class='elo-leaderboard-table'>
|
225 |
+
<tr>
|
226 |
+
<th class='rank-column'>Rank</th>
|
227 |
+
<th>Model</th>
|
228 |
+
<th>ELO Rating</th>
|
229 |
+
<th>Points Scored</th>
|
230 |
+
<th>Points Lost</th>
|
231 |
+
</tr>
|
232 |
+
"""
|
233 |
+
|
234 |
+
for index, (model, rating) in enumerate(sorted_ratings, start=1):
|
235 |
+
rank_display = {1: "π₯", 2: "π₯", 3: "π₯"}.get(index, f"{index}")
|
236 |
+
model_size = get_model_size(model)
|
237 |
+
|
238 |
+
points_scored = 0
|
239 |
+
points_lost = 0
|
240 |
+
if model in leaderboard:
|
241 |
+
for opponent, results in leaderboard[model]['opponents'].items():
|
242 |
+
opponent_rating = elo_ratings.get(opponent, 1000)
|
243 |
+
opponent_size = get_model_size(opponent)
|
244 |
+
max_size = max(get_model_size(m) for m, _ in arena_config.APPROVED_MODELS)
|
245 |
+
|
246 |
+
for _ in range(results['wins']):
|
247 |
+
expected_score = calculate_expected_score(rating, opponent_rating)
|
248 |
+
k_factor = 32 * (1 + (opponent_size - model_size) / max_size)
|
249 |
+
points_scored += k_factor * (1 - expected_score)
|
250 |
+
|
251 |
+
for _ in range(results['losses']):
|
252 |
+
expected_score = calculate_expected_score(rating, opponent_rating)
|
253 |
+
k_factor = 32 * (1 + (model_size - opponent_size) / max_size)
|
254 |
+
points_lost += k_factor * expected_score
|
255 |
+
|
256 |
+
leaderboard_html += f"""
|
257 |
+
<tr>
|
258 |
+
<td class='rank-column'>{rank_display}</td>
|
259 |
+
<td>{get_human_readable_name(model)}</td>
|
260 |
+
<td>{round(rating)}</td>
|
261 |
+
<td>{round(points_scored, 2)}</td>
|
262 |
+
<td>{round(points_lost, 2)}</td>
|
263 |
+
</tr>
|
264 |
+
"""
|
265 |
+
|
266 |
+
leaderboard_html += "</table>"
|
267 |
+
return leaderboard_html
|
268 |
+
|
269 |
+
def create_backup():
|
270 |
+
while True:
|
271 |
+
try:
|
272 |
+
leaderboard_data = load_leaderboard()
|
273 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
274 |
+
backup_file_name = f"leaderboard_backup_{timestamp}.json"
|
275 |
+
backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}"
|
276 |
+
json_data = json.dumps(leaderboard_data, indent=2)
|
277 |
+
nc.files.upload(backup_path, json_data.encode('utf-8'))
|
278 |
+
print(f"Backup created on Nextcloud: {backup_path}")
|
279 |
+
except Exception as e:
|
280 |
+
print(f"Error creating backup: {e}")
|
281 |
+
time.sleep(3600) # Sleep for 1 HOUR
|
282 |
+
|
283 |
+
def start_backup_thread():
|
284 |
+
backup_thread = threading.Thread(target=create_backup, daemon=True)
|
285 |
+
backup_thread.start()
|