k-mktr commited on
Commit
53a3c92
β€’
1 Parent(s): ba4ccba

Update leaderboard.py

Browse files
Files changed (1) hide show
  1. leaderboard.py +166 -47
leaderboard.py CHANGED
@@ -6,10 +6,15 @@ import time
6
  from datetime import datetime
7
  import threading
8
  import arena_config
 
 
9
 
10
  # Initialize Nextcloud client
11
  nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD)
12
 
 
 
 
13
  def load_leaderboard() -> Dict[str, Any]:
14
  try:
15
  file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH)
@@ -27,6 +32,53 @@ def save_leaderboard(leaderboard_data: Dict[str, Any]) -> bool:
27
  print(f"Error saving leaderboard: {str(e)}")
28
  return False
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
31
  leaderboard = load_leaderboard()
32
 
@@ -41,62 +93,28 @@ def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
41
  leaderboard[loser]["losses"] += 1
42
  leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1
43
 
 
 
 
44
  save_leaderboard(leaderboard)
45
  return leaderboard
46
 
47
- # Function to get the current leaderboard
48
  def get_current_leaderboard() -> Dict[str, Any]:
49
  return load_leaderboard()
50
 
51
- def create_backup():
52
- while True:
53
- try:
54
- leaderboard_data = load_leaderboard()
55
-
56
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
57
- backup_file_name = f"leaderboard_backup_{timestamp}.json"
58
- backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}"
59
-
60
- json_data = json.dumps(leaderboard_data, indent=2)
61
-
62
- nc.files.upload(backup_path, json_data.encode('utf-8'))
63
-
64
- print(f"Backup created on Nextcloud: {backup_path}")
65
-
66
- except Exception as e:
67
- print(f"Error creating backup: {e}")
68
-
69
- time.sleep(3600) # Sleep for 1 hour
70
-
71
- def start_backup_thread():
72
- backup_thread = threading.Thread(target=create_backup, daemon=True)
73
- backup_thread.start()
74
-
75
-
76
  def get_human_readable_name(model_name: str) -> str:
77
  model_dict = dict(arena_config.APPROVED_MODELS)
78
  return model_dict.get(model_name, model_name)
79
 
80
  def get_leaderboard():
81
- battle_results = get_current_leaderboard()
82
-
83
- # Calculate scores for each model
84
- for model, results in battle_results.items():
85
- total_battles = results["wins"] + results["losses"]
86
- if total_battles > 0:
87
- win_rate = results["wins"] / total_battles
88
- results["score"] = win_rate * (1 - 1 / (total_battles + 1))
89
- else:
90
- results["score"] = 0
91
-
92
- # Sort results by score, then by total battles
93
  sorted_results = sorted(
94
- battle_results.items(),
95
- key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
96
  reverse=True
97
  )
98
 
99
- leaderboard = """
100
  <style>
101
  .leaderboard-table {
102
  width: 100%;
@@ -125,7 +143,6 @@ def get_leaderboard():
125
  <tr>
126
  <th class='rank-column'>Rank</th>
127
  <th>Model</th>
128
- <th>Score</th>
129
  <th>Wins</th>
130
  <th>Losses</th>
131
  <th>Win Rate</th>
@@ -139,7 +156,7 @@ def get_leaderboard():
139
  total_battles = results["wins"] + results["losses"]
140
  win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
141
 
142
- rank_display = {1: "πŸ₯‡", 2: "πŸ₯ˆ", 3: "πŸ₯‰", 18: "😞", 19: "😰", 20: "😭"}.get(index, f"{index}")
143
 
144
  top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
145
  top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
@@ -149,11 +166,10 @@ def get_leaderboard():
149
  toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
150
  toughest_opponent_losses = toughest_opponent[1]["losses"]
151
 
152
- leaderboard += f"""
153
  <tr>
154
  <td class='rank-column'>{rank_display}</td>
155
  <td>{get_human_readable_name(model)}</td>
156
- <td>{results['score']:.4f}</td>
157
  <td>{results['wins']}</td>
158
  <td>{results['losses']}</td>
159
  <td>{win_rate:.2f}%</td>
@@ -162,5 +178,108 @@ def get_leaderboard():
162
  <td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
163
  </tr>
164
  """
165
- leaderboard += "</table>"
166
- return leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from datetime import datetime
7
  import threading
8
  import arena_config
9
+ import sys
10
+ import math
11
 
12
  # Initialize Nextcloud client
13
  nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD)
14
 
15
+ # Dictionary to store ELO ratings
16
+ elo_ratings = {}
17
+
18
  def load_leaderboard() -> Dict[str, Any]:
19
  try:
20
  file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH)
 
32
  print(f"Error saving leaderboard: {str(e)}")
33
  return False
34
 
35
+ def get_model_size(model_name):
36
+ for model, human_readable in arena_config.APPROVED_MODELS:
37
+ if model == model_name:
38
+ size = float(human_readable.split('(')[1].split('B')[0])
39
+ return size
40
+ return 1.0 # Default size if not found
41
+
42
+ def calculate_expected_score(rating_a, rating_b):
43
+ return 1 / (1 + math.pow(10, (rating_b - rating_a) / 400))
44
+
45
+ def update_elo_ratings(winner, loser):
46
+ if winner not in elo_ratings or loser not in elo_ratings:
47
+ initialize_elo_ratings()
48
+
49
+ winner_rating = elo_ratings[winner]
50
+ loser_rating = elo_ratings[loser]
51
+
52
+ expected_winner = calculate_expected_score(winner_rating, loser_rating)
53
+ expected_loser = 1 - expected_winner
54
+
55
+ winner_size = get_model_size(winner)
56
+ loser_size = get_model_size(loser)
57
+ max_size = max(get_model_size(model) for model, _ in arena_config.APPROVED_MODELS)
58
+
59
+ k_factor = 32 * (1 + (loser_size - winner_size) / max_size)
60
+
61
+ elo_ratings[winner] += k_factor * (1 - expected_winner)
62
+ elo_ratings[loser] += k_factor * (0 - expected_loser)
63
+
64
+ def initialize_elo_ratings():
65
+ leaderboard = load_leaderboard()
66
+ for model, _ in arena_config.APPROVED_MODELS:
67
+ size = get_model_size(model)
68
+ elo_ratings[model] = 1000 + (size * 100)
69
+
70
+ # Replay all battles to update ELO ratings
71
+ for model, data in leaderboard.items():
72
+ for opponent, results in data['opponents'].items():
73
+ for _ in range(results['wins']):
74
+ update_elo_ratings(model, opponent)
75
+ for _ in range(results['losses']):
76
+ update_elo_ratings(opponent, model)
77
+
78
+ def ensure_elo_ratings_initialized():
79
+ if not elo_ratings:
80
+ initialize_elo_ratings()
81
+
82
  def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]:
83
  leaderboard = load_leaderboard()
84
 
 
93
  leaderboard[loser]["losses"] += 1
94
  leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1
95
 
96
+ # Update ELO ratings
97
+ update_elo_ratings(winner, loser)
98
+
99
  save_leaderboard(leaderboard)
100
  return leaderboard
101
 
 
102
  def get_current_leaderboard() -> Dict[str, Any]:
103
  return load_leaderboard()
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def get_human_readable_name(model_name: str) -> str:
106
  model_dict = dict(arena_config.APPROVED_MODELS)
107
  return model_dict.get(model_name, model_name)
108
 
109
  def get_leaderboard():
110
+ leaderboard = load_leaderboard()
 
 
 
 
 
 
 
 
 
 
 
111
  sorted_results = sorted(
112
+ leaderboard.items(),
113
+ key=lambda x: (x[1]["wins"] / (x[1]["wins"] + x[1]["losses"]) if x[1]["wins"] + x[1]["losses"] > 0 else 0, x[1]["wins"] + x[1]["losses"]),
114
  reverse=True
115
  )
116
 
117
+ leaderboard_html = """
118
  <style>
119
  .leaderboard-table {
120
  width: 100%;
 
143
  <tr>
144
  <th class='rank-column'>Rank</th>
145
  <th>Model</th>
 
146
  <th>Wins</th>
147
  <th>Losses</th>
148
  <th>Win Rate</th>
 
156
  total_battles = results["wins"] + results["losses"]
157
  win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
158
 
159
+ rank_display = {1: "πŸ₯‡", 2: "πŸ₯ˆ", 3: "πŸ₯‰"}.get(index, f"{index}")
160
 
161
  top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
162
  top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
 
166
  toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
167
  toughest_opponent_losses = toughest_opponent[1]["losses"]
168
 
169
+ leaderboard_html += f"""
170
  <tr>
171
  <td class='rank-column'>{rank_display}</td>
172
  <td>{get_human_readable_name(model)}</td>
 
173
  <td>{results['wins']}</td>
174
  <td>{results['losses']}</td>
175
  <td>{win_rate:.2f}%</td>
 
178
  <td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
179
  </tr>
180
  """
181
+ leaderboard_html += "</table>"
182
+ return leaderboard_html
183
+
184
+ def get_elo_leaderboard():
185
+ ensure_elo_ratings_initialized()
186
+ leaderboard = load_leaderboard()
187
+ sorted_ratings = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
188
+
189
+ min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
190
+ max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
191
+
192
+ explanation = f"""
193
+ <p style="font-size: 16px; margin-bottom: 20px;">
194
+ This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
195
+ Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
196
+ When a smaller model defeats a larger one, it gains more points, while larger models gain fewer points for beating smaller ones.
197
+ The "Points Scored" column shows the total ELO points gained by the model from its victories, reflecting both quantity and quality of wins.
198
+ The "Points Lost" column shows the total ELO points lost by the model from its defeats, indicating the challenges faced.
199
+ </p>
200
+ """
201
+
202
+ leaderboard_html = f"""
203
+ {explanation}
204
+ <style>
205
+ .elo-leaderboard-table {{
206
+ width: 100%;
207
+ border-collapse: collapse;
208
+ font-family: Arial, sans-serif;
209
+ }}
210
+ .elo-leaderboard-table th, .elo-leaderboard-table td {{
211
+ border: 1px solid #ddd;
212
+ padding: 8px;
213
+ text-align: left;
214
+ }}
215
+ .elo-leaderboard-table th {{
216
+ background-color: rgba(255, 255, 255, 0.1);
217
+ font-weight: bold;
218
+ }}
219
+ .rank-column {{
220
+ width: 60px;
221
+ text-align: center;
222
+ }}
223
+ </style>
224
+ <table class='elo-leaderboard-table'>
225
+ <tr>
226
+ <th class='rank-column'>Rank</th>
227
+ <th>Model</th>
228
+ <th>ELO Rating</th>
229
+ <th>Points Scored</th>
230
+ <th>Points Lost</th>
231
+ </tr>
232
+ """
233
+
234
+ for index, (model, rating) in enumerate(sorted_ratings, start=1):
235
+ rank_display = {1: "πŸ₯‡", 2: "πŸ₯ˆ", 3: "πŸ₯‰"}.get(index, f"{index}")
236
+ model_size = get_model_size(model)
237
+
238
+ points_scored = 0
239
+ points_lost = 0
240
+ if model in leaderboard:
241
+ for opponent, results in leaderboard[model]['opponents'].items():
242
+ opponent_rating = elo_ratings.get(opponent, 1000)
243
+ opponent_size = get_model_size(opponent)
244
+ max_size = max(get_model_size(m) for m, _ in arena_config.APPROVED_MODELS)
245
+
246
+ for _ in range(results['wins']):
247
+ expected_score = calculate_expected_score(rating, opponent_rating)
248
+ k_factor = 32 * (1 + (opponent_size - model_size) / max_size)
249
+ points_scored += k_factor * (1 - expected_score)
250
+
251
+ for _ in range(results['losses']):
252
+ expected_score = calculate_expected_score(rating, opponent_rating)
253
+ k_factor = 32 * (1 + (model_size - opponent_size) / max_size)
254
+ points_lost += k_factor * expected_score
255
+
256
+ leaderboard_html += f"""
257
+ <tr>
258
+ <td class='rank-column'>{rank_display}</td>
259
+ <td>{get_human_readable_name(model)}</td>
260
+ <td>{round(rating)}</td>
261
+ <td>{round(points_scored, 2)}</td>
262
+ <td>{round(points_lost, 2)}</td>
263
+ </tr>
264
+ """
265
+
266
+ leaderboard_html += "</table>"
267
+ return leaderboard_html
268
+
269
+ def create_backup():
270
+ while True:
271
+ try:
272
+ leaderboard_data = load_leaderboard()
273
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
274
+ backup_file_name = f"leaderboard_backup_{timestamp}.json"
275
+ backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}"
276
+ json_data = json.dumps(leaderboard_data, indent=2)
277
+ nc.files.upload(backup_path, json_data.encode('utf-8'))
278
+ print(f"Backup created on Nextcloud: {backup_path}")
279
+ except Exception as e:
280
+ print(f"Error creating backup: {e}")
281
+ time.sleep(3600) # Sleep for 1 HOUR
282
+
283
+ def start_backup_thread():
284
+ backup_thread = threading.Thread(target=create_backup, daemon=True)
285
+ backup_thread.start()