Spaces:
Running
Running
from nc_py_api import Nextcloud | |
import json | |
from typing import Dict, Any | |
import os | |
import time | |
from datetime import datetime | |
import threading | |
import arena_config | |
import sys | |
import math | |
# Initialize Nextcloud client | |
nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD) | |
# Dictionary to store ELO ratings | |
elo_ratings = {} | |
def load_leaderboard() -> Dict[str, Any]: | |
try: | |
file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH) | |
return json.loads(file_content.decode('utf-8')) | |
except Exception as e: | |
print(f"Error loading leaderboard: {str(e)}") | |
return {} | |
def save_leaderboard(leaderboard_data: Dict[str, Any]) -> bool: | |
try: | |
json_data = json.dumps(leaderboard_data, indent=2) | |
nc.files.upload(arena_config.NEXTCLOUD_LEADERBOARD_PATH, json_data.encode('utf-8')) | |
return True | |
except Exception as e: | |
print(f"Error saving leaderboard: {str(e)}") | |
return False | |
def get_model_size(model_name): | |
for model, human_readable in arena_config.APPROVED_MODELS: | |
if model == model_name: | |
size = float(human_readable.split('(')[1].split('B')[0]) | |
return size | |
return 1.0 # Default size if not found | |
def calculate_expected_score(rating_a, rating_b): | |
return 1 / (1 + math.pow(10, (rating_b - rating_a) / 400)) | |
def update_elo_ratings(winner, loser): | |
if winner not in elo_ratings or loser not in elo_ratings: | |
initialize_elo_ratings() | |
winner_rating = elo_ratings[winner] | |
loser_rating = elo_ratings[loser] | |
expected_winner = calculate_expected_score(winner_rating, loser_rating) | |
expected_loser = 1 - expected_winner | |
winner_size = get_model_size(winner) | |
loser_size = get_model_size(loser) | |
max_size = max(get_model_size(model) for model, _ in arena_config.APPROVED_MODELS) | |
k_factor = 32 * (1 + (loser_size - winner_size) / max_size) | |
elo_ratings[winner] += k_factor * (1 - expected_winner) | |
elo_ratings[loser] += k_factor * (0 - expected_loser) | |
def initialize_elo_ratings(): | |
leaderboard = load_leaderboard() | |
for model, _ in arena_config.APPROVED_MODELS: | |
size = get_model_size(model) | |
elo_ratings[model] = 1000 + (size * 100) | |
# Replay all battles to update ELO ratings | |
for model, data in leaderboard.items(): | |
for opponent, results in data['opponents'].items(): | |
for _ in range(results['wins']): | |
update_elo_ratings(model, opponent) | |
for _ in range(results['losses']): | |
update_elo_ratings(opponent, model) | |
def ensure_elo_ratings_initialized(): | |
if not elo_ratings: | |
initialize_elo_ratings() | |
def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]: | |
leaderboard = load_leaderboard() | |
if winner not in leaderboard: | |
leaderboard[winner] = {"wins": 0, "losses": 0, "opponents": {}} | |
if loser not in leaderboard: | |
leaderboard[loser] = {"wins": 0, "losses": 0, "opponents": {}} | |
leaderboard[winner]["wins"] += 1 | |
leaderboard[winner]["opponents"].setdefault(loser, {"wins": 0, "losses": 0})["wins"] += 1 | |
leaderboard[loser]["losses"] += 1 | |
leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1 | |
# Update ELO ratings | |
update_elo_ratings(winner, loser) | |
save_leaderboard(leaderboard) | |
return leaderboard | |
def get_current_leaderboard() -> Dict[str, Any]: | |
return load_leaderboard() | |
def get_human_readable_name(model_name: str) -> str: | |
model_dict = dict(arena_config.APPROVED_MODELS) | |
return model_dict.get(model_name, model_name) | |
def get_leaderboard(): | |
leaderboard = load_leaderboard() | |
sorted_results = sorted( | |
leaderboard.items(), | |
key=lambda x: (x[1]["wins"] / (x[1]["wins"] + x[1]["losses"]) if x[1]["wins"] + x[1]["losses"] > 0 else 0, x[1]["wins"] + x[1]["losses"]), | |
reverse=True | |
) | |
leaderboard_html = """ | |
<style> | |
.leaderboard-table { | |
width: 100%; | |
border-collapse: collapse; | |
font-family: Arial, sans-serif; | |
} | |
.leaderboard-table th, .leaderboard-table td { | |
border: 1px solid #ddd; | |
padding: 8px; | |
text-align: left; | |
} | |
.leaderboard-table th { | |
background-color: rgba(255, 255, 255, 0.1); | |
font-weight: bold; | |
} | |
.rank-column { | |
width: 60px; | |
text-align: center; | |
} | |
.opponent-details { | |
font-size: 0.9em; | |
color: #888; | |
} | |
</style> | |
<table class='leaderboard-table'> | |
<tr> | |
<th class='rank-column'>Rank</th> | |
<th>Model</th> | |
<th>Wins</th> | |
<th>Losses</th> | |
<th>Win Rate</th> | |
<th>Total Battles</th> | |
<th>Top Rival</th> | |
<th>Toughest Opponent</th> | |
</tr> | |
""" | |
for index, (model, results) in enumerate(sorted_results, start=1): | |
total_battles = results["wins"] + results["losses"] | |
win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0 | |
rank_display = {1: "π₯", 2: "π₯", 3: "π₯"}.get(index, f"{index}") | |
top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0})) | |
top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A" | |
top_rival_wins = top_rival[1]["wins"] | |
toughest_opponent = max(results["opponents"].items(), key=lambda x: x[1]["losses"], default=(None, {"losses": 0})) | |
toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A" | |
toughest_opponent_losses = toughest_opponent[1]["losses"] | |
leaderboard_html += f""" | |
<tr> | |
<td class='rank-column'>{rank_display}</td> | |
<td>{get_human_readable_name(model)}</td> | |
<td>{results['wins']}</td> | |
<td>{results['losses']}</td> | |
<td>{win_rate:.2f}%</td> | |
<td>{total_battles}</td> | |
<td class='opponent-details'>{top_rival_name} (W: {top_rival_wins})</td> | |
<td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td> | |
</tr> | |
""" | |
leaderboard_html += "</table>" | |
return leaderboard_html | |
def get_elo_leaderboard(): | |
ensure_elo_ratings_initialized() | |
leaderboard = load_leaderboard() | |
sorted_ratings = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True) | |
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS) | |
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS) | |
explanation = f""" | |
<p style="font-size: 16px; margin-bottom: 20px;"> | |
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models. | |
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings. | |
When a smaller model defeats a larger one, it gains more points, while larger models gain fewer points for beating smaller ones. | |
The "Points Scored" column shows the total ELO points gained by the model from its victories, reflecting both quantity and quality of wins. | |
The "Points Lost" column shows the total ELO points lost by the model from its defeats, indicating the challenges faced. | |
</p> | |
""" | |
leaderboard_html = f""" | |
{explanation} | |
<style> | |
.elo-leaderboard-table {{ | |
width: 100%; | |
border-collapse: collapse; | |
font-family: Arial, sans-serif; | |
}} | |
.elo-leaderboard-table th, .elo-leaderboard-table td {{ | |
border: 1px solid #ddd; | |
padding: 8px; | |
text-align: left; | |
}} | |
.elo-leaderboard-table th {{ | |
background-color: rgba(255, 255, 255, 0.1); | |
font-weight: bold; | |
}} | |
.rank-column {{ | |
width: 60px; | |
text-align: center; | |
}} | |
</style> | |
<table class='elo-leaderboard-table'> | |
<tr> | |
<th class='rank-column'>Rank</th> | |
<th>Model</th> | |
<th>ELO Rating</th> | |
<th>Points Scored</th> | |
<th>Points Lost</th> | |
</tr> | |
""" | |
for index, (model, rating) in enumerate(sorted_ratings, start=1): | |
rank_display = {1: "π₯", 2: "π₯", 3: "π₯"}.get(index, f"{index}") | |
model_size = get_model_size(model) | |
points_scored = 0 | |
points_lost = 0 | |
if model in leaderboard: | |
for opponent, results in leaderboard[model]['opponents'].items(): | |
opponent_rating = elo_ratings.get(opponent, 1000) | |
opponent_size = get_model_size(opponent) | |
max_size = max(get_model_size(m) for m, _ in arena_config.APPROVED_MODELS) | |
for _ in range(results['wins']): | |
expected_score = calculate_expected_score(rating, opponent_rating) | |
k_factor = 32 * (1 + (opponent_size - model_size) / max_size) | |
points_scored += k_factor * (1 - expected_score) | |
for _ in range(results['losses']): | |
expected_score = calculate_expected_score(rating, opponent_rating) | |
k_factor = 32 * (1 + (model_size - opponent_size) / max_size) | |
points_lost += k_factor * expected_score | |
leaderboard_html += f""" | |
<tr> | |
<td class='rank-column'>{rank_display}</td> | |
<td>{get_human_readable_name(model)}</td> | |
<td>{round(rating)}</td> | |
<td>{round(points_scored, 2)}</td> | |
<td>{round(points_lost, 2)}</td> | |
</tr> | |
""" | |
leaderboard_html += "</table>" | |
return leaderboard_html | |
def create_backup(): | |
while True: | |
try: | |
leaderboard_data = load_leaderboard() | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
backup_file_name = f"leaderboard_backup_{timestamp}.json" | |
backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}" | |
json_data = json.dumps(leaderboard_data, indent=2) | |
nc.files.upload(backup_path, json_data.encode('utf-8')) | |
print(f"Backup created on Nextcloud: {backup_path}") | |
except Exception as e: | |
print(f"Error creating backup: {e}") | |
time.sleep(3600) # Sleep for 1 HOUR | |
def start_backup_thread(): | |
backup_thread = threading.Thread(target=create_backup, daemon=True) | |
backup_thread.start() | |