from nc_py_api import Nextcloud import json from typing import Dict, Any import os import time from datetime import datetime import threading import arena_config import sys import math # Initialize Nextcloud client nc = Nextcloud(nextcloud_url=arena_config.NEXTCLOUD_URL, nc_auth_user=arena_config.NEXTCLOUD_USERNAME, nc_auth_pass=arena_config.NEXTCLOUD_PASSWORD) # Dictionary to store ELO ratings elo_ratings = {} def load_leaderboard() -> Dict[str, Any]: try: file_content = nc.files.download(arena_config.NEXTCLOUD_LEADERBOARD_PATH) return json.loads(file_content.decode('utf-8')) except Exception as e: print(f"Error loading leaderboard: {str(e)}") return {} def save_leaderboard(leaderboard_data: Dict[str, Any]) -> bool: try: json_data = json.dumps(leaderboard_data, indent=2) nc.files.upload(arena_config.NEXTCLOUD_LEADERBOARD_PATH, json_data.encode('utf-8')) return True except Exception as e: print(f"Error saving leaderboard: {str(e)}") return False def get_model_size(model_name): for model, human_readable in arena_config.APPROVED_MODELS: if model == model_name: size = float(human_readable.split('(')[1].split('B')[0]) return size return 1.0 # Default size if not found def calculate_expected_score(rating_a, rating_b): return 1 / (1 + math.pow(10, (rating_b - rating_a) / 400)) def update_elo_ratings(winner, loser): if winner not in elo_ratings or loser not in elo_ratings: initialize_elo_ratings() winner_rating = elo_ratings[winner] loser_rating = elo_ratings[loser] expected_winner = calculate_expected_score(winner_rating, loser_rating) expected_loser = 1 - expected_winner winner_size = get_model_size(winner) loser_size = get_model_size(loser) max_size = max(get_model_size(model) for model, _ in arena_config.APPROVED_MODELS) k_factor = 32 * (1 + (loser_size - winner_size) / max_size) elo_ratings[winner] += k_factor * (1 - expected_winner) elo_ratings[loser] += k_factor * (0 - expected_loser) def initialize_elo_ratings(): leaderboard = load_leaderboard() for model, _ in arena_config.APPROVED_MODELS: size = get_model_size(model) elo_ratings[model] = 1000 + (size * 100) # Replay all battles to update ELO ratings for model, data in leaderboard.items(): for opponent, results in data['opponents'].items(): for _ in range(results['wins']): update_elo_ratings(model, opponent) for _ in range(results['losses']): update_elo_ratings(opponent, model) def ensure_elo_ratings_initialized(): if not elo_ratings: initialize_elo_ratings() def update_leaderboard(winner: str, loser: str) -> Dict[str, Any]: leaderboard = load_leaderboard() if winner not in leaderboard: leaderboard[winner] = {"wins": 0, "losses": 0, "opponents": {}} if loser not in leaderboard: leaderboard[loser] = {"wins": 0, "losses": 0, "opponents": {}} leaderboard[winner]["wins"] += 1 leaderboard[winner]["opponents"].setdefault(loser, {"wins": 0, "losses": 0})["wins"] += 1 leaderboard[loser]["losses"] += 1 leaderboard[loser]["opponents"].setdefault(winner, {"wins": 0, "losses": 0})["losses"] += 1 # Update ELO ratings update_elo_ratings(winner, loser) save_leaderboard(leaderboard) return leaderboard def get_current_leaderboard() -> Dict[str, Any]: return load_leaderboard() def get_human_readable_name(model_name: str) -> str: model_dict = dict(arena_config.APPROVED_MODELS) return model_dict.get(model_name, model_name) def get_leaderboard(): leaderboard = load_leaderboard() # Calculate scores for each model for model, results in leaderboard.items(): total_battles = results["wins"] + results["losses"] if total_battles > 0: win_rate = results["wins"] / total_battles results["score"] = win_rate * (1 - 1 / (total_battles + 1)) else: results["score"] = 0 # Sort results by score, then by total battles sorted_results = sorted( leaderboard.items(), key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]), reverse=True ) # Explanation of the main leaderboard explanation = """

This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
Score = Win Rate * (1 - 1 / (Total Battles + 1))
This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.

""" leaderboard_html = f""" {explanation} """ for index, (model, results) in enumerate(sorted_results, start=1): total_battles = results["wins"] + results["losses"] win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0 rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}") top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0})) top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A" top_rival_wins = top_rival[1]["wins"] toughest_opponent = max(results["opponents"].items(), key=lambda x: x[1]["losses"], default=(None, {"losses": 0})) toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A" toughest_opponent_losses = toughest_opponent[1]["losses"] leaderboard_html += f""" """ leaderboard_html += "
Rank Model Score Wins Losses Win Rate Total Battles Top Rival Toughest Opponent
{rank_display} {get_human_readable_name(model)} {results['score']:.4f} {results['wins']} {results['losses']} {win_rate:.2f}% {total_battles} {top_rival_name} (W: {top_rival_wins}) {toughest_opponent_name} (L: {toughest_opponent_losses})
" return leaderboard_html def get_elo_leaderboard(): ensure_elo_ratings_initialized() leaderboard = load_leaderboard() sorted_ratings = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True) min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS) max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS) explanation_elo = f"""

This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models. Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings. When a smaller model defeats a larger one, it gains more points, while larger models gain fewer points for beating smaller ones. The "Points Scored" column shows the total ELO points gained by the model from its victories, reflecting both quantity and quality of wins. The "Points Lost" column shows the total ELO points lost by the model from its defeats, indicating the challenges faced.

""" leaderboard_html = f""" {explanation_elo} """ for index, (model, rating) in enumerate(sorted_ratings, start=1): rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}") model_size = get_model_size(model) points_scored = 0 points_lost = 0 if model in leaderboard: for opponent, results in leaderboard[model]['opponents'].items(): opponent_rating = elo_ratings.get(opponent, 1000) opponent_size = get_model_size(opponent) max_size = max(get_model_size(m) for m, _ in arena_config.APPROVED_MODELS) for _ in range(results['wins']): expected_score = calculate_expected_score(rating, opponent_rating) k_factor = 32 * (1 + (opponent_size - model_size) / max_size) points_scored += k_factor * (1 - expected_score) for _ in range(results['losses']): expected_score = calculate_expected_score(rating, opponent_rating) k_factor = 32 * (1 + (model_size - opponent_size) / max_size) points_lost += k_factor * expected_score leaderboard_html += f""" """ leaderboard_html += "
Rank Model ELO Rating Points Scored Points Lost
{rank_display} {get_human_readable_name(model)} {round(rating)} {round(points_scored, 2)} {round(points_lost, 2)}
" return leaderboard_html def create_backup(): while True: try: leaderboard_data = load_leaderboard() timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_file_name = f"leaderboard_backup_{timestamp}.json" backup_path = f"{arena_config.NEXTCLOUD_BACKUP_FOLDER}/{backup_file_name}" json_data = json.dumps(leaderboard_data, indent=2) nc.files.upload(backup_path, json_data.encode('utf-8')) print(f"Backup created on Nextcloud: {backup_path}") except Exception as e: print(f"Error creating backup: {e}") time.sleep(3600) # Sleep for 1 HOUR def start_backup_thread(): backup_thread = threading.Thread(target=create_backup, daemon=True) backup_thread.start()