|
import trueskill as ts |
|
import pandas as pd |
|
from typing import Dict, List, Tuple, Union |
|
import db |
|
import pandas as pd |
|
from typing import TypedDict |
|
|
|
MU_init = ts.Rating().mu |
|
SIGMA_init = ts.Rating().sigma |
|
|
|
|
|
class Prompt(TypedDict): |
|
id: int |
|
name: str |
|
text: str |
|
|
|
|
|
class Arena: |
|
""" |
|
Une arène pour comparer et classer des prompts en utilisant l'algorithme TrueSkill. |
|
""" |
|
|
|
def init_estimates(self, prompt_id) -> None: |
|
""" |
|
Initialise les estimations d'un prompt avec des ratings TrueSkill par défaut. |
|
""" |
|
estimates = db.load("estimates") |
|
if prompt_id in estimates["prompt_id"].values: |
|
|
|
db.delete( |
|
"estimates", |
|
int(estimates[estimates["prompt_id"] == prompt_id].iloc[0].id), |
|
) |
|
|
|
db.insert( |
|
"estimates", |
|
{ |
|
"prompt_id": prompt_id, |
|
"mu": MU_init, |
|
"sigma": SIGMA_init, |
|
}, |
|
) |
|
|
|
def select_match(self, user_state) -> Tuple[Prompt, Prompt] | None: |
|
""" |
|
Sélectionne deux prompts pour un match en privilégiant ceux avec une grande incertitude. |
|
Returns: |
|
Un tuple contenant les IDs des deux prompts à comparer (prompt_a, prompt_b) |
|
""" |
|
|
|
estimates = db.load("estimates") |
|
|
|
estimates = estimates[ |
|
estimates["prompt_id"] != db.get_prompt_id(user_state["team"]) |
|
] |
|
|
|
def order_match(id_a, id_b): |
|
"""Return a tuple of ids ordered by the id.""" |
|
return (id_a, id_b) if id_a < id_b else (id_b, id_a) |
|
|
|
|
|
matches = ( |
|
estimates.merge(estimates, how="cross", suffixes=["_a", "_b"]) |
|
.query("id_a != id_b") |
|
.assign(delta_mu=lambda df_: abs(df_["mu_a"] - df_["mu_b"])) |
|
.sort_values(by=["sigma_a", "delta_mu"], ascending=[False, True]) |
|
.assign( |
|
match=lambda df_: df_.apply( |
|
lambda row: order_match(int(row["id_a"]), int(row["id_b"])), axis=1 |
|
) |
|
) |
|
) |
|
|
|
user_votes = db.load("votes").loc[ |
|
lambda df_: df_["user_id"] == user_state["id"] |
|
] |
|
|
|
if user_votes.empty: |
|
user_votes = user_votes.assign(match=[]) |
|
else: |
|
user_votes = user_votes.assign( |
|
match=lambda df_: df_.apply( |
|
lambda row: order_match( |
|
int(row["winner_id"]), int(row["loser_id"]) |
|
), |
|
axis=1, |
|
) |
|
) |
|
|
|
|
|
user_matches = matches.loc[~matches["match"].isin(user_votes["match"])] |
|
|
|
if user_matches.empty: |
|
|
|
return None |
|
|
|
selected_match = user_matches.iloc[0] |
|
prompts = db.load("prompts") |
|
prompt_a = ( |
|
prompts.query(f"id == {selected_match['prompt_id_a']}").iloc[0].to_dict() |
|
) |
|
prompt_b = ( |
|
prompts.query(f"id == {selected_match['prompt_id_b']}").iloc[0].to_dict() |
|
) |
|
return prompt_a, prompt_b |
|
|
|
def record_result(self, winner_id: str, loser_id: str, user_id: str) -> None: |
|
|
|
estimates = db.load("estimates") |
|
winner_estimate = ( |
|
estimates[estimates["prompt_id"] == winner_id].iloc[0].to_dict() |
|
) |
|
loser_estimate = estimates[estimates["prompt_id"] == loser_id].iloc[0].to_dict() |
|
|
|
winner_rating = ts.Rating(winner_estimate["mu"], winner_estimate["sigma"]) |
|
loser_rating = ts.Rating(loser_estimate["mu"], loser_estimate["sigma"]) |
|
|
|
winner_new_rating, loser_new_rating = ts.rate_1vs1(winner_rating, loser_rating) |
|
|
|
db.update( |
|
"estimates", |
|
winner_estimate["id"], |
|
{"mu": winner_new_rating.mu, "sigma": winner_new_rating.sigma}, |
|
) |
|
db.update( |
|
"estimates", |
|
loser_estimate["id"], |
|
{"mu": loser_new_rating.mu, "sigma": loser_new_rating.sigma}, |
|
) |
|
|
|
db.insert( |
|
"votes", |
|
{ |
|
"winner_id": winner_id, |
|
"loser_id": loser_id, |
|
"user_id": user_id, |
|
|
|
}, |
|
) |
|
|
|
return None |
|
|
|
def get_rankings(self) -> pd.DataFrame: |
|
""" |
|
Obtient le classement actuel des prompts. |
|
|
|
Returns: |
|
Liste de dictionnaires contenant le classement de chaque prompt avec |
|
ses informations (rang, id, texte, mu, sigma, score) |
|
""" |
|
|
|
prompts = db.load("prompts") |
|
estimates = db.load("estimates").drop(columns=["id"]) |
|
rankings = prompts.merge(estimates, left_on="id", right_on="prompt_id").drop( |
|
columns=["id", "prompt_id"] |
|
) |
|
rankings = rankings.sort_values(by="mu", ascending=False) |
|
|
|
rankings["position"] = range(1, len(rankings) + 1) |
|
|
|
|
|
|
|
return rankings[["position", "team"]] |
|
|
|
def get_competition_matrix(self) -> pd.DataFrame: |
|
""" |
|
Obtient la matrice de combats des prompts. |
|
|
|
Returns: |
|
DataFrame contenant en ligne et en colonne les noms d'équipes, |
|
et dans la cellule le pourcentage de victoires de l'équipe de la ligne contre l'équipe de la colonne. |
|
""" |
|
|
|
prompts = db.load("prompts") |
|
votes = db.load("votes") |
|
|
|
competition_matrix = pd.DataFrame( |
|
index=prompts["team"], columns=prompts["team"], data=0 |
|
) |
|
competition_matrix.index.name = None |
|
competition_matrix.columns.name = None |
|
|
|
wins = competition_matrix.copy() |
|
matches = competition_matrix.copy() |
|
|
|
for _, row in votes.iterrows(): |
|
winner_name = prompts.loc[prompts["id"] == row["winner_id"], "team"].values[ |
|
0 |
|
] |
|
loser_name = prompts.loc[prompts["id"] == row["loser_id"], "team"].values[0] |
|
wins.at[winner_name, loser_name] += 1 |
|
matches.at[winner_name, loser_name] += 1 |
|
matches.at[loser_name, winner_name] += 1 |
|
|
|
competition_matrix = wins.div(matches) |
|
|
|
competition_matrix = competition_matrix.map( |
|
lambda x: "" if pd.isna(x) or x == 0 else f"{x:.0%}" |
|
) |
|
|
|
for i in range(len(competition_matrix)): |
|
competition_matrix.iloc[i, i] = "X" |
|
|
|
competition_matrix = competition_matrix.replace("", "?").reset_index(names="") |
|
|
|
return competition_matrix |
|
|
|
def get_progress(self) -> str: |
|
""" |
|
Renvoie des statistiques sur la progression du tournoi. |
|
|
|
Returns: |
|
Dictionnaire contenant des informations sur la progression: |
|
- total_prompts: nombre total de prompts |
|
- total_matches: nombre total de matchs joués |
|
- avg_sigma: incertitude moyenne des ratings |
|
- progress: pourcentage estimé de progression du tournoi |
|
- estimated_remaining_matches: estimation du nombre de matchs restants |
|
""" |
|
prompts = db.load("prompts") |
|
estimates = db.load("estimates") |
|
votes = db.load("votes") |
|
|
|
avg_sigma = estimates["sigma"].mean() |
|
|
|
|
|
|
|
initial_sigma = ts.Rating().sigma |
|
progress = min(100, max(0, (1 - avg_sigma / initial_sigma) * 100)) |
|
|
|
msg = f"""{len(prompts)} propositions à départager |
|
{len(votes)} matchs joués |
|
{avg_sigma:.2f} d'incertitude moyenne""" |
|
|
|
return msg |
|
|