use consistency score instead
Browse files
app.py
CHANGED
@@ -366,7 +366,7 @@ def get_leaderboard_data(feedback_entry=None):
|
|
366 |
"Rank",
|
367 |
"Model",
|
368 |
"Elo Score",
|
369 |
-
"
|
370 |
"Average Win Rate",
|
371 |
"Bradley-Terry Coefficient",
|
372 |
"Eigenvector Centrality Value",
|
@@ -404,7 +404,7 @@ def get_leaderboard_data(feedback_entry=None):
|
|
404 |
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
405 |
)
|
406 |
|
407 |
-
# Calculate
|
408 |
is_result = pd.Series(0.0, index=elo_result.scores.index) # Initialize with zeros using same index
|
409 |
|
410 |
# Loop through models and update values
|
@@ -414,20 +414,20 @@ def get_leaderboard_data(feedback_entry=None):
|
|
414 |
(feedback_df["left"] == model) &
|
415 |
(feedback_df["right"] == model)
|
416 |
]
|
417 |
-
|
418 |
|
419 |
-
if
|
420 |
# Count non-draw outcomes (wins or losses)
|
421 |
-
|
422 |
# Store as percentage directly
|
423 |
-
is_result[model] =
|
424 |
|
425 |
# Combine all results into a single DataFrame
|
426 |
leaderboard_data = pd.DataFrame(
|
427 |
{
|
428 |
"Model": elo_result.scores.index,
|
429 |
"Elo Score": elo_result.scores.values,
|
430 |
-
"
|
431 |
"Average Win Rate": avr_result.scores.values * 100,
|
432 |
"Bradley-Terry Coefficient": bt_result.scores.values,
|
433 |
"Eigenvector Centrality Value": eigen_result.scores.values,
|
@@ -440,7 +440,7 @@ def get_leaderboard_data(feedback_entry=None):
|
|
440 |
leaderboard_data = leaderboard_data.round(
|
441 |
{
|
442 |
"Elo Score": 2,
|
443 |
-
"
|
444 |
"Average Win Rate": 2,
|
445 |
"Bradley-Terry Coefficient": 2,
|
446 |
"Eigenvector Centrality Value": 2,
|
@@ -492,12 +492,12 @@ with gr.Blocks() as app:
|
|
492 |
"Rank",
|
493 |
"Model",
|
494 |
"Elo Score",
|
495 |
-
"
|
496 |
],
|
497 |
search_columns=["Model"],
|
498 |
filter_columns=[
|
499 |
"Elo Score",
|
500 |
-
"
|
501 |
"Average Win Rate",
|
502 |
"Bradley-Terry Coefficient",
|
503 |
"Eigenvector Centrality Value",
|
|
|
366 |
"Rank",
|
367 |
"Model",
|
368 |
"Elo Score",
|
369 |
+
"Consistency Score",
|
370 |
"Average Win Rate",
|
371 |
"Bradley-Terry Coefficient",
|
372 |
"Eigenvector Centrality Value",
|
|
|
404 |
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
405 |
)
|
406 |
|
407 |
+
# Calculate consistency score as a pandas Series aligned with other metrics
|
408 |
is_result = pd.Series(0.0, index=elo_result.scores.index) # Initialize with zeros using same index
|
409 |
|
410 |
# Loop through models and update values
|
|
|
414 |
(feedback_df["left"] == model) &
|
415 |
(feedback_df["right"] == model)
|
416 |
]
|
417 |
+
totals = len(self_matches)
|
418 |
|
419 |
+
if totals:
|
420 |
# Count non-draw outcomes (wins or losses)
|
421 |
+
draws = self_matches[self_matches["winner"] == evalica.Winner.Draw].shape[0]
|
422 |
# Store as percentage directly
|
423 |
+
is_result[model] = draws / totals
|
424 |
|
425 |
# Combine all results into a single DataFrame
|
426 |
leaderboard_data = pd.DataFrame(
|
427 |
{
|
428 |
"Model": elo_result.scores.index,
|
429 |
"Elo Score": elo_result.scores.values,
|
430 |
+
"Consistency Score": is_result.values * 100,
|
431 |
"Average Win Rate": avr_result.scores.values * 100,
|
432 |
"Bradley-Terry Coefficient": bt_result.scores.values,
|
433 |
"Eigenvector Centrality Value": eigen_result.scores.values,
|
|
|
440 |
leaderboard_data = leaderboard_data.round(
|
441 |
{
|
442 |
"Elo Score": 2,
|
443 |
+
"Consistency Score": 2,
|
444 |
"Average Win Rate": 2,
|
445 |
"Bradley-Terry Coefficient": 2,
|
446 |
"Eigenvector Centrality Value": 2,
|
|
|
492 |
"Rank",
|
493 |
"Model",
|
494 |
"Elo Score",
|
495 |
+
"Consistency Score",
|
496 |
],
|
497 |
search_columns=["Model"],
|
498 |
filter_columns=[
|
499 |
"Elo Score",
|
500 |
+
"Consistency Score",
|
501 |
"Average Win Rate",
|
502 |
"Bradley-Terry Coefficient",
|
503 |
"Eigenvector Centrality Value",
|