zhiminy commited on
Commit
5c3511e
·
1 Parent(s): 19a995e

use consistency score instead

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -366,7 +366,7 @@ def get_leaderboard_data(feedback_entry=None):
366
  "Rank",
367
  "Model",
368
  "Elo Score",
369
- "Instability Score",
370
  "Average Win Rate",
371
  "Bradley-Terry Coefficient",
372
  "Eigenvector Centrality Value",
@@ -404,7 +404,7 @@ def get_leaderboard_data(feedback_entry=None):
404
  feedback_df["left"], feedback_df["right"], feedback_df["winner"]
405
  )
406
 
407
- # Calculate instability score as a pandas Series aligned with other metrics
408
  is_result = pd.Series(0.0, index=elo_result.scores.index) # Initialize with zeros using same index
409
 
410
  # Loop through models and update values
@@ -414,20 +414,20 @@ def get_leaderboard_data(feedback_entry=None):
414
  (feedback_df["left"] == model) &
415
  (feedback_df["right"] == model)
416
  ]
417
- total = len(self_matches)
418
 
419
- if total:
420
  # Count non-draw outcomes (wins or losses)
421
- non_draws = self_matches[self_matches["winner"] != evalica.Winner.Draw].shape[0]
422
  # Store as percentage directly
423
- is_result[model] = non_draws / total
424
 
425
  # Combine all results into a single DataFrame
426
  leaderboard_data = pd.DataFrame(
427
  {
428
  "Model": elo_result.scores.index,
429
  "Elo Score": elo_result.scores.values,
430
- "Instability Score": is_result.values * 100,
431
  "Average Win Rate": avr_result.scores.values * 100,
432
  "Bradley-Terry Coefficient": bt_result.scores.values,
433
  "Eigenvector Centrality Value": eigen_result.scores.values,
@@ -440,7 +440,7 @@ def get_leaderboard_data(feedback_entry=None):
440
  leaderboard_data = leaderboard_data.round(
441
  {
442
  "Elo Score": 2,
443
- "Instability Score": 2,
444
  "Average Win Rate": 2,
445
  "Bradley-Terry Coefficient": 2,
446
  "Eigenvector Centrality Value": 2,
@@ -492,12 +492,12 @@ with gr.Blocks() as app:
492
  "Rank",
493
  "Model",
494
  "Elo Score",
495
- "Instability Score",
496
  ],
497
  search_columns=["Model"],
498
  filter_columns=[
499
  "Elo Score",
500
- "Instability Score",
501
  "Average Win Rate",
502
  "Bradley-Terry Coefficient",
503
  "Eigenvector Centrality Value",
 
366
  "Rank",
367
  "Model",
368
  "Elo Score",
369
+ "Consistency Score",
370
  "Average Win Rate",
371
  "Bradley-Terry Coefficient",
372
  "Eigenvector Centrality Value",
 
404
  feedback_df["left"], feedback_df["right"], feedback_df["winner"]
405
  )
406
 
407
+ # Calculate consistency score as a pandas Series aligned with other metrics
408
  is_result = pd.Series(0.0, index=elo_result.scores.index) # Initialize with zeros using same index
409
 
410
  # Loop through models and update values
 
414
  (feedback_df["left"] == model) &
415
  (feedback_df["right"] == model)
416
  ]
417
+ totals = len(self_matches)
418
 
419
+ if totals:
420
  # Count non-draw outcomes (wins or losses)
421
+ draws = self_matches[self_matches["winner"] == evalica.Winner.Draw].shape[0]
422
  # Store as percentage directly
423
+ is_result[model] = draws / totals
424
 
425
  # Combine all results into a single DataFrame
426
  leaderboard_data = pd.DataFrame(
427
  {
428
  "Model": elo_result.scores.index,
429
  "Elo Score": elo_result.scores.values,
430
+ "Consistency Score": is_result.values * 100,
431
  "Average Win Rate": avr_result.scores.values * 100,
432
  "Bradley-Terry Coefficient": bt_result.scores.values,
433
  "Eigenvector Centrality Value": eigen_result.scores.values,
 
440
  leaderboard_data = leaderboard_data.round(
441
  {
442
  "Elo Score": 2,
443
+ "Consistency Score": 2,
444
  "Average Win Rate": 2,
445
  "Bradley-Terry Coefficient": 2,
446
  "Eigenvector Centrality Value": 2,
 
492
  "Rank",
493
  "Model",
494
  "Elo Score",
495
+ "Consistency Score",
496
  ],
497
  search_columns=["Model"],
498
  filter_columns=[
499
  "Elo Score",
500
+ "Consistency Score",
501
  "Average Win Rate",
502
  "Bradley-Terry Coefficient",
503
  "Eigenvector Centrality Value",