zhiminy commited on
Commit
1da2cba
·
1 Parent(s): deaae24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -16
app.py CHANGED
@@ -409,7 +409,7 @@ def get_leaderboard_data(vote_entry=None):
409
  pagerank_result = evalica.pagerank(
410
  vote_df["left"], vote_df["right"], vote_df["winner"]
411
  )
412
-
413
  # Load conversation data from the Hugging Face repository
414
  conversation_data = load_content_from_hf("SE-Arena/conversations")
415
  conversation_df = pd.DataFrame(conversation_data)
@@ -418,16 +418,16 @@ def get_leaderboard_data(vote_entry=None):
418
  all_df = pd.merge(
419
  vote_df, conversation_df, on=["timestamp", "left", "right"], how="inner"
420
  )
421
-
422
  # Calculate Conversation Efficiency Indexs more efficiently
423
  # Create a dictionary to store accumulated scores and counts for each model
424
  model_rcs_sum = {}
425
  model_rcs_max = {}
426
-
427
  # Process each row once and accumulate scores
428
  for _, row in all_df.iterrows():
429
  # Determine scores based on winner
430
- match row["winner"]:
431
  case evalica.Winner.X:
432
  left_score = 1.0
433
  right_score = -1.0
@@ -437,29 +437,45 @@ def get_leaderboard_data(vote_entry=None):
437
  case _: # Draw
438
  left_score = 0.1
439
  right_score = 0.1
440
-
441
  # Count rounds for each side
442
  left_round = sum(1 for msg in row["left_chat"] if msg["role"] == "assistant")
443
  right_round = sum(1 for msg in row["right_chat"] if msg["role"] == "assistant")
444
-
445
  left_model = row["left"]
446
  right_model = row["right"]
447
-
448
  model_rcs_max[left_model] = model_rcs_max.get(left_model, 0) + 1.0 / left_round
449
- model_rcs_max[right_model] = model_rcs_max.get(right_model, 0) + 1.0 / right_round
450
-
 
 
451
  # Calculate per-round scores
452
- model_rcs_sum[left_model] = model_rcs_sum.get(left_model, 0) + left_score / left_round
453
- model_rcs_sum[right_model] = model_rcs_sum.get(right_model, 0) + right_score / right_round
 
 
 
 
454
 
455
- cei_result = {model: model_rcs_sum[model] / model_rcs_max[model] for model in model_rcs_sum}
456
- cei_result = pd.Series({model: cei_result[model] for model in elo_result.scores.index})
 
 
 
 
457
 
458
  self_matches = vote_df[vote_df["left"] == vote_df["right"]]
459
  model_matches = self_matches.groupby("left")
460
- draw_counts = model_matches["winner"].apply(lambda x: (x == evalica.Winner.Draw).sum())
 
 
461
  total_counts = model_matches.size()
462
- mcs_result = (draw_counts / total_counts).round(2).reindex(elo_result.scores.index, fill_value="N/A")
 
 
 
 
463
 
464
  # Combine all results into a single DataFrame
465
  leaderboard_data = pd.DataFrame(
@@ -491,7 +507,7 @@ def get_leaderboard_data(vote_entry=None):
491
 
492
  # Add a Rank column based on Elo scores
493
  leaderboard_data["Rank"] = (
494
- leaderboard_data["Elo Score"].rank(ascending=False).astype(int)
495
  )
496
 
497
  # Place rank in the first column
 
409
  pagerank_result = evalica.pagerank(
410
  vote_df["left"], vote_df["right"], vote_df["winner"]
411
  )
412
+
413
  # Load conversation data from the Hugging Face repository
414
  conversation_data = load_content_from_hf("SE-Arena/conversations")
415
  conversation_df = pd.DataFrame(conversation_data)
 
418
  all_df = pd.merge(
419
  vote_df, conversation_df, on=["timestamp", "left", "right"], how="inner"
420
  )
421
+
422
  # Calculate Conversation Efficiency Indexs more efficiently
423
  # Create a dictionary to store accumulated scores and counts for each model
424
  model_rcs_sum = {}
425
  model_rcs_max = {}
426
+
427
  # Process each row once and accumulate scores
428
  for _, row in all_df.iterrows():
429
  # Determine scores based on winner
430
+ match row["winner"]:
431
  case evalica.Winner.X:
432
  left_score = 1.0
433
  right_score = -1.0
 
437
  case _: # Draw
438
  left_score = 0.1
439
  right_score = 0.1
440
+
441
  # Count rounds for each side
442
  left_round = sum(1 for msg in row["left_chat"] if msg["role"] == "assistant")
443
  right_round = sum(1 for msg in row["right_chat"] if msg["role"] == "assistant")
444
+
445
  left_model = row["left"]
446
  right_model = row["right"]
447
+
448
  model_rcs_max[left_model] = model_rcs_max.get(left_model, 0) + 1.0 / left_round
449
+ model_rcs_max[right_model] = (
450
+ model_rcs_max.get(right_model, 0) + 1.0 / right_round
451
+ )
452
+
453
  # Calculate per-round scores
454
+ model_rcs_sum[left_model] = (
455
+ model_rcs_sum.get(left_model, 0) + left_score / left_round
456
+ )
457
+ model_rcs_sum[right_model] = (
458
+ model_rcs_sum.get(right_model, 0) + right_score / right_round
459
+ )
460
 
461
+ cei_result = {
462
+ model: model_rcs_sum[model] / model_rcs_max[model] for model in model_rcs_sum
463
+ }
464
+ cei_result = pd.Series(
465
+ {model: cei_result[model] for model in elo_result.scores.index}
466
+ )
467
 
468
  self_matches = vote_df[vote_df["left"] == vote_df["right"]]
469
  model_matches = self_matches.groupby("left")
470
+ draw_counts = model_matches["winner"].apply(
471
+ lambda x: (x == evalica.Winner.Draw).sum()
472
+ )
473
  total_counts = model_matches.size()
474
+ mcs_result = (
475
+ (draw_counts / total_counts)
476
+ .round(2)
477
+ .reindex(elo_result.scores.index, fill_value="N/A")
478
+ )
479
 
480
  # Combine all results into a single DataFrame
481
  leaderboard_data = pd.DataFrame(
 
507
 
508
  # Add a Rank column based on Elo scores
509
  leaderboard_data["Rank"] = (
510
+ leaderboard_data["Elo Score"].rank(method="min", ascending=False).astype(int)
511
  )
512
 
513
  # Place rank in the first column