zhiminy commited on
Commit
0f4d845
·
1 Parent(s): 9371dc4

refine update logic

Browse files
Files changed (1) hide show
  1. app.py +100 -100
app.py CHANGED
@@ -28,9 +28,6 @@ openai_client = OpenAI(api_key=api_key, base_url=base_url)
28
  # Timeout in seconds for model responses
29
  TIMEOUT = 90
30
 
31
- # leaderboard data
32
- leaderboard_data = None
33
-
34
  # Hint string constant
35
  SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
36
  HINT_STRING = "Once signed in, your votes will be recorded securely."
@@ -252,10 +249,7 @@ def chat_with_models(
252
 
253
  def request_model_response():
254
  try:
255
- request_params = {
256
- "model": model_name,
257
- "messages": truncated_input
258
- }
259
  response = openai_client.chat.completions.create(**request_params)
260
  model_response["content"] = response.choices[0].message.content
261
  except Exception as e:
@@ -366,89 +360,94 @@ def load_content_from_hf(repo_name="SE-Arena/votes"):
366
  raise Exception("Error loading feedback data from Hugging Face repository.")
367
 
368
 
369
- def get_leaderboard_data():
370
- global leaderboard_data
371
- if leaderboard_data is None:
372
- # Load feedback data from the Hugging Face repository
373
- try:
374
- feedback_data = load_content_from_hf()
375
- feedback_df = pd.DataFrame(feedback_data)
376
-
377
- # map vote to winner
378
- feedback_df["winner"] = feedback_df["winner"].map(
379
- {
380
- "left": evalica.Winner.X,
381
- "right": evalica.Winner.Y,
382
- "tie": evalica.Winner.Draw,
383
- }
384
- )
385
 
386
- # Calculate scores using various metrics
387
- avr_result = evalica.average_win_rate(
388
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
389
- )
390
- bt_result = evalica.bradley_terry(
391
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
392
- )
393
- newman_result = evalica.newman(
394
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
395
- )
396
- eigen_result = evalica.eigen(
397
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
398
- )
399
- elo_result = evalica.elo(
400
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
401
- )
402
- pagerank_result = evalica.pagerank(
403
- feedback_df["left"], feedback_df["right"], feedback_df["winner"]
404
- )
405
 
406
- # Combine all results into a single DataFrame
407
- leaderboard_data = pd.DataFrame(
408
- {
409
- "Model": elo_result.scores.index,
410
- "Elo Score": elo_result.scores.values,
411
- "Average Win Rate": avr_result.scores.values * 100,
412
- "Bradley-Terry Coefficient": bt_result.scores.values,
413
- "Eigenvector Centrality Value": eigen_result.scores.values,
414
- "Newman Modularity Score": newman_result.scores.values,
415
- "PageRank Score": pagerank_result.scores.values,
416
- }
417
- )
 
418
 
419
- # Round all numeric columns to two decimal places
420
- leaderboard_data = leaderboard_data.round(
421
- {
422
- "Elo Score": 2,
423
- "Average Win Rate": 2,
424
- "Bradley-Terry Coefficient": 2,
425
- "Eigenvector Centrality Value": 2,
426
- "Newman Modularity Score": 2,
427
- "PageRank Score": 2,
428
- }
429
- )
430
 
431
- # Add a Rank column based on Elo scores
432
- leaderboard_data["Rank"] = (
433
- leaderboard_data["Elo Score"].rank(ascending=False).astype(int)
434
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
- # Place rank in the first column
437
- leaderboard_data = leaderboard_data[["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]]
438
- except:
439
- # If no feedback exists, return an empty DataFrame
440
- return pd.DataFrame(
441
- columns=[
442
- "Rank",
443
- "Model",
444
- "Elo Score",
445
- "Average Win Rate",
446
- "Bradley-Terry Coefficient",
447
- "Eigenvector Centrality Value",
448
- "Newman Modularity Score",
449
- "PageRank Score",
450
- ]
451
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  return leaderboard_data
453
 
454
 
@@ -536,7 +535,7 @@ with gr.Blocks() as app:
536
  login_button = gr.Button(
537
  "Sign in with Hugging Face", elem_id="oauth-button"
538
  )
539
-
540
  # NEW: Add a textbox for the repository URL above the user prompt
541
  repo_url = gr.Textbox(
542
  show_label=False,
@@ -544,7 +543,7 @@ with gr.Blocks() as app:
544
  lines=1,
545
  interactive=False,
546
  )
547
-
548
  # Components with initial non-interactive state
549
  shared_input = gr.Textbox(
550
  show_label=False,
@@ -648,7 +647,11 @@ with gr.Blocks() as app:
648
  repo_info, user_input, models_state, conversation_state
649
  ):
650
  # Combine repo-related information (if any) and user query into one prompt.
651
- combined_user_input = f"Repo-related Information: {fetch_url_content(repo_info)}\n\n{user_input}" if repo_info else user_input
 
 
 
 
652
 
653
  # Dynamically select two random models
654
  if len(available_models) < 2:
@@ -775,7 +778,7 @@ with gr.Blocks() as app:
775
  print(f"Login failed: {e}")
776
  return (
777
  gr.update(visible=True), # Keep the login button visible
778
- gr.update(interactive=False), # repo_url -> disable if login failed
779
  gr.update(interactive=False), # Keep shared_input disabled
780
  gr.update(interactive=False), # Keep send_first disabled
781
  gr.update(
@@ -791,7 +794,7 @@ with gr.Blocks() as app:
791
  inputs=[],
792
  outputs=[
793
  login_button, # Hide the login button after successful login
794
- repo_url, # Keep this in sync with shared_input
795
  shared_input, # Enable shared_input
796
  send_first, # Enable send_first button
797
  feedback, # Enable feedback radio buttons
@@ -923,10 +926,7 @@ with gr.Blocks() as app:
923
  "winner": winner_model,
924
  "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
925
  }
926
-
927
- # Concatenate the new feedback with the existing leaderboard data
928
- leaderboard_data = pd.concat([get_leaderboard_data(), pd.DataFrame([feedback_entry])], ignore_index=True)
929
-
930
  # Save feedback back to the Hugging Face dataset
931
  save_content_to_hf(feedback_entry, "SE-Arena/votes")
932
 
@@ -942,9 +942,7 @@ with gr.Blocks() as app:
942
  gr.update(
943
  value="", interactive=True, visible=True
944
  ), # Clear shared_input
945
- gr.update(
946
- value="", interactive=True, visible=True
947
- ), # Clear repo_url
948
  gr.update(value="", visible=False), # Hide user_prompt_md
949
  gr.update(value="", visible=False), # Hide response_a_title
950
  gr.update(value="", visible=False), # Hide response_b_title
@@ -958,9 +956,11 @@ with gr.Blocks() as app:
958
  gr.update(
959
  value="Can't Decide", interactive=True
960
  ), # Reset feedback selection
961
- leaderboard_data, # Updated leaderboard data
962
  gr.update(visible=True), # Show the thanks message
963
- gr.update(value="", interactive=True, visible=True), # Show the repo-related url message
 
 
964
  )
965
 
966
  # Update the click event for the submit feedback button
@@ -969,7 +969,7 @@ with gr.Blocks() as app:
969
  inputs=[feedback, models_state, conversation_state],
970
  outputs=[
971
  shared_input, # Reset shared_input
972
- repo_url, # Show the repo-related URL message
973
  user_prompt_md, # Hide user_prompt_md
974
  response_a_title, # Hide Model A title
975
  response_b_title, # Hide Model B title
 
28
  # Timeout in seconds for model responses
29
  TIMEOUT = 90
30
 
 
 
 
31
  # Hint string constant
32
  SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
33
  HINT_STRING = "Once signed in, your votes will be recorded securely."
 
249
 
250
  def request_model_response():
251
  try:
252
+ request_params = {"model": model_name, "messages": truncated_input}
 
 
 
253
  response = openai_client.chat.completions.create(**request_params)
254
  model_response["content"] = response.choices[0].message.content
255
  except Exception as e:
 
360
  raise Exception("Error loading feedback data from Hugging Face repository.")
361
 
362
 
363
+ def get_leaderboard_data(feedback_entry=None):
364
+ # Load feedback data from the Hugging Face repository
365
+ feedback_data = load_content_from_hf()
366
+ feedback_df = pd.DataFrame(feedback_data)
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
+ # Concatenate the new feedback with the existing leaderboard data
369
+ if feedback_entry is not None:
370
+ feedback_df = pd.concat(
371
+ [feedback_df, pd.DataFrame([feedback_entry])], ignore_index=True
372
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
+ if feedback_df.empty():
375
+ return pd.DataFrame(
376
+ columns=[
377
+ "Rank",
378
+ "Model",
379
+ "Elo Score",
380
+ "Average Win Rate",
381
+ "Bradley-Terry Coefficient",
382
+ "Eigenvector Centrality Value",
383
+ "Newman Modularity Score",
384
+ "PageRank Score",
385
+ ]
386
+ )
387
 
388
+ # map vote to winner
389
+ feedback_df["winner"] = feedback_df["winner"].map(
390
+ {
391
+ "left": evalica.Winner.X,
392
+ "right": evalica.Winner.Y,
393
+ "tie": evalica.Winner.Draw,
394
+ }
395
+ )
 
 
 
396
 
397
+ # Calculate scores using various metrics
398
+ avr_result = evalica.average_win_rate(
399
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
400
+ )
401
+ bt_result = evalica.bradley_terry(
402
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
403
+ )
404
+ newman_result = evalica.newman(
405
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
406
+ )
407
+ eigen_result = evalica.eigen(
408
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
409
+ )
410
+ elo_result = evalica.elo(
411
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
412
+ )
413
+ pagerank_result = evalica.pagerank(
414
+ feedback_df["left"], feedback_df["right"], feedback_df["winner"]
415
+ )
416
 
417
+ # Combine all results into a single DataFrame
418
+ leaderboard_data = pd.DataFrame(
419
+ {
420
+ "Model": elo_result.scores.index,
421
+ "Elo Score": elo_result.scores.values,
422
+ "Average Win Rate": avr_result.scores.values * 100,
423
+ "Bradley-Terry Coefficient": bt_result.scores.values,
424
+ "Eigenvector Centrality Value": eigen_result.scores.values,
425
+ "Newman Modularity Score": newman_result.scores.values,
426
+ "PageRank Score": pagerank_result.scores.values,
427
+ }
428
+ )
429
+
430
+ # Round all numeric columns to two decimal places
431
+ leaderboard_data = leaderboard_data.round(
432
+ {
433
+ "Elo Score": 2,
434
+ "Average Win Rate": 2,
435
+ "Bradley-Terry Coefficient": 2,
436
+ "Eigenvector Centrality Value": 2,
437
+ "Newman Modularity Score": 2,
438
+ "PageRank Score": 2,
439
+ }
440
+ )
441
+
442
+ # Add a Rank column based on Elo scores
443
+ leaderboard_data["Rank"] = (
444
+ leaderboard_data["Elo Score"].rank(ascending=False).astype(int)
445
+ )
446
+
447
+ # Place rank in the first column
448
+ leaderboard_data = leaderboard_data[
449
+ ["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
450
+ ]
451
  return leaderboard_data
452
 
453
 
 
535
  login_button = gr.Button(
536
  "Sign in with Hugging Face", elem_id="oauth-button"
537
  )
538
+
539
  # NEW: Add a textbox for the repository URL above the user prompt
540
  repo_url = gr.Textbox(
541
  show_label=False,
 
543
  lines=1,
544
  interactive=False,
545
  )
546
+
547
  # Components with initial non-interactive state
548
  shared_input = gr.Textbox(
549
  show_label=False,
 
647
  repo_info, user_input, models_state, conversation_state
648
  ):
649
  # Combine repo-related information (if any) and user query into one prompt.
650
+ combined_user_input = (
651
+ f"Repo-related Information: {fetch_url_content(repo_info)}\n\n{user_input}"
652
+ if repo_info
653
+ else user_input
654
+ )
655
 
656
  # Dynamically select two random models
657
  if len(available_models) < 2:
 
778
  print(f"Login failed: {e}")
779
  return (
780
  gr.update(visible=True), # Keep the login button visible
781
+ gr.update(interactive=False), # repo_url -> disable if login failed
782
  gr.update(interactive=False), # Keep shared_input disabled
783
  gr.update(interactive=False), # Keep send_first disabled
784
  gr.update(
 
794
  inputs=[],
795
  outputs=[
796
  login_button, # Hide the login button after successful login
797
+ repo_url, # Keep this in sync with shared_input
798
  shared_input, # Enable shared_input
799
  send_first, # Enable send_first button
800
  feedback, # Enable feedback radio buttons
 
926
  "winner": winner_model,
927
  "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
928
  }
929
+
 
 
 
930
  # Save feedback back to the Hugging Face dataset
931
  save_content_to_hf(feedback_entry, "SE-Arena/votes")
932
 
 
942
  gr.update(
943
  value="", interactive=True, visible=True
944
  ), # Clear shared_input
945
+ gr.update(value="", interactive=True, visible=True), # Clear repo_url
 
 
946
  gr.update(value="", visible=False), # Hide user_prompt_md
947
  gr.update(value="", visible=False), # Hide response_a_title
948
  gr.update(value="", visible=False), # Hide response_b_title
 
956
  gr.update(
957
  value="Can't Decide", interactive=True
958
  ), # Reset feedback selection
959
+ get_leaderboard_data(feedback_entry), # Updated leaderboard data
960
  gr.update(visible=True), # Show the thanks message
961
+ gr.update(
962
+ value="", interactive=True, visible=True
963
+ ), # Show the repo-related url message
964
  )
965
 
966
  # Update the click event for the submit feedback button
 
969
  inputs=[feedback, models_state, conversation_state],
970
  outputs=[
971
  shared_input, # Reset shared_input
972
+ repo_url, # Show the repo-related URL message
973
  user_prompt_md, # Hide user_prompt_md
974
  response_a_title, # Hide Model A title
975
  response_b_title, # Hide Model B title