zhiminy commited on
Commit
ad40aea
·
1 Parent(s): 978f8be

remove an obsolete model

Browse files
Files changed (2) hide show
  1. app.py +121 -54
  2. context_window.json +0 -1
app.py CHANGED
@@ -403,19 +403,20 @@ def get_leaderboard_data(feedback_entry=None):
403
  pagerank_result = evalica.pagerank(
404
  feedback_df["left"], feedback_df["right"], feedback_df["winner"]
405
  )
406
-
407
  # Calculate consistency score as a pandas Series aligned with other metrics
408
- is_result = pd.Series("N/A", index=elo_result.scores.index) # Initialize with zeros using same index
 
 
409
 
410
  # Loop through models and update values
411
  for model in is_result.index:
412
  # Filter self-matches for this model
413
  self_matches = feedback_df[
414
- (feedback_df["left"] == model) &
415
- (feedback_df["right"] == model)
416
  ]
417
  totals = len(self_matches)
418
-
419
  if totals:
420
  # Count non-draw outcomes (wins or losses)
421
  draws = self_matches[self_matches["winner"] == evalica.Winner.Draw].shape[0]
@@ -681,23 +682,36 @@ with gr.Blocks() as app:
681
  # Here we default to fail open, but you can change as needed.
682
  return True
683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
  # Function to update model titles and responses
685
  def update_model_titles_and_responses(
686
  repo_url, user_input, models_state, conversation_state
687
  ):
688
  # Guardrail check first
689
  if not repo_url and not guardrail_check_se_relevance(user_input):
690
- # Return updates to show the guardrail message and hide everything else.
691
  return (
692
  # [0] guardrail_message: Show guardrail message
693
  gr.update(
694
  value="### Oops! Try asking something about software engineering. Thanks!",
695
  visible=True,
696
  ),
697
- # [1] shared_input: clear and show
698
- gr.update(value="", visible=True),
699
- # [2] repo_url: clear and show
700
- gr.update(value="", visible=True),
701
  # [3] user_prompt_md: clear and hide
702
  gr.update(value="", visible=False),
703
  # [4] response_a_title: clear and hide
@@ -712,8 +726,8 @@ with gr.Blocks() as app:
712
  gr.update(visible=False),
713
  # [9] vote_panel: hide
714
  gr.update(visible=False),
715
- # [10] send_first: show and enable button
716
- gr.update(visible=True, interactive=True),
717
  # [11] feedback: enable the selection
718
  gr.update(interactive=True),
719
  # [12] models_state: pass state as-is
@@ -760,10 +774,10 @@ with gr.Blocks() as app:
760
  return (
761
  # [0] guardrail_message: hide
762
  gr.update(visible=False),
763
- # [1] shared_input: disable and clear
764
- gr.update(value="", interactive=False, visible=True),
765
- # [2] repo_url: disable and clear
766
- gr.update(value="", interactive=False, visible=True),
767
  # [3] user_prompt_md: hide
768
  gr.update(value="", visible=False),
769
  # [4] response_a_title: hide
@@ -778,8 +792,8 @@ with gr.Blocks() as app:
778
  gr.update(visible=False),
779
  # [9] vote_panel: hide
780
  gr.update(visible=False),
781
- # [10] send_first: disable
782
- gr.update(visible=True, interactive=False),
783
  # [11] feedback: disable
784
  gr.update(interactive=False),
785
  # [12] models_state: pass state as-is
@@ -806,10 +820,10 @@ with gr.Blocks() as app:
806
  return (
807
  # [0] guardrail_message: hide (since no guardrail issue)
808
  gr.update(visible=False),
809
- # [1] shared_input: hide shared_input to prevent changes during the conversation
810
- gr.update(visible=False),
811
- # [2] repo_url: hide repository URL input similarly
812
- gr.update(visible=False),
813
  # [3] user_prompt_md: display the user's query
814
  gr.update(value=f"**Your Query:**\n\n{user_input}", visible=True),
815
  # [4] response_a_title: show title for Model A
@@ -824,8 +838,8 @@ with gr.Blocks() as app:
824
  gr.update(visible=True),
825
  # [9] vote_panel: show vote panel
826
  gr.update(visible=True),
827
- # [10] send_first: hide the submit button
828
- gr.update(visible=False),
829
  # [11] feedback: enable the feedback selection
830
  gr.update(interactive=True),
831
  # [12] models_state: pass updated models_state
@@ -915,9 +929,20 @@ with gr.Blocks() as app:
915
 
916
  # First round handling
917
  send_first.click(
918
- fn=hide_thanks_message, inputs=[], outputs=[thanks_message]
 
 
919
  ).then(
920
- fn=update_model_titles_and_responses,
 
 
 
 
 
 
 
 
 
921
  inputs=[repo_url, shared_input, models_state, conversation_state],
922
  outputs=[
923
  guardrail_message,
@@ -941,6 +966,15 @@ with gr.Blocks() as app:
941
  ],
942
  )
943
 
 
 
 
 
 
 
 
 
 
944
  # Handle subsequent rounds
945
  def handle_model_a_send(user_input, models_state, conversation_state):
946
  try:
@@ -952,10 +986,8 @@ with gr.Blocks() as app:
952
  response,
953
  conversation_state,
954
  gr.update(visible=False),
955
- gr.update(
956
- value="", interactive=True
957
- ), # Clear and enable model_a_input
958
- gr.update(interactive=False), # Disable model_a_send button
959
  )
960
  except TimeoutError as e:
961
  # Disable inputs when timeout occurs
@@ -963,12 +995,19 @@ with gr.Blocks() as app:
963
  gr.update(value=""), # Clear response
964
  conversation_state,
965
  gr.update(visible=True), # Show the timeout popup
966
- gr.update(interactive=False), # Disable model_a_input
967
- gr.update(interactive=False), # Disable model_a_send
968
  )
969
  except Exception as e:
970
  raise gr.Error(str(e))
971
-
 
 
 
 
 
 
 
972
  def handle_model_b_send(user_input, models_state, conversation_state):
973
  try:
974
  response = chat_with_models(
@@ -979,10 +1018,8 @@ with gr.Blocks() as app:
979
  response,
980
  conversation_state,
981
  gr.update(visible=False),
982
- gr.update(
983
- value="", interactive=True
984
- ), # Clear and enable model_b_input
985
- gr.update(interactive=False), # Disable model_b_send button
986
  )
987
  except TimeoutError as e:
988
  # Disable inputs when timeout occurs
@@ -990,14 +1027,21 @@ with gr.Blocks() as app:
990
  gr.update(value=""), # Clear response
991
  conversation_state,
992
  gr.update(visible=True), # Show the timeout popup
993
- gr.update(interactive=False), # Disable model_b_input
994
- gr.update(interactive=False), # Disable model_b_send
995
  )
996
  except Exception as e:
997
  raise gr.Error(str(e))
998
 
999
  model_a_send.click(
1000
- handle_model_a_send,
 
 
 
 
 
 
 
1001
  inputs=[model_a_input, models_state, conversation_state],
1002
  outputs=[
1003
  response_a,
@@ -1008,7 +1052,14 @@ with gr.Blocks() as app:
1008
  ],
1009
  )
1010
  model_b_send.click(
1011
- handle_model_b_send,
 
 
 
 
 
 
 
1012
  inputs=[model_b_input, models_state, conversation_state],
1013
  outputs=[
1014
  response_b,
@@ -1050,19 +1101,35 @@ with gr.Blocks() as app:
1050
 
1051
  # Adjust output count to match the interface definition
1052
  return (
1053
- gr.update(value="", interactive=True, visible=True), # [0] Clear shared_input textbox
1054
- gr.update(value="", interactive=True, visible=True), # [1] Clear repo_url textbox
1055
- gr.update(value="", visible=False), # [2] Hide user_prompt_md markdown component
1056
- gr.update(value="", visible=False), # [3] Hide response_a_title markdown component
1057
- gr.update(value="", visible=False), # [4] Hide response_b_title markdown component
1058
- gr.update(value=""), # [5] Clear Model A response markdown component
1059
- gr.update(value=""), # [6] Clear Model B response markdown component
1060
- gr.update(visible=False), # [7] Hide multi_round_inputs row
1061
- gr.update(visible=False), # [8] Hide vote_panel row
1062
- gr.update(value="Submit", interactive=True, visible=True),# [9] Reset send_first button
1063
- gr.update(value="Can't Decide", interactive=True), # [10] Reset feedback radio selection
1064
- get_leaderboard_data(feedback_entry), # [11] Updated leaderboard data
1065
- gr.update(visible=True) # [12] Show the thanks_message markdown component
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1066
  )
1067
 
1068
  # Update the click event for the submit feedback button
 
403
  pagerank_result = evalica.pagerank(
404
  feedback_df["left"], feedback_df["right"], feedback_df["winner"]
405
  )
406
+
407
  # Calculate consistency score as a pandas Series aligned with other metrics
408
+ is_result = pd.Series(
409
+ "N/A", index=elo_result.scores.index
410
+ ) # Initialize with zeros using same index
411
 
412
  # Loop through models and update values
413
  for model in is_result.index:
414
  # Filter self-matches for this model
415
  self_matches = feedback_df[
416
+ (feedback_df["left"] == model) & (feedback_df["right"] == model)
 
417
  ]
418
  totals = len(self_matches)
419
+
420
  if totals:
421
  # Count non-draw outcomes (wins or losses)
422
  draws = self_matches[self_matches["winner"] == evalica.Winner.Draw].shape[0]
 
682
  # Here we default to fail open, but you can change as needed.
683
  return True
684
 
685
+ def disable_first_submit_ui():
686
+ """First function to immediately disable UI elements"""
687
+ return (
688
+ # [0] guardrail_message: hide
689
+ gr.update(visible=False),
690
+ # [1] shared_input: disable but keep visible
691
+ gr.update(interactive=False),
692
+ # [2] repo_url: disable but keep visible
693
+ gr.update(interactive=False),
694
+ # [3] send_first: disable and show loading state
695
+ gr.update(interactive=False, value="Processing..."),
696
+ )
697
+
698
  # Function to update model titles and responses
699
  def update_model_titles_and_responses(
700
  repo_url, user_input, models_state, conversation_state
701
  ):
702
  # Guardrail check first
703
  if not repo_url and not guardrail_check_se_relevance(user_input):
704
+ # Return updates to show the guardrail message and re-enable UI
705
  return (
706
  # [0] guardrail_message: Show guardrail message
707
  gr.update(
708
  value="### Oops! Try asking something about software engineering. Thanks!",
709
  visible=True,
710
  ),
711
+ # [1] shared_input: clear and re-enable
712
+ gr.update(value="", interactive=True, visible=True),
713
+ # [2] repo_url: clear and re-enable
714
+ gr.update(value="", interactive=True, visible=True),
715
  # [3] user_prompt_md: clear and hide
716
  gr.update(value="", visible=False),
717
  # [4] response_a_title: clear and hide
 
726
  gr.update(visible=False),
727
  # [9] vote_panel: hide
728
  gr.update(visible=False),
729
+ # [10] send_first: re-enable button with original text
730
+ gr.update(visible=True, interactive=True, value="Submit"),
731
  # [11] feedback: enable the selection
732
  gr.update(interactive=True),
733
  # [12] models_state: pass state as-is
 
774
  return (
775
  # [0] guardrail_message: hide
776
  gr.update(visible=False),
777
+ # [1] shared_input: re-enable and clear
778
+ gr.update(value="", interactive=True, visible=True),
779
+ # [2] repo_url: re-enable and clear
780
+ gr.update(value="", interactive=True, visible=True),
781
  # [3] user_prompt_md: hide
782
  gr.update(value="", visible=False),
783
  # [4] response_a_title: hide
 
792
  gr.update(visible=False),
793
  # [9] vote_panel: hide
794
  gr.update(visible=False),
795
+ # [10] send_first: re-enable with original text
796
+ gr.update(visible=True, interactive=True, value="Submit"),
797
  # [11] feedback: disable
798
  gr.update(interactive=False),
799
  # [12] models_state: pass state as-is
 
820
  return (
821
  # [0] guardrail_message: hide (since no guardrail issue)
822
  gr.update(visible=False),
823
+ # [1] shared_input: re-enable but hide
824
+ gr.update(interactive=True, visible=False),
825
+ # [2] repo_url: re-enable but hide
826
+ gr.update(interactive=True, visible=False),
827
  # [3] user_prompt_md: display the user's query
828
  gr.update(value=f"**Your Query:**\n\n{user_input}", visible=True),
829
  # [4] response_a_title: show title for Model A
 
838
  gr.update(visible=True),
839
  # [9] vote_panel: show vote panel
840
  gr.update(visible=True),
841
+ # [10] send_first: hide the submit button but restore label
842
+ gr.update(visible=False, value="Submit"),
843
  # [11] feedback: enable the feedback selection
844
  gr.update(interactive=True),
845
  # [12] models_state: pass updated models_state
 
929
 
930
  # First round handling
931
  send_first.click(
932
+ fn=hide_thanks_message,
933
+ inputs=[],
934
+ outputs=[thanks_message]
935
  ).then(
936
+ fn=disable_first_submit_ui, # First disable UI
937
+ inputs=[],
938
+ outputs=[
939
+ guardrail_message,
940
+ shared_input,
941
+ repo_url,
942
+ send_first # Just the essential UI elements to update immediately
943
+ ]
944
+ ).then(
945
+ fn=update_model_titles_and_responses, # Then do the actual processing
946
  inputs=[repo_url, shared_input, models_state, conversation_state],
947
  outputs=[
948
  guardrail_message,
 
966
  ],
967
  )
968
 
969
+ def disable_model_a_ui():
970
+ """First function to immediately disable model A UI elements"""
971
+ return (
972
+ # [0] model_a_input: disable
973
+ gr.update(interactive=False),
974
+ # [1] model_a_send: disable and show loading state
975
+ gr.update(interactive=False, value="Processing...")
976
+ )
977
+
978
  # Handle subsequent rounds
979
  def handle_model_a_send(user_input, models_state, conversation_state):
980
  try:
 
986
  response,
987
  conversation_state,
988
  gr.update(visible=False),
989
+ gr.update(value="", interactive=True), # Clear and enable model_a_input
990
+ gr.update(interactive=False, value="Send to Model A"), # Reset button text
 
 
991
  )
992
  except TimeoutError as e:
993
  # Disable inputs when timeout occurs
 
995
  gr.update(value=""), # Clear response
996
  conversation_state,
997
  gr.update(visible=True), # Show the timeout popup
998
+ gr.update(interactive=True), # Re-enable model_a_input
999
+ gr.update(interactive=True, value="Send to Model A"), # Re-enable model_a_send button
1000
  )
1001
  except Exception as e:
1002
  raise gr.Error(str(e))
1003
+ def disable_model_b_ui():
1004
+ """First function to immediately disable model B UI elements"""
1005
+ return (
1006
+ # [0] model_b_input: disable
1007
+ gr.update(interactive=False),
1008
+ # [1] model_b_send: disable and show loading state
1009
+ gr.update(interactive=False, value="Processing...")
1010
+ )
1011
  def handle_model_b_send(user_input, models_state, conversation_state):
1012
  try:
1013
  response = chat_with_models(
 
1018
  response,
1019
  conversation_state,
1020
  gr.update(visible=False),
1021
+ gr.update(value="", interactive=True), # Clear and enable model_b_input
1022
+ gr.update(interactive=False, value="Send to Model B"), # Reset button text
 
 
1023
  )
1024
  except TimeoutError as e:
1025
  # Disable inputs when timeout occurs
 
1027
  gr.update(value=""), # Clear response
1028
  conversation_state,
1029
  gr.update(visible=True), # Show the timeout popup
1030
+ gr.update(interactive=True), # Re-enable model_b_input
1031
+ gr.update(interactive=True, value="Send to Model B"), # Re-enable model_b_send button
1032
  )
1033
  except Exception as e:
1034
  raise gr.Error(str(e))
1035
 
1036
  model_a_send.click(
1037
+ fn=disable_model_a_ui, # First disable UI
1038
+ inputs=[],
1039
+ outputs=[
1040
+ model_a_input,
1041
+ model_a_send
1042
+ ]
1043
+ ).then(
1044
+ fn=handle_model_a_send, # Then do the actual processing
1045
  inputs=[model_a_input, models_state, conversation_state],
1046
  outputs=[
1047
  response_a,
 
1052
  ],
1053
  )
1054
  model_b_send.click(
1055
+ fn=disable_model_b_ui, # First disable UI
1056
+ inputs=[],
1057
+ outputs=[
1058
+ model_b_input,
1059
+ model_b_send
1060
+ ]
1061
+ ).then(
1062
+ fn=handle_model_b_send, # Then do the actual processing
1063
  inputs=[model_b_input, models_state, conversation_state],
1064
  outputs=[
1065
  response_b,
 
1101
 
1102
  # Adjust output count to match the interface definition
1103
  return (
1104
+ gr.update(
1105
+ value="", interactive=True, visible=True
1106
+ ), # [0] Clear shared_input textbox
1107
+ gr.update(
1108
+ value="", interactive=True, visible=True
1109
+ ), # [1] Clear repo_url textbox
1110
+ gr.update(
1111
+ value="", visible=False
1112
+ ), # [2] Hide user_prompt_md markdown component
1113
+ gr.update(
1114
+ value="", visible=False
1115
+ ), # [3] Hide response_a_title markdown component
1116
+ gr.update(
1117
+ value="", visible=False
1118
+ ), # [4] Hide response_b_title markdown component
1119
+ gr.update(value=""), # [5] Clear Model A response markdown component
1120
+ gr.update(value=""), # [6] Clear Model B response markdown component
1121
+ gr.update(visible=False), # [7] Hide multi_round_inputs row
1122
+ gr.update(visible=False), # [8] Hide vote_panel row
1123
+ gr.update(
1124
+ value="Submit", interactive=True, visible=True
1125
+ ), # [9] Reset send_first button
1126
+ gr.update(
1127
+ value="Can't Decide", interactive=True
1128
+ ), # [10] Reset feedback radio selection
1129
+ get_leaderboard_data(feedback_entry), # [11] Updated leaderboard data
1130
+ gr.update(
1131
+ visible=True
1132
+ ), # [12] Show the thanks_message markdown component
1133
  )
1134
 
1135
  # Update the click event for the submit feedback button
context_window.json CHANGED
@@ -22,7 +22,6 @@
22
  "grok-3-beta": 1000000,
23
  "grok-3-mini-fast-beta": 1000000,
24
  "grok-3-mini-beta": 1000000,
25
- "llama-3.1-8b": 128000,
26
  "llama-3.1-405b": 128000,
27
  "llama-3.3-70b": 128000,
28
  "llama4-scout-instruct-basic": 10000000,
 
22
  "grok-3-beta": 1000000,
23
  "grok-3-mini-fast-beta": 1000000,
24
  "grok-3-mini-beta": 1000000,
 
25
  "llama-3.1-405b": 128000,
26
  "llama-3.3-70b": 128000,
27
  "llama4-scout-instruct-basic": 10000000,