Spaces:
Runtime error
Runtime error
refine update logic
Browse files
app.py
CHANGED
@@ -28,9 +28,6 @@ openai_client = OpenAI(api_key=api_key, base_url=base_url)
|
|
28 |
# Timeout in seconds for model responses
|
29 |
TIMEOUT = 90
|
30 |
|
31 |
-
# leaderboard data
|
32 |
-
leaderboard_data = None
|
33 |
-
|
34 |
# Hint string constant
|
35 |
SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
|
36 |
HINT_STRING = "Once signed in, your votes will be recorded securely."
|
@@ -252,10 +249,7 @@ def chat_with_models(
|
|
252 |
|
253 |
def request_model_response():
|
254 |
try:
|
255 |
-
request_params = {
|
256 |
-
"model": model_name,
|
257 |
-
"messages": truncated_input
|
258 |
-
}
|
259 |
response = openai_client.chat.completions.create(**request_params)
|
260 |
model_response["content"] = response.choices[0].message.content
|
261 |
except Exception as e:
|
@@ -366,89 +360,94 @@ def load_content_from_hf(repo_name="SE-Arena/votes"):
|
|
366 |
raise Exception("Error loading feedback data from Hugging Face repository.")
|
367 |
|
368 |
|
369 |
-
def get_leaderboard_data():
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
try:
|
374 |
-
feedback_data = load_content_from_hf()
|
375 |
-
feedback_df = pd.DataFrame(feedback_data)
|
376 |
-
|
377 |
-
# map vote to winner
|
378 |
-
feedback_df["winner"] = feedback_df["winner"].map(
|
379 |
-
{
|
380 |
-
"left": evalica.Winner.X,
|
381 |
-
"right": evalica.Winner.Y,
|
382 |
-
"tie": evalica.Winner.Draw,
|
383 |
-
}
|
384 |
-
)
|
385 |
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
)
|
390 |
-
|
391 |
-
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
392 |
-
)
|
393 |
-
newman_result = evalica.newman(
|
394 |
-
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
395 |
-
)
|
396 |
-
eigen_result = evalica.eigen(
|
397 |
-
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
398 |
-
)
|
399 |
-
elo_result = evalica.elo(
|
400 |
-
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
401 |
-
)
|
402 |
-
pagerank_result = evalica.pagerank(
|
403 |
-
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
404 |
-
)
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
|
|
418 |
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
"PageRank Score": 2,
|
428 |
-
}
|
429 |
-
)
|
430 |
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
return leaderboard_data
|
453 |
|
454 |
|
@@ -536,7 +535,7 @@ with gr.Blocks() as app:
|
|
536 |
login_button = gr.Button(
|
537 |
"Sign in with Hugging Face", elem_id="oauth-button"
|
538 |
)
|
539 |
-
|
540 |
# NEW: Add a textbox for the repository URL above the user prompt
|
541 |
repo_url = gr.Textbox(
|
542 |
show_label=False,
|
@@ -544,7 +543,7 @@ with gr.Blocks() as app:
|
|
544 |
lines=1,
|
545 |
interactive=False,
|
546 |
)
|
547 |
-
|
548 |
# Components with initial non-interactive state
|
549 |
shared_input = gr.Textbox(
|
550 |
show_label=False,
|
@@ -648,7 +647,11 @@ with gr.Blocks() as app:
|
|
648 |
repo_info, user_input, models_state, conversation_state
|
649 |
):
|
650 |
# Combine repo-related information (if any) and user query into one prompt.
|
651 |
-
combined_user_input =
|
|
|
|
|
|
|
|
|
652 |
|
653 |
# Dynamically select two random models
|
654 |
if len(available_models) < 2:
|
@@ -775,7 +778,7 @@ with gr.Blocks() as app:
|
|
775 |
print(f"Login failed: {e}")
|
776 |
return (
|
777 |
gr.update(visible=True), # Keep the login button visible
|
778 |
-
gr.update(interactive=False),
|
779 |
gr.update(interactive=False), # Keep shared_input disabled
|
780 |
gr.update(interactive=False), # Keep send_first disabled
|
781 |
gr.update(
|
@@ -791,7 +794,7 @@ with gr.Blocks() as app:
|
|
791 |
inputs=[],
|
792 |
outputs=[
|
793 |
login_button, # Hide the login button after successful login
|
794 |
-
repo_url,
|
795 |
shared_input, # Enable shared_input
|
796 |
send_first, # Enable send_first button
|
797 |
feedback, # Enable feedback radio buttons
|
@@ -923,10 +926,7 @@ with gr.Blocks() as app:
|
|
923 |
"winner": winner_model,
|
924 |
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
925 |
}
|
926 |
-
|
927 |
-
# Concatenate the new feedback with the existing leaderboard data
|
928 |
-
leaderboard_data = pd.concat([get_leaderboard_data(), pd.DataFrame([feedback_entry])], ignore_index=True)
|
929 |
-
|
930 |
# Save feedback back to the Hugging Face dataset
|
931 |
save_content_to_hf(feedback_entry, "SE-Arena/votes")
|
932 |
|
@@ -942,9 +942,7 @@ with gr.Blocks() as app:
|
|
942 |
gr.update(
|
943 |
value="", interactive=True, visible=True
|
944 |
), # Clear shared_input
|
945 |
-
gr.update(
|
946 |
-
value="", interactive=True, visible=True
|
947 |
-
), # Clear repo_url
|
948 |
gr.update(value="", visible=False), # Hide user_prompt_md
|
949 |
gr.update(value="", visible=False), # Hide response_a_title
|
950 |
gr.update(value="", visible=False), # Hide response_b_title
|
@@ -958,9 +956,11 @@ with gr.Blocks() as app:
|
|
958 |
gr.update(
|
959 |
value="Can't Decide", interactive=True
|
960 |
), # Reset feedback selection
|
961 |
-
|
962 |
gr.update(visible=True), # Show the thanks message
|
963 |
-
gr.update(
|
|
|
|
|
964 |
)
|
965 |
|
966 |
# Update the click event for the submit feedback button
|
@@ -969,7 +969,7 @@ with gr.Blocks() as app:
|
|
969 |
inputs=[feedback, models_state, conversation_state],
|
970 |
outputs=[
|
971 |
shared_input, # Reset shared_input
|
972 |
-
repo_url,
|
973 |
user_prompt_md, # Hide user_prompt_md
|
974 |
response_a_title, # Hide Model A title
|
975 |
response_b_title, # Hide Model B title
|
|
|
28 |
# Timeout in seconds for model responses
|
29 |
TIMEOUT = 90
|
30 |
|
|
|
|
|
|
|
31 |
# Hint string constant
|
32 |
SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
|
33 |
HINT_STRING = "Once signed in, your votes will be recorded securely."
|
|
|
249 |
|
250 |
def request_model_response():
|
251 |
try:
|
252 |
+
request_params = {"model": model_name, "messages": truncated_input}
|
|
|
|
|
|
|
253 |
response = openai_client.chat.completions.create(**request_params)
|
254 |
model_response["content"] = response.choices[0].message.content
|
255 |
except Exception as e:
|
|
|
360 |
raise Exception("Error loading feedback data from Hugging Face repository.")
|
361 |
|
362 |
|
363 |
+
def get_leaderboard_data(feedback_entry=None):
|
364 |
+
# Load feedback data from the Hugging Face repository
|
365 |
+
feedback_data = load_content_from_hf()
|
366 |
+
feedback_df = pd.DataFrame(feedback_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
+
# Concatenate the new feedback with the existing leaderboard data
|
369 |
+
if feedback_entry is not None:
|
370 |
+
feedback_df = pd.concat(
|
371 |
+
[feedback_df, pd.DataFrame([feedback_entry])], ignore_index=True
|
372 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
+
if feedback_df.empty():
|
375 |
+
return pd.DataFrame(
|
376 |
+
columns=[
|
377 |
+
"Rank",
|
378 |
+
"Model",
|
379 |
+
"Elo Score",
|
380 |
+
"Average Win Rate",
|
381 |
+
"Bradley-Terry Coefficient",
|
382 |
+
"Eigenvector Centrality Value",
|
383 |
+
"Newman Modularity Score",
|
384 |
+
"PageRank Score",
|
385 |
+
]
|
386 |
+
)
|
387 |
|
388 |
+
# map vote to winner
|
389 |
+
feedback_df["winner"] = feedback_df["winner"].map(
|
390 |
+
{
|
391 |
+
"left": evalica.Winner.X,
|
392 |
+
"right": evalica.Winner.Y,
|
393 |
+
"tie": evalica.Winner.Draw,
|
394 |
+
}
|
395 |
+
)
|
|
|
|
|
|
|
396 |
|
397 |
+
# Calculate scores using various metrics
|
398 |
+
avr_result = evalica.average_win_rate(
|
399 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
400 |
+
)
|
401 |
+
bt_result = evalica.bradley_terry(
|
402 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
403 |
+
)
|
404 |
+
newman_result = evalica.newman(
|
405 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
406 |
+
)
|
407 |
+
eigen_result = evalica.eigen(
|
408 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
409 |
+
)
|
410 |
+
elo_result = evalica.elo(
|
411 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
412 |
+
)
|
413 |
+
pagerank_result = evalica.pagerank(
|
414 |
+
feedback_df["left"], feedback_df["right"], feedback_df["winner"]
|
415 |
+
)
|
416 |
|
417 |
+
# Combine all results into a single DataFrame
|
418 |
+
leaderboard_data = pd.DataFrame(
|
419 |
+
{
|
420 |
+
"Model": elo_result.scores.index,
|
421 |
+
"Elo Score": elo_result.scores.values,
|
422 |
+
"Average Win Rate": avr_result.scores.values * 100,
|
423 |
+
"Bradley-Terry Coefficient": bt_result.scores.values,
|
424 |
+
"Eigenvector Centrality Value": eigen_result.scores.values,
|
425 |
+
"Newman Modularity Score": newman_result.scores.values,
|
426 |
+
"PageRank Score": pagerank_result.scores.values,
|
427 |
+
}
|
428 |
+
)
|
429 |
+
|
430 |
+
# Round all numeric columns to two decimal places
|
431 |
+
leaderboard_data = leaderboard_data.round(
|
432 |
+
{
|
433 |
+
"Elo Score": 2,
|
434 |
+
"Average Win Rate": 2,
|
435 |
+
"Bradley-Terry Coefficient": 2,
|
436 |
+
"Eigenvector Centrality Value": 2,
|
437 |
+
"Newman Modularity Score": 2,
|
438 |
+
"PageRank Score": 2,
|
439 |
+
}
|
440 |
+
)
|
441 |
+
|
442 |
+
# Add a Rank column based on Elo scores
|
443 |
+
leaderboard_data["Rank"] = (
|
444 |
+
leaderboard_data["Elo Score"].rank(ascending=False).astype(int)
|
445 |
+
)
|
446 |
+
|
447 |
+
# Place rank in the first column
|
448 |
+
leaderboard_data = leaderboard_data[
|
449 |
+
["Rank"] + [col for col in leaderboard_data.columns if col != "Rank"]
|
450 |
+
]
|
451 |
return leaderboard_data
|
452 |
|
453 |
|
|
|
535 |
login_button = gr.Button(
|
536 |
"Sign in with Hugging Face", elem_id="oauth-button"
|
537 |
)
|
538 |
+
|
539 |
# NEW: Add a textbox for the repository URL above the user prompt
|
540 |
repo_url = gr.Textbox(
|
541 |
show_label=False,
|
|
|
543 |
lines=1,
|
544 |
interactive=False,
|
545 |
)
|
546 |
+
|
547 |
# Components with initial non-interactive state
|
548 |
shared_input = gr.Textbox(
|
549 |
show_label=False,
|
|
|
647 |
repo_info, user_input, models_state, conversation_state
|
648 |
):
|
649 |
# Combine repo-related information (if any) and user query into one prompt.
|
650 |
+
combined_user_input = (
|
651 |
+
f"Repo-related Information: {fetch_url_content(repo_info)}\n\n{user_input}"
|
652 |
+
if repo_info
|
653 |
+
else user_input
|
654 |
+
)
|
655 |
|
656 |
# Dynamically select two random models
|
657 |
if len(available_models) < 2:
|
|
|
778 |
print(f"Login failed: {e}")
|
779 |
return (
|
780 |
gr.update(visible=True), # Keep the login button visible
|
781 |
+
gr.update(interactive=False), # repo_url -> disable if login failed
|
782 |
gr.update(interactive=False), # Keep shared_input disabled
|
783 |
gr.update(interactive=False), # Keep send_first disabled
|
784 |
gr.update(
|
|
|
794 |
inputs=[],
|
795 |
outputs=[
|
796 |
login_button, # Hide the login button after successful login
|
797 |
+
repo_url, # Keep this in sync with shared_input
|
798 |
shared_input, # Enable shared_input
|
799 |
send_first, # Enable send_first button
|
800 |
feedback, # Enable feedback radio buttons
|
|
|
926 |
"winner": winner_model,
|
927 |
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
928 |
}
|
929 |
+
|
|
|
|
|
|
|
930 |
# Save feedback back to the Hugging Face dataset
|
931 |
save_content_to_hf(feedback_entry, "SE-Arena/votes")
|
932 |
|
|
|
942 |
gr.update(
|
943 |
value="", interactive=True, visible=True
|
944 |
), # Clear shared_input
|
945 |
+
gr.update(value="", interactive=True, visible=True), # Clear repo_url
|
|
|
|
|
946 |
gr.update(value="", visible=False), # Hide user_prompt_md
|
947 |
gr.update(value="", visible=False), # Hide response_a_title
|
948 |
gr.update(value="", visible=False), # Hide response_b_title
|
|
|
956 |
gr.update(
|
957 |
value="Can't Decide", interactive=True
|
958 |
), # Reset feedback selection
|
959 |
+
get_leaderboard_data(feedback_entry), # Updated leaderboard data
|
960 |
gr.update(visible=True), # Show the thanks message
|
961 |
+
gr.update(
|
962 |
+
value="", interactive=True, visible=True
|
963 |
+
), # Show the repo-related url message
|
964 |
)
|
965 |
|
966 |
# Update the click event for the submit feedback button
|
|
|
969 |
inputs=[feedback, models_state, conversation_state],
|
970 |
outputs=[
|
971 |
shared_input, # Reset shared_input
|
972 |
+
repo_url, # Show the repo-related URL message
|
973 |
user_prompt_md, # Hide user_prompt_md
|
974 |
response_a_title, # Hide Model A title
|
975 |
response_b_title, # Hide Model B title
|