Update app.py
Browse files
app.py
CHANGED
@@ -39,10 +39,6 @@ with open("context_window.json", "r") as file:
|
|
39 |
# Get list of available models
|
40 |
available_models = list(context_window.keys())
|
41 |
|
42 |
-
# Initialize global variables
|
43 |
-
models_state = {}
|
44 |
-
conversation_state = {}
|
45 |
-
|
46 |
|
47 |
def fetch_github_content(url):
|
48 |
"""Fetch detailed content from a GitHub URL using PyGithub."""
|
@@ -188,68 +184,62 @@ def fetch_url_content(url):
|
|
188 |
|
189 |
|
190 |
# Truncate prompt
|
191 |
-
def truncate_prompt(
|
192 |
"""
|
193 |
Truncate the conversation history and user input to fit within the model's context window.
|
194 |
|
195 |
Args:
|
196 |
-
|
197 |
-
model_alias (str): Alias for the model being used (e.g., "Model A", "Model B").
|
198 |
models (dict): Dictionary mapping model aliases to their names.
|
199 |
conversation_state (dict): State containing the conversation history for all models.
|
200 |
|
201 |
Returns:
|
202 |
str: Truncated conversation history and user input.
|
203 |
"""
|
204 |
-
model_name = models[model_alias]
|
205 |
-
context_length = context_window.get(model_name, 4096)
|
206 |
-
|
207 |
# Get the full conversation history for the model
|
208 |
-
|
209 |
-
full_conversation = [
|
210 |
-
{"role": msg["role"], "content": msg["content"]} for msg in history
|
211 |
-
]
|
212 |
-
full_conversation.append({"role": "user", "content": user_input})
|
213 |
-
|
214 |
-
# Convert to JSON string for accurate length measurement
|
215 |
-
json_conversation = json.dumps(full_conversation)
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
return full_conversation
|
220 |
|
221 |
-
#
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
return full_conversation
|
230 |
|
231 |
|
232 |
-
def chat_with_models(
|
233 |
-
|
234 |
-
):
|
235 |
-
model_name = models[model_alias]
|
236 |
-
truncated_input = truncate_prompt(
|
237 |
-
user_input, model_alias, models, conversation_state
|
238 |
-
)
|
239 |
-
conversation_state.setdefault(model_name, []).append(
|
240 |
-
{"role": "user", "content": user_input}
|
241 |
-
)
|
242 |
-
|
243 |
response_event = threading.Event() # Event to signal response completion
|
244 |
model_response = {"content": None, "error": None}
|
245 |
|
246 |
def request_model_response():
|
247 |
try:
|
248 |
-
request_params = {"model":
|
249 |
response = openai_client.chat.completions.create(**request_params)
|
250 |
model_response["content"] = response.choices[0].message.content
|
251 |
except Exception as e:
|
252 |
-
model_response["error"] =
|
|
|
|
|
253 |
finally:
|
254 |
response_event.set() # Signal that the response is completed
|
255 |
|
@@ -267,37 +257,40 @@ def chat_with_models(
|
|
267 |
elif model_response["error"]:
|
268 |
raise Exception(model_response["error"])
|
269 |
else:
|
|
|
|
|
|
|
270 |
# Add the model's response to the conversation state
|
271 |
-
conversation_state[
|
272 |
{"role": "assistant", "content": model_response["content"]}
|
273 |
)
|
274 |
-
|
275 |
# Format the complete conversation history with different colors
|
276 |
-
formatted_history = format_conversation_history(conversation_state[
|
277 |
-
|
278 |
return formatted_history
|
279 |
|
280 |
|
281 |
def format_conversation_history(conversation_history):
|
282 |
"""
|
283 |
Format the conversation history with different colors for user and model messages.
|
284 |
-
|
285 |
Args:
|
286 |
conversation_history (list): List of conversation messages with role and content.
|
287 |
-
|
288 |
Returns:
|
289 |
str: Markdown formatted conversation history.
|
290 |
"""
|
291 |
formatted_text = ""
|
292 |
-
|
293 |
for message in conversation_history:
|
294 |
if message["role"] == "user":
|
295 |
# Format user messages with blue text
|
296 |
formatted_text += f"<div style='color: #0066cc; background-color: #f0f7ff; padding: 10px; border-radius: 5px; margin-bottom: 10px;'><strong>User:</strong> {message['content']}</div>\n\n"
|
297 |
-
else:
|
298 |
-
# Format
|
299 |
formatted_text += f"<div style='color: #006633; background-color: #f0fff0; padding: 10px; border-radius: 5px; margin-bottom: 10px;'><strong>Model:</strong> {message['content']}</div>\n\n"
|
300 |
-
|
301 |
return formatted_text
|
302 |
|
303 |
|
@@ -315,10 +308,10 @@ def save_content_to_hf(feedback_data, repo_name):
|
|
315 |
now = datetime.now()
|
316 |
quarter = (now.month - 1) // 3 + 1
|
317 |
year_quarter = f"{now.year}_Q{quarter}"
|
318 |
-
|
319 |
|
320 |
# Define the path in the repository
|
321 |
-
filename = f"{year_quarter}/{
|
322 |
|
323 |
# Ensure the user is authenticated with HF
|
324 |
token = HfFolder.get_token()
|
@@ -365,11 +358,9 @@ def load_content_from_hf(repo_name="SE-Arena/votes"):
|
|
365 |
)
|
366 |
with open(local_path, "r") as f:
|
367 |
data = json.load(f)
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
feedback_data.append(data)
|
372 |
-
|
373 |
return feedback_data
|
374 |
|
375 |
except:
|
@@ -381,6 +372,10 @@ def get_leaderboard_data(feedback_entry=None):
|
|
381 |
feedback_data = load_content_from_hf()
|
382 |
feedback_df = pd.DataFrame(feedback_data)
|
383 |
|
|
|
|
|
|
|
|
|
384 |
# Concatenate the new feedback with the existing leaderboard data
|
385 |
if feedback_entry is not None:
|
386 |
feedback_df = pd.concat(
|
@@ -432,12 +427,12 @@ def get_leaderboard_data(feedback_entry=None):
|
|
432 |
)
|
433 |
|
434 |
# Calculate consistency score as a pandas Series aligned with other metrics
|
435 |
-
|
436 |
"N/A", index=elo_result.scores.index
|
437 |
) # Initialize with zeros using same index
|
438 |
|
439 |
# Loop through models and update values
|
440 |
-
for model in
|
441 |
# Filter self-matches for this model
|
442 |
self_matches = feedback_df[
|
443 |
(feedback_df["left"] == model) & (feedback_df["right"] == model)
|
@@ -446,17 +441,19 @@ def get_leaderboard_data(feedback_entry=None):
|
|
446 |
|
447 |
if totals:
|
448 |
# Count non-draw outcomes (wins or losses)
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
452 |
|
453 |
# Combine all results into a single DataFrame
|
454 |
leaderboard_data = pd.DataFrame(
|
455 |
{
|
456 |
"Model": elo_result.scores.index,
|
457 |
"Elo Score": elo_result.scores.values,
|
458 |
-
"Consistency Score":
|
459 |
-
"Average Win Rate": avr_result.scores.values
|
460 |
"Bradley-Terry Coefficient": bt_result.scores.values,
|
461 |
"Eigenvector Centrality Value": eigen_result.scores.values,
|
462 |
"Newman Modularity Score": newman_result.scores.values,
|
@@ -507,7 +504,7 @@ with gr.Blocks() as app:
|
|
507 |
leaderboard_intro = gr.Markdown(
|
508 |
"""
|
509 |
# 🏆 FM4SE Leaderboard: Community-Driven Evaluation of Top Foundation Models (FMs) in Software Engineering (SE) Tasks
|
510 |
-
The SE Arena is an open-source platform designed to evaluate foundation models through human preference, fostering transparency and collaboration.
|
511 |
""",
|
512 |
elem_classes="leaderboard-intro",
|
513 |
)
|
@@ -678,9 +675,9 @@ with gr.Blocks() as app:
|
|
678 |
],
|
679 |
)
|
680 |
|
681 |
-
def guardrail_check_se_relevance(
|
682 |
"""
|
683 |
-
Use gpt-4o-mini to check if the
|
684 |
Return True if it is SE-related, otherwise False.
|
685 |
"""
|
686 |
# Example instructions for classification — adjust to your needs
|
@@ -692,7 +689,7 @@ with gr.Blocks() as app:
|
|
692 |
"Otherwise, respond with 'No'."
|
693 |
),
|
694 |
}
|
695 |
-
user_message = {"role": "user", "content":
|
696 |
|
697 |
try:
|
698 |
# Make the chat completion call
|
@@ -770,31 +767,37 @@ with gr.Blocks() as app:
|
|
770 |
gr.update(visible=False),
|
771 |
)
|
772 |
|
|
|
773 |
repo_info = fetch_url_content(repo_url)
|
774 |
-
# Combine repository info (if available) with the user query.
|
775 |
combined_user_input = (
|
776 |
-
f"
|
777 |
if repo_info
|
778 |
else user_input
|
779 |
)
|
780 |
|
781 |
# Randomly select two models for the comparison
|
782 |
selected_models = [random.choice(available_models) for _ in range(2)]
|
783 |
-
models = {"
|
784 |
|
785 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
786 |
models_state.clear()
|
787 |
-
models_state.update(models)
|
788 |
conversation_state.clear()
|
789 |
-
|
|
|
|
|
|
|
790 |
|
791 |
try:
|
792 |
-
response_a = chat_with_models(
|
793 |
-
|
794 |
-
)
|
795 |
-
response_b = chat_with_models(
|
796 |
-
combined_user_input, "Model B", models_state, conversation_state
|
797 |
-
)
|
798 |
except TimeoutError as e:
|
799 |
# Handle timeout by resetting components and showing a popup.
|
800 |
return (
|
@@ -841,6 +844,9 @@ with gr.Blocks() as app:
|
|
841 |
# Determine the initial state of the multi-round send buttons
|
842 |
model_a_send_state = toggle_submit_button("")
|
843 |
model_b_send_state = toggle_submit_button("")
|
|
|
|
|
|
|
844 |
|
845 |
# Return the updates for all 18 outputs.
|
846 |
return (
|
@@ -851,7 +857,7 @@ with gr.Blocks() as app:
|
|
851 |
# [2] repo_url: re-enable but hide
|
852 |
gr.update(interactive=True, visible=False),
|
853 |
# [3] user_prompt_md: display the user's query
|
854 |
-
gr.update(value=
|
855 |
# [4] response_a_title: show title for Model A
|
856 |
gr.update(value="### Model A:", visible=True),
|
857 |
# [5] response_b_title: show title for Model B
|
@@ -1002,9 +1008,8 @@ with gr.Blocks() as app:
|
|
1002 |
# Handle subsequent rounds
|
1003 |
def handle_model_a_send(user_input, models_state, conversation_state):
|
1004 |
try:
|
1005 |
-
|
1006 |
-
|
1007 |
-
)
|
1008 |
# Clear the input box and disable the send button
|
1009 |
return (
|
1010 |
response,
|
@@ -1042,9 +1047,8 @@ with gr.Blocks() as app:
|
|
1042 |
|
1043 |
def handle_model_b_send(user_input, models_state, conversation_state):
|
1044 |
try:
|
1045 |
-
|
1046 |
-
|
1047 |
-
)
|
1048 |
# Clear the input box and disable the send button
|
1049 |
return (
|
1050 |
response,
|
@@ -1114,15 +1118,21 @@ with gr.Blocks() as app:
|
|
1114 |
|
1115 |
# Create feedback entry
|
1116 |
feedback_entry = {
|
1117 |
-
"left": models_state["
|
1118 |
-
"right": models_state["
|
1119 |
"winner": winner_model,
|
1120 |
-
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
1121 |
}
|
1122 |
|
1123 |
# Save feedback back to the Hugging Face dataset
|
1124 |
save_content_to_hf(feedback_entry, "SE-Arena/votes")
|
1125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1126 |
# Save conversations back to the Hugging Face dataset
|
1127 |
save_content_to_hf(conversation_state, "SE-Arena/conversations")
|
1128 |
|
@@ -1193,7 +1203,7 @@ with gr.Blocks() as app:
|
|
1193 |
|
1194 |
- The service is a **research preview**. It only provides limited safety measures and may generate offensive content.
|
1195 |
- It must not be used for any **illegal, harmful, violent, racist, or sexual** purposes.
|
1196 |
-
- Please do not upload any **private information
|
1197 |
- The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a **Creative Commons Attribution (CC-BY)** or a similar license.
|
1198 |
"""
|
1199 |
)
|
|
|
39 |
# Get list of available models
|
40 |
available_models = list(context_window.keys())
|
41 |
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def fetch_github_content(url):
|
44 |
"""Fetch detailed content from a GitHub URL using PyGithub."""
|
|
|
184 |
|
185 |
|
186 |
# Truncate prompt
|
187 |
+
def truncate_prompt(model_alias, models, conversation_state):
|
188 |
"""
|
189 |
Truncate the conversation history and user input to fit within the model's context window.
|
190 |
|
191 |
Args:
|
192 |
+
model_alias (str): Alias for the model being used (i.e., "left", "right").
|
|
|
193 |
models (dict): Dictionary mapping model aliases to their names.
|
194 |
conversation_state (dict): State containing the conversation history for all models.
|
195 |
|
196 |
Returns:
|
197 |
str: Truncated conversation history and user input.
|
198 |
"""
|
|
|
|
|
|
|
199 |
# Get the full conversation history for the model
|
200 |
+
full_conversation = conversation_state[f"{model_alias}_chat"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# Get the context length for the model
|
203 |
+
context_length = context_window[models[model_alias]]
|
|
|
204 |
|
205 |
+
# Single loop to handle both FIFO removal and content truncation
|
206 |
+
while len(json.dumps(full_conversation)) > context_length:
|
207 |
+
# If we have more than one message, remove the oldest (FIFO)
|
208 |
+
if len(full_conversation) > 1:
|
209 |
+
full_conversation.pop(0)
|
210 |
+
# If only one message remains, truncate its content
|
211 |
+
else:
|
212 |
+
current_length = len(json.dumps(full_conversation))
|
213 |
+
# Calculate how many characters we need to remove
|
214 |
+
excess = current_length - context_length
|
215 |
+
# Add a buffer to ensure we remove enough (accounting for JSON encoding)
|
216 |
+
truncation_size = min(excess + 10, len(full_conversation[0]["content"]))
|
217 |
+
|
218 |
+
if truncation_size <= 0:
|
219 |
+
break # Can't truncate further
|
220 |
+
|
221 |
+
# Truncate the content from the end to fit
|
222 |
+
full_conversation[0]["content"] = full_conversation[0]["content"][
|
223 |
+
:-truncation_size
|
224 |
+
]
|
225 |
|
226 |
return full_conversation
|
227 |
|
228 |
|
229 |
+
def chat_with_models(model_alias, models, conversation_state, timeout=TIMEOUT):
|
230 |
+
truncated_input = truncate_prompt(model_alias, models, conversation_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
response_event = threading.Event() # Event to signal response completion
|
232 |
model_response = {"content": None, "error": None}
|
233 |
|
234 |
def request_model_response():
|
235 |
try:
|
236 |
+
request_params = {"model": models[model_alias], "messages": truncated_input}
|
237 |
response = openai_client.chat.completions.create(**request_params)
|
238 |
model_response["content"] = response.choices[0].message.content
|
239 |
except Exception as e:
|
240 |
+
model_response["error"] = (
|
241 |
+
f"{models[model_alias]} model is not available. Error: {e}"
|
242 |
+
)
|
243 |
finally:
|
244 |
response_event.set() # Signal that the response is completed
|
245 |
|
|
|
257 |
elif model_response["error"]:
|
258 |
raise Exception(model_response["error"])
|
259 |
else:
|
260 |
+
# Get the full conversation history for the model
|
261 |
+
model_key = f"{model_alias}_chat"
|
262 |
+
|
263 |
# Add the model's response to the conversation state
|
264 |
+
conversation_state[model_key].append(
|
265 |
{"role": "assistant", "content": model_response["content"]}
|
266 |
)
|
267 |
+
|
268 |
# Format the complete conversation history with different colors
|
269 |
+
formatted_history = format_conversation_history(conversation_state[model_key][1:])
|
270 |
+
|
271 |
return formatted_history
|
272 |
|
273 |
|
274 |
def format_conversation_history(conversation_history):
|
275 |
"""
|
276 |
Format the conversation history with different colors for user and model messages.
|
277 |
+
|
278 |
Args:
|
279 |
conversation_history (list): List of conversation messages with role and content.
|
280 |
+
|
281 |
Returns:
|
282 |
str: Markdown formatted conversation history.
|
283 |
"""
|
284 |
formatted_text = ""
|
285 |
+
|
286 |
for message in conversation_history:
|
287 |
if message["role"] == "user":
|
288 |
# Format user messages with blue text
|
289 |
formatted_text += f"<div style='color: #0066cc; background-color: #f0f7ff; padding: 10px; border-radius: 5px; margin-bottom: 10px;'><strong>User:</strong> {message['content']}</div>\n\n"
|
290 |
+
else:
|
291 |
+
# Format assistant messages with dark green text
|
292 |
formatted_text += f"<div style='color: #006633; background-color: #f0fff0; padding: 10px; border-radius: 5px; margin-bottom: 10px;'><strong>Model:</strong> {message['content']}</div>\n\n"
|
293 |
+
|
294 |
return formatted_text
|
295 |
|
296 |
|
|
|
308 |
now = datetime.now()
|
309 |
quarter = (now.month - 1) // 3 + 1
|
310 |
year_quarter = f"{now.year}_Q{quarter}"
|
311 |
+
timestamp = now.strftime("%Y%m%d_%H%M%S")
|
312 |
|
313 |
# Define the path in the repository
|
314 |
+
filename = f"{year_quarter}/{timestamp}.json"
|
315 |
|
316 |
# Ensure the user is authenticated with HF
|
317 |
token = HfFolder.get_token()
|
|
|
358 |
)
|
359 |
with open(local_path, "r") as f:
|
360 |
data = json.load(f)
|
361 |
+
# Add the timestamp to the data
|
362 |
+
data["timestamp"] = file.split("/")[-1].split(".")[0]
|
363 |
+
feedback_data.append(data)
|
|
|
|
|
364 |
return feedback_data
|
365 |
|
366 |
except:
|
|
|
372 |
feedback_data = load_content_from_hf()
|
373 |
feedback_df = pd.DataFrame(feedback_data)
|
374 |
|
375 |
+
# Load conversation data from the Hugging Face repository
|
376 |
+
conversation_data = load_content_from_hf("SE-Arena/conversations")
|
377 |
+
conversation_df = pd.DataFrame(conversation_data)
|
378 |
+
|
379 |
# Concatenate the new feedback with the existing leaderboard data
|
380 |
if feedback_entry is not None:
|
381 |
feedback_df = pd.concat(
|
|
|
427 |
)
|
428 |
|
429 |
# Calculate consistency score as a pandas Series aligned with other metrics
|
430 |
+
cs_result = pd.Series(
|
431 |
"N/A", index=elo_result.scores.index
|
432 |
) # Initialize with zeros using same index
|
433 |
|
434 |
# Loop through models and update values
|
435 |
+
for model in cs_result.index:
|
436 |
# Filter self-matches for this model
|
437 |
self_matches = feedback_df[
|
438 |
(feedback_df["left"] == model) & (feedback_df["right"] == model)
|
|
|
441 |
|
442 |
if totals:
|
443 |
# Count non-draw outcomes (wins or losses)
|
444 |
+
cs_result[model] = round(
|
445 |
+
self_matches[self_matches["winner"] == evalica.Winner.Draw].shape[0]
|
446 |
+
/ totals,
|
447 |
+
2,
|
448 |
+
)
|
449 |
|
450 |
# Combine all results into a single DataFrame
|
451 |
leaderboard_data = pd.DataFrame(
|
452 |
{
|
453 |
"Model": elo_result.scores.index,
|
454 |
"Elo Score": elo_result.scores.values,
|
455 |
+
"Consistency Score": cs_result.values,
|
456 |
+
"Average Win Rate": avr_result.scores.values,
|
457 |
"Bradley-Terry Coefficient": bt_result.scores.values,
|
458 |
"Eigenvector Centrality Value": eigen_result.scores.values,
|
459 |
"Newman Modularity Score": newman_result.scores.values,
|
|
|
504 |
leaderboard_intro = gr.Markdown(
|
505 |
"""
|
506 |
# 🏆 FM4SE Leaderboard: Community-Driven Evaluation of Top Foundation Models (FMs) in Software Engineering (SE) Tasks
|
507 |
+
The SE Arena is an open-source platform designed to evaluate foundation models through human preference, fostering transparency and collaboration. This platform aims to empower the SE community to assess and compare the performance of leading FMs in related tasks. For technical details, check out our [paper](https://arxiv.org/abs/2502.01860).
|
508 |
""",
|
509 |
elem_classes="leaderboard-intro",
|
510 |
)
|
|
|
675 |
],
|
676 |
)
|
677 |
|
678 |
+
def guardrail_check_se_relevance(user_input):
|
679 |
"""
|
680 |
+
Use gpt-4o-mini to check if the user input is SE-related.
|
681 |
Return True if it is SE-related, otherwise False.
|
682 |
"""
|
683 |
# Example instructions for classification — adjust to your needs
|
|
|
689 |
"Otherwise, respond with 'No'."
|
690 |
),
|
691 |
}
|
692 |
+
user_message = {"role": "user", "content": user_input}
|
693 |
|
694 |
try:
|
695 |
# Make the chat completion call
|
|
|
767 |
gr.update(visible=False),
|
768 |
)
|
769 |
|
770 |
+
# Fetch repository info if a URL is provided
|
771 |
repo_info = fetch_url_content(repo_url)
|
|
|
772 |
combined_user_input = (
|
773 |
+
f"Context: {repo_info}\n\nInquiry: {user_input}"
|
774 |
if repo_info
|
775 |
else user_input
|
776 |
)
|
777 |
|
778 |
# Randomly select two models for the comparison
|
779 |
selected_models = [random.choice(available_models) for _ in range(2)]
|
780 |
+
models = {"left": selected_models[0], "right": selected_models[1]}
|
781 |
|
782 |
+
# Create a copy to avoid modifying the original
|
783 |
+
conversations = models.copy()
|
784 |
+
conversations.update({
|
785 |
+
"url": repo_url,
|
786 |
+
"left_chat": [{"role": "user", "content": combined_user_input}],
|
787 |
+
"right_chat": [{"role": "user", "content": combined_user_input}]
|
788 |
+
})
|
789 |
+
|
790 |
+
# Clear previous states
|
791 |
models_state.clear()
|
|
|
792 |
conversation_state.clear()
|
793 |
+
|
794 |
+
# Update the states
|
795 |
+
models_state.update(models)
|
796 |
+
conversation_state.update(conversations)
|
797 |
|
798 |
try:
|
799 |
+
response_a = chat_with_models("left", models_state, conversation_state)
|
800 |
+
response_b = chat_with_models("right", models_state, conversation_state)
|
|
|
|
|
|
|
|
|
801 |
except TimeoutError as e:
|
802 |
# Handle timeout by resetting components and showing a popup.
|
803 |
return (
|
|
|
844 |
# Determine the initial state of the multi-round send buttons
|
845 |
model_a_send_state = toggle_submit_button("")
|
846 |
model_b_send_state = toggle_submit_button("")
|
847 |
+
display_content = f"### Your Query:\n\n{user_input}"
|
848 |
+
if repo_info:
|
849 |
+
display_content += f"\n\n### Repo-related URL:\n\n{repo_url}"
|
850 |
|
851 |
# Return the updates for all 18 outputs.
|
852 |
return (
|
|
|
857 |
# [2] repo_url: re-enable but hide
|
858 |
gr.update(interactive=True, visible=False),
|
859 |
# [3] user_prompt_md: display the user's query
|
860 |
+
gr.update(value=display_content, visible=True),
|
861 |
# [4] response_a_title: show title for Model A
|
862 |
gr.update(value="### Model A:", visible=True),
|
863 |
# [5] response_b_title: show title for Model B
|
|
|
1008 |
# Handle subsequent rounds
|
1009 |
def handle_model_a_send(user_input, models_state, conversation_state):
|
1010 |
try:
|
1011 |
+
conversation_state["left_chat"].append({"role": "user", "content": user_input})
|
1012 |
+
response = chat_with_models("left", models_state, conversation_state)
|
|
|
1013 |
# Clear the input box and disable the send button
|
1014 |
return (
|
1015 |
response,
|
|
|
1047 |
|
1048 |
def handle_model_b_send(user_input, models_state, conversation_state):
|
1049 |
try:
|
1050 |
+
conversation_state["right_chat"].append({"role": "user", "content": user_input})
|
1051 |
+
response = chat_with_models("right", models_state, conversation_state)
|
|
|
1052 |
# Clear the input box and disable the send button
|
1053 |
return (
|
1054 |
response,
|
|
|
1118 |
|
1119 |
# Create feedback entry
|
1120 |
feedback_entry = {
|
1121 |
+
"left": models_state["left"],
|
1122 |
+
"right": models_state["right"],
|
1123 |
"winner": winner_model,
|
|
|
1124 |
}
|
1125 |
|
1126 |
# Save feedback back to the Hugging Face dataset
|
1127 |
save_content_to_hf(feedback_entry, "SE-Arena/votes")
|
1128 |
|
1129 |
+
conversation_state["right_chat"][0]["content"] = conversation_state[
|
1130 |
+
"right_chat"
|
1131 |
+
][0]["content"].split("\n\nInquiry: ")[-1]
|
1132 |
+
conversation_state["left_chat"][0]["content"] = conversation_state[
|
1133 |
+
"left_chat"
|
1134 |
+
][0]["content"].split("\n\nInquiry: ")[-1]
|
1135 |
+
|
1136 |
# Save conversations back to the Hugging Face dataset
|
1137 |
save_content_to_hf(conversation_state, "SE-Arena/conversations")
|
1138 |
|
|
|
1203 |
|
1204 |
- The service is a **research preview**. It only provides limited safety measures and may generate offensive content.
|
1205 |
- It must not be used for any **illegal, harmful, violent, racist, or sexual** purposes.
|
1206 |
+
- Please do not upload any **private** information.
|
1207 |
- The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a **Creative Commons Attribution (CC-BY)** or a similar license.
|
1208 |
"""
|
1209 |
)
|