Spaces:

andrewsiah
/

Turing-Test-Prompt-Competition

Running

App Files Files Community

Turing-Test-Prompt-Competition / eval.py

andrewsiah

Upload folder using huggingface_hub

23dea16 verified 4 months ago

raw

history blame contribute delete

11.8 kB

	import gradio as gr
	import os
	import openai
	from dataclasses import dataclass
	from supabase import create_client, Client
	from uuid import UUID
	from dotenv import load_dotenv
	import random

	# Load environment variables from .env file
	load_dotenv()

	# Initialize Supabase client
	SUPABASE_URL = os.getenv("SUPABASE_URL")
	SUPABASE_KEY = os.getenv("SUPABASE_KEY")
	supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

	SHOW_CONFIG = True

	@dataclass
	class Args:
	frequency_penalty: float = 0
	max_tokens: int = 32
	n: int = 1
	presence_penalty: float = 0
	seed: int = 42
	stop: str = None
	stream: bool = False
	temperature: float = 0.8
	top_p: float = 0.95

	def get_completion(client, config, messages):
	print("GETTING COMPLETION")
	completion_args = {
	"model": config['model'],
	"messages": messages,
	"frequency_penalty": config.get('frequency_penalty', 0),
	"max_tokens": config.get('max_length', 32),
	"n": config.get('n', 1),
	"presence_penalty": config.get('presence_penalty', 0),
	"seed": config.get('seed', 42),
	"stop": config.get('stop', None),
	"stream": config.get('stream', False),
	"temperature": config.get('temperature', 0.8),
	"top_p": config.get('top_p', 0.95),
	}

	try:
	print("TRYING TO GET COMPLETION")
	response = client.chat.completions.create(**completion_args)
	print("GOT COMPLETION")
	return response
	except Exception as e:
	print(f"Error during API call: {e}")
	return None

	def get_two_random_configs(round_num: int):
	print("GETTING TWO RANDOM CONFIGS")
	# Fetch all configurations for the current round
	response = supabase.table("configs")\
	.select("*")\
	.eq("round", round_num)\
	.execute()

	if not response.data or len(response.data) < 2:
	return None, None

	# Randomly select two unique configurations
	selected_configs = random.sample(response.data, 2)
	return selected_configs[0], selected_configs[1]

	def initialize_session(state):
	print("INITIALIZING SESSION")
	current_round = get_current_round()
	if not current_round:
	state.value["error"] = "Error: No active round found."
	return

	config_a, config_b = get_two_random_configs(round_num=current_round)
	if not config_a or not config_b:
	state.value["error"] = "Error: Not enough configurations available for voting."
	return

	state.value['config_a'] = config_a
	state.value['config_b'] = config_b
	state.value['conversation_a'] = []
	state.value['conversation_b'] = []
	state.value['round'] = current_round

	def chat_response_a(message, history):
	print("CHAT RESPONSE A")
	return chat_response(message, history, 'a')

	def chat_response_b(message, history):
	print("CHAT RESPONSE B")
	return chat_response(message, history, 'b')

	def chat_response(message, history, config_type):
	# Access the state within the Blocks
	current_state = demo.blocks['state'].value # Accessing state correctly
	print("CHAT RESPONSE")
	config_a = current_state.get('config_a')
	config_b = current_state.get('config_b')

	# Handle initialization if configs are missing
	if not config_a or not config_b:
	initialize_session(demo.blocks['state'])
	config_a = current_state.get('config_a')
	config_b = current_state.get('config_b')
	if not config_a or not config_b:
	return "Error: Configurations not initialized sufficiently."

	# Set up OpenAI client
	openai_api_key = "super-secret-token"

	os.environ['OPENAI_API_KEY'] = openai_api_key

	openai.api_key = openai_api_key
	openai.api_base = "https://turingtest--example-vllm-openai-compatible-serve.modal.run/v1"
	client = openai.OpenAI(api_key=openai_api_key, base_url=openai.api_base)

	# Append existing conversation
	if config_type == 'a':
	system_message = {"role": "system", "content": f"{config_a['sys_prompt']}"}
	messages = [system_message]
	for user_msg, assistant_msg in current_state['conversation_a']:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	else:
	system_message = {"role": "system", "content": f"{config_b['sys_prompt']}"}
	messages = [system_message]
	for user_msg, assistant_msg in current_state['conversation_b']:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	messages.append({"role": "user", "content": message})

	# Determine which configuration to use
	# config_id = config_a['id'] if config_type == 'a' else config_b['id']

	# Get completion
	# response = get_completion(client, config_id, messages)
	if config_type == 'a':
	response = get_completion(client, config_a, messages)
	else:
	response = get_completion(client, config_b, messages)

	assistant_reply = (
	response.choices[0].message.content if response and response.choices else
	"Error: Please retry or contact support if retried more than twice."
	)

	# Update the appropriate conversation state
	if config_type == 'a':
	current_state['conversation_a'].append((message, assistant_reply))
	else:
	current_state['conversation_b'].append((message, assistant_reply))

	# Update the state
	# demo.blocks['state'].update(current_state)
	demo.blocks['state'].value = current_state

	return assistant_reply

	def create_chat_interface(model_label):
	print("CREATE CHAT INTERFACE")
	if model_label == 'a':
	return gr.ChatInterface(
	fn=lambda message, history: (chat_response_a(message, history)),
	chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
	textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
	description="",
	theme="dark",
	retry_btn=None,
	undo_btn=None,
	clear_btn=None,
	)
	else:
	return gr.ChatInterface(
	fn=lambda message, history: (chat_response_b(message, history)),
	chatbot=gr.Chatbot(height=400, label=f"Choice {model_label}"),
	textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
	description="",
	theme="dark",
	retry_btn=None,
	undo_btn=None,
	clear_btn=None,
	)

	def submit_vote(vote: str, state):
	print("SUBMIT VOTE")

	a_config_id = state.value['config_a']['id']
	b_config_id = state.value['config_b']['id']
	conversation_a = state.value.get('conversation_a', [])
	conversation_b = state.value.get('conversation_b', [])

	# Save conversations to Supabase
	supabase.table("conversations").insert([
	{
	"user_id": None, # No authentication, set to None or another identifier if available
	"configuration_id": a_config_id,
	"messages": conversation_a
	},
	{
	"user_id": None,
	"configuration_id": b_config_id,
	"messages": conversation_b
	}
	]).execute()

	# Save vote to Supabase
	supabase.table("votes").insert({
	"a_config_id": str(a_config_id),
	"b_config_id": str(b_config_id),
	"voted_by_uid": None, # No user ID since authentication is not implemented
	"round": get_current_round(), # Assuming Round 1; modify as needed
	"is_tie": vote == "tie",
	"a_wins": vote == "a",
	"created_at": "now()"
	}).execute()

	# Update ELO ratings
	# update_elo(a_config_id, b_config_id, vote)

	# Reset conversations for next voting
	state.value['conversation_a'] = []
	state.value['conversation_b'] = []

	return "Vote submitted!"

	def update_elo(a_config_id: UUID, b_config_id: UUID, vote: str):
	print("UPDATE ELO")
	a_elo_response = supabase.table("elos").select("rating").eq("user_id", a_config_id).single().execute()
	b_elo_response = supabase.table("elos").select("rating").eq("user_id", b_config_id).single().execute()

	if not a_elo_response.data or not b_elo_response.data:
	return

	a_elo = a_elo_response.data["rating"]
	b_elo = b_elo_response.data["rating"]

	if vote == "a":
	a_new = a_elo + 10
	b_new = b_elo - 10
	elif vote == "b":
	a_new = a_elo - 10
	b_new = b_elo + 10
	else:
	# Tie: no change or minimal change
	a_new = a_elo
	b_new = b_elo

	supabase.table("elos").update({"rating": a_new}).eq("user_id", a_config_id).execute()
	supabase.table("elos").update({"rating": b_new}).eq("user_id", b_config_id).execute()

	def get_current_round():
	print("GET CURRENT ROUND")
	response = supabase.table("round_status").select("round").eq("is_eval_active", True).single().execute()
	if response.data:
	return response.data["round"]
	return None

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), head=
	"""
	<style>
	body {
	font-family: 'Calibri', sans-serif; /* Choose your desired font */
	}
	</style>
	""") as demo:
	gr.Markdown("## Turing Test Prompt Comp")

	# State to hold current config IDs and separate conversations
	state = gr.State({
	"config_a": None,
	"config_b": None,
	"conversation_a": [],
	"conversation_b": [],
	"round": 1,
	"error": None
	})
	demo.blocks['state'] = state # Assign state to a key for easy access

	initialize_session(state)

	with gr.Row():
	with gr.Column():
	chat_a = create_chat_interface('a')
	with gr.Column():
	chat_b = create_chat_interface('b')

	with gr.Row():
	a_better = gr.Button("A is better 👈", scale=1)
	tie = gr.Button("🤝 Tie", scale=1)
	b_better = gr.Button("👉 B is better", scale=1)

	# Output component to display status messages
	output_message = gr.Textbox(label="Status", interactive=False)

	# Define separate functions for each vote type
	def submit_vote_a():
	return submit_vote('a', state)

	def submit_vote_b():
	return submit_vote('b', state)

	def submit_vote_tie():
	return submit_vote('tie', state)

	# Connect buttons to their respective functions
	a_better.click(
	submit_vote_a,
	inputs=None,
	outputs=output_message
	)
	b_better.click(
	submit_vote_b,
	inputs=None,
	outputs=output_message
	)
	tie.click(
	submit_vote_tie,
	inputs=None,
	outputs=output_message
	)

	prompt_input = gr.Textbox(placeholder="Message for both...", container=False)
	send_btn = gr.Button("Send to Both", variant="primary")

	def send_prompt(prompt):
	current_state = state.value
	# Append user's prompt to both conversations
	if prompt:
	current_state['conversation_a'].append((prompt, None))
	current_state['conversation_b'].append((prompt, None))
	state.update(current_state)
	return "", ""

	send_btn.click(
	send_prompt,
	inputs=prompt_input,
	outputs=[
	prompt_input,
	prompt_input
	]
	)
	prompt_input.submit(
	send_prompt,
	inputs=prompt_input,
	outputs=[
	prompt_input,
	prompt_input
	]
	)

	if __name__ == "__main__":
	demo.launch(share=True)