Spaces:

llm-council
/

sandbox

Sleeping

App Files Files Community

sandbox / app.py

justinxzhao

Streaming working, with different providers.

c0a5a18 7 months ago

raw

history blame

15.9 kB

	import os
	import streamlit as st
	import dotenv
	import openai
	from openai import OpenAI
	import anthropic
	from together import Together
	import google.generativeai as genai
	import time

	dotenv.load_dotenv()

	PASSWORD = os.getenv("APP_PASSWORD")

	# Load API keys from environment variables
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")

	# Initialize API clients
	together_client = Together(api_key=TOGETHER_API_KEY)
	genai.configure(api_key=GOOGLE_API_KEY)

	# Set up API clients for OpenAI and Anthropic
	openai.api_key = OPENAI_API_KEY
	openai_client = OpenAI(
	organization="org-kUoRSK0nOw4W2nQYMVGWOt03",
	project="proj_zb6k1DdgnSEbiAEMWxSOVVu4",
	)
	# anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
	anthropic_client = anthropic.Anthropic()

	LLM_COUNCIL_MEMBERS = {
	"Smalls": [
	"openai://gpt-4o-mini",
	"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
	"vertex://gemini-1.5-flash-001",
	"anthropic://claude-3-haiku-20240307",
	],
	"Flagships": [
	"openai://gpt-4",
	"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
	"vertex://gemini-1.5-pro-001",
	"anthropic://claude-3-5-sonnet",
	],
	}

	PROVIDER_TO_AVATAR_MAP = {
	"openai://gpt-4o-mini": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIwLjk5ZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjU2IDI2MCI+PHBhdGggZD0iTTIzOS4xODQgMTA2LjIwM2E2NC43MiA2NC43MiAwIDAgMC01LjU3Ni01My4xMDNDMjE5LjQ1MiAyOC40NTkgMTkxIDE1Ljc4NCAxNjMuMjEzIDIxLjc0QTY1LjU4NiA2NS41ODYgMCAwIDAgNTIuMDk2IDQ1LjIyYTY0LjcyIDY0LjcyIDAgMCAwLTQzLjIzIDMxLjM2Yy0xNC4zMSAyNC42MDItMTEuMDYxIDU1LjYzNCA4LjAzMyA3Ni43NGE2NC42NyA2NC42NyAwIDAgMCA1LjUyNSA1My4xMDJjMTQuMTc0IDI0LjY1IDQyLjY0NCAzNy4zMjQgNzAuNDQ2IDMxLjM2YTY0LjcyIDY0LjcyIDAgMCAwIDQ4Ljc1NCAyMS43NDRjMjguNDgxLjAyNSA1My43MTQtMTguMzYxIDYyLjQxNC00NS40ODFhNjQuNzcgNjQuNzcgMCAwIDAgNDMuMjI5LTMxLjM2YzE0LjEzNy0yNC41NTggMTAuODc1LTU1LjQyMy04LjA4My03Ni40ODNtLTk3LjU2IDEzNi4zMzhhNDguNCA0OC40IDAgMCAxLTMxLjEwNS0xMS4yNTVsMS41MzUtLjg3bDUxLjY3LTI5LjgyNWE4LjYgOC42IDAgMCAwIDQuMjQ3LTcuMzY3di03Mi44NWwyMS44NDUgMTIuNjM2Yy4yMTguMTExLjM3LjMyLjQwOS41NjN2NjAuMzY3Yy0uMDU2IDI2LjgxOC0yMS43ODMgNDguNTQ1LTQ4LjYwMSA0OC42MDFNMzcuMTU4IDE5Ny45M2E0OC4zNSA0OC4zNSAwIDAgMS01Ljc4MS0zMi41ODlsMS41MzQuOTIxbDUxLjcyMiAyOS44MjZhOC4zNCA4LjM0IDAgMCAwIDguNDQxIDBsNjMuMTgxLTM2LjQyNXYyNS4yMjFhLjg3Ljg3IDAgMCAxLS4zNTguNjY1bC01Mi4zMzUgMzAuMTg0Yy0yMy4yNTcgMTMuMzk4LTUyLjk3IDUuNDMxLTY2LjQwNC0xNy44MDNNMjMuNTQ5IDg1LjM4YTQ4LjUgNDguNSAwIDAgMSAyNS41OC0yMS4zMzN2NjEuMzlhOC4yOSA4LjI5IDAgMCAwIDQuMTk1IDcuMzE2bDYyLjg3NCAzNi4yNzJsLTIxLjg0NSAxMi42MzZhLjgyLjgyIDAgMCAxLS43NjcgMEw0MS4zNTMgMTUxLjUzYy0yMy4yMTEtMTMuNDU0LTMxLjE3MS00My4xNDQtMTcuODA0LTY2LjQwNXptMTc5LjQ2NiA0MS42OTVsLTYzLjA4LTM2LjYzTDE2MS43MyA3Ny44NmEuODIuODIgMCAwIDEgLjc2OCAwbDUyLjIzMyAzMC4xODRhNDguNiA0OC42IDAgMCAxLTcuMzE2IDg3LjYzNXYtNjEuMzkxYTguNTQgOC41NCAwIDAgMC00LjQtNy4yMTNtMjEuNzQyLTMyLjY5bC0xLjUzNS0uOTIybC01MS42MTktMzAuMDgxYTguMzkgOC4zOSAwIDAgMC04LjQ5MiAwTDk5Ljk4IDk5LjgwOFY3NC41ODdhLjcyLjcyIDAgMCAxIC4zMDctLjY2NWw1Mi4yMzMtMzAuMTMzYTQ4LjY1MiA0OC42NTIgMCAwIDEgNzIuMjM2IDUwLjM5MXpNODguMDYxIDEzOS4wOTdsLTIxLjg0NS0xMi41ODVhLjg3Ljg3IDAgMCAxLS40MS0uNjE0VjY1LjY4NWE0OC42NTIgNDguNjUyIDAgMCAxIDc5Ljc1Ny0zNy4zNDZsLTEuNTM1Ljg3bC01MS42NyAyOS44MjVhOC42IDguNiAwIDAgMC00LjI0NiA3LjM2N3ptMTEuODY4LTI1LjU4TDEyOC4wNjcgOTcuM2wyOC4xODggMTYuMjE4djMyLjQzNGwtMjguMDg2IDE2LjIxOGwtMjguMTg4LTE2LjIxOHoiLz48L3N2Zz4=",
	"anthropic://claude-3-5-sonnet": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==",
	"vertex://gemini-1.5-flash-001": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9IiM0MjY4ZmYiIGQ9Ik0yNCAxMi4wMjRjLTYuNDM3LjM4OC0xMS41OSA1LjUzOS0xMS45NzcgMTEuOTc2aC0uMDQ3QzExLjU4OCAxNy41NjMgNi40MzYgMTIuNDEyIDAgMTIuMDI0di0uMDQ3QzYuNDM3IDExLjU4OCAxMS41ODggNi40MzcgMTEuOTc2IDBoLjA0N2MuMzg4IDYuNDM3IDUuNTQgMTEuNTg4IDExLjk3NyAxMS45Nzd6Ii8+PC9zdmc+",
	"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMzIgMzIiPjxnIGZpbGw9Im5vbmUiPjxwYXRoIGZpbGw9IiNiNGFjYmMiIGQ9Ik0yMC44NzEgMjQuNzh2LTYuMDZoMy4wMXY1Ljc3YzAgLjM0LS4xMi42Mi0uMzEuOTRsLTIuNDEgNC4yYy0uMTguMjMtLjQ1LjM3LS43NS4zN2gtMS4wM2MtLjIzIDAtLjM4LS4yNC0uMjgtLjQ1bDEuNjctNC4zNWMuMDctLjEzLjEtLjI3LjEtLjQyTTE3LjA5MSAzMGMuMiAwIC4yNi0uMjEuMjItLjM4bC0yLjMyLTguNjFoLTIuOTlsLjg1IDMuNTVjLjE5LjcxLjY2IDEuMzIgMS4zIDEuNjljLjE0LjA4LjI1LjIyLjI5LjM4bC44NyAzLjE0Yy4wNC4xNy4yMS4yMi4zOC4yMnoiLz48cGF0aCBmaWxsPSIjY2RjNGQ2IiBkPSJtMjguNTQxIDIzLjA4bC0xLjI3LS45NmEuOTQuOTQgMCAwIDEtLjI3LS42NnYtMi43NWMwLS42NC0uMTYtMS4yMzgtLjQ0Mi0xLjc2cS4yMTMuMDUuNDQyLjA1YTIgMiAwIDEgMC0xLjk0OS0xLjU0OWEzLjggMy44IDAgMCAwLTEuOC0uNDUxaC04LjE3Yy0uNjYgMC0xLjI3LS40Mi0xLjU3LTEuMDFMMTAuMDQxIDMuNWEyLjIzIDIuMjMgMCAwIDAtMi4xLTEuNWMtLjE4IDAtLjMuMTctLjI0LjM0TDguNTcxIDVjLS4yIDAtMS4wNy4yMy0xLjg1LjczbC0uODA2LjQ5OEw3LjAwMiAxMHY4LjI2YzAgMi4wMSAxLjI1IDMuNzIgMy4wMSA0LjQxdjcuMDJjLS4wMS4xNy4xMy4zMS4zLjMxaDEuMzdjLjE4IDAgLjMyLS4xNC4zMi0uMzF2LTEuOTZjMC0uMTcuMDctLjMyLjE4LS40NGMuNTMtLjUyLjgyLTEuMjMuODItMS45N1YyM2g1LjA3YzEuMjcgMCAyLjQ5LS41NSAzLjMzLTEuNWMwIC45NC40OCAxLjcyIDEuMzggMi4zMmwzLjk2IDIuNDNjLjE2LjExLjI2LjMuMjYuNXYyLjkzYzAgLjE3LjE0LjMxLjMxLjMxaDEuMzdjLjE3IDAgLjMxLS4xNC4zMS0uMzF2LTUuNTFjLjAxLS40LS4xNS0uOC0uNDUtMS4wOSIvPjxwYXRoIGZpbGw9IiNmM2FkNjEiIGQ9Ik02Ljg0MSA2Yy0uMzYgMC0uNzIuMS0xLjAzLjI5bC0yLjE5IDEuMzVjLS4zNi4yMy0uNi42MS0uNjIgMS4wM2MtLjAzLjczLjU1IDEuMzMgMS4yNyAxLjMzaDMuNDljLjU3IDAgMS4wNC0uNDcgMS4wNC0xLjA1di0xYzAtMS4wNy0uODgtMS45NS0xLjk2LTEuOTUiLz48cGF0aCBmaWxsPSIjMWMxYzFjIiBkPSJNNi41IDhhLjUuNSAwIDEgMCAwLTFhLjUuNSAwIDAgMCAwIDFtLTEuOTk5LjVjMC0uMjgtLjIyLS41LS41LS41aC0uNzZhMS4yIDEuMiAwIDAgMC0uMjEgMWguOTdjLjI4IDAgLjUtLjIyLjUtLjUiLz48cGF0aCBmaWxsPSIjZjNhZDYxIiBkPSJNMjguOTkxIDI4aC0xLjk5djEuNjhjMCAuMTcuMTQuMzEuMzEuMzFoMS4zN2MuMTcgMCAuMzEtLjE0LjMxLS4zMXptLTE2Ljk5IDBoLTEuOTl2MS42OWMtLjAxLjE3LjEzLjMxLjMuMzFoMS4zN2MuMTggMCAuMzItLjE0LjMyLS4zMXptNS4wODggMmwtMS4zOTgtLjAxYy0uMTcgMC0uMzQtLjA1LS4zOC0uMjJsLS40OS0xLjc3aDIuMDU0bC40MzYgMS42MmMuMDQuMTctLjAyLjM3OC0uMjE2LjM4em0yLjI4OCAwYS4zMTMuMzEzIDAgMCAxLS4yNzYtLjQ1bC41OTUtMS41NWgyLjRsLS45MzUgMS42M2EuOTUuOTUgMCAwIDEtLjc0Mi4zN3oiLz48L2c+PC9zdmc+",
	"anthropic://claude-3-haiku-20240307": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==",
	}

	AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"]


	def anthropic_streamlit_streamer(stream):
	"""
	Process the Anthropic streaming response and yield content from the deltas.

	:param stream: Streaming object from Anthropic API
	:return: Yields content (text) from the streaming response.
	"""
	for event in stream:
	if hasattr(event, "type"):
	# Handle content blocks
	if event.type == "content_block_delta" and hasattr(event, "delta"):
	# Extract text delta from the event
	text_delta = getattr(event.delta, "text", None)
	if text_delta:
	yield text_delta

	# Handle message completion events (optional if needed)
	elif event.type == "message_stop":
	break # End of message, stop streaming


	def google_streamlit_streamer(stream):
	for chunk in stream:
	yield chunk.text


	def together_streamlit_streamer(stream):
	for chunk in stream:
	yield chunk.choices[0].delta.content


	# Helper functions for LLM council and aggregator selection
	def llm_council_selector():
	selected_council = st.radio(
	"Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys())
	)
	return LLM_COUNCIL_MEMBERS[selected_council]


	def aggregator_selector():
	return st.radio("Choose an aggregator LLM", options=AGGREGATORS)


	# API calls for different providers
	def get_openai_response(model_name, prompt):
	return openai_client.chat.completions.create(
	model=model_name,
	messages=[{"role": "user", "content": prompt}],
	stream=True,
	)


	# https://docs.anthropic.com/en/api/messages-streaming
	def get_anthropic_response(model_name, prompt):
	return anthropic_client.messages.create(
	max_tokens=1024,
	messages=[{"role": "user", "content": prompt}],
	model=model_name,
	stream=True,
	)


	def get_together_response(model_name, prompt):
	return together_client.chat.completions.create(
	model=model_name,
	messages=[{"role": "user", "content": prompt}],
	stream=True,
	)


	# https://ai.google.dev/gemini-api/docs/text-generation?lang=python
	def get_google_response(model_name, prompt):
	model = genai.GenerativeModel(model_name)
	return model.generate_content(prompt, stream=True)


	def get_llm_response(model_identifier, prompt):
	provider, model_name = model_identifier.split("://")
	if provider == "openai":
	return get_openai_response(model_name, prompt)
	elif provider == "anthropic":
	return get_anthropic_response(model_name, prompt)
	elif provider == "together":
	return get_together_response(model_name, prompt)
	elif provider == "vertex":
	return get_google_response(model_name, prompt)
	else:
	return None


	# Main Streamlit App
	def main():
	st.set_page_config(
	page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide"
	)

	# Custom CSS for the chat display
	center_css = """
	<style>
	h1, h2, h3, h6 { text-align: center; }
	.chat-container {
	display: flex;
	align-items: flex-start;
	margin-bottom: 10px;
	}
	.avatar {
	width: 50px;
	margin-right: 10px;
	}
	.message {
	background-color: #f1f1f1;
	padding: 10px;
	border-radius: 10px;
	width: 100%;
	}
	</style>
	"""
	st.markdown(center_css, unsafe_allow_html=True)

	# App title and description
	st.title("Language Model Council Sandbox")
	st.markdown("###### Invoke a council of LLMs to generate and judge each other.")
	st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)")

	# Authentication system
	if "authenticated" not in st.session_state:
	st.session_state.authenticated = False

	cols = st.columns([2, 1, 2])
	if not st.session_state.authenticated:
	with cols[1]:
	password = st.text_input("Password", type="password")
	if st.button("Login", use_container_width=True):
	if password == PASSWORD:
	st.session_state.authenticated = True
	else:
	st.error("Invalid credentials")

	if st.session_state.authenticated:
	st.success("Logged in successfully!")

	# Council and aggregator selection
	selected_models = llm_council_selector()
	st.write("Selected Models:", selected_models)
	selected_aggregator = aggregator_selector()
	st.write("Selected Aggregator:", selected_aggregator)

	# Prompt input
	prompt = st.text_area("Enter your prompt:")

	if st.button("Submit"):
	st.write("Responses:")

	# Fetching and streaming responses from each selected model
	for model in selected_models:
	# with st.chat_message(model):
	with st.chat_message(
	model,
	avatar=PROVIDER_TO_AVATAR_MAP[model],
	):
	message_placeholder = st.empty()
	stream = get_llm_response(model, prompt)
	if stream:
	if model.startswith("anthropic"):
	stream = anthropic_streamlit_streamer(stream)
	elif model.startswith("vertex"):
	stream = google_streamlit_streamer(stream)
	elif model.startswith("together"):
	stream = together_streamlit_streamer(stream)
	message_placeholder.write_stream(stream)

	# Constructing the aggregator prompt
	aggregator_prompt = f"User prompt: {prompt}\n\n"
	aggregator_prompt += "Responses from other LLMs:\n"
	aggregator_prompt += "\n".join(
	[
	f"{model}: {st.session_state.get(model, '')}"
	for model in selected_models
	]
	)
	aggregator_prompt += "\n\nPlease provide an aggregated response."

	# Fetching and streaming response from the aggregator
	st.write(f"Aggregated response from {selected_aggregator}:")
	with st.chat_message(selected_aggregator):
	message_placeholder = st.empty()
	aggregator_stream = get_llm_response(
	selected_aggregator, aggregator_prompt
	)
	if aggregator_stream:
	message_placeholder.write_stream(aggregator_stream)
	else:
	with cols[1]:
	st.warning("Please log in to access this app.")


	if __name__ == "__main__":
	main()


	# import streamlit as st
	# from components import llm_council_selector

	# st.title("LLM Council Selector")

	# selected_models = llm_council_selector()

	# if selected_models is not None:
	# st.write("Selected Models:", selected_models)
	# else:
	# st.write("No models selected or component didn't return a value.")


	# Choose your council.
	# Pre-selected.
	# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b
	# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet
	# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct
	# Custom:
	# Choose from a list of available models.
	# All:
	# All available models.

	# Choose aggregator.
	# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when
	# integrating inputs that are of lesser quality than its own.
	# Choices:
	# - 4o-latest
	# - gemini-1.5
	# - grok-2
	# - claude-3.5-sonnet
	# - llama-3.1-405b-instruct

	# Provide a prompt. (Or pre-canned prompts.)
	# Paste chat history.

	# Checkbox, enable judging.
	#
	# If checked, Judging config:
	# Single sided
	# Provide criteria. (or default).
	# If pairwise, choose granularity (or default).
	# Choose criteria. (or default).
	# Enable position swapping?

	# Go button.
	# Sections.
	# 1. Model outputs.
	# 2. Aggregated output.
	# 3. Judging underneath each output.
	# Highlight in green, the output that was best, as determined by council.
	# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses)
	# Show final overall score.
	# Highlight in red, the output that was worst, as determined by council.
	# Judging section.
	# Show agreement matrix.
	# Show bar graph of self-bias.
	# Plot contrarianism vs. conviction (scatter plot)
	# Show per-judge scores.

	# Calculate total cost.
	# Show total tokens used.