sandbox / app.py
justinxzhao's picture
Streaming working, with different providers.
c0a5a18
raw
history blame
15.9 kB
import os
import streamlit as st
import dotenv
import openai
from openai import OpenAI
import anthropic
from together import Together
import google.generativeai as genai
import time
dotenv.load_dotenv()
PASSWORD = os.getenv("APP_PASSWORD")
# Load API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
# Initialize API clients
together_client = Together(api_key=TOGETHER_API_KEY)
genai.configure(api_key=GOOGLE_API_KEY)
# Set up API clients for OpenAI and Anthropic
openai.api_key = OPENAI_API_KEY
openai_client = OpenAI(
organization="org-kUoRSK0nOw4W2nQYMVGWOt03",
project="proj_zb6k1DdgnSEbiAEMWxSOVVu4",
)
# anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
anthropic_client = anthropic.Anthropic()
LLM_COUNCIL_MEMBERS = {
"Smalls": [
"openai://gpt-4o-mini",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"vertex://gemini-1.5-flash-001",
"anthropic://claude-3-haiku-20240307",
],
"Flagships": [
"openai://gpt-4",
"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
"vertex://gemini-1.5-pro-001",
"anthropic://claude-3-5-sonnet",
],
}
PROVIDER_TO_AVATAR_MAP = {
"openai://gpt-4o-mini": "",
"anthropic://claude-3-5-sonnet": "",
"vertex://gemini-1.5-flash-001": "",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "",
"anthropic://claude-3-haiku-20240307": "",
}
AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"]
def anthropic_streamlit_streamer(stream):
"""
Process the Anthropic streaming response and yield content from the deltas.
:param stream: Streaming object from Anthropic API
:return: Yields content (text) from the streaming response.
"""
for event in stream:
if hasattr(event, "type"):
# Handle content blocks
if event.type == "content_block_delta" and hasattr(event, "delta"):
# Extract text delta from the event
text_delta = getattr(event.delta, "text", None)
if text_delta:
yield text_delta
# Handle message completion events (optional if needed)
elif event.type == "message_stop":
break # End of message, stop streaming
def google_streamlit_streamer(stream):
for chunk in stream:
yield chunk.text
def together_streamlit_streamer(stream):
for chunk in stream:
yield chunk.choices[0].delta.content
# Helper functions for LLM council and aggregator selection
def llm_council_selector():
selected_council = st.radio(
"Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys())
)
return LLM_COUNCIL_MEMBERS[selected_council]
def aggregator_selector():
return st.radio("Choose an aggregator LLM", options=AGGREGATORS)
# API calls for different providers
def get_openai_response(model_name, prompt):
return openai_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://docs.anthropic.com/en/api/messages-streaming
def get_anthropic_response(model_name, prompt):
return anthropic_client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
model=model_name,
stream=True,
)
def get_together_response(model_name, prompt):
return together_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://ai.google.dev/gemini-api/docs/text-generation?lang=python
def get_google_response(model_name, prompt):
model = genai.GenerativeModel(model_name)
return model.generate_content(prompt, stream=True)
def get_llm_response(model_identifier, prompt):
provider, model_name = model_identifier.split("://")
if provider == "openai":
return get_openai_response(model_name, prompt)
elif provider == "anthropic":
return get_anthropic_response(model_name, prompt)
elif provider == "together":
return get_together_response(model_name, prompt)
elif provider == "vertex":
return get_google_response(model_name, prompt)
else:
return None
# Main Streamlit App
def main():
st.set_page_config(
page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide"
)
# Custom CSS for the chat display
center_css = """
<style>
h1, h2, h3, h6 { text-align: center; }
.chat-container {
display: flex;
align-items: flex-start;
margin-bottom: 10px;
}
.avatar {
width: 50px;
margin-right: 10px;
}
.message {
background-color: #f1f1f1;
padding: 10px;
border-radius: 10px;
width: 100%;
}
</style>
"""
st.markdown(center_css, unsafe_allow_html=True)
# App title and description
st.title("Language Model Council Sandbox")
st.markdown("###### Invoke a council of LLMs to generate and judge each other.")
st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)")
# Authentication system
if "authenticated" not in st.session_state:
st.session_state.authenticated = False
cols = st.columns([2, 1, 2])
if not st.session_state.authenticated:
with cols[1]:
password = st.text_input("Password", type="password")
if st.button("Login", use_container_width=True):
if password == PASSWORD:
st.session_state.authenticated = True
else:
st.error("Invalid credentials")
if st.session_state.authenticated:
st.success("Logged in successfully!")
# Council and aggregator selection
selected_models = llm_council_selector()
st.write("Selected Models:", selected_models)
selected_aggregator = aggregator_selector()
st.write("Selected Aggregator:", selected_aggregator)
# Prompt input
prompt = st.text_area("Enter your prompt:")
if st.button("Submit"):
st.write("Responses:")
# Fetching and streaming responses from each selected model
for model in selected_models:
# with st.chat_message(model):
with st.chat_message(
model,
avatar=PROVIDER_TO_AVATAR_MAP[model],
):
message_placeholder = st.empty()
stream = get_llm_response(model, prompt)
if stream:
if model.startswith("anthropic"):
stream = anthropic_streamlit_streamer(stream)
elif model.startswith("vertex"):
stream = google_streamlit_streamer(stream)
elif model.startswith("together"):
stream = together_streamlit_streamer(stream)
message_placeholder.write_stream(stream)
# Constructing the aggregator prompt
aggregator_prompt = f"User prompt: {prompt}\n\n"
aggregator_prompt += "Responses from other LLMs:\n"
aggregator_prompt += "\n".join(
[
f"{model}: {st.session_state.get(model, '')}"
for model in selected_models
]
)
aggregator_prompt += "\n\nPlease provide an aggregated response."
# Fetching and streaming response from the aggregator
st.write(f"Aggregated response from {selected_aggregator}:")
with st.chat_message(selected_aggregator):
message_placeholder = st.empty()
aggregator_stream = get_llm_response(
selected_aggregator, aggregator_prompt
)
if aggregator_stream:
message_placeholder.write_stream(aggregator_stream)
else:
with cols[1]:
st.warning("Please log in to access this app.")
if __name__ == "__main__":
main()
# import streamlit as st
# from components import llm_council_selector
# st.title("LLM Council Selector")
# selected_models = llm_council_selector()
# if selected_models is not None:
# st.write("Selected Models:", selected_models)
# else:
# st.write("No models selected or component didn't return a value.")
# Choose your council.
# Pre-selected.
# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b
# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet
# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct
# Custom:
# Choose from a list of available models.
# All:
# All available models.
# Choose aggregator.
# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when
# integrating inputs that are of lesser quality than its own.
# Choices:
# - 4o-latest
# - gemini-1.5
# - grok-2
# - claude-3.5-sonnet
# - llama-3.1-405b-instruct
# Provide a prompt. (Or pre-canned prompts.)
# Paste chat history.
# Checkbox, enable judging.
#
# If checked, Judging config:
# Single sided
# Provide criteria. (or default).
# If pairwise, choose granularity (or default).
# Choose criteria. (or default).
# Enable position swapping?
# Go button.
# Sections.
# 1. Model outputs.
# 2. Aggregated output.
# 3. Judging underneath each output.
# Highlight in green, the output that was best, as determined by council.
# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses)
# Show final overall score.
# Highlight in red, the output that was worst, as determined by council.
# Judging section.
# Show agreement matrix.
# Show bar graph of self-bias.
# Plot contrarianism vs. conviction (scatter plot)
# Show per-judge scores.
# Calculate total cost.
# Show total tokens used.