Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import dotenv | |
import openai | |
from openai import OpenAI | |
import anthropic | |
from together import Together | |
import google.generativeai as genai | |
import time | |
dotenv.load_dotenv() | |
PASSWORD = os.getenv("APP_PASSWORD") | |
# Load API keys from environment variables | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY") | |
# Initialize API clients | |
together_client = Together(api_key=TOGETHER_API_KEY) | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# Set up API clients for OpenAI and Anthropic | |
openai.api_key = OPENAI_API_KEY | |
openai_client = OpenAI( | |
organization="org-kUoRSK0nOw4W2nQYMVGWOt03", | |
project="proj_zb6k1DdgnSEbiAEMWxSOVVu4", | |
) | |
# anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY) | |
anthropic_client = anthropic.Anthropic() | |
LLM_COUNCIL_MEMBERS = { | |
"Smalls": [ | |
"openai://gpt-4o-mini", | |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", | |
"vertex://gemini-1.5-flash-001", | |
"anthropic://claude-3-haiku-20240307", | |
], | |
"Flagships": [ | |
"openai://gpt-4", | |
"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", | |
"vertex://gemini-1.5-pro-001", | |
"anthropic://claude-3-5-sonnet", | |
], | |
} | |
PROVIDER_TO_AVATAR_MAP = { | |
"openai://gpt-4o-mini": "", | |
"anthropic://claude-3-5-sonnet": "", | |
"vertex://gemini-1.5-flash-001": "", | |
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "", | |
"anthropic://claude-3-haiku-20240307": "", | |
} | |
AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"] | |
def anthropic_streamlit_streamer(stream): | |
""" | |
Process the Anthropic streaming response and yield content from the deltas. | |
:param stream: Streaming object from Anthropic API | |
:return: Yields content (text) from the streaming response. | |
""" | |
for event in stream: | |
if hasattr(event, "type"): | |
# Handle content blocks | |
if event.type == "content_block_delta" and hasattr(event, "delta"): | |
# Extract text delta from the event | |
text_delta = getattr(event.delta, "text", None) | |
if text_delta: | |
yield text_delta | |
# Handle message completion events (optional if needed) | |
elif event.type == "message_stop": | |
break # End of message, stop streaming | |
def google_streamlit_streamer(stream): | |
for chunk in stream: | |
yield chunk.text | |
def together_streamlit_streamer(stream): | |
for chunk in stream: | |
yield chunk.choices[0].delta.content | |
# Helper functions for LLM council and aggregator selection | |
def llm_council_selector(): | |
selected_council = st.radio( | |
"Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys()) | |
) | |
return LLM_COUNCIL_MEMBERS[selected_council] | |
def aggregator_selector(): | |
return st.radio("Choose an aggregator LLM", options=AGGREGATORS) | |
# API calls for different providers | |
def get_openai_response(model_name, prompt): | |
return openai_client.chat.completions.create( | |
model=model_name, | |
messages=[{"role": "user", "content": prompt}], | |
stream=True, | |
) | |
# https://docs.anthropic.com/en/api/messages-streaming | |
def get_anthropic_response(model_name, prompt): | |
return anthropic_client.messages.create( | |
max_tokens=1024, | |
messages=[{"role": "user", "content": prompt}], | |
model=model_name, | |
stream=True, | |
) | |
def get_together_response(model_name, prompt): | |
return together_client.chat.completions.create( | |
model=model_name, | |
messages=[{"role": "user", "content": prompt}], | |
stream=True, | |
) | |
# https://ai.google.dev/gemini-api/docs/text-generation?lang=python | |
def get_google_response(model_name, prompt): | |
model = genai.GenerativeModel(model_name) | |
return model.generate_content(prompt, stream=True) | |
def get_llm_response(model_identifier, prompt): | |
provider, model_name = model_identifier.split("://") | |
if provider == "openai": | |
return get_openai_response(model_name, prompt) | |
elif provider == "anthropic": | |
return get_anthropic_response(model_name, prompt) | |
elif provider == "together": | |
return get_together_response(model_name, prompt) | |
elif provider == "vertex": | |
return get_google_response(model_name, prompt) | |
else: | |
return None | |
# Main Streamlit App | |
def main(): | |
st.set_page_config( | |
page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide" | |
) | |
# Custom CSS for the chat display | |
center_css = """ | |
<style> | |
h1, h2, h3, h6 { text-align: center; } | |
.chat-container { | |
display: flex; | |
align-items: flex-start; | |
margin-bottom: 10px; | |
} | |
.avatar { | |
width: 50px; | |
margin-right: 10px; | |
} | |
.message { | |
background-color: #f1f1f1; | |
padding: 10px; | |
border-radius: 10px; | |
width: 100%; | |
} | |
</style> | |
""" | |
st.markdown(center_css, unsafe_allow_html=True) | |
# App title and description | |
st.title("Language Model Council Sandbox") | |
st.markdown("###### Invoke a council of LLMs to generate and judge each other.") | |
st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)") | |
# Authentication system | |
if "authenticated" not in st.session_state: | |
st.session_state.authenticated = False | |
cols = st.columns([2, 1, 2]) | |
if not st.session_state.authenticated: | |
with cols[1]: | |
password = st.text_input("Password", type="password") | |
if st.button("Login", use_container_width=True): | |
if password == PASSWORD: | |
st.session_state.authenticated = True | |
else: | |
st.error("Invalid credentials") | |
if st.session_state.authenticated: | |
st.success("Logged in successfully!") | |
# Council and aggregator selection | |
selected_models = llm_council_selector() | |
st.write("Selected Models:", selected_models) | |
selected_aggregator = aggregator_selector() | |
st.write("Selected Aggregator:", selected_aggregator) | |
# Prompt input | |
prompt = st.text_area("Enter your prompt:") | |
if st.button("Submit"): | |
st.write("Responses:") | |
# Fetching and streaming responses from each selected model | |
for model in selected_models: | |
# with st.chat_message(model): | |
with st.chat_message( | |
model, | |
avatar=PROVIDER_TO_AVATAR_MAP[model], | |
): | |
message_placeholder = st.empty() | |
stream = get_llm_response(model, prompt) | |
if stream: | |
if model.startswith("anthropic"): | |
stream = anthropic_streamlit_streamer(stream) | |
elif model.startswith("vertex"): | |
stream = google_streamlit_streamer(stream) | |
elif model.startswith("together"): | |
stream = together_streamlit_streamer(stream) | |
message_placeholder.write_stream(stream) | |
# Constructing the aggregator prompt | |
aggregator_prompt = f"User prompt: {prompt}\n\n" | |
aggregator_prompt += "Responses from other LLMs:\n" | |
aggregator_prompt += "\n".join( | |
[ | |
f"{model}: {st.session_state.get(model, '')}" | |
for model in selected_models | |
] | |
) | |
aggregator_prompt += "\n\nPlease provide an aggregated response." | |
# Fetching and streaming response from the aggregator | |
st.write(f"Aggregated response from {selected_aggregator}:") | |
with st.chat_message(selected_aggregator): | |
message_placeholder = st.empty() | |
aggregator_stream = get_llm_response( | |
selected_aggregator, aggregator_prompt | |
) | |
if aggregator_stream: | |
message_placeholder.write_stream(aggregator_stream) | |
else: | |
with cols[1]: | |
st.warning("Please log in to access this app.") | |
if __name__ == "__main__": | |
main() | |
# import streamlit as st | |
# from components import llm_council_selector | |
# st.title("LLM Council Selector") | |
# selected_models = llm_council_selector() | |
# if selected_models is not None: | |
# st.write("Selected Models:", selected_models) | |
# else: | |
# st.write("No models selected or component didn't return a value.") | |
# Choose your council. | |
# Pre-selected. | |
# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b | |
# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet | |
# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct | |
# Custom: | |
# Choose from a list of available models. | |
# All: | |
# All available models. | |
# Choose aggregator. | |
# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when | |
# integrating inputs that are of lesser quality than its own. | |
# Choices: | |
# - 4o-latest | |
# - gemini-1.5 | |
# - grok-2 | |
# - claude-3.5-sonnet | |
# - llama-3.1-405b-instruct | |
# Provide a prompt. (Or pre-canned prompts.) | |
# Paste chat history. | |
# Checkbox, enable judging. | |
# | |
# If checked, Judging config: | |
# Single sided | |
# Provide criteria. (or default). | |
# If pairwise, choose granularity (or default). | |
# Choose criteria. (or default). | |
# Enable position swapping? | |
# Go button. | |
# Sections. | |
# 1. Model outputs. | |
# 2. Aggregated output. | |
# 3. Judging underneath each output. | |
# Highlight in green, the output that was best, as determined by council. | |
# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses) | |
# Show final overall score. | |
# Highlight in red, the output that was worst, as determined by council. | |
# Judging section. | |
# Show agreement matrix. | |
# Show bar graph of self-bias. | |
# Plot contrarianism vs. conviction (scatter plot) | |
# Show per-judge scores. | |
# Calculate total cost. | |
# Show total tokens used. | |