sandbox / constants.py
justinxzhao's picture
Add token usage tracking for openai and fix token usage tracking for anthropic.
1afb9ca
import os
import dotenv
dotenv.load_dotenv()
if os.getenv("DEBUG_MODE") == "True":
LLM_COUNCIL_MEMBERS = {
"Smalls": [
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
"anthropic://claude-3-haiku-20240307",
"openai://gpt-4o-mini",
],
"Flagships": [
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
"anthropic://claude-3-haiku-20240307",
],
}
else:
LLM_COUNCIL_MEMBERS = {
"Smalls": [
"openai://gpt-4o-mini",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
"vertex://gemini-1.5-flash-001",
"anthropic://claude-3-haiku-20240307",
],
"Flagships": [
"openai://gpt-4o",
"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
"vertex://gemini-1.5-pro-002",
"anthropic://claude-3-5-sonnet",
],
"OpenAI": [
"openai://gpt-4o",
"openai://gpt-4o-mini",
],
}
PROVIDER_TO_AVATAR_MAP = {
"openai://gpt-4o-mini": "",
"anthropic://claude-3-5-sonnet": "",
"vertex://gemini-1.5-flash-001": "",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "",
"together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": "",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo": "",
"anthropic://claude-3-haiku-20240307": "",
}
LLM_TO_UI_NAME_MAP = {
"openai://gpt-4o-mini": "GPT-4 Turbo Mini",
"anthropic://claude-3-5-sonnet": "Claude 3 Sonnet",
"vertex://gemini-1.5-flash-001": "Gemini 1.5 Flash",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "Llama 3.1 8B Instruct",
"together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": "Llama 3.1 70B Instruct",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo": "Llama 3.2 3B Instruct",
"anthropic://claude-3-haiku-20240307": "Claude 3 Haiku",
}
if os.getenv("DEBUG_MODE") == "True":
AGGREGATORS = ["together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"]
else:
AGGREGATORS = [
"anthropic://claude-3-haiku-20240307",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
"together://meta-llama/Llama-3.2-3B-Instruct-Turbo",
"openai://gpt-4o",
"openai://gpt-4o-mini",
]
# Fix the aggregator step.
# Add a judging step.
# Add visualizations.
# import streamlit as st
# from components import llm_council_selector
# st.title("LLM Council Selector")
# selected_models = llm_council_selector()
# if selected_models is not None:
# st.write("Selected Models:", selected_models)
# else:
# st.write("No models selected or component didn't return a value.")
# Choose your council.
# Pre-selected.
# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b
# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet
# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct
# Custom:
# Choose from a list of available models.
# All:
# All available models.
# Choose aggregator.
# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when
# integrating inputs that are of lesser quality than its own.
# Choices:
# - 4o-latest
# - gemini-1.5
# - grok-2
# - claude-3.5-sonnet
# - llama-3.1-405b-instruct
# Provide a prompt. (Or pre-canned prompts.)
# Paste chat history.
# Checkbox, enable judging.
#
# If checked, Judging config:
# Single sided
# Provide criteria. (or default).
# If pairwise, choose granularity (or default).
# Choose criteria. (or default).
# Enable position swapping?
# Go button.
# Sections.
# 1. Model outputs.
# 2. Aggregated output.
# 3. Judging underneath each output.
# Highlight in green, the output that was best, as determined by council.
# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses)
# Show final overall score.
# Highlight in red, the output that was worst, as determined by council.
# Judging section.
# Show agreement matrix.
# Show bar graph of self-bias.
# Plot contrarianism vs. conviction (scatter plot)
# Show per-judge scores.
# Calculate total cost.
# Show total tokens used.
# """
# type: [single, pairwise]
# [single]
# - criteria:
# - name
# - weight
# - description
# - scoring
# [pairwise]
# - granularity: [fine, coarse]
# - ties_allowed: [yes, no]
# - position_swapping: [yes, no]
# - reference_model: [model_name]
# - criteria:
# - name
# - weight
# - description
# """