import os import dotenv dotenv.load_dotenv() if os.getenv("DEBUG_MODE") == "True": LLM_COUNCIL_MEMBERS = { "Smalls": [ "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo", "anthropic://claude-3-haiku-20240307", "openai://gpt-4o-mini", ], "Flagships": [ "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo", "anthropic://claude-3-haiku-20240307", ], } else: LLM_COUNCIL_MEMBERS = { "Smalls": [ "openai://gpt-4o-mini", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo", "vertex://gemini-1.5-flash-001", "anthropic://claude-3-haiku-20240307", ], "Flagships": [ "openai://gpt-4o", "together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", "vertex://gemini-1.5-pro-002", "anthropic://claude-3-5-sonnet", ], "OpenAI": [ "openai://gpt-4o", "openai://gpt-4o-mini", ], } PROVIDER_TO_AVATAR_MAP = { "openai://gpt-4o-mini": "", "anthropic://claude-3-5-sonnet": "", "vertex://gemini-1.5-flash-001": "", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "", "together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": "", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo": "", "anthropic://claude-3-haiku-20240307": "", } LLM_TO_UI_NAME_MAP = { "openai://gpt-4o-mini": "GPT-4 Turbo Mini", "anthropic://claude-3-5-sonnet": "Claude 3 Sonnet", "vertex://gemini-1.5-flash-001": "Gemini 1.5 Flash", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "Llama 3.1 8B Instruct", "together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": "Llama 3.1 70B Instruct", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo": "Llama 3.2 3B Instruct", "anthropic://claude-3-haiku-20240307": "Claude 3 Haiku", } if os.getenv("DEBUG_MODE") == "True": AGGREGATORS = ["together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"] else: AGGREGATORS = [ "anthropic://claude-3-haiku-20240307", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "together://meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "together://meta-llama/Llama-3.2-3B-Instruct-Turbo", "openai://gpt-4o", "openai://gpt-4o-mini", ] # Fix the aggregator step. # Add a judging step. # Add visualizations. # import streamlit as st # from components import llm_council_selector # st.title("LLM Council Selector") # selected_models = llm_council_selector() # if selected_models is not None: # st.write("Selected Models:", selected_models) # else: # st.write("No models selected or component didn't return a value.") # Choose your council. # Pre-selected. # Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b # Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet # Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct # Custom: # Choose from a list of available models. # All: # All available models. # Choose aggregator. # Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when # integrating inputs that are of lesser quality than its own. # Choices: # - 4o-latest # - gemini-1.5 # - grok-2 # - claude-3.5-sonnet # - llama-3.1-405b-instruct # Provide a prompt. (Or pre-canned prompts.) # Paste chat history. # Checkbox, enable judging. # # If checked, Judging config: # Single sided # Provide criteria. (or default). # If pairwise, choose granularity (or default). # Choose criteria. (or default). # Enable position swapping? # Go button. # Sections. # 1. Model outputs. # 2. Aggregated output. # 3. Judging underneath each output. # Highlight in green, the output that was best, as determined by council. # Show graph breakdown of scores and justifications. (by criteria, # wins and # losses) # Show final overall score. # Highlight in red, the output that was worst, as determined by council. # Judging section. # Show agreement matrix. # Show bar graph of self-bias. # Plot contrarianism vs. conviction (scatter plot) # Show per-judge scores. # Calculate total cost. # Show total tokens used. # """ # type: [single, pairwise] # [single] # - criteria: # - name # - weight # - description # - scoring # [pairwise] # - granularity: [fine, coarse] # - ties_allowed: [yes, no] # - position_swapping: [yes, no] # - reference_model: [model_name] # - criteria: # - name # - weight # - description # """