Spaces:
Build error
Build error
File size: 8,604 Bytes
51ff9e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
from __future__ import annotations
import os
from typing import Any
from pydantic import BaseModel, Field, SecretStr, ValidationError
from openhands.core.logger import LOG_DIR
from openhands.core.logger import openhands_logger as logger
class LLMConfig(BaseModel):
"""Configuration for the LLM model.
Attributes:
model: The model to use.
api_key: The API key to use.
base_url: The base URL for the API. This is necessary for local LLMs.
api_version: The version of the API.
aws_access_key_id: The AWS access key ID.
aws_secret_access_key: The AWS secret access key.
aws_region_name: The AWS region name.
num_retries: The number of retries to attempt.
retry_multiplier: The multiplier for the exponential backoff.
retry_min_wait: The minimum time to wait between retries, in seconds. This is exponential backoff minimum. For models with very low limits, this can be set to 15-20.
retry_max_wait: The maximum time to wait between retries, in seconds. This is exponential backoff maximum.
timeout: The timeout for the API.
max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated.
temperature: The temperature for the API.
top_p: The top p for the API.
top_k: The top k for the API.
custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side.
max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4).
max_output_tokens: The maximum number of output tokens. This is sent to the LLM.
input_cost_per_token: The cost per input token. This will available in logs for the user to check.
output_cost_per_token: The cost per output token. This will available in logs for the user to check.
ollama_base_url: The base URL for the OLLAMA API.
drop_params: Drop any unmapped (unsupported) params without causing an exception.
modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty.
disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
log_completions: Whether to log LLM completions to the state.
log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
custom_tokenizer: A custom tokenizer to use for token counting.
native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set.
reasoning_effort: The effort to put into reasoning. This is a string that can be one of 'low', 'medium', 'high', or 'none'. Exclusive for o1 models.
seed: The seed to use for the LLM.
"""
model: str = Field(default='claude-sonnet-4-20250514')
api_key: SecretStr | None = Field(default=None)
base_url: str | None = Field(default=None)
api_version: str | None = Field(default=None)
aws_access_key_id: SecretStr | None = Field(default=None)
aws_secret_access_key: SecretStr | None = Field(default=None)
aws_region_name: str | None = Field(default=None)
openrouter_site_url: str = Field(default='https://docs.all-hands.dev/')
openrouter_app_name: str = Field(default='OpenHands')
# total wait time: 5 + 10 + 20 + 30 = 65 seconds
num_retries: int = Field(default=4)
retry_multiplier: float = Field(default=2)
retry_min_wait: int = Field(default=5)
retry_max_wait: int = Field(default=30)
timeout: int | None = Field(default=None)
max_message_chars: int = Field(
default=30_000
) # maximum number of characters in an observation's content when sent to the llm
temperature: float = Field(default=0.0)
top_p: float = Field(default=1.0)
top_k: float | None = Field(default=None)
custom_llm_provider: str | None = Field(default=None)
max_input_tokens: int | None = Field(default=None)
max_output_tokens: int | None = Field(default=None)
input_cost_per_token: float | None = Field(default=None)
output_cost_per_token: float | None = Field(default=None)
ollama_base_url: str | None = Field(default=None)
# This setting can be sent in each call to litellm
drop_params: bool = Field(default=True)
# Note: this setting is actually global, unlike drop_params
modify_params: bool = Field(default=True)
disable_vision: bool | None = Field(default=None)
caching_prompt: bool = Field(default=True)
log_completions: bool = Field(default=False)
log_completions_folder: str = Field(default=os.path.join(LOG_DIR, 'completions'))
custom_tokenizer: str | None = Field(default=None)
native_tool_calling: bool | None = Field(default=None)
reasoning_effort: str | None = Field(default='high')
seed: int | None = Field(default=None)
model_config = {'extra': 'forbid'}
@classmethod
def from_toml_section(cls, data: dict) -> dict[str, LLMConfig]:
"""
Create a mapping of LLMConfig instances from a toml dictionary representing the [llm] section.
The default configuration is built from all non-dict keys in data.
Then, each key with a dict value (e.g. [llm.random_name]) is treated as a custom LLM configuration,
and its values override the default configuration.
Example:
Apply generic LLM config with custom LLM overrides, e.g.
[llm]
model=...
num_retries = 5
[llm.claude]
model="claude-3-5-sonnet"
results in num_retries APPLIED to claude-3-5-sonnet.
Returns:
dict[str, LLMConfig]: A mapping where the key "llm" corresponds to the default configuration
and additional keys represent custom configurations.
"""
# Initialize the result mapping
llm_mapping: dict[str, LLMConfig] = {}
# Extract base config data (non-dict values)
base_data = {}
custom_sections: dict[str, dict] = {}
for key, value in data.items():
if isinstance(value, dict):
custom_sections[key] = value
else:
base_data[key] = value
# Try to create the base config
try:
base_config = cls.model_validate(base_data)
llm_mapping['llm'] = base_config
except ValidationError:
logger.warning(
'Cannot parse [llm] config from toml. Continuing with defaults.'
)
# If base config fails, create a default one
base_config = cls()
# Still add it to the mapping
llm_mapping['llm'] = base_config
# Process each custom section independently
for name, overrides in custom_sections.items():
try:
# Merge base config with overrides
merged = {**base_config.model_dump(), **overrides}
custom_config = cls.model_validate(merged)
llm_mapping[name] = custom_config
except ValidationError:
logger.warning(
f'Cannot parse [{name}] config from toml. This section will be skipped.'
)
# Skip this custom section but continue with others
continue
return llm_mapping
def model_post_init(self, __context: Any) -> None:
"""Post-initialization hook to assign OpenRouter-related variables to environment variables.
This ensures that these values are accessible to litellm at runtime.
"""
super().model_post_init(__context)
# Assign OpenRouter-specific variables to environment variables
if self.openrouter_site_url:
os.environ['OR_SITE_URL'] = self.openrouter_site_url
if self.openrouter_app_name:
os.environ['OR_APP_NAME'] = self.openrouter_app_name
# Set an API version by default for Azure models
# Required for newer models.
# Azure issue: https://github.com/All-Hands-AI/OpenHands/issues/7755
if self.model.startswith('azure') and self.api_version is None:
self.api_version = '2024-12-01-preview'
|