from __future__ import annotations from typing import Literal, cast from pydantic import BaseModel, Field, ValidationError from openhands.core import logger from openhands.core.config.llm_config import LLMConfig class NoOpCondenserConfig(BaseModel): """Configuration for NoOpCondenser.""" type: Literal['noop'] = Field('noop') model_config = {'extra': 'forbid'} class ObservationMaskingCondenserConfig(BaseModel): """Configuration for ObservationMaskingCondenser.""" type: Literal['observation_masking'] = Field('observation_masking') attention_window: int = Field( default=100, description='The number of most-recent events where observations will not be masked.', ge=1, ) model_config = {'extra': 'forbid'} class BrowserOutputCondenserConfig(BaseModel): """Configuration for the BrowserOutputCondenser.""" type: Literal['browser_output_masking'] = Field('browser_output_masking') attention_window: int = Field( default=1, description='The number of most recent browser output observations that will not be masked.', ge=1, ) class RecentEventsCondenserConfig(BaseModel): """Configuration for RecentEventsCondenser.""" type: Literal['recent'] = Field('recent') # at least one event by default, because the best guess is that it is the user task keep_first: int = Field( default=1, description='The number of initial events to condense.', ge=0, ) max_events: int = Field( default=100, description='Maximum number of events to keep.', ge=1 ) model_config = {'extra': 'forbid'} class LLMSummarizingCondenserConfig(BaseModel): """Configuration for LLMCondenser.""" type: Literal['llm'] = Field('llm') llm_config: LLMConfig = Field( ..., description='Configuration for the LLM to use for condensing.' ) # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( default=1, description='Number of initial events to always keep in history.', ge=0, ) max_size: int = Field( default=100, description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) max_event_length: int = Field( default=10_000, description='Maximum length of the event representations to be passed to the LLM.', ) model_config = {'extra': 'forbid'} class AmortizedForgettingCondenserConfig(BaseModel): """Configuration for AmortizedForgettingCondenser.""" type: Literal['amortized'] = Field('amortized') max_size: int = Field( default=100, description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( default=1, description='Number of initial events to always keep in history.', ge=0, ) model_config = {'extra': 'forbid'} class LLMAttentionCondenserConfig(BaseModel): """Configuration for LLMAttentionCondenser.""" type: Literal['llm_attention'] = Field('llm_attention') llm_config: LLMConfig = Field( ..., description='Configuration for the LLM to use for attention.' ) max_size: int = Field( default=100, description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( default=1, description='Number of initial events to always keep in history.', ge=0, ) model_config = {'extra': 'forbid'} class StructuredSummaryCondenserConfig(BaseModel): """Configuration for StructuredSummaryCondenser instances.""" type: Literal['structured'] = Field('structured') llm_config: LLMConfig = Field( ..., description='Configuration for the LLM to use for condensing.' ) # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( default=1, description='Number of initial events to always keep in history.', ge=0, ) max_size: int = Field( default=100, description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) max_event_length: int = Field( default=10_000, description='Maximum length of the event representations to be passed to the LLM.', ) model_config = {'extra': 'forbid'} class CondenserPipelineConfig(BaseModel): """Configuration for the CondenserPipeline. Not currently supported by the TOML or ENV_VAR configuration strategies. """ type: Literal['pipeline'] = Field('pipeline') condensers: list[CondenserConfig] = Field( default_factory=list, description='List of condenser configurations to be used in the pipeline.', ) model_config = {'extra': 'forbid'} # Type alias for convenience CondenserConfig = ( NoOpCondenserConfig | ObservationMaskingCondenserConfig | BrowserOutputCondenserConfig | RecentEventsCondenserConfig | LLMSummarizingCondenserConfig | AmortizedForgettingCondenserConfig | LLMAttentionCondenserConfig | StructuredSummaryCondenserConfig | CondenserPipelineConfig ) def condenser_config_from_toml_section( data: dict, llm_configs: dict | None = None ) -> dict[str, CondenserConfig]: """ Create a CondenserConfig instance from a toml dictionary representing the [condenser] section. For CondenserConfig, the handling is different since it's a union type. The type of condenser is determined by the 'type' field in the section. Example: Parse condenser config like: [condenser] type = "noop" For condensers that require an LLM config, you can specify the name of an LLM config: [condenser] type = "llm" llm_config = "my_llm" # References [llm.my_llm] section Args: data: The TOML dictionary representing the [condenser] section. llm_configs: Optional dictionary of LLMConfig objects keyed by name. Returns: dict[str, CondenserConfig]: A mapping where the key "condenser" corresponds to the configuration. """ # Initialize the result mapping condenser_mapping: dict[str, CondenserConfig] = {} # Process config try: # Determine which condenser type to use based on 'type' field condenser_type = data.get('type', 'noop') # Handle LLM config reference if needed if ( condenser_type in ('llm', 'llm_attention') and 'llm_config' in data and isinstance(data['llm_config'], str) ): llm_config_name = data['llm_config'] if llm_configs and llm_config_name in llm_configs: # Replace the string reference with the actual LLMConfig object data_copy = data.copy() data_copy['llm_config'] = llm_configs[llm_config_name] config = create_condenser_config(condenser_type, data_copy) else: logger.openhands_logger.warning( f"LLM config '{llm_config_name}' not found for condenser. Using default LLMConfig." ) # Create a default LLMConfig if the referenced one doesn't exist data_copy = data.copy() # Try to use the fallback 'llm' config if llm_configs is not None: data_copy['llm_config'] = llm_configs.get('llm') config = create_condenser_config(condenser_type, data_copy) else: config = create_condenser_config(condenser_type, data) condenser_mapping['condenser'] = config except (ValidationError, ValueError) as e: logger.openhands_logger.warning( f'Invalid condenser configuration: {e}. Using NoOpCondenserConfig.' ) # Default to NoOpCondenserConfig if config fails config = NoOpCondenserConfig(type='noop') condenser_mapping['condenser'] = config return condenser_mapping # For backward compatibility from_toml_section = condenser_config_from_toml_section def create_condenser_config(condenser_type: str, data: dict) -> CondenserConfig: """ Create a CondenserConfig instance based on the specified type. Args: condenser_type: The type of condenser to create. data: The configuration data. Returns: A CondenserConfig instance. Raises: ValueError: If the condenser type is unknown. ValidationError: If the provided data fails validation for the condenser type. """ # Mapping of condenser types to their config classes condenser_classes = { 'noop': NoOpCondenserConfig, 'observation_masking': ObservationMaskingCondenserConfig, 'recent': RecentEventsCondenserConfig, 'llm': LLMSummarizingCondenserConfig, 'amortized': AmortizedForgettingCondenserConfig, 'llm_attention': LLMAttentionCondenserConfig, 'structured': StructuredSummaryCondenserConfig, } if condenser_type not in condenser_classes: raise ValueError(f'Unknown condenser type: {condenser_type}') # Create and validate the config using direct instantiation # Explicitly handle ValidationError to provide more context try: config_class = condenser_classes[condenser_type] # Use type casting to help mypy understand the return type return cast(CondenserConfig, config_class(**data)) except ValidationError as e: # Just re-raise with a more descriptive message, but don't try to pass the errors # which can cause compatibility issues with different pydantic versions raise ValueError( f"Validation failed for condenser type '{condenser_type}': {e}" )