from __future__ import annotations
from litellm import supports_response_schema
from pydantic import BaseModel
from openhands.core.config.condenser_config import LLMAttentionCondenserConfig
from openhands.events.action.agent import CondensationAction
from openhands.llm.llm import LLM
from openhands.memory.condenser.condenser import (
Condensation,
RollingCondenser,
View,
)
class ImportantEventSelection(BaseModel):
"""Utility class for the `LLMAttentionCondenser` that forces the LLM to return a list of integers."""
ids: list[int]
class LLMAttentionCondenser(RollingCondenser):
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
if keep_first >= max_size // 2:
raise ValueError(
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
)
if keep_first < 0:
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
if max_size < 1:
raise ValueError(f'max_size ({keep_first}) cannot be non-positive')
self.max_size = max_size
self.keep_first = keep_first
self.llm = llm
# This condenser relies on the `response_schema` feature, which is not supported by all LLMs
if not supports_response_schema(
model=self.llm.config.model,
custom_llm_provider=self.llm.config.custom_llm_provider,
):
raise ValueError(
"The LLM model must support the 'response_schema' parameter to use the LLMAttentionCondenser."
)
super().__init__()
def get_condensation(self, view: View) -> Condensation:
target_size = self.max_size // 2
head_event_ids = [event.id for event in view.events[: self.keep_first]]
events_from_tail = target_size - len(head_event_ids)
message: str = """You will be given a list of actions, observations, and thoughts from a coding agent.
Each item in the list has an identifier. Please sort the identifiers in order of how important the
contents of the item are for the next step of the coding agent's task, from most important to least
important."""
response = self.llm.completion(
messages=[
{'content': message, 'role': 'user'},
*[
{
'content': f'{e.id}\n{e.message}',
'role': 'user',
}
for e in view
],
],
response_format={
'type': 'json_schema',
'json_schema': {
'name': 'ImportantEventSelection',
'schema': ImportantEventSelection.model_json_schema(),
},
},
)
response_ids = ImportantEventSelection.model_validate_json(
response.choices[0].message.content
).ids
self.add_metadata('metrics', self.llm.metrics.get())
# Filter out any IDs from the head and trim the results down
response_ids = [
response_id
for response_id in response_ids
if response_id not in head_event_ids
][:events_from_tail]
# If the response IDs aren't _long_ enough, iterate backwards through the events and add any unfound IDs to the list.
for event in reversed(view):
if len(response_ids) >= events_from_tail:
break
if event.id not in response_ids:
response_ids.append(event.id)
# Now that we've found the right number of events to keep, convert this into a list of events to forget.
event = CondensationAction(
forgotten_event_ids=[
event.id
for event in view
if event.id not in response_ids and event.id not in head_event_ids
],
)
return Condensation(action=event)
def should_condense(self, view: View) -> bool:
return len(view) > self.max_size
@classmethod
def from_config(cls, config: LLMAttentionCondenserConfig) -> LLMAttentionCondenser:
# This condenser cannot take advantage of prompt caching. If it happens
# to be set, we'll pay for the cache writes but never get a chance to
# save on a read.
llm_config = config.llm_config.model_copy()
llm_config.caching_prompt = False
return LLMAttentionCondenser(
llm=LLM(config=llm_config),
max_size=config.max_size,
keep_first=config.keep_first,
)
LLMAttentionCondenser.register_config(LLMAttentionCondenserConfig)