Spaces:
Build error
Build error
File size: 5,879 Bytes
51ff9e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import json
from dataclasses import dataclass
from typing import Iterable
from openhands.core.logger import openhands_logger as logger
from openhands.events.event import Event, EventSource
from openhands.events.event_filter import EventFilter
from openhands.events.event_store_abc import EventStoreABC
from openhands.events.serialization.event import event_from_dict
from openhands.storage.files import FileStore
from openhands.storage.locations import (
get_conversation_dir,
get_conversation_event_filename,
get_conversation_events_dir,
)
from openhands.utils.shutdown_listener import should_continue
@dataclass(frozen=True)
class _CachePage:
events: list[dict] | None
start: int
end: int
def covers(self, global_index: int) -> bool:
if global_index < self.start:
return False
if global_index >= self.end:
return False
return True
def get_event(self, global_index: int) -> Event | None:
# If there was not actually a cached page, return None
if not self.events:
return None
local_index = global_index - self.start
return event_from_dict(self.events[local_index])
_DUMMY_PAGE = _CachePage(None, 1, -1)
@dataclass
class EventStore(EventStoreABC):
"""
A stored list of events backing a conversation
"""
sid: str
file_store: FileStore
user_id: str | None
cur_id: int = -1 # We fix this in post init if it is not specified
cache_size: int = 25
def __post_init__(self) -> None:
if self.cur_id >= 0:
return
events = []
try:
events_dir = get_conversation_events_dir(self.sid, self.user_id)
events = self.file_store.list(events_dir)
except FileNotFoundError:
logger.debug(f'No events found for session {self.sid} at {events_dir}')
if not events:
self.cur_id = 0
return
# if we have events, we need to find the highest id to prepare for new events
for event_str in events:
id = self._get_id_from_filename(event_str)
if id >= self.cur_id:
self.cur_id = id + 1
def search_events(
self,
start_id: int = 0,
end_id: int | None = None,
reverse: bool = False,
filter: EventFilter | None = None,
limit: int | None = None,
) -> Iterable[Event]:
"""
Retrieve events from the event stream, optionally filtering out events of a given type
and events marked as hidden.
Args:
start_id: The ID of the first event to retrieve. Defaults to 0.
end_id: The ID of the last event to retrieve. Defaults to the last event in the stream.
reverse: Whether to retrieve events in reverse order. Defaults to False.
filter: EventFilter to use
Yields:
Events from the stream that match the criteria.
"""
if end_id is None:
end_id = self.cur_id
else:
end_id += 1 # From inclusive to exclusive
if reverse:
step = -1
start_id, end_id = end_id, start_id
start_id -= 1
end_id -= 1
else:
step = 1
cache_page = _DUMMY_PAGE
num_results = 0
for index in range(start_id, end_id, step):
if not should_continue():
return
if not cache_page.covers(index):
cache_page = self._load_cache_page_for_index(index)
event = cache_page.get_event(index)
if event is None:
try:
event = self.get_event(index)
except FileNotFoundError:
event = None
if event:
if not filter or filter.include(event):
yield event
num_results += 1
if limit and limit <= num_results:
return
def get_event(self, id: int) -> Event:
filename = self._get_filename_for_id(id, self.user_id)
content = self.file_store.read(filename)
data = json.loads(content)
return event_from_dict(data)
def get_latest_event(self) -> Event:
return self.get_event(self.cur_id - 1)
def get_latest_event_id(self) -> int:
return self.cur_id - 1
def filtered_events_by_source(self, source: EventSource) -> Iterable[Event]:
for event in self.get_events():
if event.source == source:
yield event
def _get_filename_for_id(self, id: int, user_id: str | None) -> str:
return get_conversation_event_filename(self.sid, id, user_id)
def _get_filename_for_cache(self, start: int, end: int) -> str:
return f'{get_conversation_dir(self.sid, self.user_id)}event_cache/{start}-{end}.json'
def _load_cache_page(self, start: int, end: int) -> _CachePage:
"""Read a page from the cache. Reading individual events is slow when there are a lot of them, so we use pages."""
cache_filename = self._get_filename_for_cache(start, end)
try:
content = self.file_store.read(cache_filename)
events = json.loads(content)
except FileNotFoundError:
events = None
page = _CachePage(events, start, end)
return page
def _load_cache_page_for_index(self, index: int) -> _CachePage:
offset = index % self.cache_size
index -= offset
return self._load_cache_page(index, index + self.cache_size)
@staticmethod
def _get_id_from_filename(filename: str) -> int:
try:
return int(filename.split('/')[-1].split('.')[0])
except ValueError:
logger.warning(f'get id from filename ({filename}) failed.')
return -1
|