File size: 22,636 Bytes
51ff9e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
from unittest.mock import MagicMock, patch

import pytest

from openhands.controller.agent import Agent
from openhands.controller.agent_controller import AgentController
from openhands.controller.state.state import State
from openhands.core.config import OpenHandsConfig
from openhands.events import EventSource
from openhands.events.action import CmdRunAction, MessageAction, RecallAction
from openhands.events.action.message import SystemMessageAction
from openhands.events.event import RecallType
from openhands.events.observation import (
    CmdOutputObservation,
    Observation,
    RecallObservation,
)
from openhands.events.stream import EventStream
from openhands.llm.llm import LLM
from openhands.llm.metrics import Metrics
from openhands.storage.memory import InMemoryFileStore


# Helper function to create events with sequential IDs and causes
def create_events(event_data):
    events = []
    # Import necessary types here to avoid repeated imports inside the loop
    from openhands.events.action import CmdRunAction, RecallAction
    from openhands.events.observation import CmdOutputObservation, RecallObservation

    for i, data in enumerate(event_data):
        event_type = data['type']
        source = data.get('source', EventSource.AGENT)
        kwargs = {}  # Arguments for the event constructor

        # Determine arguments based on event type
        if event_type == RecallAction:
            kwargs['query'] = data.get('query', '')
            kwargs['recall_type'] = data.get('recall_type', RecallType.KNOWLEDGE)
        elif event_type == RecallObservation:
            kwargs['content'] = data.get('content', '')
            kwargs['recall_type'] = data.get('recall_type', RecallType.KNOWLEDGE)
        elif event_type == CmdRunAction:
            kwargs['command'] = data.get('command', '')
        elif event_type == CmdOutputObservation:
            # Required args for CmdOutputObservation
            kwargs['content'] = data.get('content', '')
            kwargs['command'] = data.get('command', '')
            # Pass command_id via kwargs if present in data
            if 'command_id' in data:
                kwargs['command_id'] = data['command_id']
            # Pass metadata if present
            if 'metadata' in data:
                kwargs['metadata'] = data['metadata']
        else:  # Default for MessageAction, SystemMessageAction, etc.
            kwargs['content'] = data.get('content', '')

        # Instantiate the event
        event = event_type(**kwargs)

        # Assign internal attributes AFTER instantiation
        event._id = i + 1  # Assign sequential IDs starting from 1
        event._source = source
        # Assign _cause using cause_id from data, AFTER event._id is set
        if 'cause_id' in data:
            event._cause = data['cause_id']
        # If command_id was NOT passed via kwargs but cause_id exists,
        # pass cause_id as command_id to __init__ via kwargs for legacy handling
        # This needs to happen *before* instantiation if we want __init__ to handle it
        # Let's adjust the logic slightly:
        if event_type == CmdOutputObservation:
            if 'command_id' not in kwargs and 'cause_id' in data:
                kwargs['command_id'] = data['cause_id']  # Let __init__ handle this
            # Re-instantiate if we added command_id
            if 'command_id' in kwargs and event.command_id != kwargs['command_id']:
                event = event_type(**kwargs)
                event._id = i + 1
                event._source = source

        # Now assign _cause if it exists in data, after potential re-instantiation
        if 'cause_id' in data:
            event._cause = data['cause_id']

        events.append(event)
    return events


@pytest.fixture
def controller_fixture():
    mock_agent = MagicMock(spec=Agent)
    mock_agent.llm = MagicMock(spec=LLM)
    mock_agent.llm.metrics = Metrics()
    mock_agent.llm.config = OpenHandsConfig().get_llm_config()
    mock_agent.config = OpenHandsConfig().get_agent_config('CodeActAgent')

    mock_event_stream = MagicMock(spec=EventStream)
    mock_event_stream.sid = 'test_sid'
    mock_event_stream.file_store = InMemoryFileStore({})
    # Ensure get_latest_event_id returns an integer
    mock_event_stream.get_latest_event_id.return_value = -1

    controller = AgentController(
        agent=mock_agent,
        event_stream=mock_event_stream,
        max_iterations=10,
        sid='test_sid',
    )
    controller.state = State(session_id='test_sid')

    # Mock _first_user_message directly on the instance
    mock_first_user_message = MagicMock(spec=MessageAction)
    controller._first_user_message = MagicMock(return_value=mock_first_user_message)

    return controller, mock_first_user_message


# =============================================
# Test Cases for _apply_conversation_window
# =============================================


def test_basic_truncation(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    controller.state.history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 3},  # 4
            {'type': CmdRunAction, 'command': 'ls'},  # 5
            {
                'type': CmdOutputObservation,
                'content': 'file1',
                'command': 'ls',
                'cause_id': 5,
            },  # 6
            {'type': CmdRunAction, 'command': 'pwd'},  # 7
            {
                'type': CmdOutputObservation,
                'content': '/dir',
                'command': 'pwd',
                'cause_id': 7,
            },  # 8
            {'type': CmdRunAction, 'command': 'cat file1'},  # 9
            {
                'type': CmdOutputObservation,
                'content': 'content',
                'command': 'cat file1',
                'cause_id': 9,
            },  # 10
        ]
    )
    mock_first_user_message.id = 2  # Set the ID of the mocked first user message

    # Calculation (RecallAction now essential):
    # History len = 10
    # Essentials = [sys(1), user(2), recall_act(3), recall_obs(4)] (len=4)
    # Non-essential count = 10 - 4 = 6
    # num_recent_to_keep = max(1, 6 // 2) = 3
    # slice_start_index = 10 - 3 = 7
    # recent_events_slice = history[7:] = [obs2(8), cmd3(9), obs3(10)]
    # Validation: remove leading obs2(8). validated_slice = [cmd3(9), obs3(10)]
    # Final = essentials + validated_slice = [sys(1), user(2), recall_act(3), recall_obs(4), cmd3(9), obs3(10)]
    # Expected IDs: [1, 2, 3, 4, 9, 10]. Length 6.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 6
    expected_ids = [1, 2, 3, 4, 9, 10]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids
    # Check no dangling observations at the start of the recent slice part
    # The first event of the validated slice is cmd3(9)
    assert not isinstance(truncated_events[4], Observation)  # Index adjusted


def test_no_system_message(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    controller.state.history = create_events(
        [
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 1
            {'type': RecallAction, 'query': 'User Task 1'},  # 2
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 2},  # 3
            {'type': CmdRunAction, 'command': 'ls'},  # 4
            {
                'type': CmdOutputObservation,
                'content': 'file1',
                'command': 'ls',
                'cause_id': 4,
            },  # 5
            {'type': CmdRunAction, 'command': 'pwd'},  # 6
            {
                'type': CmdOutputObservation,
                'content': '/dir',
                'command': 'pwd',
                'cause_id': 6,
            },  # 7
            {'type': CmdRunAction, 'command': 'cat file1'},  # 8
            {
                'type': CmdOutputObservation,
                'content': 'content',
                'command': 'cat file1',
                'cause_id': 8,
            },  # 9
        ]
    )
    mock_first_user_message.id = 1

    # Calculation (RecallAction now essential):
    # History len = 9
    # Essentials = [user(1), recall_act(2), recall_obs(3)] (len=3)
    # Non-essential count = 9 - 3 = 6
    # num_recent_to_keep = max(1, 6 // 2) = 3
    # slice_start_index = 9 - 3 = 6
    # recent_events_slice = history[6:] = [obs2(7), cmd3(8), obs3(9)]
    # Validation: remove leading obs2(7). validated_slice = [cmd3(8), obs3(9)]
    # Final = essentials + validated_slice = [user(1), recall_act(2), recall_obs(3), cmd3(8), obs3(9)]
    # Expected IDs: [1, 2, 3, 8, 9]. Length 5.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 5
    expected_ids = [1, 2, 3, 8, 9]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids


def test_no_recall_observation(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    controller.state.history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3 (Recall Action exists)
            # Recall Observation is missing
            {'type': CmdRunAction, 'command': 'ls'},  # 4
            {
                'type': CmdOutputObservation,
                'content': 'file1',
                'command': 'ls',
                'cause_id': 4,
            },  # 5
            {'type': CmdRunAction, 'command': 'pwd'},  # 6
            {
                'type': CmdOutputObservation,
                'content': '/dir',
                'command': 'pwd',
                'cause_id': 6,
            },  # 7
            {'type': CmdRunAction, 'command': 'cat file1'},  # 8
            {
                'type': CmdOutputObservation,
                'content': 'content',
                'command': 'cat file1',
                'cause_id': 8,
            },  # 9
        ]
    )
    mock_first_user_message.id = 2

    # Calculation (RecallAction essential only if RecallObs exists):
    # History len = 9
    # Essentials = [sys(1), user(2)] (len=2) - RecallObs missing, so RecallAction not essential here
    # Non-essential count = 9 - 2 = 7
    # num_recent_to_keep = max(1, 7 // 2) = 3
    # slice_start_index = 9 - 3 = 6
    # recent_events_slice = history[6:] = [obs2(7), cmd3(8), obs3(9)]
    # Validation: remove leading obs2(7). validated_slice = [cmd3(8), obs3(9)]
    # Final = essentials + validated_slice = [sys(1), user(2), recall_action(3), cmd_cat(8), obs_cat(9)]
    # Expected IDs: [1, 2, 3, 8, 9]. Length 5.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 5
    expected_ids = [1, 2, 3, 8, 9]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids


def test_short_history_no_truncation(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 3},  # 4
            {'type': CmdRunAction, 'command': 'ls'},  # 5
            {
                'type': CmdOutputObservation,
                'content': 'file1',
                'command': 'ls',
                'cause_id': 5,
            },  # 6
        ]
    )
    controller.state.history = history
    mock_first_user_message.id = 2

    # Calculation (RecallAction now essential):
    # History len = 6
    # Essentials = [sys(1), user(2), recall_act(3), recall_obs(4)] (len=4)
    # Non-essential count = 6 - 4 = 2
    # num_recent_to_keep = max(1, 2 // 2) = 1
    # slice_start_index = 6 - 1 = 5
    # recent_events_slice = history[5:] = [obs1(6)]
    # Validation: remove leading obs1(6). validated_slice = []
    # Final = essentials + validated_slice = [sys(1), user(2), recall_act(3), recall_obs(4)]
    # Expected IDs: [1, 2, 3, 4]. Length 4.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 4
    expected_ids = [1, 2, 3, 4]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids


def test_only_essential_events(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 3},  # 4
        ]
    )
    controller.state.history = history
    mock_first_user_message.id = 2

    # Calculation (RecallAction now essential):
    # History len = 4
    # Essentials = [sys(1), user(2), recall_act(3), recall_obs(4)] (len=4)
    # Non-essential count = 4 - 4 = 0
    # num_recent_to_keep = max(1, 0 // 2) = 1
    # slice_start_index = 4 - 1 = 3
    # recent_events_slice = history[3:] = [recall_obs(4)]
    # Validation: remove leading recall_obs(4). validated_slice = []
    # Final = essentials + validated_slice = [sys(1), user(2), recall_act(3), recall_obs(4)]
    # Expected IDs: [1, 2, 3, 4]. Length 4.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 4
    expected_ids = [1, 2, 3, 4]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids


def test_dangling_observations_at_cut_point(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    history_forced_dangle = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 3},  # 4
            # --- Slice calculation should start here ---
            {
                'type': CmdOutputObservation,
                'content': 'dangle1',
                'command': 'cmd_unknown',
            },  # 5 (Dangling)
            {
                'type': CmdOutputObservation,
                'content': 'dangle2',
                'command': 'cmd_unknown',
            },  # 6 (Dangling)
            {'type': CmdRunAction, 'command': 'cmd1'},  # 7
            {
                'type': CmdOutputObservation,
                'content': 'obs1',
                'command': 'cmd1',
                'cause_id': 7,
            },  # 8
            {'type': CmdRunAction, 'command': 'cmd2'},  # 9
            {
                'type': CmdOutputObservation,
                'content': 'obs2',
                'command': 'cmd2',
                'cause_id': 9,
            },  # 10
        ]
    )  # 10 events total
    controller.state.history = history_forced_dangle
    mock_first_user_message.id = 2

    # Calculation (RecallAction now essential):
    # History len = 10
    # Essentials = [sys(1), user(2), recall_act(3), recall_obs(4)] (len=4)
    # Non-essential count = 10 - 4 = 6
    # num_recent_to_keep = max(1, 6 // 2) = 3
    # slice_start_index = 10 - 3 = 7
    # recent_events_slice = history[7:] = [obs1(8), cmd2(9), obs2(10)]
    # Validation: remove leading obs1(8). validated_slice = [cmd2(9), obs2(10)]
    # Final = essentials + validated_slice = [sys(1), user(2), recall_act(3), recall_obs(4), cmd2(9), obs2(10)]
    # Expected IDs: [1, 2, 3, 4, 9, 10]. Length 6.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 6
    expected_ids = [1, 2, 3, 4, 9, 10]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids
    # Verify dangling observations 5 and 6 were removed (implicitly by slice start and validation)


def test_only_dangling_observations_in_recent_slice(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {'type': RecallObservation, 'content': 'Recall result', 'cause_id': 3},  # 4
            # --- Slice calculation should start here ---
            {
                'type': CmdOutputObservation,
                'content': 'dangle1',
                'command': 'cmd_unknown',
            },  # 5 (Dangling)
            {
                'type': CmdOutputObservation,
                'content': 'dangle2',
                'command': 'cmd_unknown',
            },  # 6 (Dangling)
        ]
    )  # 6 events total
    controller.state.history = history
    mock_first_user_message.id = 2

    # Calculation (RecallAction now essential):
    # History len = 6
    # Essentials = [sys(1), user(2), recall_act(3), recall_obs(4)] (len=4)
    # Non-essential count = 6 - 4 = 2
    # num_recent_to_keep = max(1, 2 // 2) = 1
    # slice_start_index = 6 - 1 = 5
    # recent_events_slice = history[5:] = [dangle2(6)]
    # Validation: remove leading dangle2(6). validated_slice = [] (Corrected based on user feedback/bugfix)
    # Final = essentials + validated_slice = [sys(1), user(2), recall_act(3), recall_obs(4)]
    # Expected IDs: [1, 2, 3, 4]. Length 4.
    with patch(
        'openhands.controller.agent_controller.logger.warning'
    ) as mock_log_warning:
        truncated_events = controller._apply_conversation_window()

        assert len(truncated_events) == 4
        expected_ids = [1, 2, 3, 4]
        actual_ids = [e.id for e in truncated_events]
        assert actual_ids == expected_ids
        # Verify dangling observations 5 and 6 were removed

        # Check that the specific warning was logged exactly once
        assert mock_log_warning.call_count == 1

        # Check the essential parts of the arguments, allowing for variations like stacklevel
        call_args, call_kwargs = mock_log_warning.call_args
        expected_message_substring = 'All recent events are dangling observations, which we truncate. This means the agent has only the essential first events. This should not happen.'
        assert expected_message_substring in call_args[0]
        assert 'extra' in call_kwargs
        assert call_kwargs['extra'].get('session_id') == 'test_sid'


def test_empty_history(controller_fixture):
    controller, _ = controller_fixture
    controller.state.history = []

    truncated_events = controller._apply_conversation_window()
    assert truncated_events == []


def test_multiple_user_messages(controller_fixture):
    controller, mock_first_user_message = controller_fixture

    history = create_events(
        [
            {'type': SystemMessageAction, 'content': 'System Prompt'},  # 1
            {
                'type': MessageAction,
                'content': 'User Task 1',
                'source': EventSource.USER,
            },  # 2 (First)
            {'type': RecallAction, 'query': 'User Task 1'},  # 3
            {
                'type': RecallObservation,
                'content': 'Recall result 1',
                'cause_id': 3,
            },  # 4
            {'type': CmdRunAction, 'command': 'cmd1'},  # 5
            {
                'type': CmdOutputObservation,
                'content': 'obs1',
                'command': 'cmd1',
                'cause_id': 5,
            },  # 6
            {
                'type': MessageAction,
                'content': 'User Task 2',
                'source': EventSource.USER,
            },  # 7 (Second)
            {'type': RecallAction, 'query': 'User Task 2'},  # 8
            {
                'type': RecallObservation,
                'content': 'Recall result 2',
                'cause_id': 8,
            },  # 9
            {'type': CmdRunAction, 'command': 'cmd2'},  # 10
            {
                'type': CmdOutputObservation,
                'content': 'obs2',
                'command': 'cmd2',
                'cause_id': 10,
            },  # 11
        ]
    )  # 11 events total
    controller.state.history = history
    mock_first_user_message.id = 2  # Explicitly set the first user message ID

    # Calculation (RecallAction now essential):
    # History len = 11
    # Essentials = [sys(1), user1(2), recall_act1(3), recall_obs1(4)] (len=4)
    # Non-essential count = 11 - 4 = 7
    # num_recent_to_keep = max(1, 7 // 2) = 3
    # slice_start_index = 11 - 3 = 8
    # recent_events_slice = history[8:] = [recall_obs2(9), cmd2(10), obs2(11)]
    # Validation: remove leading recall_obs2(9). validated_slice = [cmd2(10), obs2(11)]
    # Final = essentials + validated_slice = [sys(1), user1(2), recall_act1(3), recall_obs1(4)] + [cmd2(10), obs2(11)]
    # Expected IDs: [1, 2, 3, 4, 10, 11]. Length 6.
    truncated_events = controller._apply_conversation_window()

    assert len(truncated_events) == 6
    expected_ids = [1, 2, 3, 4, 10, 11]
    actual_ids = [e.id for e in truncated_events]
    assert actual_ids == expected_ids

    # Verify the second user message (ID 7) was NOT kept
    assert not any(event.id == 7 for event in truncated_events)
    # Verify the first user message (ID 2) is present
    assert any(event.id == 2 for event in truncated_events)