File size: 4,362 Bytes
51ff9e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from openhands.events.action import MessageAction
from openhands.events.observation import CmdOutputMetadata, CmdOutputObservation
from openhands.events.serialization import event_from_dict, event_to_dict
from openhands.llm.metrics import Cost, Metrics, ResponseLatency, TokenUsage


def test_command_output_success_serialization():
    # Test successful command
    obs = CmdOutputObservation(
        command='ls',
        content='file1.txt\nfile2.txt',
        metadata=CmdOutputMetadata(exit_code=0),
    )
    serialized = event_to_dict(obs)
    assert serialized['success'] is True

    # Test failed command
    obs = CmdOutputObservation(
        command='ls',
        content='No such file or directory',
        metadata=CmdOutputMetadata(exit_code=1),
    )
    serialized = event_to_dict(obs)
    assert serialized['success'] is False


def test_metrics_basic_serialization():
    # Create a basic action with only accumulated_cost
    action = MessageAction(content='Hello, world!')
    metrics = Metrics()
    metrics.accumulated_cost = 0.03
    action._llm_metrics = metrics

    # Test serialization
    serialized = event_to_dict(action)
    assert 'llm_metrics' in serialized
    assert serialized['llm_metrics']['accumulated_cost'] == 0.03
    assert serialized['llm_metrics']['costs'] == []
    assert serialized['llm_metrics']['response_latencies'] == []
    assert serialized['llm_metrics']['token_usages'] == []

    # Test deserialization
    deserialized = event_from_dict(serialized)
    assert deserialized.llm_metrics is not None
    assert deserialized.llm_metrics.accumulated_cost == 0.03
    assert len(deserialized.llm_metrics.costs) == 0
    assert len(deserialized.llm_metrics.response_latencies) == 0
    assert len(deserialized.llm_metrics.token_usages) == 0


def test_metrics_full_serialization():
    # Create an observation with all metrics fields
    obs = CmdOutputObservation(
        command='ls',
        content='test.txt',
        metadata=CmdOutputMetadata(exit_code=0),
    )
    metrics = Metrics(model_name='test-model')
    metrics.accumulated_cost = 0.03

    # Add a cost
    cost = Cost(model='test-model', cost=0.02)
    metrics._costs.append(cost)

    # Add a response latency
    latency = ResponseLatency(model='test-model', latency=0.5, response_id='test-id')
    metrics.response_latencies = [latency]

    # Add token usage
    usage = TokenUsage(
        model='test-model',
        prompt_tokens=10,
        completion_tokens=20,
        cache_read_tokens=0,
        cache_write_tokens=0,
        response_id='test-id',
    )
    metrics.token_usages = [usage]

    obs._llm_metrics = metrics

    # Test serialization
    serialized = event_to_dict(obs)
    assert 'llm_metrics' in serialized
    metrics_dict = serialized['llm_metrics']
    assert metrics_dict['accumulated_cost'] == 0.03
    assert len(metrics_dict['costs']) == 1
    assert metrics_dict['costs'][0]['cost'] == 0.02
    assert len(metrics_dict['response_latencies']) == 1
    assert metrics_dict['response_latencies'][0]['latency'] == 0.5
    assert len(metrics_dict['token_usages']) == 1
    assert metrics_dict['token_usages'][0]['prompt_tokens'] == 10
    assert metrics_dict['token_usages'][0]['completion_tokens'] == 20

    # Test deserialization
    deserialized = event_from_dict(serialized)
    assert deserialized.llm_metrics is not None
    assert deserialized.llm_metrics.accumulated_cost == 0.03
    assert len(deserialized.llm_metrics.costs) == 1
    assert deserialized.llm_metrics.costs[0].cost == 0.02
    assert len(deserialized.llm_metrics.response_latencies) == 1
    assert deserialized.llm_metrics.response_latencies[0].latency == 0.5
    assert len(deserialized.llm_metrics.token_usages) == 1
    assert deserialized.llm_metrics.token_usages[0].prompt_tokens == 10
    assert deserialized.llm_metrics.token_usages[0].completion_tokens == 20


def test_metrics_none_serialization():
    # Test when metrics is None
    obs = CmdOutputObservation(
        command='ls',
        content='test.txt',
        metadata=CmdOutputMetadata(exit_code=0),
    )
    obs._llm_metrics = None

    # Test serialization
    serialized = event_to_dict(obs)
    assert 'llm_metrics' not in serialized

    # Test deserialization
    deserialized = event_from_dict(serialized)
    assert deserialized.llm_metrics is None