from openhands.events.action import MessageAction from openhands.events.observation import CmdOutputMetadata, CmdOutputObservation from openhands.events.serialization import event_from_dict, event_to_dict from openhands.llm.metrics import Cost, Metrics, ResponseLatency, TokenUsage def test_command_output_success_serialization(): # Test successful command obs = CmdOutputObservation( command='ls', content='file1.txt\nfile2.txt', metadata=CmdOutputMetadata(exit_code=0), ) serialized = event_to_dict(obs) assert serialized['success'] is True # Test failed command obs = CmdOutputObservation( command='ls', content='No such file or directory', metadata=CmdOutputMetadata(exit_code=1), ) serialized = event_to_dict(obs) assert serialized['success'] is False def test_metrics_basic_serialization(): # Create a basic action with only accumulated_cost action = MessageAction(content='Hello, world!') metrics = Metrics() metrics.accumulated_cost = 0.03 action._llm_metrics = metrics # Test serialization serialized = event_to_dict(action) assert 'llm_metrics' in serialized assert serialized['llm_metrics']['accumulated_cost'] == 0.03 assert serialized['llm_metrics']['costs'] == [] assert serialized['llm_metrics']['response_latencies'] == [] assert serialized['llm_metrics']['token_usages'] == [] # Test deserialization deserialized = event_from_dict(serialized) assert deserialized.llm_metrics is not None assert deserialized.llm_metrics.accumulated_cost == 0.03 assert len(deserialized.llm_metrics.costs) == 0 assert len(deserialized.llm_metrics.response_latencies) == 0 assert len(deserialized.llm_metrics.token_usages) == 0 def test_metrics_full_serialization(): # Create an observation with all metrics fields obs = CmdOutputObservation( command='ls', content='test.txt', metadata=CmdOutputMetadata(exit_code=0), ) metrics = Metrics(model_name='test-model') metrics.accumulated_cost = 0.03 # Add a cost cost = Cost(model='test-model', cost=0.02) metrics._costs.append(cost) # Add a response latency latency = ResponseLatency(model='test-model', latency=0.5, response_id='test-id') metrics.response_latencies = [latency] # Add token usage usage = TokenUsage( model='test-model', prompt_tokens=10, completion_tokens=20, cache_read_tokens=0, cache_write_tokens=0, response_id='test-id', ) metrics.token_usages = [usage] obs._llm_metrics = metrics # Test serialization serialized = event_to_dict(obs) assert 'llm_metrics' in serialized metrics_dict = serialized['llm_metrics'] assert metrics_dict['accumulated_cost'] == 0.03 assert len(metrics_dict['costs']) == 1 assert metrics_dict['costs'][0]['cost'] == 0.02 assert len(metrics_dict['response_latencies']) == 1 assert metrics_dict['response_latencies'][0]['latency'] == 0.5 assert len(metrics_dict['token_usages']) == 1 assert metrics_dict['token_usages'][0]['prompt_tokens'] == 10 assert metrics_dict['token_usages'][0]['completion_tokens'] == 20 # Test deserialization deserialized = event_from_dict(serialized) assert deserialized.llm_metrics is not None assert deserialized.llm_metrics.accumulated_cost == 0.03 assert len(deserialized.llm_metrics.costs) == 1 assert deserialized.llm_metrics.costs[0].cost == 0.02 assert len(deserialized.llm_metrics.response_latencies) == 1 assert deserialized.llm_metrics.response_latencies[0].latency == 0.5 assert len(deserialized.llm_metrics.token_usages) == 1 assert deserialized.llm_metrics.token_usages[0].prompt_tokens == 10 assert deserialized.llm_metrics.token_usages[0].completion_tokens == 20 def test_metrics_none_serialization(): # Test when metrics is None obs = CmdOutputObservation( command='ls', content='test.txt', metadata=CmdOutputMetadata(exit_code=0), ) obs._llm_metrics = None # Test serialization serialized = event_to_dict(obs) assert 'llm_metrics' not in serialized # Test deserialization deserialized = event_from_dict(serialized) assert deserialized.llm_metrics is None