Spaces:
Configuration error
Configuration error
import os | |
import sys | |
import pytest | |
import asyncio | |
from typing import Optional, cast | |
from unittest.mock import patch, AsyncMock | |
sys.path.insert(0, os.path.abspath("../..")) | |
import litellm | |
from litellm.integrations.custom_logger import CustomLogger | |
import json | |
from litellm.types.utils import StandardLoggingPayload | |
from litellm.types.llms.openai import ( | |
ResponseCompletedEvent, | |
ResponsesAPIResponse, | |
ResponseTextConfig, | |
ResponseAPIUsage, | |
IncompleteDetails, | |
) | |
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler | |
from base_responses_api import BaseResponsesAPITest, validate_responses_api_response | |
class TestOpenAIResponsesAPITest(BaseResponsesAPITest): | |
def get_base_completion_call_args(self): | |
return { | |
"model": "openai/gpt-4o", | |
} | |
class TestCustomLogger(CustomLogger): | |
def __init__( | |
self, | |
): | |
self.standard_logging_object: Optional[StandardLoggingPayload] = None | |
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): | |
print("in async_log_success_event") | |
print("kwargs=", json.dumps(kwargs, indent=4, default=str)) | |
self.standard_logging_object = kwargs["standard_logging_object"] | |
pass | |
def validate_standard_logging_payload( | |
slp: StandardLoggingPayload, response: ResponsesAPIResponse, request_model: str | |
): | |
""" | |
Validate that a StandardLoggingPayload object matches the expected response | |
Args: | |
slp (StandardLoggingPayload): The standard logging payload object to validate | |
response (dict): The litellm response to compare against | |
request_model (str): The model name that was requested | |
""" | |
# Validate payload exists | |
assert slp is not None, "Standard logging payload should not be None" | |
# Validate token counts | |
print("response=", json.dumps(response, indent=4, default=str)) | |
assert ( | |
slp["prompt_tokens"] == response["usage"]["input_tokens"] | |
), "Prompt tokens mismatch" | |
assert ( | |
slp["completion_tokens"] == response["usage"]["output_tokens"] | |
), "Completion tokens mismatch" | |
assert ( | |
slp["total_tokens"] | |
== response["usage"]["input_tokens"] + response["usage"]["output_tokens"] | |
), "Total tokens mismatch" | |
# Validate spend and response metadata | |
assert slp["response_cost"] > 0, "Response cost should be greater than 0" | |
assert slp["id"] == response["id"], "Response ID mismatch" | |
assert slp["model"] == request_model, "Model name mismatch" | |
# Validate messages | |
assert slp["messages"] == [{"content": "hi", "role": "user"}], "Messages mismatch" | |
# Validate complete response structure | |
validate_responses_match(slp["response"], response) | |
async def test_basic_openai_responses_api_streaming_with_logging(): | |
litellm._turn_on_debug() | |
litellm.set_verbose = True | |
test_custom_logger = TestCustomLogger() | |
litellm.callbacks = [test_custom_logger] | |
request_model = "gpt-4o" | |
response = await litellm.aresponses( | |
model=request_model, | |
input="hi", | |
stream=True, | |
) | |
final_response: Optional[ResponseCompletedEvent] = None | |
async for event in response: | |
if event.type == "response.completed": | |
final_response = event | |
print("litellm response=", json.dumps(event, indent=4, default=str)) | |
print("sleeping for 2 seconds...") | |
await asyncio.sleep(2) | |
print( | |
"standard logging payload=", | |
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str), | |
) | |
assert final_response is not None | |
assert test_custom_logger.standard_logging_object is not None | |
validate_standard_logging_payload( | |
slp=test_custom_logger.standard_logging_object, | |
response=final_response.response, | |
request_model=request_model, | |
) | |
def validate_responses_match(slp_response, litellm_response): | |
"""Validate that the standard logging payload OpenAI response matches the litellm response""" | |
# Validate core fields | |
assert slp_response["id"] == litellm_response["id"], "ID mismatch" | |
assert slp_response["model"] == litellm_response["model"], "Model mismatch" | |
assert ( | |
slp_response["created_at"] == litellm_response["created_at"] | |
), "Created at mismatch" | |
# Validate usage | |
assert ( | |
slp_response["usage"]["input_tokens"] | |
== litellm_response["usage"]["input_tokens"] | |
), "Input tokens mismatch" | |
assert ( | |
slp_response["usage"]["output_tokens"] | |
== litellm_response["usage"]["output_tokens"] | |
), "Output tokens mismatch" | |
assert ( | |
slp_response["usage"]["total_tokens"] | |
== litellm_response["usage"]["total_tokens"] | |
), "Total tokens mismatch" | |
# Validate output/messages | |
assert len(slp_response["output"]) == len( | |
litellm_response["output"] | |
), "Output length mismatch" | |
for slp_msg, litellm_msg in zip(slp_response["output"], litellm_response["output"]): | |
assert slp_msg["role"] == litellm_msg.role, "Message role mismatch" | |
# Access the content's text field for the litellm response | |
litellm_content = litellm_msg.content[0].text if litellm_msg.content else "" | |
assert ( | |
slp_msg["content"][0]["text"] == litellm_content | |
), f"Message content mismatch. Expected {litellm_content}, Got {slp_msg['content']}" | |
assert slp_msg["status"] == litellm_msg.status, "Message status mismatch" | |
async def test_basic_openai_responses_api_non_streaming_with_logging(): | |
litellm._turn_on_debug() | |
litellm.set_verbose = True | |
test_custom_logger = TestCustomLogger() | |
litellm.callbacks = [test_custom_logger] | |
request_model = "gpt-4o" | |
response = await litellm.aresponses( | |
model=request_model, | |
input="hi", | |
) | |
print("litellm response=", json.dumps(response, indent=4, default=str)) | |
print("response hidden params=", response._hidden_params) | |
print("sleeping for 2 seconds...") | |
await asyncio.sleep(2) | |
print( | |
"standard logging payload=", | |
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str), | |
) | |
assert response is not None | |
assert test_custom_logger.standard_logging_object is not None | |
validate_standard_logging_payload( | |
test_custom_logger.standard_logging_object, response, request_model | |
) | |
def validate_stream_event(event): | |
""" | |
Validate that a streaming event from litellm.responses() or litellm.aresponses() | |
with stream=True conforms to the expected structure based on its event type. | |
Args: | |
event: The streaming event object to validate | |
Raises: | |
AssertionError: If the event doesn't match the expected structure for its type | |
""" | |
# Common validation for all event types | |
assert hasattr(event, "type"), "Event should have a 'type' attribute" | |
# Type-specific validation | |
if event.type == "response.created" or event.type == "response.in_progress": | |
assert hasattr( | |
event, "response" | |
), f"{event.type} event should have a 'response' attribute" | |
validate_responses_api_response(event.response, final_chunk=False) | |
elif event.type == "response.completed": | |
assert hasattr( | |
event, "response" | |
), "response.completed event should have a 'response' attribute" | |
validate_responses_api_response(event.response, final_chunk=True) | |
# Usage is guaranteed only on the completed event | |
assert ( | |
"usage" in event.response | |
), "response.completed event should have usage information" | |
print("Usage in event.response=", event.response["usage"]) | |
assert isinstance(event.response["usage"], ResponseAPIUsage) | |
elif event.type == "response.failed" or event.type == "response.incomplete": | |
assert hasattr( | |
event, "response" | |
), f"{event.type} event should have a 'response' attribute" | |
elif ( | |
event.type == "response.output_item.added" | |
or event.type == "response.output_item.done" | |
): | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "item" | |
), f"{event.type} event should have an 'item' attribute" | |
elif ( | |
event.type == "response.content_part.added" | |
or event.type == "response.content_part.done" | |
): | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "part" | |
), f"{event.type} event should have a 'part' attribute" | |
elif event.type == "response.output_text.delta": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "delta" | |
), f"{event.type} event should have a 'delta' attribute" | |
elif event.type == "response.output_text.annotation.added": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "annotation_index" | |
), f"{event.type} event should have an 'annotation_index' attribute" | |
assert hasattr( | |
event, "annotation" | |
), f"{event.type} event should have an 'annotation' attribute" | |
elif event.type == "response.output_text.done": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "text" | |
), f"{event.type} event should have a 'text' attribute" | |
elif event.type == "response.refusal.delta": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "delta" | |
), f"{event.type} event should have a 'delta' attribute" | |
elif event.type == "response.refusal.done": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "content_index" | |
), f"{event.type} event should have a 'content_index' attribute" | |
assert hasattr( | |
event, "refusal" | |
), f"{event.type} event should have a 'refusal' attribute" | |
elif event.type == "response.function_call_arguments.delta": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "delta" | |
), f"{event.type} event should have a 'delta' attribute" | |
elif event.type == "response.function_call_arguments.done": | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "arguments" | |
), f"{event.type} event should have an 'arguments' attribute" | |
elif event.type in [ | |
"response.file_search_call.in_progress", | |
"response.file_search_call.searching", | |
"response.file_search_call.completed", | |
"response.web_search_call.in_progress", | |
"response.web_search_call.searching", | |
"response.web_search_call.completed", | |
]: | |
assert hasattr( | |
event, "output_index" | |
), f"{event.type} event should have an 'output_index' attribute" | |
assert hasattr( | |
event, "item_id" | |
), f"{event.type} event should have an 'item_id' attribute" | |
elif event.type == "error": | |
assert hasattr( | |
event, "message" | |
), "Error event should have a 'message' attribute" | |
return True # Return True if validation passes | |
async def test_openai_responses_api_streaming_validation(sync_mode): | |
"""Test that validates each streaming event from the responses API""" | |
litellm._turn_on_debug() | |
event_types_seen = set() | |
if sync_mode: | |
response = litellm.responses( | |
model="gpt-4o", | |
input="Tell me about artificial intelligence in 3 sentences.", | |
stream=True, | |
) | |
for event in response: | |
print(f"Validating event type: {event.type}") | |
validate_stream_event(event) | |
event_types_seen.add(event.type) | |
else: | |
response = await litellm.aresponses( | |
model="gpt-4o", | |
input="Tell me about artificial intelligence in 3 sentences.", | |
stream=True, | |
) | |
async for event in response: | |
print(f"Validating event type: {event.type}") | |
validate_stream_event(event) | |
event_types_seen.add(event.type) | |
# At minimum, we should see these core event types | |
required_events = {"response.created", "response.completed"} | |
missing_events = required_events - event_types_seen | |
assert not missing_events, f"Missing required event types: {missing_events}" | |
print(f"Successfully validated all event types: {event_types_seen}") | |
async def test_openai_responses_litellm_router(sync_mode): | |
""" | |
Test the OpenAI responses API with LiteLLM Router in both sync and async modes | |
""" | |
litellm._turn_on_debug() | |
router = litellm.Router( | |
model_list=[ | |
{ | |
"model_name": "gpt4o-special-alias", | |
"litellm_params": { | |
"model": "gpt-4o", | |
"api_key": os.getenv("OPENAI_API_KEY"), | |
}, | |
} | |
] | |
) | |
# Call the handler | |
if sync_mode: | |
response = router.responses( | |
model="gpt4o-special-alias", | |
input="Hello, can you tell me a short joke?", | |
max_output_tokens=100, | |
) | |
print("SYNC MODE RESPONSE=", response) | |
else: | |
response = await router.aresponses( | |
model="gpt4o-special-alias", | |
input="Hello, can you tell me a short joke?", | |
max_output_tokens=100, | |
) | |
print( | |
f"Router {'sync' if sync_mode else 'async'} response=", | |
json.dumps(response, indent=4, default=str), | |
) | |
# Use the helper function to validate the response | |
validate_responses_api_response(response, final_chunk=True) | |
return response | |
async def test_openai_responses_litellm_router_streaming(sync_mode): | |
""" | |
Test the OpenAI responses API with streaming through LiteLLM Router | |
""" | |
litellm._turn_on_debug() | |
router = litellm.Router( | |
model_list=[ | |
{ | |
"model_name": "gpt4o-special-alias", | |
"litellm_params": { | |
"model": "gpt-4o", | |
"api_key": os.getenv("OPENAI_API_KEY"), | |
}, | |
} | |
] | |
) | |
event_types_seen = set() | |
if sync_mode: | |
response = router.responses( | |
model="gpt4o-special-alias", | |
input="Tell me about artificial intelligence in 2 sentences.", | |
stream=True, | |
) | |
for event in response: | |
print(f"Validating event type: {event.type}") | |
validate_stream_event(event) | |
event_types_seen.add(event.type) | |
else: | |
response = await router.aresponses( | |
model="gpt4o-special-alias", | |
input="Tell me about artificial intelligence in 2 sentences.", | |
stream=True, | |
) | |
async for event in response: | |
print(f"Validating event type: {event.type}") | |
validate_stream_event(event) | |
event_types_seen.add(event.type) | |
# At minimum, we should see these core event types | |
required_events = {"response.created", "response.completed"} | |
missing_events = required_events - event_types_seen | |
assert not missing_events, f"Missing required event types: {missing_events}" | |
print(f"Successfully validated all event types: {event_types_seen}") | |
async def test_openai_responses_litellm_router_no_metadata(): | |
""" | |
Test that metadata is not passed through when using the Router for responses API | |
""" | |
mock_response = { | |
"id": "resp_123", | |
"object": "response", | |
"created_at": 1741476542, | |
"status": "completed", | |
"model": "gpt-4o", | |
"output": [ | |
{ | |
"type": "message", | |
"id": "msg_123", | |
"status": "completed", | |
"role": "assistant", | |
"content": [ | |
{"type": "output_text", "text": "Hello world!", "annotations": []} | |
], | |
} | |
], | |
"parallel_tool_calls": True, | |
"usage": { | |
"input_tokens": 10, | |
"output_tokens": 20, | |
"total_tokens": 30, | |
"output_tokens_details": {"reasoning_tokens": 0}, | |
}, | |
"text": {"format": {"type": "text"}}, | |
# Adding all required fields | |
"error": None, | |
"incomplete_details": None, | |
"instructions": None, | |
"metadata": {}, | |
"temperature": 1.0, | |
"tool_choice": "auto", | |
"tools": [], | |
"top_p": 1.0, | |
"max_output_tokens": None, | |
"previous_response_id": None, | |
"reasoning": {"effort": None, "summary": None}, | |
"truncation": "disabled", | |
"user": None, | |
} | |
class MockResponse: | |
def __init__(self, json_data, status_code): | |
self._json_data = json_data | |
self.status_code = status_code | |
self.text = str(json_data) | |
def json(self): # Changed from async to sync | |
return self._json_data | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", | |
new_callable=AsyncMock, | |
) as mock_post: | |
# Configure the mock to return our response | |
mock_post.return_value = MockResponse(mock_response, 200) | |
litellm._turn_on_debug() | |
router = litellm.Router( | |
model_list=[ | |
{ | |
"model_name": "gpt4o-special-alias", | |
"litellm_params": { | |
"model": "gpt-4o", | |
"api_key": "fake-key", | |
}, | |
} | |
] | |
) | |
# Call the handler with metadata | |
await router.aresponses( | |
model="gpt4o-special-alias", | |
input="Hello, can you tell me a short joke?", | |
) | |
# Check the request body | |
request_body = mock_post.call_args.kwargs["json"] | |
print("Request body:", json.dumps(request_body, indent=4)) | |
# Assert metadata is not in the request | |
assert ( | |
"metadata" not in request_body | |
), "metadata should not be in the request body" | |
mock_post.assert_called_once() | |
async def test_openai_responses_litellm_router_with_metadata(): | |
""" | |
Test that metadata is correctly passed through when explicitly provided to the Router for responses API | |
""" | |
test_metadata = { | |
"user_id": "123", | |
"conversation_id": "abc", | |
"custom_field": "test_value", | |
} | |
mock_response = { | |
"id": "resp_123", | |
"object": "response", | |
"created_at": 1741476542, | |
"status": "completed", | |
"model": "gpt-4o", | |
"output": [ | |
{ | |
"type": "message", | |
"id": "msg_123", | |
"status": "completed", | |
"role": "assistant", | |
"content": [ | |
{"type": "output_text", "text": "Hello world!", "annotations": []} | |
], | |
} | |
], | |
"parallel_tool_calls": True, | |
"usage": { | |
"input_tokens": 10, | |
"output_tokens": 20, | |
"total_tokens": 30, | |
"output_tokens_details": {"reasoning_tokens": 0}, | |
}, | |
"text": {"format": {"type": "text"}}, | |
"error": None, | |
"incomplete_details": None, | |
"instructions": None, | |
"metadata": test_metadata, # Include the test metadata in response | |
"temperature": 1.0, | |
"tool_choice": "auto", | |
"tools": [], | |
"top_p": 1.0, | |
"max_output_tokens": None, | |
"previous_response_id": None, | |
"reasoning": {"effort": None, "summary": None}, | |
"truncation": "disabled", | |
"user": None, | |
} | |
class MockResponse: | |
def __init__(self, json_data, status_code): | |
self._json_data = json_data | |
self.status_code = status_code | |
self.text = str(json_data) | |
def json(self): | |
return self._json_data | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", | |
new_callable=AsyncMock, | |
) as mock_post: | |
# Configure the mock to return our response | |
mock_post.return_value = MockResponse(mock_response, 200) | |
litellm._turn_on_debug() | |
router = litellm.Router( | |
model_list=[ | |
{ | |
"model_name": "gpt4o-special-alias", | |
"litellm_params": { | |
"model": "gpt-4o", | |
"api_key": "fake-key", | |
}, | |
} | |
] | |
) | |
# Call the handler with metadata | |
await router.aresponses( | |
model="gpt4o-special-alias", | |
input="Hello, can you tell me a short joke?", | |
metadata=test_metadata, | |
) | |
# Check the request body | |
request_body = mock_post.call_args.kwargs["json"] | |
print("Request body:", json.dumps(request_body, indent=4)) | |
# Assert metadata matches exactly what was passed | |
assert ( | |
request_body["metadata"] == test_metadata | |
), "metadata in request body should match what was passed" | |
mock_post.assert_called_once() | |
def test_bad_request_bad_param_error(): | |
"""Raise a BadRequestError when an invalid parameter value is provided""" | |
try: | |
litellm.responses(model="gpt-4o", input="This should fail", temperature=2000) | |
pytest.fail("Expected BadRequestError but no exception was raised") | |
except litellm.BadRequestError as e: | |
print(f"Exception raised: {e}") | |
print(f"Exception type: {type(e)}") | |
print(f"Exception args: {e.args}") | |
print(f"Exception details: {e.__dict__}") | |
except Exception as e: | |
pytest.fail(f"Unexpected exception raised: {e}") | |
async def test_async_bad_request_bad_param_error(): | |
"""Raise a BadRequestError when an invalid parameter value is provided""" | |
try: | |
await litellm.aresponses( | |
model="gpt-4o", input="This should fail", temperature=2000 | |
) | |
pytest.fail("Expected BadRequestError but no exception was raised") | |
except litellm.BadRequestError as e: | |
print(f"Exception raised: {e}") | |
print(f"Exception type: {type(e)}") | |
print(f"Exception args: {e.args}") | |
print(f"Exception details: {e.__dict__}") | |
except Exception as e: | |
pytest.fail(f"Unexpected exception raised: {e}") | |
async def test_openai_o1_pro_response_api(sync_mode): | |
""" | |
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model | |
due to reaching max_output_tokens limit. | |
""" | |
# Mock response from o1-pro | |
mock_response = { | |
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", | |
"object": "response", | |
"created_at": 1742486999, | |
"status": "incomplete", | |
"error": None, | |
"incomplete_details": {"reason": "max_output_tokens"}, | |
"instructions": None, | |
"max_output_tokens": 20, | |
"model": "o1-pro-2025-03-19", | |
"output": [ | |
{ | |
"type": "reasoning", | |
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", | |
"summary": [], | |
} | |
], | |
"parallel_tool_calls": True, | |
"previous_response_id": None, | |
"reasoning": {"effort": "medium", "generate_summary": None}, | |
"store": True, | |
"temperature": 1.0, | |
"text": {"format": {"type": "text"}}, | |
"tool_choice": "auto", | |
"tools": [], | |
"top_p": 1.0, | |
"truncation": "disabled", | |
"usage": { | |
"input_tokens": 73, | |
"input_tokens_details": {"cached_tokens": 0}, | |
"output_tokens": 20, | |
"output_tokens_details": {"reasoning_tokens": 0}, | |
"total_tokens": 93, | |
}, | |
"user": None, | |
"metadata": {}, | |
} | |
class MockResponse: | |
def __init__(self, json_data, status_code): | |
self._json_data = json_data | |
self.status_code = status_code | |
self.text = json.dumps(json_data) | |
def json(self): # Changed from async to sync | |
return self._json_data | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", | |
new_callable=AsyncMock, | |
) as mock_post: | |
# Configure the mock to return our response | |
mock_post.return_value = MockResponse(mock_response, 200) | |
litellm._turn_on_debug() | |
litellm.set_verbose = True | |
# Call o1-pro with max_output_tokens=20 | |
response = await litellm.aresponses( | |
model="openai/o1-pro", | |
input="Write a detailed essay about artificial intelligence and its impact on society", | |
max_output_tokens=20, | |
) | |
# Verify the request was made correctly | |
mock_post.assert_called_once() | |
request_body = mock_post.call_args.kwargs["json"] | |
assert request_body["model"] == "o1-pro" | |
assert request_body["max_output_tokens"] == 20 | |
# Validate the response | |
print("Response:", json.dumps(response, indent=4, default=str)) | |
# Check that the response has the expected structure | |
assert response["id"] is not None | |
assert response["status"] == "incomplete" | |
assert response["incomplete_details"].reason == "max_output_tokens" | |
assert response["max_output_tokens"] == 20 | |
# Validate usage information | |
assert response["usage"]["input_tokens"] == 73 | |
assert response["usage"]["output_tokens"] == 20 | |
assert response["usage"]["total_tokens"] == 93 | |
# Validate that the response is properly identified as incomplete | |
validate_responses_api_response(response, final_chunk=True) | |
async def test_openai_o1_pro_response_api_streaming(sync_mode): | |
""" | |
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model | |
due to reaching max_output_tokens limit in both sync and async streaming modes. | |
""" | |
# Mock response from o1-pro | |
mock_response = { | |
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", | |
"object": "response", | |
"created_at": 1742486999, | |
"status": "incomplete", | |
"error": None, | |
"incomplete_details": {"reason": "max_output_tokens"}, | |
"instructions": None, | |
"max_output_tokens": 20, | |
"model": "o1-pro-2025-03-19", | |
"output": [ | |
{ | |
"type": "reasoning", | |
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", | |
"summary": [], | |
} | |
], | |
"parallel_tool_calls": True, | |
"previous_response_id": None, | |
"reasoning": {"effort": "medium", "generate_summary": None}, | |
"store": True, | |
"temperature": 1.0, | |
"text": {"format": {"type": "text"}}, | |
"tool_choice": "auto", | |
"tools": [], | |
"top_p": 1.0, | |
"truncation": "disabled", | |
"usage": { | |
"input_tokens": 73, | |
"input_tokens_details": {"cached_tokens": 0}, | |
"output_tokens": 20, | |
"output_tokens_details": {"reasoning_tokens": 0}, | |
"total_tokens": 93, | |
}, | |
"user": None, | |
"metadata": {}, | |
} | |
class MockResponse: | |
def __init__(self, json_data, status_code): | |
self._json_data = json_data | |
self.status_code = status_code | |
self.text = json.dumps(json_data) | |
def json(self): | |
return self._json_data | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", | |
new_callable=AsyncMock, | |
) as mock_post: | |
# Configure the mock to return our response | |
mock_post.return_value = MockResponse(mock_response, 200) | |
litellm._turn_on_debug() | |
litellm.set_verbose = True | |
# Verify the request was made correctly | |
if sync_mode: | |
# For sync mode, we need to patch the sync HTTP handler | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post", | |
return_value=MockResponse(mock_response, 200), | |
) as mock_sync_post: | |
response = litellm.responses( | |
model="openai/o1-pro", | |
input="Write a detailed essay about artificial intelligence and its impact on society", | |
max_output_tokens=20, | |
stream=True, | |
) | |
# Process the sync stream | |
event_count = 0 | |
for event in response: | |
print( | |
f"Sync litellm response #{event_count}:", | |
json.dumps(event, indent=4, default=str), | |
) | |
event_count += 1 | |
# Verify the sync request was made correctly | |
mock_sync_post.assert_called_once() | |
request_body = mock_sync_post.call_args.kwargs["json"] | |
assert request_body["model"] == "o1-pro" | |
assert request_body["max_output_tokens"] == 20 | |
assert "stream" not in request_body | |
else: | |
# For async mode | |
response = await litellm.aresponses( | |
model="openai/o1-pro", | |
input="Write a detailed essay about artificial intelligence and its impact on society", | |
max_output_tokens=20, | |
stream=True, | |
) | |
# Process the async stream | |
event_count = 0 | |
async for event in response: | |
print( | |
f"Async litellm response #{event_count}:", | |
json.dumps(event, indent=4, default=str), | |
) | |
event_count += 1 | |
# Verify the async request was made correctly | |
mock_post.assert_called_once() | |
request_body = mock_post.call_args.kwargs["json"] | |
assert request_body["model"] == "o1-pro" | |
assert request_body["max_output_tokens"] == 20 | |
assert "stream" not in request_body | |
def test_basic_computer_use_preview_tool_call(): | |
""" | |
Test that LiteLLM correctly handles a computer_use_preview tool call where the environment is set to "linux" | |
linux is an unsupported environment for the computer_use_preview tool, but litellm users should still be able to pass it to openai | |
""" | |
# Mock response from OpenAI | |
mock_response = { | |
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", | |
"object": "response", | |
"created_at": 1742486999, | |
"status": "incomplete", | |
"error": None, | |
"incomplete_details": {"reason": "max_output_tokens"}, | |
"instructions": None, | |
"max_output_tokens": 20, | |
"model": "o1-pro-2025-03-19", | |
"output": [ | |
{ | |
"type": "reasoning", | |
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", | |
"summary": [], | |
} | |
], | |
"parallel_tool_calls": True, | |
"previous_response_id": None, | |
"reasoning": {"effort": "medium", "generate_summary": None}, | |
"store": True, | |
"temperature": 1.0, | |
"text": {"format": {"type": "text"}}, | |
"tool_choice": "auto", | |
"tools": [], | |
"top_p": 1.0, | |
"truncation": "disabled", | |
"usage": { | |
"input_tokens": 73, | |
"input_tokens_details": {"cached_tokens": 0}, | |
"output_tokens": 20, | |
"output_tokens_details": {"reasoning_tokens": 0}, | |
"total_tokens": 93, | |
}, | |
"user": None, | |
"metadata": {}, | |
} | |
class MockResponse: | |
def __init__(self, json_data, status_code): | |
self._json_data = json_data | |
self.status_code = status_code | |
self.text = json.dumps(json_data) | |
def json(self): | |
return self._json_data | |
with patch( | |
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post", | |
return_value=MockResponse(mock_response, 200), | |
) as mock_post: | |
litellm._turn_on_debug() | |
litellm.set_verbose = True | |
# Call the responses API with computer_use_preview tool | |
response = litellm.responses( | |
model="openai/computer-use-preview", | |
tools=[{ | |
"type": "computer_use_preview", | |
"display_width": 1024, | |
"display_height": 768, | |
"environment": "linux" # other possible values: "mac", "windows", "ubuntu" | |
}], | |
input="Check the latest OpenAI news on bing.com.", | |
reasoning={"summary": "concise"}, | |
truncation="auto" | |
) | |
# Verify the request was made correctly | |
mock_post.assert_called_once() | |
request_body = mock_post.call_args.kwargs["json"] | |
# Validate the request structure | |
assert request_body["model"] == "computer-use-preview" | |
assert len(request_body["tools"]) == 1 | |
assert request_body["tools"][0]["type"] == "computer_use_preview" | |
assert request_body["tools"][0]["display_width"] == 1024 | |
assert request_body["tools"][0]["display_height"] == 768 | |
assert request_body["tools"][0]["environment"] == "linux" | |
# Check that reasoning was passed correctly | |
assert request_body["reasoning"]["summary"] == "concise" | |
assert request_body["truncation"] == "auto" | |
# Validate the input format | |
assert isinstance(request_body["input"], str) | |
assert request_body["input"] == "Check the latest OpenAI news on bing.com." | |
def test_mcp_tools_with_responses_api(): | |
litellm._turn_on_debug() | |
MCP_TOOLS = [ | |
{ | |
"type": "mcp", | |
"server_label": "deepwiki", | |
"server_url": "https://mcp.deepwiki.com/mcp", | |
"allowed_tools": ["ask_question"] | |
} | |
] | |
MODEL = "openai/gpt-4.1" | |
USER_QUERY = "What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?" | |
######################################################### | |
# Step 1: OpenAI will use MCP LIST, and return a list of MCP calls for our approval | |
response = litellm.responses( | |
model=MODEL, | |
tools=MCP_TOOLS, | |
input=USER_QUERY | |
) | |
print(response) | |
response = cast(ResponsesAPIResponse, response) | |
mcp_approval_id: Optional[str] | |
for output in response.output: | |
if output.type == "mcp_approval_request": | |
mcp_approval_id = output.id | |
break | |
# Step 2: Send followup with approval for the MCP call | |
response_with_mcp_call = litellm.responses( | |
model=MODEL, | |
tools=MCP_TOOLS, | |
input=[ | |
{ | |
"type": "mcp_approval_response", | |
"approve": True, | |
"approval_request_id": mcp_approval_id | |
} | |
], | |
previous_response_id=response.id, | |
) | |
print(response_with_mcp_call) | |