Spaces:
Configuration error
Configuration error
""" | |
Handler for transforming /chat/completions api requests to litellm.responses requests | |
""" | |
import json | |
from typing import ( | |
TYPE_CHECKING, | |
Any, | |
AsyncIterator, | |
Dict, | |
Iterable, | |
Iterator, | |
List, | |
Optional, | |
Tuple, | |
Union, | |
cast, | |
) | |
from litellm import ModelResponse | |
from litellm._logging import verbose_logger | |
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator | |
from litellm.llms.base_llm.bridges.completion_transformation import ( | |
CompletionTransformationBridge, | |
) | |
if TYPE_CHECKING: | |
from pydantic import BaseModel | |
from litellm import LiteLLMLoggingObj, ModelResponse | |
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator | |
from litellm.types.llms.openai import ( | |
ALL_RESPONSES_API_TOOL_PARAMS, | |
AllMessageValues, | |
ChatCompletionThinkingBlock, | |
OpenAIMessageContentListBlock, | |
) | |
from litellm.types.utils import GenericStreamingChunk, ModelResponseStream | |
class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge): | |
""" | |
Handler for transforming /chat/completions api requests to litellm.responses requests | |
""" | |
def __init__(self): | |
pass | |
def convert_chat_completion_messages_to_responses_api( | |
self, messages: List["AllMessageValues"] | |
) -> Tuple[List[Any], Optional[str]]: | |
input_items: List[Any] = [] | |
instructions: Optional[str] = None | |
for msg in messages: | |
role = msg.get("role") | |
content = msg.get("content", "") | |
tool_calls = msg.get("tool_calls") | |
tool_call_id = msg.get("tool_call_id") | |
if role == "system": | |
# Extract system message as instructions | |
if isinstance(content, str): | |
instructions = content | |
else: | |
raise ValueError(f"System message must be a string: {content}") | |
elif role == "tool": | |
# Convert tool message to function call output format | |
input_items.append( | |
{ | |
"type": "function_call_output", | |
"call_id": tool_call_id, | |
"output": content, | |
} | |
) | |
elif role == "assistant" and tool_calls and isinstance(tool_calls, list): | |
for tool_call in tool_calls: | |
function = tool_call.get("function") | |
if function: | |
input_tool_call = { | |
"type": "function_call", | |
"call_id": tool_call["id"], | |
} | |
if "name" in function: | |
input_tool_call["name"] = function["name"] | |
if "arguments" in function: | |
input_tool_call["arguments"] = function["arguments"] | |
input_items.append(input_tool_call) | |
else: | |
raise ValueError(f"tool call not supported: {tool_call}") | |
elif content is not None: | |
# Regular user/assistant message | |
input_items.append( | |
{ | |
"type": "message", | |
"role": role, | |
"content": self._convert_content_to_responses_format(content), | |
} | |
) | |
return input_items, instructions | |
def transform_request( | |
self, | |
model: str, | |
messages: List["AllMessageValues"], | |
optional_params: dict, | |
litellm_params: dict, | |
headers: dict, | |
) -> dict: | |
from litellm.types.llms.openai import ResponsesAPIOptionalRequestParams | |
( | |
input_items, | |
instructions, | |
) = self.convert_chat_completion_messages_to_responses_api(messages) | |
# Build responses API request using the reverse transformation logic | |
responses_api_request = ResponsesAPIOptionalRequestParams() | |
# Set instructions if we found a system message | |
if instructions: | |
responses_api_request["instructions"] = instructions | |
# Map optional parameters | |
for key, value in optional_params.items(): | |
if value is None: | |
continue | |
if key in ("max_tokens", "max_completion_tokens"): | |
responses_api_request["max_output_tokens"] = value | |
elif key == "tools" and value is not None: | |
# Convert chat completion tools to responses API tools format | |
responses_api_request[ | |
"tools" | |
] = self._convert_tools_to_responses_format( | |
cast(List[Dict[str, Any]], value) | |
) | |
elif key in ResponsesAPIOptionalRequestParams.__annotations__.keys(): | |
responses_api_request[key] = value # type: ignore | |
elif key == "metadata": | |
responses_api_request["metadata"] = value | |
elif key == "previous_response_id": | |
# Support for responses API session management | |
responses_api_request["previous_response_id"] = value | |
# Get stream parameter from litellm_params if not in optional_params | |
stream = optional_params.get("stream") or litellm_params.get("stream", False) | |
verbose_logger.debug(f"Chat provider: Stream parameter: {stream}") | |
# Ensure stream is properly set in the request | |
if stream: | |
responses_api_request["stream"] = True | |
# Handle session management if previous_response_id is provided | |
previous_response_id = optional_params.get("previous_response_id") | |
if previous_response_id: | |
# Use the existing session handler for responses API | |
verbose_logger.debug( | |
f"Chat provider: Warning ignoring previous response ID: {previous_response_id}" | |
) | |
# Convert back to responses API format for the actual request | |
api_model = model | |
request_data = { | |
"model": api_model, | |
"input": input_items, | |
} | |
verbose_logger.debug( | |
f"Chat provider: Final request model={api_model}, input_items={len(input_items)}" | |
) | |
# Add non-None values from responses_api_request | |
for key, value in responses_api_request.items(): | |
if value is not None: | |
if key == "instructions" and instructions: | |
request_data["instructions"] = instructions | |
else: | |
request_data[key] = value | |
return request_data | |
def transform_response( | |
self, | |
model: str, | |
raw_response: "BaseModel", | |
model_response: "ModelResponse", | |
logging_obj: "LiteLLMLoggingObj", | |
request_data: dict, | |
messages: List["AllMessageValues"], | |
optional_params: dict, | |
litellm_params: dict, | |
encoding: Any, | |
api_key: Optional[str] = None, | |
json_mode: Optional[bool] = None, | |
) -> "ModelResponse": | |
"""Transform Responses API response to chat completion response""" | |
from openai.types.responses import ( | |
ResponseFunctionToolCall, | |
ResponseOutputMessage, | |
ResponseReasoningItem, | |
) | |
from litellm.responses.utils import ResponseAPILoggingUtils | |
from litellm.types.llms.openai import ResponsesAPIResponse | |
from litellm.types.responses.main import ( | |
GenericResponseOutputItem, | |
OutputFunctionToolCall, | |
) | |
from litellm.types.utils import Choices, Message | |
if not isinstance(raw_response, ResponsesAPIResponse): | |
raise ValueError(f"Unexpected response type: {type(raw_response)}") | |
choices: List[Choices] = [] | |
index = 0 | |
for item in raw_response.output: | |
if isinstance(item, ResponseReasoningItem): | |
pass # ignore for now. | |
elif isinstance(item, ResponseOutputMessage): | |
for content in item.content: | |
response_text = getattr(content, "text", "") | |
msg = Message( | |
role=item.role, content=response_text if response_text else "" | |
) | |
choices.append( | |
Choices(message=msg, finish_reason="stop", index=index) | |
) | |
index += 1 | |
elif isinstance(item, ResponseFunctionToolCall): | |
msg = Message( | |
content=None, | |
tool_calls=[ | |
{ | |
"id": item.call_id, | |
"function": { | |
"name": item.name, | |
"arguments": item.arguments, | |
}, | |
"type": "function", | |
} | |
], | |
) | |
choices.append( | |
Choices(message=msg, finish_reason="tool_calls", index=index) | |
) | |
index += 1 | |
elif isinstance(item, GenericResponseOutputItem): | |
raise ValueError("GenericResponseOutputItem not supported") | |
elif isinstance(item, OutputFunctionToolCall): | |
# function/tool calls pass through as-is | |
raise ValueError("Function calling not supported yet.") | |
else: | |
raise ValueError(f"Unknown item type: {item}") | |
setattr(model_response, "choices", choices) | |
setattr( | |
model_response, | |
"usage", | |
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage( | |
raw_response.usage | |
), | |
) | |
return model_response | |
def get_model_response_iterator( | |
self, | |
streaming_response: Union[ | |
Iterator[str], AsyncIterator[str], "ModelResponse", "BaseModel" | |
], | |
sync_stream: bool, | |
json_mode: Optional[bool] = False, | |
) -> BaseModelResponseIterator: | |
return OpenAiResponsesToChatCompletionStreamIterator( | |
streaming_response, sync_stream, json_mode | |
) | |
def _convert_content_to_responses_format( | |
self, | |
content: Union[ | |
str, | |
Iterable[ | |
Union["OpenAIMessageContentListBlock", "ChatCompletionThinkingBlock"] | |
], | |
], | |
) -> List[Dict[str, Any]]: | |
"""Convert chat completion content to responses API format""" | |
verbose_logger.debug( | |
f"Chat provider: Converting content to responses format - input type: {type(content)}" | |
) | |
if isinstance(content, str): | |
result = [{"type": "input_text", "text": content}] | |
verbose_logger.debug(f"Chat provider: String content -> {result}") | |
return result | |
elif isinstance(content, list): | |
result = [] | |
for i, item in enumerate(content): | |
verbose_logger.debug( | |
f"Chat provider: Processing content item {i}: {type(item)} = {item}" | |
) | |
if isinstance(item, str): | |
converted = {"type": "input_text", "text": item} | |
result.append(converted) | |
verbose_logger.debug(f"Chat provider: -> {converted}") | |
elif isinstance(item, dict): | |
# Handle multimodal content | |
original_type = item.get("type") | |
if original_type == "text": | |
converted = {"type": "input_text", "text": item.get("text", "")} | |
result.append(converted) | |
verbose_logger.debug(f"Chat provider: text -> {converted}") | |
elif original_type == "image_url": | |
# Map to responses API image format | |
converted = { | |
"type": "input_image", | |
"image_url": item.get("image_url", {}), | |
} | |
result.append(converted) | |
verbose_logger.debug( | |
f"Chat provider: image_url -> {converted}" | |
) | |
else: | |
# Try to map other types to responses API format | |
item_type = original_type or "input_text" | |
if item_type == "image": | |
converted = {"type": "input_image", **item} | |
result.append(converted) | |
verbose_logger.debug( | |
f"Chat provider: image -> {converted}" | |
) | |
elif item_type in [ | |
"input_text", | |
"input_image", | |
"output_text", | |
"refusal", | |
"input_file", | |
"computer_screenshot", | |
"summary_text", | |
]: | |
# Already in responses API format | |
result.append(item) | |
verbose_logger.debug( | |
f"Chat provider: passthrough -> {item}" | |
) | |
else: | |
# Default to input_text for unknown types | |
converted = { | |
"type": "input_text", | |
"text": str(item.get("text", item)), | |
} | |
result.append(converted) | |
verbose_logger.debug( | |
f"Chat provider: unknown({original_type}) -> {converted}" | |
) | |
verbose_logger.debug(f"Chat provider: Final converted content: {result}") | |
return result | |
else: | |
result = [{"type": "input_text", "text": str(content)}] | |
verbose_logger.debug(f"Chat provider: Other content type -> {result}") | |
return result | |
def _convert_tools_to_responses_format( | |
self, tools: List[Dict[str, Any]] | |
) -> List["ALL_RESPONSES_API_TOOL_PARAMS"]: | |
"""Convert chat completion tools to responses API tools format""" | |
responses_tools = [] | |
for tool in tools: | |
if tool.get("type") == "function": | |
function = tool.get("function", {}) | |
responses_tools.append( | |
{ | |
"type": "function", | |
"name": function.get("name", ""), | |
"description": function.get("description", ""), | |
"parameters": function.get("parameters", {}), | |
"strict": function.get("strict", False), | |
} | |
) | |
return cast(List["ALL_RESPONSES_API_TOOL_PARAMS"], responses_tools) | |
def _map_responses_status_to_finish_reason(self, status: Optional[str]) -> str: | |
"""Map responses API status to chat completion finish_reason""" | |
if not status: | |
return "stop" | |
status_mapping = { | |
"completed": "stop", | |
"incomplete": "length", | |
"failed": "stop", | |
"cancelled": "stop", | |
} | |
return status_mapping.get(status, "stop") | |
class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator): | |
def __init__( | |
self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False | |
): | |
super().__init__(streaming_response, sync_stream, json_mode) | |
def _handle_string_chunk( | |
self, str_line: Union[str, "BaseModel"] | |
) -> Union["GenericStreamingChunk", "ModelResponseStream"]: | |
from pydantic import BaseModel | |
if isinstance(str_line, BaseModel): | |
return self.chunk_parser(str_line.model_dump()) | |
if not str_line or str_line.startswith("event:"): | |
# ignore. | |
return GenericStreamingChunk( | |
text="", tool_use=None, is_finished=False, finish_reason="", usage=None | |
) | |
index = str_line.find("data:") | |
if index != -1: | |
str_line = str_line[index + 5 :] | |
return self.chunk_parser(json.loads(str_line)) | |
def chunk_parser( | |
self, chunk: dict | |
) -> Union["GenericStreamingChunk", "ModelResponseStream"]: | |
# Transform responses API streaming chunk to chat completion format | |
from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk | |
from litellm.types.utils import ( | |
ChatCompletionToolCallChunk, | |
GenericStreamingChunk, | |
) | |
verbose_logger.debug( | |
f"Chat provider: transform_streaming_response called with chunk: {chunk}" | |
) | |
parsed_chunk = chunk | |
if not parsed_chunk: | |
raise ValueError("Chat provider: Empty parsed_chunk") | |
if not isinstance(parsed_chunk, dict): | |
raise ValueError(f"Chat provider: Invalid chunk type {type(parsed_chunk)}") | |
# Handle different event types from responses API | |
event_type = parsed_chunk.get("type") | |
verbose_logger.debug(f"Chat provider: Processing event type: {event_type}") | |
if event_type == "response.created": | |
# Initial response creation event | |
verbose_logger.debug(f"Chat provider: response.created -> {chunk}") | |
return GenericStreamingChunk( | |
text="", tool_use=None, is_finished=False, finish_reason="", usage=None | |
) | |
elif event_type == "response.output_item.added": | |
# New output item added | |
output_item = parsed_chunk.get("item", {}) | |
if output_item.get("type") == "function_call": | |
return GenericStreamingChunk( | |
text="", | |
tool_use=ChatCompletionToolCallChunk( | |
id=output_item.get("call_id"), | |
index=0, | |
type="function", | |
function=ChatCompletionToolCallFunctionChunk( | |
name=parsed_chunk.get("name", None), | |
arguments=parsed_chunk.get("arguments", ""), | |
), | |
), | |
is_finished=False, | |
finish_reason="", | |
usage=None, | |
) | |
elif output_item.get("type") == "message": | |
pass | |
elif output_item.get("type") == "reasoning": | |
pass | |
else: | |
raise ValueError(f"Chat provider: Invalid output_item {output_item}") | |
elif event_type == "response.function_call_arguments.delta": | |
content_part: Optional[str] = parsed_chunk.get("delta", None) | |
if content_part: | |
return GenericStreamingChunk( | |
text="", | |
tool_use=ChatCompletionToolCallChunk( | |
id=None, | |
index=0, | |
type="function", | |
function=ChatCompletionToolCallFunctionChunk( | |
name=None, arguments=content_part | |
), | |
), | |
is_finished=False, | |
finish_reason="", | |
usage=None, | |
) | |
else: | |
raise ValueError( | |
f"Chat provider: Invalid function argument delta {parsed_chunk}" | |
) | |
elif event_type == "response.output_item.done": | |
# New output item added | |
output_item = parsed_chunk.get("item", {}) | |
if output_item.get("type") == "function_call": | |
return GenericStreamingChunk( | |
text="", | |
tool_use=ChatCompletionToolCallChunk( | |
id=output_item.get("call_id"), | |
index=0, | |
type="function", | |
function=ChatCompletionToolCallFunctionChunk( | |
name=parsed_chunk.get("name", None), | |
arguments="", # responses API sends everything again, we don't | |
), | |
), | |
is_finished=True, | |
finish_reason="tool_calls", | |
usage=None, | |
) | |
elif output_item.get("type") == "message": | |
return GenericStreamingChunk( | |
finish_reason="stop", is_finished=True, usage=None, text="" | |
) | |
elif output_item.get("type") == "reasoning": | |
pass | |
else: | |
raise ValueError(f"Chat provider: Invalid output_item {output_item}") | |
elif event_type == "response.output_text.delta": | |
# Content part added to output | |
content_part = parsed_chunk.get("delta", None) | |
if content_part is not None: | |
return GenericStreamingChunk( | |
text=content_part, | |
tool_use=None, | |
is_finished=False, | |
finish_reason="", | |
usage=None, | |
) | |
else: | |
raise ValueError(f"Chat provider: Invalid text delta {parsed_chunk}") | |
else: | |
pass | |
# For any unhandled event types, create a minimal valid chunk or skip | |
verbose_logger.debug( | |
f"Chat provider: Unhandled event type '{event_type}', creating empty chunk" | |
) | |
# Return a minimal valid chunk for unknown events | |
return GenericStreamingChunk( | |
text="", tool_use=None, is_finished=False, finish_reason="", usage=None | |
) | |