DesertWolf's picture
Upload folder using huggingface_hub
447ebeb verified
"""
Handler for transforming /chat/completions api requests to litellm.responses requests
"""
import json
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
)
from litellm import ModelResponse
from litellm._logging import verbose_logger
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.bridges.completion_transformation import (
CompletionTransformationBridge,
)
if TYPE_CHECKING:
from pydantic import BaseModel
from litellm import LiteLLMLoggingObj, ModelResponse
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.types.llms.openai import (
ALL_RESPONSES_API_TOOL_PARAMS,
AllMessageValues,
ChatCompletionThinkingBlock,
OpenAIMessageContentListBlock,
)
from litellm.types.utils import GenericStreamingChunk, ModelResponseStream
class LiteLLMResponsesTransformationHandler(CompletionTransformationBridge):
"""
Handler for transforming /chat/completions api requests to litellm.responses requests
"""
def __init__(self):
pass
def convert_chat_completion_messages_to_responses_api(
self, messages: List["AllMessageValues"]
) -> Tuple[List[Any], Optional[str]]:
input_items: List[Any] = []
instructions: Optional[str] = None
for msg in messages:
role = msg.get("role")
content = msg.get("content", "")
tool_calls = msg.get("tool_calls")
tool_call_id = msg.get("tool_call_id")
if role == "system":
# Extract system message as instructions
if isinstance(content, str):
instructions = content
else:
raise ValueError(f"System message must be a string: {content}")
elif role == "tool":
# Convert tool message to function call output format
input_items.append(
{
"type": "function_call_output",
"call_id": tool_call_id,
"output": content,
}
)
elif role == "assistant" and tool_calls and isinstance(tool_calls, list):
for tool_call in tool_calls:
function = tool_call.get("function")
if function:
input_tool_call = {
"type": "function_call",
"call_id": tool_call["id"],
}
if "name" in function:
input_tool_call["name"] = function["name"]
if "arguments" in function:
input_tool_call["arguments"] = function["arguments"]
input_items.append(input_tool_call)
else:
raise ValueError(f"tool call not supported: {tool_call}")
elif content is not None:
# Regular user/assistant message
input_items.append(
{
"type": "message",
"role": role,
"content": self._convert_content_to_responses_format(content),
}
)
return input_items, instructions
def transform_request(
self,
model: str,
messages: List["AllMessageValues"],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
from litellm.types.llms.openai import ResponsesAPIOptionalRequestParams
(
input_items,
instructions,
) = self.convert_chat_completion_messages_to_responses_api(messages)
# Build responses API request using the reverse transformation logic
responses_api_request = ResponsesAPIOptionalRequestParams()
# Set instructions if we found a system message
if instructions:
responses_api_request["instructions"] = instructions
# Map optional parameters
for key, value in optional_params.items():
if value is None:
continue
if key in ("max_tokens", "max_completion_tokens"):
responses_api_request["max_output_tokens"] = value
elif key == "tools" and value is not None:
# Convert chat completion tools to responses API tools format
responses_api_request[
"tools"
] = self._convert_tools_to_responses_format(
cast(List[Dict[str, Any]], value)
)
elif key in ResponsesAPIOptionalRequestParams.__annotations__.keys():
responses_api_request[key] = value # type: ignore
elif key == "metadata":
responses_api_request["metadata"] = value
elif key == "previous_response_id":
# Support for responses API session management
responses_api_request["previous_response_id"] = value
# Get stream parameter from litellm_params if not in optional_params
stream = optional_params.get("stream") or litellm_params.get("stream", False)
verbose_logger.debug(f"Chat provider: Stream parameter: {stream}")
# Ensure stream is properly set in the request
if stream:
responses_api_request["stream"] = True
# Handle session management if previous_response_id is provided
previous_response_id = optional_params.get("previous_response_id")
if previous_response_id:
# Use the existing session handler for responses API
verbose_logger.debug(
f"Chat provider: Warning ignoring previous response ID: {previous_response_id}"
)
# Convert back to responses API format for the actual request
api_model = model
request_data = {
"model": api_model,
"input": input_items,
}
verbose_logger.debug(
f"Chat provider: Final request model={api_model}, input_items={len(input_items)}"
)
# Add non-None values from responses_api_request
for key, value in responses_api_request.items():
if value is not None:
if key == "instructions" and instructions:
request_data["instructions"] = instructions
else:
request_data[key] = value
return request_data
def transform_response(
self,
model: str,
raw_response: "BaseModel",
model_response: "ModelResponse",
logging_obj: "LiteLLMLoggingObj",
request_data: dict,
messages: List["AllMessageValues"],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> "ModelResponse":
"""Transform Responses API response to chat completion response"""
from openai.types.responses import (
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseReasoningItem,
)
from litellm.responses.utils import ResponseAPILoggingUtils
from litellm.types.llms.openai import ResponsesAPIResponse
from litellm.types.responses.main import (
GenericResponseOutputItem,
OutputFunctionToolCall,
)
from litellm.types.utils import Choices, Message
if not isinstance(raw_response, ResponsesAPIResponse):
raise ValueError(f"Unexpected response type: {type(raw_response)}")
choices: List[Choices] = []
index = 0
for item in raw_response.output:
if isinstance(item, ResponseReasoningItem):
pass # ignore for now.
elif isinstance(item, ResponseOutputMessage):
for content in item.content:
response_text = getattr(content, "text", "")
msg = Message(
role=item.role, content=response_text if response_text else ""
)
choices.append(
Choices(message=msg, finish_reason="stop", index=index)
)
index += 1
elif isinstance(item, ResponseFunctionToolCall):
msg = Message(
content=None,
tool_calls=[
{
"id": item.call_id,
"function": {
"name": item.name,
"arguments": item.arguments,
},
"type": "function",
}
],
)
choices.append(
Choices(message=msg, finish_reason="tool_calls", index=index)
)
index += 1
elif isinstance(item, GenericResponseOutputItem):
raise ValueError("GenericResponseOutputItem not supported")
elif isinstance(item, OutputFunctionToolCall):
# function/tool calls pass through as-is
raise ValueError("Function calling not supported yet.")
else:
raise ValueError(f"Unknown item type: {item}")
setattr(model_response, "choices", choices)
setattr(
model_response,
"usage",
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
raw_response.usage
),
)
return model_response
def get_model_response_iterator(
self,
streaming_response: Union[
Iterator[str], AsyncIterator[str], "ModelResponse", "BaseModel"
],
sync_stream: bool,
json_mode: Optional[bool] = False,
) -> BaseModelResponseIterator:
return OpenAiResponsesToChatCompletionStreamIterator(
streaming_response, sync_stream, json_mode
)
def _convert_content_to_responses_format(
self,
content: Union[
str,
Iterable[
Union["OpenAIMessageContentListBlock", "ChatCompletionThinkingBlock"]
],
],
) -> List[Dict[str, Any]]:
"""Convert chat completion content to responses API format"""
verbose_logger.debug(
f"Chat provider: Converting content to responses format - input type: {type(content)}"
)
if isinstance(content, str):
result = [{"type": "input_text", "text": content}]
verbose_logger.debug(f"Chat provider: String content -> {result}")
return result
elif isinstance(content, list):
result = []
for i, item in enumerate(content):
verbose_logger.debug(
f"Chat provider: Processing content item {i}: {type(item)} = {item}"
)
if isinstance(item, str):
converted = {"type": "input_text", "text": item}
result.append(converted)
verbose_logger.debug(f"Chat provider: -> {converted}")
elif isinstance(item, dict):
# Handle multimodal content
original_type = item.get("type")
if original_type == "text":
converted = {"type": "input_text", "text": item.get("text", "")}
result.append(converted)
verbose_logger.debug(f"Chat provider: text -> {converted}")
elif original_type == "image_url":
# Map to responses API image format
converted = {
"type": "input_image",
"image_url": item.get("image_url", {}),
}
result.append(converted)
verbose_logger.debug(
f"Chat provider: image_url -> {converted}"
)
else:
# Try to map other types to responses API format
item_type = original_type or "input_text"
if item_type == "image":
converted = {"type": "input_image", **item}
result.append(converted)
verbose_logger.debug(
f"Chat provider: image -> {converted}"
)
elif item_type in [
"input_text",
"input_image",
"output_text",
"refusal",
"input_file",
"computer_screenshot",
"summary_text",
]:
# Already in responses API format
result.append(item)
verbose_logger.debug(
f"Chat provider: passthrough -> {item}"
)
else:
# Default to input_text for unknown types
converted = {
"type": "input_text",
"text": str(item.get("text", item)),
}
result.append(converted)
verbose_logger.debug(
f"Chat provider: unknown({original_type}) -> {converted}"
)
verbose_logger.debug(f"Chat provider: Final converted content: {result}")
return result
else:
result = [{"type": "input_text", "text": str(content)}]
verbose_logger.debug(f"Chat provider: Other content type -> {result}")
return result
def _convert_tools_to_responses_format(
self, tools: List[Dict[str, Any]]
) -> List["ALL_RESPONSES_API_TOOL_PARAMS"]:
"""Convert chat completion tools to responses API tools format"""
responses_tools = []
for tool in tools:
if tool.get("type") == "function":
function = tool.get("function", {})
responses_tools.append(
{
"type": "function",
"name": function.get("name", ""),
"description": function.get("description", ""),
"parameters": function.get("parameters", {}),
"strict": function.get("strict", False),
}
)
return cast(List["ALL_RESPONSES_API_TOOL_PARAMS"], responses_tools)
def _map_responses_status_to_finish_reason(self, status: Optional[str]) -> str:
"""Map responses API status to chat completion finish_reason"""
if not status:
return "stop"
status_mapping = {
"completed": "stop",
"incomplete": "length",
"failed": "stop",
"cancelled": "stop",
}
return status_mapping.get(status, "stop")
class OpenAiResponsesToChatCompletionStreamIterator(BaseModelResponseIterator):
def __init__(
self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
):
super().__init__(streaming_response, sync_stream, json_mode)
def _handle_string_chunk(
self, str_line: Union[str, "BaseModel"]
) -> Union["GenericStreamingChunk", "ModelResponseStream"]:
from pydantic import BaseModel
if isinstance(str_line, BaseModel):
return self.chunk_parser(str_line.model_dump())
if not str_line or str_line.startswith("event:"):
# ignore.
return GenericStreamingChunk(
text="", tool_use=None, is_finished=False, finish_reason="", usage=None
)
index = str_line.find("data:")
if index != -1:
str_line = str_line[index + 5 :]
return self.chunk_parser(json.loads(str_line))
def chunk_parser(
self, chunk: dict
) -> Union["GenericStreamingChunk", "ModelResponseStream"]:
# Transform responses API streaming chunk to chat completion format
from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk
from litellm.types.utils import (
ChatCompletionToolCallChunk,
GenericStreamingChunk,
)
verbose_logger.debug(
f"Chat provider: transform_streaming_response called with chunk: {chunk}"
)
parsed_chunk = chunk
if not parsed_chunk:
raise ValueError("Chat provider: Empty parsed_chunk")
if not isinstance(parsed_chunk, dict):
raise ValueError(f"Chat provider: Invalid chunk type {type(parsed_chunk)}")
# Handle different event types from responses API
event_type = parsed_chunk.get("type")
verbose_logger.debug(f"Chat provider: Processing event type: {event_type}")
if event_type == "response.created":
# Initial response creation event
verbose_logger.debug(f"Chat provider: response.created -> {chunk}")
return GenericStreamingChunk(
text="", tool_use=None, is_finished=False, finish_reason="", usage=None
)
elif event_type == "response.output_item.added":
# New output item added
output_item = parsed_chunk.get("item", {})
if output_item.get("type") == "function_call":
return GenericStreamingChunk(
text="",
tool_use=ChatCompletionToolCallChunk(
id=output_item.get("call_id"),
index=0,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=parsed_chunk.get("name", None),
arguments=parsed_chunk.get("arguments", ""),
),
),
is_finished=False,
finish_reason="",
usage=None,
)
elif output_item.get("type") == "message":
pass
elif output_item.get("type") == "reasoning":
pass
else:
raise ValueError(f"Chat provider: Invalid output_item {output_item}")
elif event_type == "response.function_call_arguments.delta":
content_part: Optional[str] = parsed_chunk.get("delta", None)
if content_part:
return GenericStreamingChunk(
text="",
tool_use=ChatCompletionToolCallChunk(
id=None,
index=0,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=None, arguments=content_part
),
),
is_finished=False,
finish_reason="",
usage=None,
)
else:
raise ValueError(
f"Chat provider: Invalid function argument delta {parsed_chunk}"
)
elif event_type == "response.output_item.done":
# New output item added
output_item = parsed_chunk.get("item", {})
if output_item.get("type") == "function_call":
return GenericStreamingChunk(
text="",
tool_use=ChatCompletionToolCallChunk(
id=output_item.get("call_id"),
index=0,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=parsed_chunk.get("name", None),
arguments="", # responses API sends everything again, we don't
),
),
is_finished=True,
finish_reason="tool_calls",
usage=None,
)
elif output_item.get("type") == "message":
return GenericStreamingChunk(
finish_reason="stop", is_finished=True, usage=None, text=""
)
elif output_item.get("type") == "reasoning":
pass
else:
raise ValueError(f"Chat provider: Invalid output_item {output_item}")
elif event_type == "response.output_text.delta":
# Content part added to output
content_part = parsed_chunk.get("delta", None)
if content_part is not None:
return GenericStreamingChunk(
text=content_part,
tool_use=None,
is_finished=False,
finish_reason="",
usage=None,
)
else:
raise ValueError(f"Chat provider: Invalid text delta {parsed_chunk}")
else:
pass
# For any unhandled event types, create a minimal valid chunk or skip
verbose_logger.debug(
f"Chat provider: Unhandled event type '{event_type}', creating empty chunk"
)
# Return a minimal valid chunk for unknown events
return GenericStreamingChunk(
text="", tool_use=None, is_finished=False, finish_reason="", usage=None
)