Spaces:
Sleeping
Sleeping
import base64 | |
from typing import Any, Dict, Optional, Union, cast, get_type_hints | |
import litellm | |
from litellm._logging import verbose_logger | |
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig | |
from litellm.types.llms.openai import ( | |
ResponseAPIUsage, | |
ResponsesAPIOptionalRequestParams, | |
ResponsesAPIResponse, | |
) | |
from litellm.types.responses.main import DecodedResponseId | |
from litellm.types.utils import SpecialEnums, Usage | |
class ResponsesAPIRequestUtils: | |
"""Helper utils for constructing ResponseAPI requests""" | |
def get_optional_params_responses_api( | |
model: str, | |
responses_api_provider_config: BaseResponsesAPIConfig, | |
response_api_optional_params: ResponsesAPIOptionalRequestParams, | |
) -> Dict: | |
""" | |
Get optional parameters for the responses API. | |
Args: | |
params: Dictionary of all parameters | |
model: The model name | |
responses_api_provider_config: The provider configuration for responses API | |
Returns: | |
A dictionary of supported parameters for the responses API | |
""" | |
# Remove None values and internal parameters | |
# Get supported parameters for the model | |
supported_params = responses_api_provider_config.get_supported_openai_params( | |
model | |
) | |
# Check for unsupported parameters | |
unsupported_params = [ | |
param | |
for param in response_api_optional_params | |
if param not in supported_params | |
] | |
if unsupported_params: | |
raise litellm.UnsupportedParamsError( | |
model=model, | |
message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}", | |
) | |
# Map parameters to provider-specific format | |
mapped_params = responses_api_provider_config.map_openai_params( | |
response_api_optional_params=response_api_optional_params, | |
model=model, | |
drop_params=litellm.drop_params, | |
) | |
return mapped_params | |
def get_requested_response_api_optional_param( | |
params: Dict[str, Any], | |
) -> ResponsesAPIOptionalRequestParams: | |
""" | |
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams. | |
Args: | |
params: Dictionary of parameters to filter | |
Returns: | |
ResponsesAPIOptionalRequestParams instance with only the valid parameters | |
""" | |
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys() | |
filtered_params = { | |
k: v for k, v in params.items() if k in valid_keys and v is not None | |
} | |
# decode previous_response_id if it's a litellm encoded id | |
if "previous_response_id" in filtered_params: | |
decoded_previous_response_id = ResponsesAPIRequestUtils.decode_previous_response_id_to_original_previous_response_id( | |
filtered_params["previous_response_id"] | |
) | |
filtered_params["previous_response_id"] = decoded_previous_response_id | |
return cast(ResponsesAPIOptionalRequestParams, filtered_params) | |
def _update_responses_api_response_id_with_model_id( | |
responses_api_response: ResponsesAPIResponse, | |
custom_llm_provider: Optional[str], | |
litellm_metadata: Optional[Dict[str, Any]] = None, | |
) -> ResponsesAPIResponse: | |
""" | |
Update the responses_api_response_id with model_id and custom_llm_provider | |
This builds a composite ID containing the custom LLM provider, model ID, and original response ID | |
""" | |
litellm_metadata = litellm_metadata or {} | |
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {} | |
model_id = model_info.get("id") | |
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id( | |
model_id=model_id, | |
custom_llm_provider=custom_llm_provider, | |
response_id=responses_api_response.id, | |
) | |
responses_api_response.id = updated_id | |
return responses_api_response | |
def _build_responses_api_response_id( | |
custom_llm_provider: Optional[str], | |
model_id: Optional[str], | |
response_id: str, | |
) -> str: | |
"""Build the responses_api_response_id""" | |
assembled_id: str = str( | |
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value | |
).format(custom_llm_provider, model_id, response_id) | |
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode( | |
"utf-8" | |
) | |
return f"resp_{base64_encoded_id}" | |
def _decode_responses_api_response_id( | |
response_id: str, | |
) -> DecodedResponseId: | |
""" | |
Decode the responses_api_response_id | |
Returns: | |
DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id | |
""" | |
try: | |
# Remove prefix and decode | |
cleaned_id = response_id.replace("resp_", "") | |
decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8") | |
# Parse components using known prefixes | |
if ";" not in decoded_id: | |
return DecodedResponseId( | |
custom_llm_provider=None, | |
model_id=None, | |
response_id=response_id, | |
) | |
parts = decoded_id.split(";") | |
# Format: litellm:custom_llm_provider:{};model_id:{};response_id:{} | |
custom_llm_provider = None | |
model_id = None | |
if ( | |
len(parts) >= 3 | |
): # Full format with custom_llm_provider, model_id, and response_id | |
custom_llm_provider_part = parts[0] | |
model_id_part = parts[1] | |
response_part = parts[2] | |
custom_llm_provider = custom_llm_provider_part.replace( | |
"litellm:custom_llm_provider:", "" | |
) | |
model_id = model_id_part.replace("model_id:", "") | |
decoded_response_id = response_part.replace("response_id:", "") | |
else: | |
decoded_response_id = response_id | |
return DecodedResponseId( | |
custom_llm_provider=custom_llm_provider, | |
model_id=model_id, | |
response_id=decoded_response_id, | |
) | |
except Exception as e: | |
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}") | |
return DecodedResponseId( | |
custom_llm_provider=None, | |
model_id=None, | |
response_id=response_id, | |
) | |
def get_model_id_from_response_id(response_id: Optional[str]) -> Optional[str]: | |
"""Get the model_id from the response_id""" | |
if response_id is None: | |
return None | |
decoded_response_id = ( | |
ResponsesAPIRequestUtils._decode_responses_api_response_id(response_id) | |
) | |
return decoded_response_id.get("model_id") or None | |
def decode_previous_response_id_to_original_previous_response_id( | |
previous_response_id: str, | |
) -> str: | |
""" | |
Decode the previous_response_id to the original previous_response_id | |
Why? | |
- LiteLLM encodes the `custom_llm_provider` and `model_id` into the `previous_response_id` this helps with maintaining session consistency when load balancing multiple deployments of the same model. | |
- We cannot send the litellm encoded b64 to the upstream llm api, hence we decode it to the original `previous_response_id` | |
Args: | |
previous_response_id: The previous_response_id to decode | |
Returns: | |
The original previous_response_id | |
""" | |
decoded_response_id = ( | |
ResponsesAPIRequestUtils._decode_responses_api_response_id( | |
previous_response_id | |
) | |
) | |
return decoded_response_id.get("response_id", previous_response_id) | |
class ResponseAPILoggingUtils: | |
def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool: | |
"""returns True if usage is from OpenAI Response API""" | |
if isinstance(usage, ResponseAPIUsage): | |
return True | |
if "input_tokens" in usage and "output_tokens" in usage: | |
return True | |
return False | |
def _transform_response_api_usage_to_chat_usage( | |
usage: Union[dict, ResponseAPIUsage], | |
) -> Usage: | |
"""Tranforms the ResponseAPIUsage object to a Usage object""" | |
response_api_usage: ResponseAPIUsage = ( | |
ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage | |
) | |
prompt_tokens: int = response_api_usage.input_tokens or 0 | |
completion_tokens: int = response_api_usage.output_tokens or 0 | |
return Usage( | |
prompt_tokens=prompt_tokens, | |
completion_tokens=completion_tokens, | |
total_tokens=prompt_tokens + completion_tokens, | |
) | |