import json from typing import ( TYPE_CHECKING, Any, AsyncIterator, Coroutine, Dict, List, Literal, Optional, Tuple, Union, cast, ) import httpx # type: ignore import litellm import litellm.litellm_core_utils import litellm.types import litellm.types.utils from litellm._logging import verbose_logger from litellm.litellm_core_utils.realtime_streaming import RealTimeStreaming from litellm.llms.base_llm.anthropic_messages.transformation import ( BaseAnthropicMessagesConfig, ) from litellm.llms.base_llm.audio_transcription.transformation import ( BaseAudioTranscriptionConfig, ) from litellm.llms.base_llm.base_model_iterator import MockResponseIterator from litellm.llms.base_llm.chat.transformation import BaseConfig from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig from litellm.llms.base_llm.files.transformation import BaseFilesConfig from litellm.llms.base_llm.image_edit.transformation import BaseImageEditConfig from litellm.llms.base_llm.realtime.transformation import BaseRealtimeConfig from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, _get_httpx_client, get_async_httpx_client, ) from litellm.responses.streaming_iterator import ( BaseResponsesAPIStreamingIterator, MockResponsesAPIStreamingIterator, ResponsesAPIStreamingIterator, SyncResponsesAPIStreamingIterator, ) from litellm.types.llms.anthropic_messages.anthropic_response import ( AnthropicMessagesResponse, ) from litellm.types.llms.openai import ( CreateFileRequest, OpenAIFileObject, ResponseInputParam, ResponsesAPIResponse, ) from litellm.types.rerank import OptionalRerankParams, RerankResponse from litellm.types.responses.main import DeleteResponseResult from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse from litellm.utils import ( CustomStreamWrapper, ImageResponse, ModelResponse, ProviderConfigManager, ) if TYPE_CHECKING: from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj LiteLLMLoggingObj = _LiteLLMLoggingObj else: LiteLLMLoggingObj = Any class BaseLLMHTTPHandler: async def _make_common_async_call( self, async_httpx_client: AsyncHTTPHandler, provider_config: BaseConfig, api_base: str, headers: dict, data: dict, timeout: Union[float, httpx.Timeout], litellm_params: dict, logging_obj: LiteLLMLoggingObj, stream: bool = False, signed_json_body: Optional[bytes] = None, ) -> httpx.Response: """Common implementation across stream + non-stream calls. Meant to ensure consistent error-handling.""" max_retry_on_unprocessable_entity_error = ( provider_config.max_retry_on_unprocessable_entity_error ) response: Optional[httpx.Response] = None for i in range(max(max_retry_on_unprocessable_entity_error, 1)): try: response = await async_httpx_client.post( url=api_base, headers=headers, data=( signed_json_body if signed_json_body is not None else json.dumps(data) ), timeout=timeout, stream=stream, logging_obj=logging_obj, ) except httpx.HTTPStatusError as e: hit_max_retry = i + 1 == max_retry_on_unprocessable_entity_error should_retry = provider_config.should_retry_llm_api_inside_llm_translation_on_http_error( e=e, litellm_params=litellm_params ) if should_retry and not hit_max_retry: data = ( provider_config.transform_request_on_unprocessable_entity_error( e=e, request_data=data ) ) continue else: raise self._handle_error(e=e, provider_config=provider_config) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) break if response is None: raise provider_config.get_error_class( error_message="No response from the API", status_code=422, # don't retry on this error headers={}, ) return response def _make_common_sync_call( self, sync_httpx_client: HTTPHandler, provider_config: BaseConfig, api_base: str, headers: dict, data: dict, timeout: Union[float, httpx.Timeout], litellm_params: dict, logging_obj: LiteLLMLoggingObj, stream: bool = False, signed_json_body: Optional[bytes] = None, ) -> httpx.Response: max_retry_on_unprocessable_entity_error = ( provider_config.max_retry_on_unprocessable_entity_error ) response: Optional[httpx.Response] = None for i in range(max(max_retry_on_unprocessable_entity_error, 1)): try: response = sync_httpx_client.post( url=api_base, headers=headers, data=( signed_json_body if signed_json_body is not None else json.dumps(data) ), timeout=timeout, stream=stream, logging_obj=logging_obj, ) except httpx.HTTPStatusError as e: hit_max_retry = i + 1 == max_retry_on_unprocessable_entity_error should_retry = provider_config.should_retry_llm_api_inside_llm_translation_on_http_error( e=e, litellm_params=litellm_params ) if should_retry and not hit_max_retry: data = ( provider_config.transform_request_on_unprocessable_entity_error( e=e, request_data=data ) ) continue else: raise self._handle_error(e=e, provider_config=provider_config) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) break if response is None: raise provider_config.get_error_class( error_message="No response from the API", status_code=422, # don't retry on this error headers={}, ) return response async def async_completion( self, custom_llm_provider: str, provider_config: BaseConfig, api_base: str, headers: dict, data: dict, timeout: Union[float, httpx.Timeout], model: str, model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, messages: list, optional_params: dict, litellm_params: dict, encoding: Any, api_key: Optional[str] = None, client: Optional[AsyncHTTPHandler] = None, json_mode: bool = False, signed_json_body: Optional[bytes] = None, ): if client is None: async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client response = await self._make_common_async_call( async_httpx_client=async_httpx_client, provider_config=provider_config, api_base=api_base, headers=headers, data=data, timeout=timeout, litellm_params=litellm_params, stream=False, logging_obj=logging_obj, signed_json_body=signed_json_body, ) return provider_config.transform_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, json_mode=json_mode, ) def completion( self, model: str, messages: list, api_base: str, custom_llm_provider: str, model_response: ModelResponse, encoding, logging_obj: LiteLLMLoggingObj, optional_params: dict, timeout: Union[float, httpx.Timeout], litellm_params: dict, acompletion: bool, stream: Optional[bool] = False, fake_stream: bool = False, api_key: Optional[str] = None, headers: Optional[Dict[str, Any]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, provider_config: Optional[BaseConfig] = None, ): json_mode: bool = optional_params.pop("json_mode", False) extra_body: Optional[dict] = optional_params.pop("extra_body", None) provider_config = ( provider_config or ProviderConfigManager.get_provider_chat_config( model=model, provider=litellm.LlmProviders(custom_llm_provider) ) ) if provider_config is None: raise ValueError( f"Provider config not found for model: {model} and provider: {custom_llm_provider}" ) fake_stream = ( fake_stream or optional_params.pop("fake_stream", False) or provider_config.should_fake_stream( model=model, custom_llm_provider=custom_llm_provider, stream=stream ) ) # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, headers=headers or {}, model=model, messages=messages, optional_params=optional_params, api_base=api_base, litellm_params=litellm_params, ) api_base = provider_config.get_complete_url( api_base=api_base, api_key=api_key, model=model, optional_params=optional_params, stream=stream, litellm_params=litellm_params, ) data = provider_config.transform_request( model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params, headers=headers, ) if extra_body is not None: data = {**data, **extra_body} headers, signed_json_body = provider_config.sign_request( headers=headers, optional_params=optional_params, request_data=data, api_base=api_base, stream=stream, fake_stream=fake_stream, model=model, ) ## LOGGING logging_obj.pre_call( input=messages, api_key=api_key, additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) if acompletion is True: if stream is True: data = self._add_stream_param_to_request_body( data=data, provider_config=provider_config, fake_stream=fake_stream, ) return self.acompletion_stream_function( model=model, messages=messages, api_base=api_base, headers=headers, custom_llm_provider=custom_llm_provider, provider_config=provider_config, timeout=timeout, logging_obj=logging_obj, data=data, fake_stream=fake_stream, client=( client if client is not None and isinstance(client, AsyncHTTPHandler) else None ), litellm_params=litellm_params, json_mode=json_mode, optional_params=optional_params, signed_json_body=signed_json_body, ) else: return self.async_completion( custom_llm_provider=custom_llm_provider, provider_config=provider_config, api_base=api_base, headers=headers, data=data, timeout=timeout, model=model, model_response=model_response, logging_obj=logging_obj, api_key=api_key, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, client=( client if client is not None and isinstance(client, AsyncHTTPHandler) else None ), json_mode=json_mode, signed_json_body=signed_json_body, ) if stream is True: data = self._add_stream_param_to_request_body( data=data, provider_config=provider_config, fake_stream=fake_stream, ) if provider_config.has_custom_stream_wrapper is True: return provider_config.get_sync_custom_stream_wrapper( model=model, custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, api_base=api_base, headers=headers, data=data, signed_json_body=signed_json_body, messages=messages, client=client, json_mode=json_mode, ) completion_stream, headers = self.make_sync_call( provider_config=provider_config, api_base=api_base, headers=headers, # type: ignore data=data, signed_json_body=signed_json_body, original_data=data, model=model, messages=messages, logging_obj=logging_obj, timeout=timeout, fake_stream=fake_stream, client=( client if client is not None and isinstance(client, HTTPHandler) else None ), litellm_params=litellm_params, json_mode=json_mode, optional_params=optional_params, ) return CustomStreamWrapper( completion_stream=completion_stream, model=model, custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client response = self._make_common_sync_call( sync_httpx_client=sync_httpx_client, provider_config=provider_config, api_base=api_base, headers=headers, data=data, signed_json_body=signed_json_body, timeout=timeout, litellm_params=litellm_params, logging_obj=logging_obj, ) return provider_config.transform_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, json_mode=json_mode, ) def make_sync_call( self, provider_config: BaseConfig, api_base: str, headers: dict, data: dict, signed_json_body: Optional[bytes], original_data: dict, model: str, messages: list, logging_obj, optional_params: dict, litellm_params: dict, timeout: Union[float, httpx.Timeout], fake_stream: bool = False, client: Optional[HTTPHandler] = None, json_mode: bool = False, ) -> Tuple[Any, dict]: if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( { "ssl_verify": litellm_params.get("ssl_verify", None), } ) else: sync_httpx_client = client stream = True if fake_stream is True: stream = False response = self._make_common_sync_call( sync_httpx_client=sync_httpx_client, provider_config=provider_config, api_base=api_base, headers=headers, data=data, signed_json_body=signed_json_body, timeout=timeout, litellm_params=litellm_params, stream=stream, logging_obj=logging_obj, ) if fake_stream is True: model_response: ModelResponse = provider_config.transform_response( model=model, raw_response=response, model_response=litellm.ModelResponse(), logging_obj=logging_obj, request_data=original_data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=None, json_mode=json_mode, ) completion_stream: Any = MockResponseIterator( model_response=model_response, json_mode=json_mode ) else: completion_stream = provider_config.get_model_response_iterator( streaming_response=response.iter_lines(), sync_stream=True, json_mode=json_mode, ) # LOGGING logging_obj.post_call( input=messages, api_key="", original_response="first stream response received", additional_args={"complete_input_dict": data}, ) return completion_stream, dict(response.headers) async def acompletion_stream_function( self, model: str, messages: list, api_base: str, custom_llm_provider: str, headers: dict, provider_config: BaseConfig, timeout: Union[float, httpx.Timeout], logging_obj: LiteLLMLoggingObj, data: dict, litellm_params: dict, optional_params: dict, fake_stream: bool = False, client: Optional[AsyncHTTPHandler] = None, json_mode: Optional[bool] = None, signed_json_body: Optional[bytes] = None, ): if provider_config.has_custom_stream_wrapper is True: return await provider_config.get_async_custom_stream_wrapper( model=model, custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, api_base=api_base, headers=headers, data=data, messages=messages, client=client, json_mode=json_mode, signed_json_body=signed_json_body, ) completion_stream, _response_headers = await self.make_async_call_stream_helper( model=model, custom_llm_provider=custom_llm_provider, provider_config=provider_config, api_base=api_base, headers=headers, data=data, messages=messages, logging_obj=logging_obj, timeout=timeout, fake_stream=fake_stream, client=client, litellm_params=litellm_params, optional_params=optional_params, json_mode=json_mode, signed_json_body=signed_json_body, ) streamwrapper = CustomStreamWrapper( completion_stream=completion_stream, model=model, custom_llm_provider=custom_llm_provider, logging_obj=logging_obj, ) return streamwrapper async def make_async_call_stream_helper( self, model: str, custom_llm_provider: str, provider_config: BaseConfig, api_base: str, headers: dict, data: dict, messages: list, logging_obj: LiteLLMLoggingObj, timeout: Union[float, httpx.Timeout], litellm_params: dict, optional_params: dict, fake_stream: bool = False, client: Optional[AsyncHTTPHandler] = None, json_mode: Optional[bool] = None, signed_json_body: Optional[bytes] = None, ) -> Tuple[Any, httpx.Headers]: """ Helper function for making an async call with stream. Handles fake stream as well. """ if client is None: async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client stream = True if fake_stream is True: stream = False response = await self._make_common_async_call( async_httpx_client=async_httpx_client, provider_config=provider_config, api_base=api_base, headers=headers, data=data, signed_json_body=signed_json_body, timeout=timeout, litellm_params=litellm_params, stream=stream, logging_obj=logging_obj, ) if fake_stream is True: model_response: ModelResponse = provider_config.transform_response( model=model, raw_response=response, model_response=litellm.ModelResponse(), logging_obj=logging_obj, request_data=data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=None, json_mode=json_mode, ) completion_stream: Any = MockResponseIterator( model_response=model_response, json_mode=json_mode ) else: completion_stream = provider_config.get_model_response_iterator( streaming_response=response.aiter_lines(), sync_stream=False ) # LOGGING logging_obj.post_call( input=messages, api_key="", original_response="first stream response received", additional_args={"complete_input_dict": data}, ) return completion_stream, response.headers def _add_stream_param_to_request_body( self, data: dict, provider_config: BaseConfig, fake_stream: bool, ) -> dict: """ Some providers like Bedrock invoke do not support the stream parameter in the request body, we only pass `stream` in the request body the provider supports it. """ if fake_stream is True: # remove 'stream' from data new_data = data.copy() new_data.pop("stream", None) return new_data if provider_config.supports_stream_param_in_request_body is True: data["stream"] = True return data def embedding( self, model: str, input: list, timeout: float, custom_llm_provider: str, logging_obj: LiteLLMLoggingObj, api_base: Optional[str], optional_params: dict, litellm_params: dict, model_response: EmbeddingResponse, api_key: Optional[str] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, aembedding: bool = False, headers: Optional[Dict[str, Any]] = None, ) -> EmbeddingResponse: provider_config = ProviderConfigManager.get_provider_embedding_config( model=model, provider=litellm.LlmProviders(custom_llm_provider) ) if provider_config is None: raise ValueError( f"Provider {custom_llm_provider} does not support embedding" ) # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, headers=headers or {}, model=model, messages=[], optional_params=optional_params, litellm_params=litellm_params, ) api_base = provider_config.get_complete_url( api_base=api_base, api_key=api_key, model=model, optional_params=optional_params, litellm_params=litellm_params, ) data = provider_config.transform_embedding_request( model=model, input=input, optional_params=optional_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=input, api_key=api_key, additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) if aembedding is True: return self.aembedding( # type: ignore request_data=data, api_base=api_base, headers=headers, model=model, custom_llm_provider=custom_llm_provider, provider_config=provider_config, model_response=model_response, logging_obj=logging_obj, api_key=api_key, timeout=timeout, client=client, optional_params=optional_params, litellm_params=litellm_params, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client() else: sync_httpx_client = client try: response = sync_httpx_client.post( url=api_base, headers=headers, data=json.dumps(data), timeout=timeout, ) except Exception as e: raise self._handle_error( e=e, provider_config=provider_config, ) return provider_config.transform_embedding_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=data, optional_params=optional_params, litellm_params=litellm_params, ) async def aembedding( self, request_data: dict, api_base: str, headers: dict, model: str, custom_llm_provider: str, provider_config: BaseEmbeddingConfig, model_response: EmbeddingResponse, logging_obj: LiteLLMLoggingObj, optional_params: dict, litellm_params: dict, api_key: Optional[str] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> EmbeddingResponse: if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider) ) else: async_httpx_client = client try: response = await async_httpx_client.post( url=api_base, headers=headers, json=request_data, timeout=timeout, ) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) return provider_config.transform_embedding_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=request_data, optional_params=optional_params, litellm_params=litellm_params, ) def rerank( self, model: str, custom_llm_provider: str, logging_obj: LiteLLMLoggingObj, provider_config: BaseRerankConfig, optional_rerank_params: OptionalRerankParams, timeout: Optional[Union[float, httpx.Timeout]], model_response: RerankResponse, _is_async: bool = False, headers: Optional[Dict[str, Any]] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> RerankResponse: # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, headers=headers or {}, model=model, ) api_base = provider_config.get_complete_url( api_base=api_base, model=model, ) data = provider_config.transform_rerank_request( model=model, optional_rerank_params=optional_rerank_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=optional_rerank_params.get("query", ""), api_key=api_key, additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) if _is_async is True: return self.arerank( # type: ignore model=model, request_data=data, custom_llm_provider=custom_llm_provider, provider_config=provider_config, logging_obj=logging_obj, model_response=model_response, api_base=api_base, headers=headers, api_key=api_key, timeout=timeout, client=client, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client() else: sync_httpx_client = client try: response = sync_httpx_client.post( url=api_base, headers=headers, data=json.dumps(data), timeout=timeout, ) except Exception as e: raise self._handle_error( e=e, provider_config=provider_config, ) return provider_config.transform_rerank_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=data, ) async def arerank( self, model: str, request_data: dict, custom_llm_provider: str, provider_config: BaseRerankConfig, logging_obj: LiteLLMLoggingObj, model_response: RerankResponse, api_base: str, headers: dict, api_key: Optional[str] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> RerankResponse: if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider) ) else: async_httpx_client = client try: response = await async_httpx_client.post( url=api_base, headers=headers, data=json.dumps(request_data), timeout=timeout, ) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) return provider_config.transform_rerank_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, api_key=api_key, request_data=request_data, ) def _prepare_audio_transcription_request( self, model: str, audio_file: FileTypes, optional_params: dict, litellm_params: dict, logging_obj: LiteLLMLoggingObj, api_key: Optional[str], api_base: Optional[str], headers: Optional[Dict[str, Any]], provider_config: BaseAudioTranscriptionConfig, ) -> Tuple[dict, str, Optional[bytes], Optional[dict]]: """ Shared logic for preparing audio transcription requests. Returns: (headers, complete_url, binary_data, json_data) """ headers = provider_config.validate_environment( api_key=api_key, headers=headers or {}, model=model, messages=[], optional_params=optional_params, litellm_params=litellm_params, ) complete_url = provider_config.get_complete_url( api_base=api_base, api_key=api_key, model=model, optional_params=optional_params, litellm_params=litellm_params, ) # Handle the audio file based on type data = provider_config.transform_audio_transcription_request( model=model, audio_file=audio_file, optional_params=optional_params, litellm_params=litellm_params, ) binary_data: Optional[bytes] = None json_data: Optional[dict] = None if isinstance(data, bytes): binary_data = data else: json_data = data ## LOGGING logging_obj.pre_call( input=optional_params.get("query", ""), api_key=api_key, additional_args={ "complete_input_dict": {}, "api_base": complete_url, "headers": headers, }, ) return headers, complete_url, binary_data, json_data def _transform_audio_transcription_response( self, provider_config: BaseAudioTranscriptionConfig, model: str, response: httpx.Response, model_response: TranscriptionResponse, logging_obj: LiteLLMLoggingObj, optional_params: dict, api_key: Optional[str], ) -> TranscriptionResponse: """Shared logic for transforming audio transcription responses.""" if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig): return provider_config.transform_audio_transcription_response( model=model, raw_response=response, model_response=model_response, logging_obj=logging_obj, request_data={}, optional_params=optional_params, litellm_params={}, api_key=api_key, ) return model_response def audio_transcriptions( self, model: str, audio_file: FileTypes, optional_params: dict, litellm_params: dict, model_response: TranscriptionResponse, timeout: float, max_retries: int, logging_obj: LiteLLMLoggingObj, api_key: Optional[str], api_base: Optional[str], custom_llm_provider: str, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, atranscription: bool = False, headers: Optional[Dict[str, Any]] = None, provider_config: Optional[BaseAudioTranscriptionConfig] = None, ) -> Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]: if provider_config is None: raise ValueError( f"No provider config found for model: {model} and provider: {custom_llm_provider}" ) if atranscription is True: return self.async_audio_transcriptions( # type: ignore model=model, audio_file=audio_file, optional_params=optional_params, litellm_params=litellm_params, model_response=model_response, timeout=timeout, max_retries=max_retries, logging_obj=logging_obj, api_key=api_key, api_base=api_base, custom_llm_provider=custom_llm_provider, client=client, headers=headers, provider_config=provider_config, ) # Prepare the request headers, complete_url, binary_data, json_data = ( self._prepare_audio_transcription_request( model=model, audio_file=audio_file, optional_params=optional_params, litellm_params=litellm_params, logging_obj=logging_obj, api_key=api_key, api_base=api_base, headers=headers, provider_config=provider_config, ) ) if client is None or not isinstance(client, HTTPHandler): client = _get_httpx_client() try: # Make the POST request response = client.post( url=complete_url, headers=headers, content=binary_data, json=json_data, timeout=timeout, ) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) return self._transform_audio_transcription_response( provider_config=provider_config, model=model, response=response, model_response=model_response, logging_obj=logging_obj, optional_params=optional_params, api_key=api_key, ) async def async_audio_transcriptions( self, model: str, audio_file: FileTypes, optional_params: dict, litellm_params: dict, model_response: TranscriptionResponse, timeout: float, max_retries: int, logging_obj: LiteLLMLoggingObj, api_key: Optional[str], api_base: Optional[str], custom_llm_provider: str, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, headers: Optional[Dict[str, Any]] = None, provider_config: Optional[BaseAudioTranscriptionConfig] = None, ) -> TranscriptionResponse: if provider_config is None: raise ValueError( f"No provider config found for model: {model} and provider: {custom_llm_provider}" ) # Prepare the request headers, complete_url, binary_data, json_data = ( self._prepare_audio_transcription_request( model=model, audio_file=audio_file, optional_params=optional_params, litellm_params=litellm_params, logging_obj=logging_obj, api_key=api_key, api_base=api_base, headers=headers, provider_config=provider_config, ) ) if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client try: # Make the async POST request response = await async_httpx_client.post( url=complete_url, headers=headers, content=binary_data, json=json_data, timeout=timeout, ) except Exception as e: raise self._handle_error(e=e, provider_config=provider_config) return self._transform_audio_transcription_response( provider_config=provider_config, model=model, response=response, model_response=model_response, logging_obj=logging_obj, optional_params=optional_params, api_key=api_key, ) async def async_anthropic_messages_handler( self, model: str, messages: List[Dict], anthropic_messages_provider_config: BaseAnthropicMessagesConfig, anthropic_messages_optional_request_params: Dict, custom_llm_provider: str, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, client: Optional[AsyncHTTPHandler] = None, extra_headers: Optional[Dict[str, Any]] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, stream: Optional[bool] = False, kwargs: Optional[Dict[str, Any]] = None, ) -> Union[AnthropicMessagesResponse, AsyncIterator]: if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders.ANTHROPIC ) else: async_httpx_client = client # Prepare headers kwargs = kwargs or {} provider_specific_header = cast( Optional[litellm.types.utils.ProviderSpecificHeader], kwargs.get("provider_specific_header", None), ) extra_headers = ( provider_specific_header.get("extra_headers", {}) if provider_specific_header else {} ) ( headers, api_base, ) = anthropic_messages_provider_config.validate_anthropic_messages_environment( headers=extra_headers or {}, model=model, messages=messages, optional_params=anthropic_messages_optional_request_params, litellm_params=dict(litellm_params), api_key=api_key, api_base=api_base, ) logging_obj.update_environment_variables( model=model, optional_params=dict(anthropic_messages_optional_request_params), litellm_params={ "metadata": kwargs.get("metadata", {}), "preset_cache_key": None, "stream_response": {}, **anthropic_messages_optional_request_params, }, custom_llm_provider=custom_llm_provider, ) # Prepare request body request_body = anthropic_messages_provider_config.transform_anthropic_messages_request( model=model, messages=messages, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, litellm_params=litellm_params, headers=headers, ) logging_obj.stream = stream logging_obj.model_call_details.update(request_body) # Make the request request_url = anthropic_messages_provider_config.get_complete_url( api_base=api_base, api_key=api_key, model=model, optional_params=dict( litellm_params ), # this uses the invoke config, which expects aws_* params in optional_params litellm_params=dict(litellm_params), stream=stream, ) headers, signed_json_body = anthropic_messages_provider_config.sign_request( headers=headers, optional_params=dict( litellm_params ), # dynamic aws_* params are passed under litellm_params request_data=request_body, api_base=request_url, stream=stream, fake_stream=False, model=model, ) logging_obj.pre_call( input=[{"role": "user", "content": json.dumps(request_body)}], api_key="", additional_args={ "complete_input_dict": request_body, "api_base": str(request_url), "headers": headers, }, ) response = await async_httpx_client.post( url=request_url, headers=headers, data=signed_json_body or json.dumps(request_body), stream=stream or False, logging_obj=logging_obj, ) response.raise_for_status() # used for logging + cost tracking logging_obj.model_call_details["httpx_response"] = response if stream: completion_stream = anthropic_messages_provider_config.get_async_streaming_response_iterator( model=model, httpx_response=response, request_body=request_body, litellm_logging_obj=logging_obj, ) return completion_stream else: return anthropic_messages_provider_config.transform_anthropic_messages_response( model=model, raw_response=response, logging_obj=logging_obj, ) def anthropic_messages_handler( self, model: str, messages: List[Dict], anthropic_messages_provider_config: BaseAnthropicMessagesConfig, anthropic_messages_optional_request_params: Dict, custom_llm_provider: str, _is_async: bool, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, stream: Optional[bool] = False, kwargs: Optional[Dict[str, Any]] = None, ) -> Union[ AnthropicMessagesResponse, Coroutine[Any, Any, Union[AnthropicMessagesResponse, AsyncIterator]], ]: """ LLM HTTP Handler for Anthropic Messages """ if _is_async: # Return the async coroutine if called with _is_async=True return self.async_anthropic_messages_handler( model=model, messages=messages, anthropic_messages_provider_config=anthropic_messages_provider_config, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, client=client if isinstance(client, AsyncHTTPHandler) else None, custom_llm_provider=custom_llm_provider, litellm_params=litellm_params, logging_obj=logging_obj, api_key=api_key, api_base=api_base, stream=stream, kwargs=kwargs, ) raise ValueError("anthropic_messages_handler is not implemented for sync calls") def response_api_handler( self, model: str, input: Union[str, ResponseInputParam], responses_api_provider_config: BaseResponsesAPIConfig, response_api_optional_request_params: Dict, custom_llm_provider: str, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, fake_stream: bool = False, litellm_metadata: Optional[Dict[str, Any]] = None, ) -> Union[ ResponsesAPIResponse, BaseResponsesAPIStreamingIterator, Coroutine[ Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator] ], ]: """ Handles responses API requests. When _is_async=True, returns a coroutine instead of making the call directly. """ if _is_async: # Return the async coroutine if called with _is_async=True return self.async_response_api_handler( model=model, input=input, responses_api_provider_config=responses_api_provider_config, response_api_optional_request_params=response_api_optional_request_params, custom_llm_provider=custom_llm_provider, litellm_params=litellm_params, logging_obj=logging_obj, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, client=client if isinstance(client, AsyncHTTPHandler) else None, fake_stream=fake_stream, litellm_metadata=litellm_metadata, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=response_api_optional_request_params.get("extra_headers", {}) or {}, model=model, ) if extra_headers: headers.update(extra_headers) # Check if streaming is requested stream = response_api_optional_request_params.get("stream", False) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) data = responses_api_provider_config.transform_responses_api_request( model=model, input=input, response_api_optional_request_params=response_api_optional_request_params, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=input, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: if stream: # For streaming, use stream=True in the request if fake_stream is True: stream, data = self._prepare_fake_stream_request( stream=stream, data=data, fake_stream=fake_stream, ) response = sync_httpx_client.post( url=api_base, headers=headers, json=data, timeout=timeout or response_api_optional_request_params.get("timeout"), stream=stream, ) if fake_stream is True: return MockResponsesAPIStreamingIterator( response=response, model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, litellm_metadata=litellm_metadata, custom_llm_provider=custom_llm_provider, ) return SyncResponsesAPIStreamingIterator( response=response, model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, litellm_metadata=litellm_metadata, custom_llm_provider=custom_llm_provider, ) else: # For non-streaming requests response = sync_httpx_client.post( url=api_base, headers=headers, json=data, timeout=timeout or response_api_optional_request_params.get("timeout"), ) except Exception as e: raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_response_api_response( model=model, raw_response=response, logging_obj=logging_obj, ) async def async_response_api_handler( self, model: str, input: Union[str, ResponseInputParam], responses_api_provider_config: BaseResponsesAPIConfig, response_api_optional_request_params: Dict, custom_llm_provider: str, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, fake_stream: bool = False, litellm_metadata: Optional[Dict[str, Any]] = None, ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: """ Async version of the responses API handler. Uses async HTTP client to make requests. """ if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=response_api_optional_request_params.get("extra_headers", {}) or {}, model=model, ) if extra_headers: headers.update(extra_headers) # Check if streaming is requested stream = response_api_optional_request_params.get("stream", False) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) data = responses_api_provider_config.transform_responses_api_request( model=model, input=input, response_api_optional_request_params=response_api_optional_request_params, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=input, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: if stream: # For streaming, we need to use stream=True in the request if fake_stream is True: stream, data = self._prepare_fake_stream_request( stream=stream, data=data, fake_stream=fake_stream, ) response = await async_httpx_client.post( url=api_base, headers=headers, json=data, timeout=timeout or response_api_optional_request_params.get("timeout"), stream=stream, ) if fake_stream is True: return MockResponsesAPIStreamingIterator( response=response, model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, litellm_metadata=litellm_metadata, custom_llm_provider=custom_llm_provider, ) # Return the streaming iterator return ResponsesAPIStreamingIterator( response=response, model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, litellm_metadata=litellm_metadata, custom_llm_provider=custom_llm_provider, ) else: # For non-streaming, proceed as before response = await async_httpx_client.post( url=api_base, headers=headers, json=data, timeout=timeout or response_api_optional_request_params.get("timeout"), ) except Exception as e: raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_response_api_response( model=model, raw_response=response, logging_obj=logging_obj, ) async def async_delete_response_api_handler( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str], extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, ) -> DeleteResponseResult: """ Async version of the delete response API handler. Uses async HTTP client to make requests. """ if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, data = responses_api_provider_config.transform_delete_response_api_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=input, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = await async_httpx_client.delete( url=url, headers=headers, json=data, timeout=timeout ) except Exception as e: raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_delete_response_api_response( raw_response=response, logging_obj=logging_obj, ) def delete_response_api_handler( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str], extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, ) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]: """ Async version of the responses API handler. Uses async HTTP client to make requests. """ if _is_async: return self.async_delete_response_api_handler( response_id=response_id, responses_api_provider_config=responses_api_provider_config, litellm_params=litellm_params, logging_obj=logging_obj, custom_llm_provider=custom_llm_provider, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, client=client, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, data = responses_api_provider_config.transform_delete_response_api_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=input, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = sync_httpx_client.delete( url=url, headers=headers, json=data, timeout=timeout ) except Exception as e: raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_delete_response_api_response( raw_response=response, logging_obj=logging_obj, ) def get_responses( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str] = None, extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, ) -> Union[ResponsesAPIResponse, Coroutine[Any, Any, ResponsesAPIResponse]]: """ Get a response by ID Uses GET /v1/responses/{response_id} endpoint in the responses API """ if _is_async: return self.async_get_responses( response_id=response_id, responses_api_provider_config=responses_api_provider_config, litellm_params=litellm_params, logging_obj=logging_obj, custom_llm_provider=custom_llm_provider, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, client=client, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, data = responses_api_provider_config.transform_get_response_api_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input="", api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = sync_httpx_client.get(url=url, headers=headers, params=data) except Exception as e: raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_get_response_api_response( raw_response=response, logging_obj=logging_obj, ) async def async_get_responses( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str] = None, extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> ResponsesAPIResponse: """ Async version of get_responses """ if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, data = responses_api_provider_config.transform_get_response_api_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input="", api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = await async_httpx_client.get( url=url, headers=headers, params=data ) except Exception as e: verbose_logger.exception(f"Error retrieving response: {e}") raise self._handle_error( e=e, provider_config=responses_api_provider_config, ) return responses_api_provider_config.transform_get_response_api_response( raw_response=response, logging_obj=logging_obj, ) ##################################################################### ################ LIST RESPONSES INPUT ITEMS HANDLER ########################### ##################################################################### def list_responses_input_items( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str] = None, after: Optional[str] = None, before: Optional[str] = None, include: Optional[List[str]] = None, limit: int = 20, order: Literal["asc", "desc"] = "desc", extra_headers: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, ) -> Union[Dict, Coroutine[Any, Any, Dict]]: if _is_async: return self.async_list_responses_input_items( response_id=response_id, responses_api_provider_config=responses_api_provider_config, litellm_params=litellm_params, logging_obj=logging_obj, custom_llm_provider=custom_llm_provider, after=after, before=before, include=include, limit=limit, order=order, extra_headers=extra_headers, timeout=timeout, client=client, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, params = responses_api_provider_config.transform_list_input_items_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, after=after, before=before, include=include, limit=limit, order=order, ) logging_obj.pre_call( input="", api_key="", additional_args={ "complete_input_dict": params, "api_base": api_base, "headers": headers, }, ) try: response = sync_httpx_client.get(url=url, headers=headers, params=params) except Exception as e: raise self._handle_error(e=e, provider_config=responses_api_provider_config) return responses_api_provider_config.transform_list_input_items_response( raw_response=response, logging_obj=logging_obj, ) async def async_list_responses_input_items( self, response_id: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, custom_llm_provider: Optional[str] = None, after: Optional[str] = None, before: Optional[str] = None, include: Optional[List[str]] = None, limit: int = 20, order: Literal["asc", "desc"] = "desc", extra_headers: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> Dict: if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client headers = responses_api_provider_config.validate_environment( api_key=litellm_params.api_key, headers=extra_headers or {}, model="None", ) if extra_headers: headers.update(extra_headers) api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) url, params = responses_api_provider_config.transform_list_input_items_request( response_id=response_id, api_base=api_base, litellm_params=litellm_params, headers=headers, after=after, before=before, include=include, limit=limit, order=order, ) logging_obj.pre_call( input="", api_key="", additional_args={ "complete_input_dict": params, "api_base": api_base, "headers": headers, }, ) try: response = await async_httpx_client.get( url=url, headers=headers, params=params ) except Exception as e: raise self._handle_error(e=e, provider_config=responses_api_provider_config) return responses_api_provider_config.transform_list_input_items_response( raw_response=response, logging_obj=logging_obj, ) def create_file( self, create_file_data: CreateFileRequest, litellm_params: dict, provider_config: BaseFilesConfig, headers: dict, api_base: Optional[str], api_key: Optional[str], logging_obj: LiteLLMLoggingObj, _is_async: bool = False, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, ) -> Union[OpenAIFileObject, Coroutine[Any, Any, OpenAIFileObject]]: """ Creates a file using Gemini's two-step upload process """ # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, headers=headers, model="", messages=[], optional_params={}, litellm_params=litellm_params, ) api_base = provider_config.get_complete_file_url( api_base=api_base, api_key=api_key, model="", optional_params={}, litellm_params=litellm_params, data=create_file_data, ) if api_base is None: raise ValueError("api_base is required for create_file") # Get the transformed request data for both steps transformed_request = provider_config.transform_create_file_request( model="", create_file_data=create_file_data, litellm_params=litellm_params, optional_params={}, ) if _is_async: return self.async_create_file( transformed_request=transformed_request, litellm_params=litellm_params, provider_config=provider_config, headers=headers, api_base=api_base, logging_obj=logging_obj, client=client, timeout=timeout, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client() else: sync_httpx_client = client if isinstance(transformed_request, str) or isinstance( transformed_request, bytes ): upload_response = sync_httpx_client.post( url=api_base, headers=headers, data=transformed_request, timeout=timeout, ) else: try: # Step 1: Initial request to get upload URL initial_response = sync_httpx_client.post( url=api_base, headers={ **headers, **transformed_request["initial_request"]["headers"], }, data=json.dumps(transformed_request["initial_request"]["data"]), timeout=timeout, ) # Extract upload URL from response headers upload_url = initial_response.headers.get("X-Goog-Upload-URL") if not upload_url: raise ValueError("Failed to get upload URL from initial request") # Step 2: Upload the actual file upload_response = sync_httpx_client.post( url=upload_url, headers=transformed_request["upload_request"]["headers"], data=transformed_request["upload_request"]["data"], timeout=timeout, ) except Exception as e: raise self._handle_error( e=e, provider_config=provider_config, ) return provider_config.transform_create_file_response( model=None, raw_response=upload_response, logging_obj=logging_obj, litellm_params=litellm_params, ) async def async_create_file( self, transformed_request: Union[bytes, str, dict], litellm_params: dict, provider_config: BaseFilesConfig, headers: dict, api_base: str, logging_obj: LiteLLMLoggingObj, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, ): """ Creates a file using Gemini's two-step upload process """ if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=provider_config.custom_llm_provider ) else: async_httpx_client = client if isinstance(transformed_request, str) or isinstance( transformed_request, bytes ): upload_response = await async_httpx_client.post( url=api_base, headers=headers, data=transformed_request, timeout=timeout, ) else: try: # Step 1: Initial request to get upload URL initial_response = await async_httpx_client.post( url=api_base, headers={ **headers, **transformed_request["initial_request"]["headers"], }, data=json.dumps(transformed_request["initial_request"]["data"]), timeout=timeout, ) # Extract upload URL from response headers upload_url = initial_response.headers.get("X-Goog-Upload-URL") if not upload_url: raise ValueError("Failed to get upload URL from initial request") # Step 2: Upload the actual file upload_response = await async_httpx_client.post( url=upload_url, headers=transformed_request["upload_request"]["headers"], data=transformed_request["upload_request"]["data"], timeout=timeout, ) except Exception as e: verbose_logger.exception(f"Error creating file: {e}") raise self._handle_error( e=e, provider_config=provider_config, ) return provider_config.transform_create_file_response( model=None, raw_response=upload_response, logging_obj=logging_obj, litellm_params=litellm_params, ) def list_files(self): """ Lists all files """ pass def delete_file(self): """ Deletes a file """ pass def retrieve_file(self): """ Returns the metadata of the file """ pass def retrieve_file_content(self): """ Returns the content of the file """ pass def _prepare_fake_stream_request( self, stream: bool, data: dict, fake_stream: bool, ) -> Tuple[bool, dict]: """ Handles preparing a request when `fake_stream` is True. """ if fake_stream is True: stream = False data.pop("stream", None) return stream, data return stream, data def _handle_error( self, e: Exception, provider_config: Union[ BaseConfig, BaseRerankConfig, BaseResponsesAPIConfig, BaseImageEditConfig ], ): status_code = getattr(e, "status_code", 500) error_headers = getattr(e, "headers", None) if isinstance(e, httpx.HTTPStatusError): error_text = e.response.text status_code = e.response.status_code else: error_text = getattr(e, "text", str(e)) error_response = getattr(e, "response", None) if error_headers is None and error_response: error_headers = getattr(error_response, "headers", None) if error_response and hasattr(error_response, "text"): error_text = getattr(error_response, "text", error_text) if error_headers: error_headers = dict(error_headers) else: error_headers = {} raise provider_config.get_error_class( error_message=error_text, status_code=status_code, headers=error_headers, ) async def async_realtime( self, model: str, websocket: Any, logging_obj: LiteLLMLoggingObj, provider_config: BaseRealtimeConfig, headers: dict, api_base: Optional[str] = None, api_key: Optional[str] = None, client: Optional[Any] = None, timeout: Optional[float] = None, ): import websockets from websockets.asyncio.client import ClientConnection url = provider_config.get_complete_url(api_base, model, api_key) headers = provider_config.validate_environment( headers=headers, model=model, api_key=api_key, ) try: async with websockets.connect( # type: ignore url, extra_headers=headers ) as backend_ws: realtime_streaming = RealTimeStreaming( websocket, cast(ClientConnection, backend_ws), logging_obj, provider_config, model, ) await realtime_streaming.bidirectional_forward() except websockets.exceptions.InvalidStatusCode as e: # type: ignore verbose_logger.exception(f"Error connecting to backend: {e}") await websocket.close(code=e.status_code, reason=str(e)) except Exception as e: verbose_logger.exception(f"Error connecting to backend: {e}") try: await websocket.close( code=1011, reason=f"Internal server error: {str(e)}" ) except RuntimeError as close_error: if "already completed" in str(close_error) or "websocket.close" in str( close_error ): # The WebSocket is already closed or the response is completed, so we can ignore this error pass else: # If it's a different RuntimeError, we might want to log it or handle it differently raise Exception( f"Unexpected error while closing WebSocket: {close_error}" ) def image_edit_handler( self, model: str, image: Any, prompt: str, image_edit_provider_config: BaseImageEditConfig, image_edit_optional_request_params: Dict, custom_llm_provider: str, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, timeout: Union[float, httpx.Timeout], extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, fake_stream: bool = False, litellm_metadata: Optional[Dict[str, Any]] = None, ) -> Union[ImageResponse, Coroutine[Any, Any, ImageResponse],]: """ Handles image edit requests. When _is_async=True, returns a coroutine instead of making the call directly. """ if _is_async: # Return the async coroutine if called with _is_async=True return self.async_image_edit_handler( model=model, image=image, prompt=prompt, image_edit_provider_config=image_edit_provider_config, image_edit_optional_request_params=image_edit_optional_request_params, custom_llm_provider=custom_llm_provider, litellm_params=litellm_params, logging_obj=logging_obj, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, client=client if isinstance(client, AsyncHTTPHandler) else None, fake_stream=fake_stream, litellm_metadata=litellm_metadata, ) if client is None or not isinstance(client, HTTPHandler): sync_httpx_client = _get_httpx_client( params={"ssl_verify": litellm_params.get("ssl_verify", None)} ) else: sync_httpx_client = client headers = image_edit_provider_config.validate_environment( api_key=litellm_params.api_key, headers=image_edit_optional_request_params.get("extra_headers", {}) or {}, model=model, ) if extra_headers: headers.update(extra_headers) api_base = image_edit_provider_config.get_complete_url( model=model, api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) data, files = image_edit_provider_config.transform_image_edit_request( model=model, image=image, prompt=prompt, image_edit_optional_request_params=image_edit_optional_request_params, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=prompt, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = sync_httpx_client.post( url=api_base, headers=headers, data=data, files=files, timeout=timeout, ) except Exception as e: raise self._handle_error( e=e, provider_config=image_edit_provider_config, ) return image_edit_provider_config.transform_image_edit_response( model=model, raw_response=response, logging_obj=logging_obj, ) async def async_image_edit_handler( self, model: str, image: FileTypes, prompt: str, image_edit_provider_config: BaseImageEditConfig, image_edit_optional_request_params: Dict, custom_llm_provider: str, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, timeout: Union[float, httpx.Timeout], extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, fake_stream: bool = False, litellm_metadata: Optional[Dict[str, Any]] = None, ) -> ImageResponse: """ Async version of the image edit handler. Uses async HTTP client to make requests. """ if client is None or not isinstance(client, AsyncHTTPHandler): async_httpx_client = get_async_httpx_client( llm_provider=litellm.LlmProviders(custom_llm_provider), params={"ssl_verify": litellm_params.get("ssl_verify", None)}, ) else: async_httpx_client = client headers = image_edit_provider_config.validate_environment( api_key=litellm_params.api_key, headers=image_edit_optional_request_params.get("extra_headers", {}) or {}, model=model, ) if extra_headers: headers.update(extra_headers) api_base = image_edit_provider_config.get_complete_url( model=model, api_base=litellm_params.api_base, litellm_params=dict(litellm_params), ) data, files = image_edit_provider_config.transform_image_edit_request( model=model, image=image, prompt=prompt, image_edit_optional_request_params=image_edit_optional_request_params, litellm_params=litellm_params, headers=headers, ) ## LOGGING logging_obj.pre_call( input=prompt, api_key="", additional_args={ "complete_input_dict": data, "api_base": api_base, "headers": headers, }, ) try: response = await async_httpx_client.post( url=api_base, headers=headers, data=data, files=files, timeout=timeout, ) except Exception as e: raise self._handle_error( e=e, provider_config=image_edit_provider_config, ) return image_edit_provider_config.transform_image_edit_response( model=model, raw_response=response, logging_obj=logging_obj, )