|
""" |
|
Callbacks triggered on cooling down deployments |
|
""" |
|
|
|
import copy |
|
from typing import TYPE_CHECKING, Any, Optional, Union |
|
|
|
import litellm |
|
from litellm._logging import verbose_logger |
|
|
|
if TYPE_CHECKING: |
|
from litellm.router import Router as _Router |
|
|
|
LitellmRouter = _Router |
|
from litellm.integrations.prometheus import PrometheusLogger |
|
else: |
|
LitellmRouter = Any |
|
PrometheusLogger = Any |
|
|
|
|
|
async def router_cooldown_event_callback( |
|
litellm_router_instance: LitellmRouter, |
|
deployment_id: str, |
|
exception_status: Union[str, int], |
|
cooldown_time: float, |
|
): |
|
""" |
|
Callback triggered when a deployment is put into cooldown by litellm |
|
|
|
- Updates deployment state on Prometheus |
|
- Increments cooldown metric for deployment on Prometheus |
|
""" |
|
verbose_logger.debug("In router_cooldown_event_callback - updating prometheus") |
|
_deployment = litellm_router_instance.get_deployment(model_id=deployment_id) |
|
if _deployment is None: |
|
verbose_logger.warning( |
|
f"in router_cooldown_event_callback but _deployment is None for deployment_id={deployment_id}. Doing nothing" |
|
) |
|
return |
|
_litellm_params = _deployment["litellm_params"] |
|
temp_litellm_params = copy.deepcopy(_litellm_params) |
|
temp_litellm_params = dict(temp_litellm_params) |
|
_model_name = _deployment.get("model_name", None) or "" |
|
_api_base = ( |
|
litellm.get_api_base(model=_model_name, optional_params=temp_litellm_params) |
|
or "" |
|
) |
|
model_info = _deployment["model_info"] |
|
model_id = model_info.id |
|
|
|
litellm_model_name = temp_litellm_params.get("model") or "" |
|
llm_provider = "" |
|
try: |
|
_, llm_provider, _, _ = litellm.get_llm_provider( |
|
model=litellm_model_name, |
|
custom_llm_provider=temp_litellm_params.get("custom_llm_provider"), |
|
) |
|
except Exception: |
|
pass |
|
|
|
|
|
prometheusLogger: Optional[PrometheusLogger] = ( |
|
_get_prometheus_logger_from_callbacks() |
|
) |
|
|
|
if prometheusLogger is not None: |
|
prometheusLogger.set_deployment_complete_outage( |
|
litellm_model_name=_model_name, |
|
model_id=model_id, |
|
api_base=_api_base, |
|
api_provider=llm_provider, |
|
) |
|
|
|
prometheusLogger.increment_deployment_cooled_down( |
|
litellm_model_name=_model_name, |
|
model_id=model_id, |
|
api_base=_api_base, |
|
api_provider=llm_provider, |
|
exception_status=str(exception_status), |
|
) |
|
|
|
return |
|
|
|
|
|
def _get_prometheus_logger_from_callbacks() -> Optional[PrometheusLogger]: |
|
""" |
|
Checks if prometheus is a initalized callback, if yes returns it |
|
""" |
|
from litellm.integrations.prometheus import PrometheusLogger |
|
|
|
for _callback in litellm._async_success_callback: |
|
if isinstance(_callback, PrometheusLogger): |
|
return _callback |
|
for global_callback in litellm.callbacks: |
|
if isinstance(global_callback, PrometheusLogger): |
|
return global_callback |
|
|
|
return None |
|
|