Spaces:
Sleeping
Sleeping
""" | |
Returns a random deployment from the list of healthy deployments. | |
If weights are provided, it will return a deployment based on the weights. | |
""" | |
import random | |
from typing import TYPE_CHECKING, Any, Dict, List, Union | |
from litellm._logging import verbose_router_logger | |
if TYPE_CHECKING: | |
from litellm.router import Router as _Router | |
LitellmRouter = _Router | |
else: | |
LitellmRouter = Any | |
def simple_shuffle( | |
llm_router_instance: LitellmRouter, | |
healthy_deployments: Union[List[Any], Dict[Any, Any]], | |
model: str, | |
) -> Dict: | |
""" | |
Returns a random deployment from the list of healthy deployments. | |
If weights are provided, it will return a deployment based on the weights. | |
If users pass `rpm` or `tpm`, we do a random weighted pick - based on `rpm`/`tpm`. | |
Args: | |
llm_router_instance: LitellmRouter instance | |
healthy_deployments: List of healthy deployments | |
model: Model name | |
Returns: | |
Dict: A single healthy deployment | |
""" | |
############## Check if 'weight' param set for a weighted pick ################# | |
weight = healthy_deployments[0].get("litellm_params").get("weight", None) | |
if weight is not None: | |
# use weight-random pick if rpms provided | |
weights = [m["litellm_params"].get("weight", 0) for m in healthy_deployments] | |
verbose_router_logger.debug(f"\nweight {weights}") | |
total_weight = sum(weights) | |
weights = [weight / total_weight for weight in weights] | |
verbose_router_logger.debug(f"\n weights {weights}") | |
# Perform weighted random pick | |
selected_index = random.choices(range(len(weights)), weights=weights)[0] | |
verbose_router_logger.debug(f"\n selected index, {selected_index}") | |
deployment = healthy_deployments[selected_index] | |
verbose_router_logger.info( | |
f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" | |
) | |
return deployment or deployment[0] | |
############## Check if we can do a RPM/TPM based weighted pick ################# | |
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None) | |
if rpm is not None: | |
# use weight-random pick if rpms provided | |
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments] | |
verbose_router_logger.debug(f"\nrpms {rpms}") | |
total_rpm = sum(rpms) | |
weights = [rpm / total_rpm for rpm in rpms] | |
verbose_router_logger.debug(f"\n weights {weights}") | |
# Perform weighted random pick | |
selected_index = random.choices(range(len(rpms)), weights=weights)[0] | |
verbose_router_logger.debug(f"\n selected index, {selected_index}") | |
deployment = healthy_deployments[selected_index] | |
verbose_router_logger.info( | |
f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" | |
) | |
return deployment or deployment[0] | |
############## Check if we can do a RPM/TPM based weighted pick ################# | |
tpm = healthy_deployments[0].get("litellm_params").get("tpm", None) | |
if tpm is not None: | |
# use weight-random pick if rpms provided | |
tpms = [m["litellm_params"].get("tpm", 0) for m in healthy_deployments] | |
verbose_router_logger.debug(f"\ntpms {tpms}") | |
total_tpm = sum(tpms) | |
weights = [tpm / total_tpm for tpm in tpms] | |
verbose_router_logger.debug(f"\n weights {weights}") | |
# Perform weighted random pick | |
selected_index = random.choices(range(len(tpms)), weights=weights)[0] | |
verbose_router_logger.debug(f"\n selected index, {selected_index}") | |
deployment = healthy_deployments[selected_index] | |
verbose_router_logger.info( | |
f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" | |
) | |
return deployment or deployment[0] | |
############## No RPM/TPM passed, we do a random pick ################# | |
item = random.choice(healthy_deployments) | |
return item or item[0] | |