Spaces:
Configuration error
Configuration error
import io | |
import os | |
import sys | |
sys.path.insert(0, os.path.abspath("../..")) | |
import asyncio | |
import logging | |
import uuid | |
import pytest | |
from prometheus_client import REGISTRY, CollectorRegistry | |
import litellm | |
from litellm import completion | |
from litellm._logging import verbose_logger | |
from litellm.integrations.prometheus import ( | |
PrometheusLogger, | |
UserAPIKeyLabelValues, | |
get_custom_labels_from_metadata, | |
) | |
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler | |
from litellm.types.utils import ( | |
StandardLoggingPayload, | |
StandardLoggingMetadata, | |
StandardLoggingHiddenParams, | |
StandardLoggingModelInformation, | |
) | |
import pytest | |
from unittest.mock import MagicMock, patch, call | |
from datetime import datetime, timedelta, timezone | |
from litellm.integrations.prometheus import PrometheusLogger | |
from litellm.proxy._types import UserAPIKeyAuth | |
verbose_logger.setLevel(logging.DEBUG) | |
litellm.set_verbose = True | |
import time | |
def prometheus_logger() -> PrometheusLogger: | |
collectors = list(REGISTRY._collector_to_names.keys()) | |
for collector in collectors: | |
REGISTRY.unregister(collector) | |
return PrometheusLogger() | |
def create_standard_logging_payload() -> StandardLoggingPayload: | |
return StandardLoggingPayload( | |
id="test_id", | |
call_type="completion", | |
stream=False, | |
response_cost=0.1, | |
response_cost_failure_debug_info=None, | |
status="success", | |
total_tokens=30, | |
prompt_tokens=20, | |
completion_tokens=10, | |
startTime=1234567890.0, | |
endTime=1234567891.0, | |
completionStartTime=1234567890.5, | |
model_map_information=StandardLoggingModelInformation( | |
model_map_key="gpt-3.5-turbo", model_map_value=None | |
), | |
model="gpt-3.5-turbo", | |
model_id="model-123", | |
model_group="openai-gpt", | |
custom_llm_provider="openai", | |
api_base="https://api.openai.com", | |
metadata=StandardLoggingMetadata( | |
user_api_key_hash="test_hash", | |
user_api_key_alias="test_alias", | |
user_api_key_team_id="test_team", | |
user_api_key_user_id="test_user", | |
user_api_key_user_email="[email protected]", | |
user_api_key_team_alias="test_team_alias", | |
user_api_key_org_id=None, | |
spend_logs_metadata=None, | |
requester_ip_address="127.0.0.1", | |
requester_metadata=None, | |
user_api_key_end_user_id="test_end_user", | |
), | |
cache_hit=False, | |
cache_key=None, | |
saved_cache_cost=0.0, | |
request_tags=[], | |
end_user=None, | |
requester_ip_address="127.0.0.1", | |
messages=[{"role": "user", "content": "Hello, world!"}], | |
response={"choices": [{"message": {"content": "Hi there!"}}]}, | |
error_str=None, | |
model_parameters={"stream": True}, | |
hidden_params=StandardLoggingHiddenParams( | |
model_id="model-123", | |
cache_key=None, | |
api_base="https://api.openai.com", | |
response_cost="0.1", | |
additional_headers=None, | |
), | |
) | |
def test_safe_get_remaining_budget(prometheus_logger): | |
assert prometheus_logger._safe_get_remaining_budget(100, 30) == 70 | |
assert prometheus_logger._safe_get_remaining_budget(100, None) == 100 | |
assert prometheus_logger._safe_get_remaining_budget(None, 30) == float("inf") | |
assert prometheus_logger._safe_get_remaining_budget(None, None) == float("inf") | |
async def test_async_log_success_event(prometheus_logger): | |
standard_logging_object = create_standard_logging_payload() | |
kwargs = { | |
"model": "gpt-3.5-turbo", | |
"stream": True, | |
"litellm_params": { | |
"metadata": { | |
"user_api_key": "test_key", | |
"user_api_key_user_id": "test_user", | |
"user_api_key_team_id": "test_team", | |
"user_api_key_end_user_id": "test_end_user", | |
} | |
}, | |
"start_time": datetime.now(), | |
"completion_start_time": datetime.now(), | |
"api_call_start_time": datetime.now(), | |
"end_time": datetime.now() + timedelta(seconds=1), | |
"standard_logging_object": standard_logging_object, | |
} | |
response_obj = MagicMock() | |
# Mock the prometheus client methods | |
# High Level Metrics - request/spend | |
prometheus_logger.litellm_requests_metric = MagicMock() | |
prometheus_logger.litellm_spend_metric = MagicMock() | |
# Token Metrics | |
prometheus_logger.litellm_tokens_metric = MagicMock() | |
prometheus_logger.litellm_input_tokens_metric = MagicMock() | |
prometheus_logger.litellm_output_tokens_metric = MagicMock() | |
# Remaining Budget Metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock() | |
# Virtual Key Rate limit Metrics | |
prometheus_logger.litellm_remaining_api_key_requests_for_model = MagicMock() | |
prometheus_logger.litellm_remaining_api_key_tokens_for_model = MagicMock() | |
# Latency Metrics | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() | |
prometheus_logger.litellm_llm_api_latency_metric = MagicMock() | |
prometheus_logger.litellm_request_total_latency_metric = MagicMock() | |
await prometheus_logger.async_log_success_event( | |
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"] | |
) | |
# Assert that the metrics were incremented | |
prometheus_logger.litellm_requests_metric.labels.assert_called() | |
prometheus_logger.litellm_spend_metric.labels.assert_called() | |
# Token Metrics | |
prometheus_logger.litellm_tokens_metric.labels.assert_called() | |
prometheus_logger.litellm_input_tokens_metric.labels.assert_called() | |
prometheus_logger.litellm_output_tokens_metric.labels.assert_called() | |
# Remaining Budget Metrics | |
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called() | |
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called() | |
# Virtual Key Rate limit Metrics | |
prometheus_logger.litellm_remaining_api_key_requests_for_model.labels.assert_called() | |
prometheus_logger.litellm_remaining_api_key_tokens_for_model.labels.assert_called() | |
# Latency Metrics | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called() | |
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called() | |
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called() | |
def test_increment_token_metrics(prometheus_logger): | |
""" | |
Test the increment_token_metrics method | |
input, output, and total tokens metrics are incremented by the values in the standard logging payload | |
""" | |
prometheus_logger.litellm_tokens_metric = MagicMock() | |
prometheus_logger.litellm_input_tokens_metric = MagicMock() | |
prometheus_logger.litellm_output_tokens_metric = MagicMock() | |
standard_logging_payload = create_standard_logging_payload() | |
standard_logging_payload["total_tokens"] = 100 | |
standard_logging_payload["prompt_tokens"] = 50 | |
standard_logging_payload["completion_tokens"] = 50 | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
**standard_logging_payload, | |
) | |
prometheus_logger._increment_token_metrics( | |
standard_logging_payload, | |
end_user_id="user1", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
model="gpt-3.5-turbo", | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
user_id="user1", | |
enum_values=enum_values, | |
) | |
prometheus_logger.litellm_tokens_metric.labels.assert_called_once_with( | |
end_user=None, user=None, hashed_api_key='test_hash', api_key_alias='test_alias', team='test_team', team_alias='test_team_alias', requested_model=None, model='gpt-3.5-turbo' | |
) | |
prometheus_logger.litellm_tokens_metric.labels().inc.assert_called_once_with(100) | |
prometheus_logger.litellm_input_tokens_metric.labels.assert_called_once_with( | |
end_user=None, | |
user=None, | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
requested_model=None, | |
model="gpt-3.5-turbo", | |
) | |
prometheus_logger.litellm_input_tokens_metric.labels().inc.assert_called_once_with( | |
50 | |
) | |
prometheus_logger.litellm_output_tokens_metric.labels.assert_called_once_with( | |
end_user=None, | |
user=None, | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
requested_model=None, | |
model="gpt-3.5-turbo", | |
) | |
prometheus_logger.litellm_output_tokens_metric.labels().inc.assert_called_once_with( | |
50 | |
) | |
async def test_increment_remaining_budget_metrics(prometheus_logger): | |
""" | |
Test the increment_remaining_budget_metrics method | |
- team and api key remaining budget metrics are set to the difference between max budget and spend | |
- team and api key max budget metrics are set to their respective max budgets | |
- team and api key remaining hours metrics are set based on budget reset timestamps | |
""" | |
# Mock all budget-related metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_max_budget_metric = MagicMock() | |
prometheus_logger.litellm_api_key_max_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() | |
prometheus_logger.litellm_api_key_budget_remaining_hours_metric = MagicMock() | |
# Create a future budget reset time for testing | |
future_reset_time_team = datetime.now() + timedelta(hours=10) | |
future_reset_time_key = datetime.now() + timedelta(hours=12) | |
# Mock the get_team_object and get_key_object functions to return objects with budget reset times | |
with patch( | |
"litellm.proxy.auth.auth_checks.get_team_object" | |
) as mock_get_team, patch( | |
"litellm.proxy.auth.auth_checks.get_key_object" | |
) as mock_get_key: | |
mock_get_team.return_value = MagicMock(budget_reset_at=future_reset_time_team) | |
mock_get_key.return_value = MagicMock(budget_reset_at=future_reset_time_key) | |
litellm_params = { | |
"metadata": { | |
"user_api_key_team_spend": 50, | |
"user_api_key_team_max_budget": 100, | |
"user_api_key_spend": 25, | |
"user_api_key_max_budget": 75, | |
} | |
} | |
await prometheus_logger._increment_remaining_budget_metrics( | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
litellm_params=litellm_params, | |
response_cost=10, | |
) | |
# Test remaining budget metrics | |
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( | |
team="team1", team_alias="team_alias1" | |
) | |
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( | |
40 # 100 - (50 + 10) | |
) | |
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called_once_with( | |
hashed_api_key="key1", api_key_alias="alias1" | |
) | |
prometheus_logger.litellm_remaining_api_key_budget_metric.labels().set.assert_called_once_with( | |
40 # 75 - (25 + 10) | |
) | |
# Test max budget metrics | |
prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with( | |
team="team1", team_alias="team_alias1" | |
) | |
prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with( | |
100 | |
) | |
prometheus_logger.litellm_api_key_max_budget_metric.labels.assert_called_once_with( | |
hashed_api_key="key1", api_key_alias="alias1" | |
) | |
prometheus_logger.litellm_api_key_max_budget_metric.labels().set.assert_called_once_with( | |
75 | |
) | |
# Test remaining hours metrics | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.assert_called_once_with( | |
team="team1", team_alias="team_alias1" | |
) | |
# The remaining hours should be approximately 10 (with some small difference due to test execution time) | |
remaining_hours_call = prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_args[ | |
0 | |
][ | |
0 | |
] | |
assert 9.9 <= remaining_hours_call <= 10.0 | |
prometheus_logger.litellm_api_key_budget_remaining_hours_metric.labels.assert_called_once_with( | |
hashed_api_key="key1", api_key_alias="alias1" | |
) | |
# The remaining hours should be approximately 10 (with some small difference due to test execution time) | |
remaining_hours_call = prometheus_logger.litellm_api_key_budget_remaining_hours_metric.labels().set.call_args[ | |
0 | |
][ | |
0 | |
] | |
assert 11.9 <= remaining_hours_call <= 12.0 | |
def test_set_latency_metrics(prometheus_logger): | |
""" | |
Test the set_latency_metrics method | |
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload | |
""" | |
standard_logging_payload = create_standard_logging_payload() | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() | |
prometheus_logger.litellm_llm_api_latency_metric = MagicMock() | |
prometheus_logger.litellm_request_total_latency_metric = MagicMock() | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
requested_model=standard_logging_payload["model_group"], | |
user=standard_logging_payload["metadata"]["user_api_key_user_id"], | |
**standard_logging_payload, | |
) | |
now = datetime.now() | |
kwargs = { | |
"end_time": now, # when the request ends | |
"start_time": now - timedelta(seconds=2), # when the request starts | |
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts | |
"completion_start_time": now | |
- timedelta(seconds=1), # when the completion starts | |
"stream": True, | |
} | |
prometheus_logger._set_latency_metrics( | |
kwargs=kwargs, | |
model="gpt-3.5-turbo", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
enum_values=enum_values, | |
) | |
# completion_start_time - api_call_start_time | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called_once_with( | |
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1" | |
) | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels().observe.assert_called_once_with( | |
0.5 | |
) | |
# end_time - api_call_start_time | |
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once_with( | |
end_user=None, | |
user="test_user", | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
requested_model="openai-gpt", | |
model="gpt-3.5-turbo", | |
) | |
prometheus_logger.litellm_llm_api_latency_metric.labels().observe.assert_called_once_with( | |
1.5 | |
) | |
# total latency for the request | |
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once_with( | |
end_user=None, | |
user="test_user", | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
requested_model="openai-gpt", | |
model="gpt-3.5-turbo", | |
) | |
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with( | |
2.0 | |
) | |
def test_set_latency_metrics_missing_timestamps(prometheus_logger): | |
""" | |
Test that _set_latency_metrics handles missing timestamp values gracefully | |
""" | |
# Mock all metrics used in the method | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() | |
prometheus_logger.litellm_llm_api_latency_metric = MagicMock() | |
prometheus_logger.litellm_request_total_latency_metric = MagicMock() | |
standard_logging_payload = create_standard_logging_payload() | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
) | |
# Test case where completion_start_time is None | |
kwargs = { | |
"end_time": datetime.now(), | |
"start_time": datetime.now() - timedelta(seconds=2), | |
"api_call_start_time": datetime.now() - timedelta(seconds=1.5), | |
"completion_start_time": None, # Missing completion start time | |
"stream": True, | |
} | |
# This should not raise an exception | |
prometheus_logger._set_latency_metrics( | |
kwargs=kwargs, | |
model="gpt-3.5-turbo", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
enum_values=enum_values, | |
) | |
# Verify time to first token metric was not called due to missing completion_start_time | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called() | |
# Other metrics should still be called | |
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once() | |
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once() | |
def test_set_latency_metrics_missing_api_call_start(prometheus_logger): | |
""" | |
Test that _set_latency_metrics handles missing api_call_start_time gracefully | |
""" | |
# Mock all metrics used in the method | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() | |
prometheus_logger.litellm_llm_api_latency_metric = MagicMock() | |
prometheus_logger.litellm_request_total_latency_metric = MagicMock() | |
standard_logging_payload = create_standard_logging_payload() | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
) | |
# Test case where api_call_start_time is None | |
kwargs = { | |
"end_time": datetime.now(), | |
"start_time": datetime.now() - timedelta(seconds=2), | |
"api_call_start_time": None, # Missing API call start time | |
"completion_start_time": datetime.now() - timedelta(seconds=1), | |
"stream": True, | |
} | |
# This should not raise an exception | |
prometheus_logger._set_latency_metrics( | |
kwargs=kwargs, | |
model="gpt-3.5-turbo", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
enum_values=enum_values, | |
) | |
# Verify API latency metrics were not called due to missing api_call_start_time | |
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called() | |
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_not_called() | |
# Total request latency should still be called | |
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once() | |
def test_increment_top_level_request_and_spend_metrics(prometheus_logger): | |
""" | |
Test the increment_top_level_request_and_spend_metrics method | |
- litellm_requests_metric is incremented by 1 | |
- litellm_spend_metric is incremented by the response cost in the standard logging payload | |
""" | |
standard_logging_payload = create_standard_logging_payload() | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
**standard_logging_payload, | |
) | |
prometheus_logger.litellm_requests_metric = MagicMock() | |
prometheus_logger.litellm_spend_metric = MagicMock() | |
prometheus_logger._increment_top_level_request_and_spend_metrics( | |
end_user_id="user1", | |
user_api_key="key1", | |
user_api_key_alias="alias1", | |
model="gpt-3.5-turbo", | |
user_api_team="team1", | |
user_api_team_alias="team_alias1", | |
user_id="user1", | |
response_cost=0.1, | |
enum_values=enum_values, | |
) | |
prometheus_logger.litellm_requests_metric.labels.assert_called_once_with( | |
end_user=None, | |
user=None, | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
model="gpt-3.5-turbo", | |
user_email=None, | |
) | |
prometheus_logger.litellm_requests_metric.labels().inc.assert_called_once() | |
prometheus_logger.litellm_spend_metric.labels.assert_called_once_with( | |
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1" | |
) | |
prometheus_logger.litellm_spend_metric.labels().inc.assert_called_once_with(0.1) | |
async def test_async_log_failure_event(prometheus_logger): | |
# NOTE: almost all params for this metric are read from standard logging payload | |
standard_logging_object = create_standard_logging_payload() | |
kwargs = { | |
"model": "gpt-3.5-turbo", | |
"litellm_params": { | |
"custom_llm_provider": "openai", | |
}, | |
"start_time": datetime.now(), | |
"completion_start_time": datetime.now(), | |
"api_call_start_time": datetime.now(), | |
"end_time": datetime.now() + timedelta(seconds=1), | |
"standard_logging_object": standard_logging_object, | |
"exception": Exception("Test error"), | |
} | |
response_obj = MagicMock() | |
# Mock the metrics | |
prometheus_logger.litellm_llm_api_failed_requests_metric = MagicMock() | |
prometheus_logger.litellm_deployment_failure_responses = MagicMock() | |
prometheus_logger.litellm_deployment_total_requests = MagicMock() | |
prometheus_logger.set_deployment_partial_outage = MagicMock() | |
await prometheus_logger.async_log_failure_event( | |
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"] | |
) | |
# litellm_llm_api_failed_requests_metric incremented | |
""" | |
Expected metrics | |
end_user_id, | |
user_api_key, | |
user_api_key_alias, | |
model, | |
user_api_team, | |
user_api_team_alias, | |
user_id, | |
""" | |
prometheus_logger.litellm_llm_api_failed_requests_metric.labels.assert_called_once_with( | |
None, | |
"test_hash", | |
"test_alias", | |
"gpt-3.5-turbo", | |
"test_team", | |
"test_team_alias", | |
"test_user", | |
) | |
prometheus_logger.litellm_llm_api_failed_requests_metric.labels().inc.assert_called_once() | |
# deployment should be marked in partial outage | |
prometheus_logger.set_deployment_partial_outage.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
) | |
# deployment failure responses incremented | |
prometheus_logger.litellm_deployment_failure_responses.labels.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
exception_status="None", | |
exception_class="Exception", | |
requested_model="openai-gpt", # passed in standard logging payload | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
) | |
prometheus_logger.litellm_deployment_failure_responses.labels().inc.assert_called_once() | |
# deployment total requests incremented | |
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
requested_model="openai-gpt", # passed in standard logging payload | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
) | |
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once() | |
async def test_async_post_call_failure_hook(prometheus_logger): | |
""" | |
Test for the async_post_call_failure_hook method | |
it should increment the litellm_proxy_failed_requests_metric and litellm_proxy_total_requests_metric | |
""" | |
# Mock the prometheus metrics | |
prometheus_logger.litellm_proxy_failed_requests_metric = MagicMock() | |
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock() | |
# Create test data | |
request_data = {"model": "gpt-3.5-turbo"} | |
original_exception = litellm.RateLimitError( | |
message="Test error", llm_provider="openai", model="gpt-3.5-turbo" | |
) | |
user_api_key_dict = UserAPIKeyAuth( | |
api_key="test_key", | |
key_alias="test_alias", | |
team_id="test_team", | |
team_alias="test_team_alias", | |
user_id="test_user", | |
end_user_id="test_end_user", | |
request_route="/chat/completions", | |
) | |
# Call the function | |
await prometheus_logger.async_post_call_failure_hook( | |
request_data=request_data, | |
original_exception=original_exception, | |
user_api_key_dict=user_api_key_dict, | |
) | |
# Assert failed requests metric was incremented with correct labels | |
prometheus_logger.litellm_proxy_failed_requests_metric.labels.assert_called_once_with( | |
end_user=None, | |
hashed_api_key="test_key", | |
api_key_alias="test_alias", | |
requested_model="gpt-3.5-turbo", | |
team="test_team", | |
team_alias="test_team_alias", | |
user="test_user", | |
exception_status="429", | |
exception_class="Openai.RateLimitError", | |
route=user_api_key_dict.request_route, | |
) | |
prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once() | |
# Assert total requests metric was incremented with correct labels | |
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( | |
end_user=None, | |
hashed_api_key="test_key", | |
api_key_alias="test_alias", | |
requested_model="gpt-3.5-turbo", | |
team="test_team", | |
team_alias="test_team_alias", | |
user="test_user", | |
status_code="429", | |
user_email=None, | |
route=user_api_key_dict.request_route, | |
) | |
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() | |
async def test_async_post_call_success_hook(prometheus_logger): | |
""" | |
Test for the async_post_call_success_hook method | |
it should increment the litellm_proxy_total_requests_metric | |
""" | |
# Mock the prometheus metric | |
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock() | |
# Create test data | |
data = {"model": "gpt-3.5-turbo"} | |
user_api_key_dict = UserAPIKeyAuth( | |
api_key="test_key", | |
key_alias="test_alias", | |
team_id="test_team", | |
team_alias="test_team_alias", | |
user_id="test_user", | |
end_user_id="test_end_user", | |
request_route="/chat/completions", | |
) | |
response = {"choices": [{"message": {"content": "test response"}}]} | |
# Call the function | |
await prometheus_logger.async_post_call_success_hook( | |
data=data, user_api_key_dict=user_api_key_dict, response=response | |
) | |
# Assert total requests metric was incremented with correct labels | |
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( | |
end_user=None, | |
hashed_api_key="test_key", | |
api_key_alias="test_alias", | |
requested_model="gpt-3.5-turbo", | |
team="test_team", | |
team_alias="test_team_alias", | |
user="test_user", | |
status_code="200", | |
user_email=None, | |
route=user_api_key_dict.request_route, | |
) | |
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() | |
def test_set_llm_deployment_success_metrics(prometheus_logger): | |
# Mock all the metrics used in the method | |
prometheus_logger.litellm_remaining_requests_metric = MagicMock() | |
prometheus_logger.litellm_remaining_tokens_metric = MagicMock() | |
prometheus_logger.litellm_deployment_success_responses = MagicMock() | |
prometheus_logger.litellm_deployment_total_requests = MagicMock() | |
prometheus_logger.litellm_deployment_latency_per_output_token = MagicMock() | |
prometheus_logger.set_deployment_healthy = MagicMock() | |
prometheus_logger.litellm_overhead_latency_metric = MagicMock() | |
standard_logging_payload = create_standard_logging_payload() | |
standard_logging_payload["hidden_params"]["additional_headers"] = { | |
"x_ratelimit_remaining_requests": 123, | |
"x_ratelimit_remaining_tokens": 4321, | |
} | |
standard_logging_payload["hidden_params"]["litellm_overhead_time_ms"] = 100 | |
# Create test data | |
request_kwargs = { | |
"model": "gpt-3.5-turbo", | |
"litellm_params": { | |
"custom_llm_provider": "openai", | |
"metadata": {"model_info": {"id": "model-123"}}, | |
}, | |
"standard_logging_object": standard_logging_payload, | |
} | |
enum_values = UserAPIKeyLabelValues( | |
litellm_model_name=standard_logging_payload["model"], | |
api_provider=standard_logging_payload["custom_llm_provider"], | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
**standard_logging_payload, | |
) | |
start_time = datetime.now() | |
end_time = start_time + timedelta(seconds=1) | |
output_tokens = 10 | |
# Call the function | |
prometheus_logger.set_llm_deployment_success_metrics( | |
request_kwargs=request_kwargs, | |
start_time=start_time, | |
end_time=end_time, | |
output_tokens=output_tokens, | |
enum_values=enum_values, | |
) | |
# Verify remaining requests metric | |
prometheus_logger.litellm_remaining_requests_metric.labels.assert_called_once_with( | |
"openai-gpt", # model_group / requested model from create_standard_logging_payload() | |
"openai", # llm provider | |
"https://api.openai.com", # api base | |
"gpt-3.5-turbo", # actual model used - litellm model name | |
standard_logging_payload["metadata"]["user_api_key_hash"], | |
standard_logging_payload["metadata"]["user_api_key_alias"], | |
) | |
prometheus_logger.litellm_remaining_requests_metric.labels().set.assert_called_once_with( | |
123 | |
) | |
# Verify remaining tokens metric | |
prometheus_logger.litellm_remaining_tokens_metric.labels.assert_called_once_with( | |
"openai-gpt", # model_group / requested model from create_standard_logging_payload() | |
"openai", # llm provider | |
"https://api.openai.com", # api base | |
"gpt-3.5-turbo", # actual model used - litellm model name | |
standard_logging_payload["metadata"]["user_api_key_hash"], | |
standard_logging_payload["metadata"]["user_api_key_alias"], | |
) | |
prometheus_logger.litellm_remaining_tokens_metric.labels().set.assert_called_once_with( | |
4321 | |
) | |
# Verify deployment healthy state | |
prometheus_logger.set_deployment_healthy.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
) | |
# Verify success responses metric | |
prometheus_logger.litellm_deployment_success_responses.labels.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
requested_model="openai-gpt", # requested model from create_standard_logging_payload() | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
) | |
prometheus_logger.litellm_deployment_success_responses.labels().inc.assert_called_once() | |
# Verify total requests metric | |
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
requested_model="openai-gpt", # requested model from create_standard_logging_payload() | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
) | |
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once() | |
# Verify latency per output token metric | |
prometheus_logger.litellm_deployment_latency_per_output_token.labels.assert_called_once_with( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], | |
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], | |
team=standard_logging_payload["metadata"]["user_api_key_team_id"], | |
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], | |
) | |
prometheus_logger.litellm_overhead_latency_metric.labels.assert_called_once_with( | |
"openai-gpt", # model_group / requested model from create_standard_logging_payload() | |
"openai", # llm provider | |
"https://api.openai.com", # api base | |
"gpt-3.5-turbo", # actual model used - litellm model name | |
standard_logging_payload["metadata"]["user_api_key_hash"], | |
standard_logging_payload["metadata"]["user_api_key_alias"], | |
) | |
# Calculate expected latency per token (1 second / 10 tokens = 0.1 seconds per token) | |
expected_latency_per_token = 0.1 | |
prometheus_logger.litellm_deployment_latency_per_output_token.labels().observe.assert_called_once_with( | |
expected_latency_per_token | |
) | |
async def test_log_success_fallback_event(prometheus_logger): | |
prometheus_logger.litellm_deployment_successful_fallbacks = MagicMock() | |
original_model_group = "gpt-3.5-turbo" | |
kwargs = { | |
"model": "gpt-4", | |
"metadata": { | |
"user_api_key_hash": "test_hash", | |
"user_api_key_alias": "test_alias", | |
"user_api_key_team_id": "test_team", | |
"user_api_key_team_alias": "test_team_alias", | |
}, | |
} | |
original_exception = litellm.RateLimitError( | |
message="Test error", llm_provider="openai", model="gpt-3.5-turbo" | |
) | |
await prometheus_logger.log_success_fallback_event( | |
original_model_group=original_model_group, | |
kwargs=kwargs, | |
original_exception=original_exception, | |
) | |
prometheus_logger.litellm_deployment_successful_fallbacks.labels.assert_called_once_with( | |
requested_model=original_model_group, | |
fallback_model="gpt-4", | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
exception_status="429", | |
exception_class="Openai.RateLimitError", | |
) | |
prometheus_logger.litellm_deployment_successful_fallbacks.labels().inc.assert_called_once() | |
async def test_log_failure_fallback_event(prometheus_logger): | |
prometheus_logger.litellm_deployment_failed_fallbacks = MagicMock() | |
original_model_group = "gpt-3.5-turbo" | |
kwargs = { | |
"model": "gpt-4", | |
"metadata": { | |
"user_api_key_hash": "test_hash", | |
"user_api_key_alias": "test_alias", | |
"user_api_key_team_id": "test_team", | |
"user_api_key_team_alias": "test_team_alias", | |
}, | |
} | |
original_exception = litellm.RateLimitError( | |
message="Test error", llm_provider="openai", model="gpt-3.5-turbo" | |
) | |
await prometheus_logger.log_failure_fallback_event( | |
original_model_group=original_model_group, | |
kwargs=kwargs, | |
original_exception=original_exception, | |
) | |
prometheus_logger.litellm_deployment_failed_fallbacks.labels.assert_called_once_with( | |
requested_model=original_model_group, | |
fallback_model="gpt-4", | |
hashed_api_key="test_hash", | |
api_key_alias="test_alias", | |
team="test_team", | |
team_alias="test_team_alias", | |
exception_status="429", | |
exception_class="Openai.RateLimitError", | |
) | |
prometheus_logger.litellm_deployment_failed_fallbacks.labels().inc.assert_called_once() | |
def test_deployment_state_management(prometheus_logger): | |
prometheus_logger.litellm_deployment_state = MagicMock() | |
test_params = { | |
"litellm_model_name": "gpt-3.5-turbo", | |
"model_id": "model-123", | |
"api_base": "https://api.openai.com", | |
"api_provider": "openai", | |
} | |
# Test set_deployment_healthy (state=0) | |
prometheus_logger.set_deployment_healthy(**test_params) | |
prometheus_logger.litellm_deployment_state.labels.assert_called_with( | |
test_params["litellm_model_name"], | |
test_params["model_id"], | |
test_params["api_base"], | |
test_params["api_provider"], | |
) | |
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(0) | |
# Test set_deployment_partial_outage (state=1) | |
prometheus_logger.set_deployment_partial_outage(**test_params) | |
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(1) | |
# Test set_deployment_complete_outage (state=2) | |
prometheus_logger.set_deployment_complete_outage(**test_params) | |
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(2) | |
def test_increment_deployment_cooled_down(prometheus_logger): | |
prometheus_logger.litellm_deployment_cooled_down = MagicMock() | |
prometheus_logger.increment_deployment_cooled_down( | |
litellm_model_name="gpt-3.5-turbo", | |
model_id="model-123", | |
api_base="https://api.openai.com", | |
api_provider="openai", | |
exception_status="429", | |
) | |
prometheus_logger.litellm_deployment_cooled_down.labels.assert_called_once_with( | |
"gpt-3.5-turbo", "model-123", "https://api.openai.com", "openai", "429" | |
) | |
prometheus_logger.litellm_deployment_cooled_down.labels().inc.assert_called_once() | |
def test_prometheus_factory(monkeypatch, enable_end_user_cost_tracking_prometheus_only): | |
from litellm.integrations.prometheus import prometheus_label_factory | |
from litellm.types.integrations.prometheus import UserAPIKeyLabelValues | |
monkeypatch.setattr( | |
"litellm.enable_end_user_cost_tracking_prometheus_only", | |
enable_end_user_cost_tracking_prometheus_only, | |
) | |
enum_values = UserAPIKeyLabelValues( | |
end_user="test_end_user", | |
api_key_hash="test_hash", | |
api_key_alias="test_alias", | |
) | |
supported_labels = ["end_user", "api_key_hash", "api_key_alias"] | |
returned_dict = prometheus_label_factory( | |
supported_enum_labels=supported_labels, enum_values=enum_values | |
) | |
if enable_end_user_cost_tracking_prometheus_only is True: | |
assert returned_dict["end_user"] == "test_end_user" | |
else: | |
assert returned_dict["end_user"] == None | |
def test_get_custom_labels_from_metadata(monkeypatch): | |
monkeypatch.setattr( | |
"litellm.custom_prometheus_metadata_labels", ["metadata.foo", "metadata.bar"] | |
) | |
metadata = {"foo": "bar", "bar": "baz", "taz": "qux"} | |
assert get_custom_labels_from_metadata(metadata) == { | |
"metadata_foo": "bar", | |
"metadata_bar": "baz", | |
} | |
async def test_initialize_remaining_budget_metrics(prometheus_logger): | |
""" | |
Test that _initialize_remaining_budget_metrics correctly sets budget metrics for all teams | |
""" | |
litellm.prometheus_initialize_budget_metrics = True | |
# Mock the prisma client and get_paginated_teams function | |
with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch( | |
"litellm.proxy.management_endpoints.team_endpoints.get_paginated_teams" | |
) as mock_get_teams: | |
# Create mock team data with proper datetime objects for budget_reset_at | |
future_reset = datetime.now() + timedelta(hours=24) # Reset 24 hours from now | |
mock_teams = [ | |
MagicMock( | |
team_id="team1", | |
team_alias="alias1", | |
max_budget=100, | |
spend=30, | |
budget_reset_at=future_reset, | |
), | |
MagicMock( | |
team_id="team2", | |
team_alias="alias2", | |
max_budget=200, | |
spend=50, | |
budget_reset_at=future_reset, | |
), | |
MagicMock( | |
team_id="team3", | |
team_alias=None, | |
max_budget=300, | |
spend=100, | |
budget_reset_at=future_reset, | |
), | |
] | |
# Mock get_paginated_teams to return our test data | |
mock_get_teams.return_value = (mock_teams, len(mock_teams)) | |
# Mock the Prometheus metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() | |
# Call the function | |
await prometheus_logger._initialize_remaining_budget_metrics() | |
# Verify the remaining budget metric was set correctly for each team | |
expected_budget_calls = [ | |
call.labels("team1", "alias1").set(70), # 100 - 30 | |
call.labels("team2", "alias2").set(150), # 200 - 50 | |
call.labels("team3", "").set(200), # 300 - 100 | |
] | |
prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls( | |
expected_budget_calls, any_order=True | |
) | |
# Get all the calls made to the hours metric | |
hours_calls = ( | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.mock_calls | |
) | |
# Verify the structure and approximate values of the hours calls | |
assert len(hours_calls) == 6 # 3 teams * 2 calls each (labels + set) | |
# Helper function to extract hours value from call | |
def get_hours_from_call(call_obj): | |
if "set" in str(call_obj): | |
return call_obj[1][0] # Extract the hours value | |
return None | |
# Verify each team's hours are approximately 24 (within reasonable bounds) | |
hours_values = [ | |
get_hours_from_call(call) | |
for call in hours_calls | |
if get_hours_from_call(call) is not None | |
] | |
for hours in hours_values: | |
assert ( | |
23.9 <= hours <= 24.0 | |
), f"Hours value {hours} not within expected range" | |
# Verify the labels were called with correct team information | |
label_calls = [ | |
call.labels(team="team1", team_alias="alias1"), | |
call.labels(team="team2", team_alias="alias2"), | |
call.labels(team="team3", team_alias=""), | |
] | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_has_calls( | |
label_calls, any_order=True | |
) | |
async def test_initialize_remaining_budget_metrics_exception_handling( | |
prometheus_logger, | |
): | |
""" | |
Test that _initialize_remaining_budget_metrics properly handles exceptions | |
""" | |
litellm.prometheus_initialize_budget_metrics = True | |
# Mock the prisma client and get_paginated_teams function to raise an exception | |
with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch( | |
"litellm.proxy.management_endpoints.team_endpoints.get_paginated_teams" | |
) as mock_get_teams, patch( | |
"litellm.proxy.management_endpoints.key_management_endpoints._list_key_helper" | |
) as mock_list_keys: | |
# Make get_paginated_teams raise an exception | |
mock_get_teams.side_effect = Exception("Database error") | |
mock_list_keys.side_effect = Exception("Key listing error") | |
# Mock the Prometheus metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock() | |
# Mock the logger to capture the error | |
with patch("litellm._logging.verbose_logger.exception") as mock_logger: | |
# Call the function | |
await prometheus_logger._initialize_remaining_budget_metrics() | |
# Verify both errors were logged | |
assert mock_logger.call_count == 2 | |
assert ( | |
"Error initializing teams budget metrics" | |
in mock_logger.call_args_list[0][0][0] | |
) | |
assert ( | |
"Error initializing keys budget metrics" | |
in mock_logger.call_args_list[1][0][0] | |
) | |
# Verify the metrics were never called | |
prometheus_logger.litellm_remaining_team_budget_metric.assert_not_called() | |
prometheus_logger.litellm_remaining_api_key_budget_metric.assert_not_called() | |
async def test_initialize_api_key_budget_metrics(prometheus_logger): | |
""" | |
Test that _initialize_api_key_budget_metrics correctly sets budget metrics for all API keys | |
""" | |
litellm.prometheus_initialize_budget_metrics = True | |
# Mock the prisma client and _list_key_helper function | |
with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch( | |
"litellm.proxy.management_endpoints.key_management_endpoints._list_key_helper" | |
) as mock_list_keys: | |
# Create mock key data with proper datetime objects for budget_reset_at | |
future_reset = datetime.now() + timedelta(hours=24) # Reset 24 hours from now | |
key1 = UserAPIKeyAuth( | |
api_key="key1_hash", | |
key_alias="alias1", | |
team_id="team1", | |
max_budget=100, | |
spend=30, | |
budget_reset_at=future_reset, | |
) | |
key1.token = "key1_hash" | |
key2 = UserAPIKeyAuth( | |
api_key="key2_hash", | |
key_alias="alias2", | |
team_id="team2", | |
max_budget=200, | |
spend=50, | |
budget_reset_at=future_reset, | |
) | |
key2.token = "key2_hash" | |
key3 = UserAPIKeyAuth( | |
api_key="key3_hash", | |
key_alias=None, | |
team_id="team3", | |
max_budget=300, | |
spend=100, | |
budget_reset_at=future_reset, | |
) | |
key3.token = "key3_hash" | |
mock_keys = [ | |
key1, | |
key2, | |
key3, | |
] | |
# Mock _list_key_helper to return our test data | |
mock_list_keys.return_value = {"keys": mock_keys, "total_count": len(mock_keys)} | |
# Mock the Prometheus metrics | |
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock() | |
prometheus_logger.litellm_api_key_budget_remaining_hours_metric = MagicMock() | |
prometheus_logger.litellm_api_key_max_budget_metric = MagicMock() | |
# Call the function | |
await prometheus_logger._initialize_api_key_budget_metrics() | |
# Verify the remaining budget metric was set correctly for each key | |
expected_budget_calls = [ | |
call.labels("key1_hash", "alias1").set(70), # 100 - 30 | |
call.labels("key2_hash", "alias2").set(150), # 200 - 50 | |
call.labels("key3_hash", "").set(200), # 300 - 100 | |
] | |
prometheus_logger.litellm_remaining_api_key_budget_metric.assert_has_calls( | |
expected_budget_calls, any_order=True | |
) | |
# Get all the calls made to the hours metric | |
hours_calls = ( | |
prometheus_logger.litellm_api_key_budget_remaining_hours_metric.mock_calls | |
) | |
# Verify the structure and approximate values of the hours calls | |
assert len(hours_calls) == 6 # 3 keys * 2 calls each (labels + set) | |
# Helper function to extract hours value from call | |
def get_hours_from_call(call_obj): | |
if "set" in str(call_obj): | |
return call_obj[1][0] # Extract the hours value | |
return None | |
# Verify each key's hours are approximately 24 (within reasonable bounds) | |
hours_values = [ | |
get_hours_from_call(call) | |
for call in hours_calls | |
if get_hours_from_call(call) is not None | |
] | |
for hours in hours_values: | |
assert ( | |
23.9 <= hours <= 24.0 | |
), f"Hours value {hours} not within expected range" | |
# Verify max budget metric was set correctly for each key | |
expected_max_budget_calls = [ | |
call.labels("key1_hash", "alias1").set(100), | |
call.labels("key2_hash", "alias2").set(200), | |
call.labels("key3_hash", "").set(300), | |
] | |
prometheus_logger.litellm_api_key_max_budget_metric.assert_has_calls( | |
expected_max_budget_calls, any_order=True | |
) | |
def test_set_team_budget_metrics_multiple_teams(prometheus_logger): | |
""" | |
Test that _set_team_budget_metrics correctly handles multiple teams with different budgets and reset times | |
""" | |
# Create test teams with different budgets and reset times | |
teams = [ | |
MagicMock( | |
team_id="team1", | |
team_alias="alias1", | |
spend=50.0, | |
max_budget=100.0, | |
budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc), | |
), | |
MagicMock( | |
team_id="team2", | |
team_alias="alias2", | |
spend=75.0, | |
max_budget=150.0, | |
budget_reset_at=datetime(2024, 6, 30, tzinfo=timezone.utc), | |
), | |
MagicMock( | |
team_id="team3", | |
team_alias="alias3", | |
spend=25.0, | |
max_budget=200.0, | |
budget_reset_at=datetime(2024, 3, 31, tzinfo=timezone.utc), | |
), | |
] | |
# Mock the metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_max_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() | |
# Set metrics for each team | |
for team in teams: | |
prometheus_logger._set_team_budget_metrics(team) | |
# Verify remaining budget metric calls | |
expected_remaining_budget_calls = [ | |
call.labels(team="team1", team_alias="alias1").set(50.0), # 100 - 50 | |
call.labels(team="team2", team_alias="alias2").set(75.0), # 150 - 75 | |
call.labels(team="team3", team_alias="alias3").set(175.0), # 200 - 25 | |
] | |
prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls( | |
expected_remaining_budget_calls, any_order=True | |
) | |
# Verify max budget metric calls | |
expected_max_budget_calls = [ | |
call.labels("team1", "alias1").set(100.0), | |
call.labels("team2", "alias2").set(150.0), | |
call.labels("team3", "alias3").set(200.0), | |
] | |
prometheus_logger.litellm_team_max_budget_metric.assert_has_calls( | |
expected_max_budget_calls, any_order=True | |
) | |
# Verify budget reset metric calls | |
# Note: The exact hours will depend on the current time, so we'll just verify the structure | |
assert ( | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_count | |
== 3 | |
) | |
assert ( | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_count | |
== 3 | |
) | |
def test_set_team_budget_metrics_null_values(prometheus_logger): | |
""" | |
Test that _set_team_budget_metrics correctly handles null/None values | |
""" | |
# Create test team with null values | |
team = MagicMock( | |
team_id="team_null", | |
team_alias=None, # Test null alias | |
spend=None, # Test null spend | |
max_budget=None, # Test null max_budget | |
budget_reset_at=None, # Test null reset time | |
) | |
# Mock the metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_max_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() | |
# Set metrics for the team | |
prometheus_logger._set_team_budget_metrics(team) | |
# Verify remaining budget metric is set to infinity when max_budget is None | |
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( | |
team="team_null", team_alias="" | |
) | |
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( | |
float("inf") | |
) | |
# Verify max budget metric is not set when max_budget is None | |
prometheus_logger.litellm_team_max_budget_metric.assert_not_called() | |
# Verify reset metric is not set when budget_reset_at is None | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_not_called() | |
def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypatch): | |
""" | |
Test that _set_team_budget_metrics correctly handles custom prometheus labels | |
""" | |
# Set custom prometheus labels | |
custom_labels = ["metadata.organization", "metadata.environment"] | |
monkeypatch.setattr("litellm.custom_prometheus_metadata_labels", custom_labels) | |
# Create test team with custom metadata | |
team = MagicMock( | |
team_id="team1", | |
team_alias="alias1", | |
spend=50.0, | |
max_budget=100.0, | |
budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc), | |
) | |
# Mock the metrics | |
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_max_budget_metric = MagicMock() | |
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() | |
# Set metrics for the team | |
prometheus_logger._set_team_budget_metrics(team) | |
# Verify remaining budget metric includes custom labels | |
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( | |
team="team1", | |
team_alias="alias1", | |
metadata_organization=None, | |
metadata_environment=None, | |
) | |
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( | |
50.0 | |
) # 100 - 50 | |
# Verify max budget metric includes custom labels | |
prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with( | |
team="team1", | |
team_alias="alias1", | |
metadata_organization=None, | |
metadata_environment=None, | |
) | |
prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with( | |
100.0 | |
) | |
# Verify budget reset metric includes custom labels | |
budget_reset_calls = ( | |
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_args_list | |
) | |
assert len(budget_reset_calls) == 1 | |
assert budget_reset_calls[0][1] == { | |
"team": "team1", | |
"team_alias": "alias1", | |
"metadata_organization": None, | |
"metadata_environment": None, | |
} | |
def test_get_exception_class_name(prometheus_logger): | |
""" | |
Test that _get_exception_class_name correctly formats the exception class name | |
""" | |
# Test case 1: Exception with llm_provider | |
rate_limit_error = litellm.RateLimitError( | |
message="Rate limit exceeded", | |
llm_provider="openai", | |
model="gpt-3.5-turbo" | |
) | |
assert prometheus_logger._get_exception_class_name(rate_limit_error) == "Openai.RateLimitError" | |
# Test case 2: Exception with empty llm_provider | |
auth_error = litellm.AuthenticationError( | |
message="Invalid API key", | |
llm_provider="", | |
model="gpt-4" | |
) | |
assert prometheus_logger._get_exception_class_name(auth_error) == "AuthenticationError" | |
# Test case 3: Exception with None llm_provider | |
context_window_error = litellm.ContextWindowExceededError( | |
message="Context length exceeded", | |
llm_provider=None, | |
model="gpt-4" | |
) | |
assert prometheus_logger._get_exception_class_name(context_window_error) == "ContextWindowExceededError" | |