Spaces:

DesertWolf
/

test3

Configuration error

App Files Files Community

test3 / tests /test_litellm /llms /azure /test_azure_common_utils.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 14 days ago

raw

history blame contribute delete

30.8 kB

	import json
	import os
	import sys
	import traceback
	from typing import Callable, Optional
	from unittest.mock import MagicMock, patch

	import pytest

	sys.path.insert(
	0, os.path.abspath("../../../..")
	) # Adds the parent directory to the system path
	import litellm
	from litellm.llms.azure.common_utils import BaseAzureLLM
	from litellm.types.utils import CallTypes


	# Mock the necessary dependencies
	@pytest.fixture
	def setup_mocks():
	with patch(
	"litellm.llms.azure.common_utils.get_azure_ad_token_from_entra_id"
	) as mock_entra_token, patch(
	"litellm.llms.azure.common_utils.get_azure_ad_token_from_username_password"
	) as mock_username_password_token, patch(
	"litellm.llms.azure.common_utils.get_azure_ad_token_from_oidc"
	) as mock_oidc_token, patch(
	"litellm.llms.azure.common_utils.get_azure_ad_token_provider"
	) as mock_token_provider, patch(
	"litellm.llms.azure.common_utils.litellm"
	) as mock_litellm, patch(
	"litellm.llms.azure.common_utils.verbose_logger"
	) as mock_logger, patch(
	"litellm.llms.azure.common_utils.select_azure_base_url_or_endpoint"
	) as mock_select_url:
	# Configure mocks
	mock_litellm.AZURE_DEFAULT_API_VERSION = "2023-05-15"
	mock_litellm.enable_azure_ad_token_refresh = False

	mock_entra_token.return_value = lambda: "mock-entra-token"
	mock_username_password_token.return_value = (
	lambda: "mock-username-password-token"
	)
	mock_oidc_token.return_value = "mock-oidc-token"
	mock_token_provider.return_value = lambda: "mock-default-token"

	mock_select_url.side_effect = (
	lambda azure_client_params, **kwargs: azure_client_params
	)

	yield {
	"entra_token": mock_entra_token,
	"username_password_token": mock_username_password_token,
	"oidc_token": mock_oidc_token,
	"token_provider": mock_token_provider,
	"litellm": mock_litellm,
	"logger": mock_logger,
	"select_url": mock_select_url,
	}


	def test_initialize_with_api_key(setup_mocks):
	# Test with api_key provided
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key="test-api-key",
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version="2023-06-01",
	is_async=False,
	)

	# Verify expected result
	assert result["api_key"] == "test-api-key"
	assert result["azure_endpoint"] == "https://test.openai.azure.com"
	assert result["api_version"] == "2023-06-01"
	assert "azure_ad_token" in result
	assert result["azure_ad_token"] is None


	def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch):
	monkeypatch.setenv("AZURE_TENANT_ID", "test-tenant-id")
	monkeypatch.setenv("AZURE_CLIENT_ID", "test-client-id")
	monkeypatch.setenv("AZURE_CLIENT_SECRET", "test-client-secret")

	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_entra_id was called
	setup_mocks["entra_token"].assert_called_once_with(
	tenant_id="test-tenant-id",
	client_id="test-client-id",
	client_secret="test-client-secret",
	)

	# Verify expected result
	assert result["api_key"] is None
	assert result["azure_endpoint"] == "https://test.openai.azure.com"
	assert "azure_ad_token_provider" in result


	def test_initialize_with_tenant_credentials(setup_mocks):
	# Test with tenant_id, client_id, and client_secret provided
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"tenant_id": "test-tenant-id",
	"client_id": "test-client-id",
	"client_secret": "test-client-secret",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_entra_id was called
	setup_mocks["entra_token"].assert_called_once_with(
	tenant_id="test-tenant-id",
	client_id="test-client-id",
	client_secret="test-client-secret",
	)

	# Verify expected result
	assert result["api_key"] is None
	assert result["azure_endpoint"] == "https://test.openai.azure.com"
	assert "azure_ad_token_provider" in result


	def test_initialize_with_username_password(monkeypatch, setup_mocks):
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
	monkeypatch.delenv("AZURE_USERNAME", raising=False)
	monkeypatch.delenv("AZURE_PASSWORD", raising=False)

	# Test with azure_username, azure_password, and client_id provided
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"azure_username": "test-username",
	"azure_password": "test-password",
	"client_id": "test-client-id",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Print the call arguments for debugging
	print("\nDebug - Call arguments for all mocks:")
	print("username_password_token:", setup_mocks["username_password_token"].call_args)
	print("entra_token:", setup_mocks["entra_token"].call_args)
	print("oidc_token:", setup_mocks["oidc_token"].call_args)
	print("token_provider:", setup_mocks["token_provider"].call_args)
	print("\nResult:", result)

	# Verify that get_azure_ad_token_from_username_password was called
	setup_mocks["username_password_token"].assert_called_once_with(
	azure_username="test-username",
	azure_password="test-password",
	client_id="test-client-id",
	)

	# Verify expected result
	assert "azure_ad_token_provider" in result


	def test_initialize_with_oidc_token(setup_mocks, monkeypatch):
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
	# Test with azure_ad_token that starts with "oidc/"
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={"azure_ad_token": "oidc/test-token"},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_oidc was called
	setup_mocks["oidc_token"].assert_called_once_with(
	azure_ad_token="oidc/test-token", azure_client_id=None, azure_tenant_id=None
	)

	# Verify expected result
	assert result["azure_ad_token"] == "mock-oidc-token"


	def test_initialize_with_oidc_token_and_client_params(setup_mocks):
	# Test with azure_ad_token that starts with "oidc/" and explicit client/tenant IDs
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"azure_ad_token": "oidc/test-token",
	"client_id": "test-client-id",
	"tenant_id": "test-tenant-id",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_oidc was called with the correct parameters
	setup_mocks["oidc_token"].assert_called_once_with(
	azure_ad_token="oidc/test-token",
	azure_client_id="test-client-id",
	azure_tenant_id="test-tenant-id",
	)

	# Verify expected result
	assert result["azure_ad_token"] == "mock-oidc-token"


	def test_initialize_with_oidc_token_fallback_to_env(setup_mocks, monkeypatch):
	# Set environment variables
	monkeypatch.setenv("AZURE_CLIENT_ID", "env-client-id")
	monkeypatch.setenv("AZURE_TENANT_ID", "env-tenant-id")

	# Test with azure_ad_token that starts with "oidc/" but no explicit client/tenant IDs
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"azure_ad_token": "oidc/test-token",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_oidc was called with environment variables
	setup_mocks["oidc_token"].assert_called_once_with(
	azure_ad_token="oidc/test-token",
	azure_client_id="env-client-id",
	azure_tenant_id="env-tenant-id",
	)

	# Verify expected result
	assert result["azure_ad_token"] == "mock-oidc-token"


	def test_initialize_with_oidc_token_no_credentials(setup_mocks, monkeypatch):
	# Clear environment variables
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)

	# Test with azure_ad_token that starts with "oidc/" but no credentials anywhere
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"azure_ad_token": "oidc/test-token",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_from_oidc was called with None values
	setup_mocks["oidc_token"].assert_called_once_with(
	azure_ad_token="oidc/test-token", azure_client_id=None, azure_tenant_id=None
	)

	# Verify expected result
	assert result["azure_ad_token"] == "mock-oidc-token"


	def test_initialize_with_ad_token_provider(setup_mocks, monkeypatch):
	# Clear environment variables
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)

	# Test with custom azure_ad_token_provider
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={
	"azure_ad_token_provider": lambda: "mock-custom-token",
	},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify expected result
	assert result["azure_ad_token_provider"]() == "mock-custom-token"


	def test_initialize_with_enable_token_refresh(setup_mocks, monkeypatch):
	litellm._turn_on_debug()
	# Enable token refresh
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
	setup_mocks["litellm"].enable_azure_ad_token_refresh = True

	# Test with token refresh enabled
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify that get_azure_ad_token_provider was called
	setup_mocks["token_provider"].assert_called_once()

	# Verify expected result
	assert "azure_ad_token_provider" in result


	def test_initialize_with_token_refresh_error(setup_mocks, monkeypatch):
	# Enable token refresh but make it raise an error
	monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
	monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
	monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
	setup_mocks["litellm"].enable_azure_ad_token_refresh = True
	setup_mocks["token_provider"].side_effect = ValueError("Token provider error")

	# Test with token refresh enabled but raising error
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key=None,
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify error was logged
	setup_mocks["logger"].debug.assert_any_call(
	"Azure AD Token Provider could not be used."
	)


	def test_api_version_from_env_var(setup_mocks):
	# Test api_version from environment variable
	with patch.dict(os.environ, {"AZURE_API_VERSION": "2023-07-01"}):
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key="test-api-key",
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version=None,
	is_async=False,
	)

	# Verify expected result
	assert result["api_version"] == "2023-07-01"


	def test_select_azure_base_url_called(setup_mocks):
	# Test that select_azure_base_url_or_endpoint is called
	result = BaseAzureLLM().initialize_azure_sdk_client(
	litellm_params={},
	api_key="test-api-key",
	api_base="https://test.openai.azure.com",
	model_name="gpt-4",
	api_version="2023-06-01",
	is_async=False,
	)

	# Verify that select_azure_base_url_or_endpoint was called
	setup_mocks["select_url"].assert_called_once()


	@pytest.mark.parametrize(
	"call_type",
	[
	call_type
	for call_type in CallTypes.__members__.values()
	if call_type.name.startswith("a")
	and call_type.name
	not in [
	"amoderation",
	"arerank",
	"arealtime",
	"anthropic_messages",
	"add_message",
	"arun_thread_stream",
	"aresponses",
	"alist_input_items",
	"acreate_fine_tuning_job",
	"acancel_fine_tuning_job",
	"alist_fine_tuning_jobs",
	"aretrieve_fine_tuning_job",
	"afile_list",
	"aimage_edit",
	"image_edit",
	]
	],
	)
	@pytest.mark.asyncio
	async def test_ensure_initialize_azure_sdk_client_always_used(call_type):
	from litellm.router import Router

	# Create a router with an Azure model
	azure_model_name = "azure/chatgpt-v-2"
	router = Router(
	model_list=[
	{
	"model_name": "gpt-3.5-turbo",
	"litellm_params": {
	"model": azure_model_name,
	"api_key": "test-api-key",
	"api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
	"api_base": os.getenv(
	"AZURE_API_BASE", "https://test.openai.azure.com"
	),
	},
	}
	],
	)

	# Prepare test input based on call type
	test_inputs = {
	"acompletion": {
	"messages": [{"role": "user", "content": "Hello, how are you?"}]
	},
	"atext_completion": {"prompt": "Hello, how are you?"},
	"aimage_generation": {"prompt": "Hello, how are you?"},
	"aembedding": {"input": "Hello, how are you?"},
	"arerank": {"input": "Hello, how are you?"},
	"atranscription": {"file": "path/to/file"},
	"aspeech": {"input": "Hello, how are you?", "voice": "female"},
	"acreate_batch": {
	"completion_window": 10,
	"endpoint": "https://test.openai.azure.com",
	"input_file_id": "123",
	},
	"aretrieve_batch": {"batch_id": "123"},
	"aget_assistants": {"custom_llm_provider": "azure"},
	"acreate_assistants": {"custom_llm_provider": "azure"},
	"adelete_assistant": {"custom_llm_provider": "azure", "assistant_id": "123"},
	"acreate_thread": {"custom_llm_provider": "azure"},
	"aget_thread": {"custom_llm_provider": "azure", "thread_id": "123"},
	"a_add_message": {
	"custom_llm_provider": "azure",
	"thread_id": "123",
	"role": "user",
	"content": "Hello, how are you?",
	},
	"aget_messages": {"custom_llm_provider": "azure", "thread_id": "123"},
	"arun_thread": {
	"custom_llm_provider": "azure",
	"assistant_id": "123",
	"thread_id": "123",
	},
	"acreate_file": {
	"custom_llm_provider": "azure",
	"file": MagicMock(),
	"purpose": "assistants",
	},
	"afile_content": {
	"custom_llm_provider": "azure",
	"file_id": "123",
	},
	"afile_delete": {
	"custom_llm_provider": "azure",
	"file_id": "123",
	},
	}

	# Get appropriate input for this call type
	input_kwarg = test_inputs.get(call_type.value, {})

	patch_target = (
	"litellm.llms.azure.common_utils.BaseAzureLLM.initialize_azure_sdk_client"
	)
	if call_type == CallTypes.arerank:
	patch_target = (
	"litellm.rerank_api.main.azure_rerank.initialize_azure_sdk_client"
	)
	elif call_type == CallTypes.acreate_batch or call_type == CallTypes.aretrieve_batch:
	patch_target = (
	"litellm.batches.main.azure_batches_instance.initialize_azure_sdk_client"
	)
	elif (
	call_type == CallTypes.aget_assistants
	or call_type == CallTypes.acreate_assistants
	or call_type == CallTypes.adelete_assistant
	or call_type == CallTypes.acreate_thread
	or call_type == CallTypes.aget_thread
	or call_type == CallTypes.a_add_message
	or call_type == CallTypes.aget_messages
	or call_type == CallTypes.arun_thread
	):
	patch_target = (
	"litellm.assistants.main.azure_assistants_api.initialize_azure_sdk_client"
	)
	elif call_type == CallTypes.acreate_file or call_type == CallTypes.afile_content:
	patch_target = (
	"litellm.files.main.azure_files_instance.initialize_azure_sdk_client"
	)

	# Mock the initialize_azure_sdk_client function
	with patch(patch_target) as mock_init_azure:
	# Also mock async_function_with_fallbacks to prevent actual API calls
	# Call the appropriate router method
	try:
	get_attr = getattr(router, call_type.value, None)
	if get_attr is None:
	pytest.skip(
	f"Skipping {call_type.value} because it is not supported on Router"
	)
	await getattr(router, call_type.value)(
	model="gpt-3.5-turbo",
	**input_kwarg,
	num_retries=0,
	azure_ad_token="oidc/test-token",
	)
	except Exception as e:
	traceback.print_exc()

	# Verify initialize_azure_sdk_client was called
	mock_init_azure.assert_called_once()

	# Verify it was called with the right model name
	calls = mock_init_azure.call_args_list
	azure_calls = [call for call in calls]

	litellm_params = azure_calls[0].kwargs["litellm_params"]
	print("litellm_params", litellm_params)

	assert (
	"azure_ad_token" in litellm_params
	), "azure_ad_token not found in parameters"
	assert (
	litellm_params["azure_ad_token"] == "oidc/test-token"
	), "azure_ad_token is not correct"

	# More detailed verification (optional)
	for call in azure_calls:
	assert "api_key" in call.kwargs, "api_key not found in parameters"
	assert "api_base" in call.kwargs, "api_base not found in parameters"


	@pytest.mark.parametrize(
	"call_type",
	[
	CallTypes.atext_completion,
	CallTypes.acompletion,
	],
	)
	@pytest.mark.asyncio
	async def test_ensure_initialize_azure_sdk_client_always_used_azure_text(call_type):
	from litellm.router import Router

	# Create a router with an Azure model
	azure_model_name = "azure_text/chatgpt-v-2"
	router = Router(
	model_list=[
	{
	"model_name": "gpt-3.5-turbo",
	"litellm_params": {
	"model": azure_model_name,
	"api_key": "test-api-key",
	"api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
	"api_base": os.getenv(
	"AZURE_API_BASE", "https://test.openai.azure.com"
	),
	},
	}
	],
	)

	# Prepare test input based on call type
	test_inputs = {
	"acompletion": {
	"messages": [{"role": "user", "content": "Hello, how are you?"}]
	},
	"atext_completion": {"prompt": "Hello, how are you?"},
	}

	# Get appropriate input for this call type
	input_kwarg = test_inputs.get(call_type.value, {})

	patch_target = "litellm.main.azure_text_completions.initialize_azure_sdk_client"

	# Mock the initialize_azure_sdk_client function
	with patch(patch_target) as mock_init_azure:
	# Also mock async_function_with_fallbacks to prevent actual API calls
	# Call the appropriate router method
	try:
	get_attr = getattr(router, call_type.value, None)
	if get_attr is None:
	pytest.skip(
	f"Skipping {call_type.value} because it is not supported on Router"
	)
	await getattr(router, call_type.value)(
	model="gpt-3.5-turbo",
	**input_kwarg,
	num_retries=0,
	azure_ad_token="oidc/test-token",
	)
	except Exception as e:
	traceback.print_exc()

	# Verify initialize_azure_sdk_client was called
	mock_init_azure.assert_called_once()

	# Verify it was called with the right model name
	calls = mock_init_azure.call_args_list
	azure_calls = [call for call in calls]

	litellm_params = azure_calls[0].kwargs["litellm_params"]
	print("litellm_params", litellm_params)

	assert (
	"azure_ad_token" in litellm_params
	), "azure_ad_token not found in parameters"
	assert (
	litellm_params["azure_ad_token"] == "oidc/test-token"
	), "azure_ad_token is not correct"

	# More detailed verification (optional)
	for call in azure_calls:
	assert "api_key" in call.kwargs, "api_key not found in parameters"
	assert "api_base" in call.kwargs, "api_base not found in parameters"


	# Test parameters for different API functions with Azure models
	AZURE_API_FUNCTION_PARAMS = [
	# (function_name, is_async, args)
	(
	"completion",
	False,
	{
	"model": "azure/gpt-4",
	"messages": [{"role": "user", "content": "Hello"}],
	"max_tokens": 10,
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"completion",
	True,
	{
	"model": "azure/gpt-4",
	"messages": [{"role": "user", "content": "Hello"}],
	"max_tokens": 10,
	"stream": True,
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"embedding",
	False,
	{
	"model": "azure/text-embedding-ada-002",
	"input": "Hello world",
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"embedding",
	True,
	{
	"model": "azure/text-embedding-ada-002",
	"input": "Hello world",
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"speech",
	False,
	{
	"model": "azure/tts-1",
	"input": "Hello, this is a test of text to speech",
	"voice": "alloy",
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"speech",
	True,
	{
	"model": "azure/tts-1",
	"input": "Hello, this is a test of text to speech",
	"voice": "alloy",
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"transcription",
	False,
	{
	"model": "azure/whisper-1",
	"file": MagicMock(),
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	(
	"transcription",
	True,
	{
	"model": "azure/whisper-1",
	"file": MagicMock(),
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	},
	),
	]


	@pytest.mark.parametrize("function_name,is_async,args", AZURE_API_FUNCTION_PARAMS)
	@pytest.mark.asyncio
	async def test_azure_client_reuse(function_name, is_async, args):
	"""
	Test that multiple Azure API calls reuse the same Azure OpenAI client
	"""
	litellm.set_verbose = True

	# Determine which client class to mock based on whether the test is async
	client_path = (
	"litellm.llms.azure.common_utils.AsyncAzureOpenAI"
	if is_async
	else "litellm.llms.azure.common_utils.AzureOpenAI"
	)

	# Create a proper mock class that can pass isinstance checks
	mock_client = MagicMock()

	# Create the appropriate patches
	with patch(client_path) as mock_client_class, patch.object(
	BaseAzureLLM, "set_cached_openai_client"
	) as mock_set_cache, patch.object(
	BaseAzureLLM, "get_cached_openai_client"
	) as mock_get_cache, patch.object(
	BaseAzureLLM, "initialize_azure_sdk_client"
	) as mock_init_azure:
	# Configure the mock client class to return our mock instance
	mock_client_class.return_value = mock_client

	# Setup the mock to return None first time (cache miss) then a client for subsequent calls
	mock_get_cache.side_effect = [None] + [
	mock_client
	] * 9 # First call returns None, rest return the mock client

	# Mock the initialize_azure_sdk_client to return a dict with the necessary params
	mock_init_azure.return_value = {
	"api_key": args.get("api_key"),
	"azure_endpoint": args.get("api_base"),
	"api_version": args.get("api_version"),
	"azure_ad_token": None,
	"azure_ad_token_provider": None,
	}

	# Make 10 API calls
	for _ in range(10):
	try:
	# Call the appropriate function based on parameters
	if is_async:
	# Add 'a' prefix for async functions
	func = getattr(litellm, f"a{function_name}")
	await func(**args)
	else:
	func = getattr(litellm, function_name)
	func(**args)
	except Exception:
	# We expect exceptions since we're mocking the client
	pass

	# Verify client was created only once
	assert (
	mock_client_class.call_count == 1
	), f"{'Async' if is_async else ''}AzureOpenAI client should be created only once"

	# Verify initialize_azure_sdk_client was called once
	assert (
	mock_init_azure.call_count == 1
	), "initialize_azure_sdk_client should be called once"

	# Verify the client was cached
	assert mock_set_cache.call_count == 1, "Client should be cached once"

	# Verify we tried to get from cache 10 times (once per request)
	assert mock_get_cache.call_count == 10, "Should check cache for each request"


	@pytest.mark.asyncio
	async def test_azure_client_cache_separates_sync_and_async():
	"""
	Test that the Azure client cache correctly separates sync and async clients.
	This directly tests the fix for issues #9801 and #10318 where sync and async
	clients were being mixed up in the cache.
	"""
	from litellm.llms.azure.common_utils import BaseAzureLLM

	# Clear the in-memory cache before test
	litellm.in_memory_llm_clients_cache._cache = {}

	# Create mock sync and async clients
	mock_sync_client = MagicMock()
	mock_async_client = MagicMock()

	# Patch the Azure client classes
	with patch(
	"litellm.llms.azure.common_utils.AzureOpenAI"
	) as mock_sync_client_class, patch(
	"litellm.llms.azure.common_utils.AsyncAzureOpenAI"
	) as mock_async_client_class, patch.object(
	BaseAzureLLM, "initialize_azure_sdk_client"
	) as mock_init_azure:
	# Configure the mocks to return our instances
	mock_sync_client_class.return_value = mock_sync_client
	mock_async_client_class.return_value = mock_async_client

	# Mock the initialize_azure_sdk_client to return necessary params
	mock_init_azure.return_value = {
	"api_key": "test-api-key",
	"azure_endpoint": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	"azure_ad_token": None,
	"azure_ad_token_provider": None,
	}

	# Create an instance and make identical requests with different async flags
	base_llm = BaseAzureLLM()
	common_params = {
	"api_key": "test-api-key",
	"api_base": "https://test.openai.azure.com",
	"api_version": "2023-05-15",
	"model": "gpt-4",
	"litellm_params": {},
	}

	# Get a sync client
	sync_client = base_llm.get_azure_openai_client(_is_async=False, **common_params)
	# Then get an async client with identical parameters
	async_client = base_llm.get_azure_openai_client(_is_async=True, **common_params)

	# Verify we got the right classes
	assert (
	sync_client is mock_sync_client
	), "Sync client should be the mock sync client"
	assert (
	async_client is mock_async_client
	), "Async client should be the mock async client"

	# Verify each client class was instantiated exactly once
	assert (
	mock_sync_client_class.call_count == 1
	), "AzureOpenAI should be instantiated once"
	assert (
	mock_async_client_class.call_count == 1
	), "AsyncAzureOpenAI should be instantiated once"

	# Verify initialize_azure_sdk_client was called for each client type
	assert (
	mock_init_azure.call_count == 2
	), "initialize_azure_sdk_client should be called twice"