Spaces:
Configuration error
Configuration error
""" | |
Test Gemini TTS (Text-to-Speech) functionality | |
""" | |
import os | |
import sys | |
import pytest | |
from unittest.mock import patch, MagicMock | |
sys.path.insert( | |
0, os.path.abspath("../../../..") | |
) # Adds the parent directory to the system path | |
import litellm | |
from litellm.llms.gemini.chat.transformation import GoogleAIStudioGeminiConfig | |
from litellm.utils import get_supported_openai_params | |
class TestGeminiTTSTransformation: | |
"""Test Gemini TTS transformation functionality""" | |
def test_gemini_tts_model_detection(self): | |
"""Test that TTS models are correctly identified""" | |
config = GoogleAIStudioGeminiConfig() | |
# Test TTS models | |
assert config.is_model_gemini_audio_model("gemini-2.5-flash-preview-tts") == True | |
assert config.is_model_gemini_audio_model("gemini-2.5-pro-preview-tts") == True | |
# Test non-TTS models | |
assert config.is_model_gemini_audio_model("gemini-2.5-flash") == False | |
assert config.is_model_gemini_audio_model("gemini-2.5-pro") == False | |
assert config.is_model_gemini_audio_model("gpt-4o-audio-preview") == False | |
def test_gemini_tts_supported_params(self): | |
"""Test that audio parameter is included for TTS models""" | |
config = GoogleAIStudioGeminiConfig() | |
# Test TTS model | |
params = config.get_supported_openai_params("gemini-2.5-flash-preview-tts") | |
assert "audio" in params | |
# Test that other standard params are still included | |
assert "temperature" in params | |
assert "max_tokens" in params | |
assert "modalities" in params | |
# Test non-TTS model | |
params_non_tts = config.get_supported_openai_params("gemini-2.5-flash") | |
assert "audio" not in params_non_tts | |
def test_gemini_tts_audio_parameter_mapping(self): | |
"""Test audio parameter mapping for TTS models""" | |
config = GoogleAIStudioGeminiConfig() | |
non_default_params = { | |
"audio": { | |
"voice": "Kore", | |
"format": "pcm16" | |
} | |
} | |
optional_params = {} | |
result = config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
# Check speech config is created | |
assert "speechConfig" in result | |
assert "voiceConfig" in result["speechConfig"] | |
assert "prebuiltVoiceConfig" in result["speechConfig"]["voiceConfig"] | |
assert result["speechConfig"]["voiceConfig"]["prebuiltVoiceConfig"]["voiceName"] == "Kore" | |
# Check response modalities | |
assert "responseModalities" in result | |
assert "AUDIO" in result["responseModalities"] | |
def test_gemini_tts_audio_parameter_with_existing_modalities(self): | |
"""Test audio parameter mapping when modalities already exist""" | |
config = GoogleAIStudioGeminiConfig() | |
non_default_params = { | |
"audio": { | |
"voice": "Puck", | |
"format": "pcm16" | |
} | |
} | |
optional_params = { | |
"responseModalities": ["TEXT"] | |
} | |
result = config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
# Check that AUDIO is added to existing modalities | |
assert "responseModalities" in result | |
assert "TEXT" in result["responseModalities"] | |
assert "AUDIO" in result["responseModalities"] | |
def test_gemini_tts_no_audio_parameter(self): | |
"""Test that non-audio parameters are handled normally""" | |
config = GoogleAIStudioGeminiConfig() | |
non_default_params = { | |
"temperature": 0.7, | |
"max_tokens": 100 | |
} | |
optional_params = {} | |
result = config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
# Should not have speech config | |
assert "speechConfig" not in result | |
# Should not automatically add audio modalities | |
assert "responseModalities" not in result | |
def test_gemini_tts_invalid_audio_parameter(self): | |
"""Test handling of invalid audio parameter""" | |
config = GoogleAIStudioGeminiConfig() | |
non_default_params = { | |
"audio": "invalid_string" # Should be dict | |
} | |
optional_params = {} | |
result = config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
# Should not create speech config for invalid audio param | |
assert "speechConfig" not in result | |
def test_gemini_tts_empty_audio_parameter(self): | |
"""Test handling of empty audio parameter""" | |
config = GoogleAIStudioGeminiConfig() | |
non_default_params = { | |
"audio": {} | |
} | |
optional_params = {} | |
result = config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
# Should still set response modalities even with empty audio config | |
assert "responseModalities" in result | |
assert "AUDIO" in result["responseModalities"] | |
def test_gemini_tts_audio_format_validation(self): | |
"""Test audio format validation for TTS models""" | |
config = GoogleAIStudioGeminiConfig() | |
# Test invalid format | |
non_default_params = { | |
"audio": { | |
"voice": "Kore", | |
"format": "wav" # Invalid format | |
} | |
} | |
optional_params = {} | |
with pytest.raises(ValueError, match="Unsupported audio format for Gemini TTS models"): | |
config.map_openai_params( | |
non_default_params=non_default_params, | |
optional_params=optional_params, | |
model="gemini-2.5-flash-preview-tts", | |
drop_params=False | |
) | |
def test_gemini_tts_utils_integration(self): | |
"""Test integration with LiteLLM utils functions""" | |
# Test that get_supported_openai_params works with TTS models | |
params = get_supported_openai_params("gemini-2.5-flash-preview-tts", "gemini") | |
assert "audio" in params | |
# Test non-TTS model | |
params_non_tts = get_supported_openai_params("gemini-2.5-flash", "gemini") | |
assert "audio" not in params_non_tts | |
def test_gemini_tts_completion_mock(): | |
"""Test Gemini TTS completion with mocked response""" | |
with patch('litellm.completion') as mock_completion: | |
# Mock a successful TTS response | |
mock_response = MagicMock() | |
mock_response.choices = [MagicMock()] | |
mock_response.choices[0].message.content = "Generated audio response" | |
mock_completion.return_value = mock_response | |
# Test completion call with audio parameter | |
response = litellm.completion( | |
model="gemini-2.5-flash-preview-tts", | |
messages=[{"role": "user", "content": "Say hello"}], | |
audio={"voice": "Kore", "format": "pcm16"} | |
) | |
assert response is not None | |
assert response.choices[0].message.content is not None | |
if __name__ == "__main__": | |
pytest.main([__file__]) | |