|
""" |
|
Test Modal endpoint improvements: |
|
1. Turbo model usage by default |
|
2. Parallel processing for long audio |
|
3. Health check endpoint |
|
4. Better audio encoding/decoding |
|
5. Service architecture decoupling |
|
""" |
|
|
|
import pytest |
|
import asyncio |
|
import os |
|
import sys |
|
from pathlib import Path |
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src")) |
|
|
|
from tools.transcription_tools import ( |
|
transcribe_audio_file_tool, |
|
check_modal_endpoints_health, |
|
get_modal_endpoint_url |
|
) |
|
|
|
|
|
class TestModalImprovements: |
|
"""Test Modal endpoint improvements""" |
|
|
|
@pytest.mark.asyncio |
|
async def test_modal_health_check(self): |
|
"""Test Modal health check endpoint""" |
|
print("\nπ©Ί Testing Modal health check endpoint...") |
|
|
|
health_status = await check_modal_endpoints_health() |
|
|
|
print(f"Health status: {health_status['status']}") |
|
assert health_status["status"] in ["healthy", "unhealthy"] |
|
assert "endpoints_available" in health_status |
|
|
|
if health_status["status"] == "healthy": |
|
assert health_status["endpoints_available"] is True |
|
assert "modal_health" in health_status |
|
|
|
modal_health = health_status["modal_health"] |
|
assert "service" in modal_health |
|
assert "default_model" in modal_health |
|
|
|
|
|
assert modal_health["default_model"] == "turbo" |
|
print(f"β
Default model confirmed as: {modal_health['default_model']}") |
|
|
|
print("β
Health check test completed") |
|
|
|
def test_endpoint_url_configuration(self): |
|
"""Test endpoint URL configuration""" |
|
print("\nπ Testing endpoint URL configuration...") |
|
|
|
|
|
endpoints = [ |
|
"transcribe-audio-chunk-endpoint", |
|
"health-check-endpoint" |
|
|
|
] |
|
|
|
for endpoint in endpoints: |
|
url = get_modal_endpoint_url(endpoint) |
|
assert url.startswith("https://") |
|
assert endpoint.replace("-", "") in url.replace("-", "") |
|
print(f" β
{endpoint}: {url}") |
|
|
|
|
|
with pytest.raises(ValueError): |
|
get_modal_endpoint_url("invalid-endpoint") |
|
|
|
print("β
Endpoint URL configuration test completed") |
|
|
|
@pytest.mark.asyncio |
|
async def test_turbo_model_transcription(self): |
|
"""Test that turbo model is used by default""" |
|
print("\nπ Testing turbo model transcription...") |
|
|
|
|
|
test_audio_files = [ |
|
"tests/cache/apple_podcast_episode.mp3", |
|
"tests/cache/xyz_podcast_episode.mp3" |
|
] |
|
|
|
available_file = None |
|
for file_path in test_audio_files: |
|
if os.path.exists(file_path): |
|
available_file = file_path |
|
break |
|
|
|
if not available_file: |
|
pytest.skip("No test audio files available for transcription test") |
|
|
|
print(f"Using test file: {available_file}") |
|
|
|
|
|
result = await transcribe_audio_file_tool( |
|
audio_file_path=available_file, |
|
use_parallel_processing=False |
|
) |
|
|
|
print(f"Transcription status: {result['processing_status']}") |
|
|
|
if result["processing_status"] == "success": |
|
|
|
assert result["model_used"] == "turbo" |
|
print(f"β
Confirmed turbo model used: {result['model_used']}") |
|
print(f" Segments: {result['segment_count']}") |
|
print(f" Duration: {result['audio_duration']:.2f}s") |
|
else: |
|
print(f"β οΈ Transcription failed: {result.get('error_message', 'Unknown error')}") |
|
|
|
assert result["model_used"] == "turbo" |
|
|
|
print("β
Turbo model transcription test completed") |
|
|
|
@pytest.mark.asyncio |
|
async def test_parallel_processing_option(self): |
|
"""Test parallel processing option""" |
|
print("\nβ‘ Testing parallel processing option...") |
|
|
|
|
|
test_audio_files = [ |
|
"tests/cache/apple_podcast_episode.mp3", |
|
"tests/cache/xyz_podcast_episode.mp3" |
|
] |
|
|
|
available_file = None |
|
for file_path in test_audio_files: |
|
if os.path.exists(file_path): |
|
available_file = file_path |
|
break |
|
|
|
if not available_file: |
|
pytest.skip("No test audio files available for parallel processing test") |
|
|
|
print(f"Using test file: {available_file}") |
|
|
|
|
|
result = await transcribe_audio_file_tool( |
|
audio_file_path=available_file, |
|
use_parallel_processing=True, |
|
chunk_duration=60 |
|
) |
|
|
|
print(f"Parallel transcription status: {result['processing_status']}") |
|
|
|
if result["processing_status"] == "success": |
|
|
|
if "parallel_processing" in result: |
|
print(f"β
Parallel processing enabled: {result['parallel_processing']}") |
|
if result.get("chunks_processed"): |
|
print(f" Chunks processed: {result['chunks_processed']}") |
|
|
|
assert result["model_used"] == "turbo" |
|
print(f" Model used: {result['model_used']}") |
|
print(f" Segments: {result['segment_count']}") |
|
print(f" Duration: {result['audio_duration']:.2f}s") |
|
else: |
|
print(f"β οΈ Parallel transcription failed: {result.get('error_message', 'Unknown error')}") |
|
|
|
print("β
Parallel processing test completed") |
|
|
|
@pytest.mark.asyncio |
|
async def test_service_architecture_decoupling(self): |
|
"""Test that the service architecture is properly decoupled""" |
|
print("\nποΈ Testing service architecture decoupling...") |
|
|
|
|
|
try: |
|
from tools.transcription_tools import ( |
|
transcribe_audio_file_tool, |
|
check_modal_endpoints_health, |
|
get_modal_endpoint_url |
|
) |
|
print("β
Transcription tools import successful") |
|
except ImportError as e: |
|
pytest.fail(f"Transcription tools import failed: {e}") |
|
|
|
|
|
try: |
|
urls = {} |
|
for endpoint in ["transcribe-audio-endpoint", "health-check-endpoint"]: |
|
url = get_modal_endpoint_url(endpoint) |
|
urls[endpoint] = url |
|
assert url.startswith("https://") |
|
print("β
Endpoint configuration working independently") |
|
except Exception as e: |
|
pytest.fail(f"Endpoint configuration failed: {e}") |
|
|
|
|
|
try: |
|
health_status = await check_modal_endpoints_health() |
|
assert "status" in health_status |
|
print("β
Health check service abstraction working") |
|
except Exception as e: |
|
print(f"β οΈ Health check service test failed: {e}") |
|
|
|
|
|
try: |
|
import src.config.modal_config as modal_config |
|
|
|
config_content = open("src/config/modal_config.py", "r").read() |
|
|
|
|
|
business_logic_indicators = [ |
|
"transcribe_audio_parallel", |
|
"split_audio_chunks", |
|
"merge_transcription_results" |
|
] |
|
|
|
for indicator in business_logic_indicators: |
|
assert indicator not in config_content, f"Business logic '{indicator}' found in config" |
|
|
|
print("β
Modal config properly decoupled from business logic") |
|
except Exception as e: |
|
print(f"β οΈ Config decoupling test failed: {e}") |
|
|
|
print("β
Service architecture decoupling test completed") |
|
|
|
def test_model_options_validation(self): |
|
"""Test that model options are properly validated""" |
|
print("\nπ― Testing model options validation...") |
|
|
|
|
|
import sys |
|
import os |
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) |
|
|
|
try: |
|
from utils.modal_helpers import validate_transcription_request |
|
except ImportError: |
|
|
|
def validate_transcription_request(request_data): |
|
valid_models = ["tiny", "base", "small", "medium", "large", "turbo"] |
|
if not request_data.get("audio_file_data"): |
|
return False, "Missing audio_file_data field" |
|
model_size = request_data.get("model_size", "turbo") |
|
if model_size not in valid_models: |
|
return False, f"Invalid model size '{model_size}'. Valid options: {valid_models}" |
|
return True, "" |
|
|
|
|
|
valid_request = { |
|
"audio_file_data": "dGVzdA==", |
|
"model_size": "turbo", |
|
"output_format": "srt" |
|
} |
|
|
|
is_valid, error = validate_transcription_request(valid_request) |
|
assert is_valid is True |
|
assert error == "" |
|
print("β
Valid request validation passed") |
|
|
|
|
|
invalid_request = { |
|
"audio_file_data": "dGVzdA==", |
|
"model_size": "invalid_model", |
|
"output_format": "srt" |
|
} |
|
|
|
is_valid, error = validate_transcription_request(invalid_request) |
|
assert is_valid is False |
|
assert "Invalid model size" in error |
|
print("β
Invalid model validation passed") |
|
|
|
|
|
missing_audio_request = { |
|
"model_size": "turbo", |
|
"output_format": "srt" |
|
} |
|
|
|
is_valid, error = validate_transcription_request(missing_audio_request) |
|
assert is_valid is False |
|
assert "Missing audio_file_data" in error |
|
print("β
Missing audio data validation passed") |
|
|
|
print("β
Model options validation test completed") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
import asyncio |
|
|
|
async def run_async_tests(): |
|
test_instance = TestModalImprovements() |
|
|
|
|
|
await test_instance.test_modal_health_check() |
|
await test_instance.test_turbo_model_transcription() |
|
await test_instance.test_parallel_processing_option() |
|
await test_instance.test_service_architecture_decoupling() |
|
|
|
|
|
test_instance.test_endpoint_url_configuration() |
|
test_instance.test_model_options_validation() |
|
|
|
asyncio.run(run_async_tests()) |
|
print("\nπ All Modal improvement tests completed!") |