File size: 11,793 Bytes
76f9cd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""
Test Modal endpoint improvements:
1. Turbo model usage by default
2. Parallel processing for long audio
3. Health check endpoint  
4. Better audio encoding/decoding
5. Service architecture decoupling
"""

import pytest
import asyncio
import os
import sys
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from tools.transcription_tools import (
    transcribe_audio_file_tool, 
    check_modal_endpoints_health,
    get_modal_endpoint_url
)


class TestModalImprovements:
    """Test Modal endpoint improvements"""
    
    @pytest.mark.asyncio
    async def test_modal_health_check(self):
        """Test Modal health check endpoint"""
        print("\n🩺 Testing Modal health check endpoint...")
        
        health_status = await check_modal_endpoints_health()
        
        print(f"Health status: {health_status['status']}")
        assert health_status["status"] in ["healthy", "unhealthy"]
        assert "endpoints_available" in health_status
        
        if health_status["status"] == "healthy":
            assert health_status["endpoints_available"] is True
            assert "modal_health" in health_status
            
            modal_health = health_status["modal_health"]
            assert "service" in modal_health
            assert "default_model" in modal_health
            
            # Verify turbo is the default model
            assert modal_health["default_model"] == "turbo"
            print(f"βœ… Default model confirmed as: {modal_health['default_model']}")
        
        print("βœ… Health check test completed")
    
    def test_endpoint_url_configuration(self):
        """Test endpoint URL configuration"""
        print("\nπŸ”— Testing endpoint URL configuration...")
        
        # Test all known endpoints
        endpoints = [
            "transcribe-audio-chunk-endpoint",
            "health-check-endpoint"
            # Note: Download endpoints removed - downloads now handled locally
        ]
        
        for endpoint in endpoints:
            url = get_modal_endpoint_url(endpoint)
            assert url.startswith("https://")
            assert endpoint.replace("-", "") in url.replace("-", "")
            print(f"  βœ… {endpoint}: {url}")
        
        # Test invalid endpoint
        with pytest.raises(ValueError):
            get_modal_endpoint_url("invalid-endpoint")
        
        print("βœ… Endpoint URL configuration test completed")
    
    @pytest.mark.asyncio
    async def test_turbo_model_transcription(self):
        """Test that turbo model is used by default"""
        print("\nπŸš€ Testing turbo model transcription...")
        
        # Check if we have test audio files
        test_audio_files = [
            "tests/cache/apple_podcast_episode.mp3",
            "tests/cache/xyz_podcast_episode.mp3"
        ]
        
        available_file = None
        for file_path in test_audio_files:
            if os.path.exists(file_path):
                available_file = file_path
                break
        
        if not available_file:
            pytest.skip("No test audio files available for transcription test")
        
        print(f"Using test file: {available_file}")
        
        # Test with default model (should be turbo)
        result = await transcribe_audio_file_tool(
            audio_file_path=available_file,
            use_parallel_processing=False  # Use single processing for faster test
        )
        
        print(f"Transcription status: {result['processing_status']}")
        
        if result["processing_status"] == "success":
            # Verify turbo model was used
            assert result["model_used"] == "turbo"
            print(f"βœ… Confirmed turbo model used: {result['model_used']}")
            print(f"   Segments: {result['segment_count']}")
            print(f"   Duration: {result['audio_duration']:.2f}s")
        else:
            print(f"⚠️ Transcription failed: {result.get('error_message', 'Unknown error')}")
            # Still check that turbo was attempted
            assert result["model_used"] == "turbo"
        
        print("βœ… Turbo model transcription test completed")
    
    @pytest.mark.asyncio
    async def test_parallel_processing_option(self):
        """Test parallel processing option"""
        print("\n⚑ Testing parallel processing option...")
        
        # Check if we have test audio files  
        test_audio_files = [
            "tests/cache/apple_podcast_episode.mp3",
            "tests/cache/xyz_podcast_episode.mp3"
        ]
        
        available_file = None
        for file_path in test_audio_files:
            if os.path.exists(file_path):
                available_file = file_path
                break
        
        if not available_file:
            pytest.skip("No test audio files available for parallel processing test")
        
        print(f"Using test file: {available_file}")
        
        # Test with parallel processing enabled
        result = await transcribe_audio_file_tool(
            audio_file_path=available_file,
            use_parallel_processing=True,
            chunk_duration=60  # 1 minute chunks for testing
        )
        
        print(f"Parallel transcription status: {result['processing_status']}")
        
        if result["processing_status"] == "success":
            # Check if parallel processing was used
            if "parallel_processing" in result:
                print(f"βœ… Parallel processing enabled: {result['parallel_processing']}")
                if result.get("chunks_processed"):
                    print(f"   Chunks processed: {result['chunks_processed']}")
            
            assert result["model_used"] == "turbo"
            print(f"   Model used: {result['model_used']}")
            print(f"   Segments: {result['segment_count']}")
            print(f"   Duration: {result['audio_duration']:.2f}s")
        else:
            print(f"⚠️ Parallel transcription failed: {result.get('error_message', 'Unknown error')}")
        
        print("βœ… Parallel processing test completed")
    
    @pytest.mark.asyncio
    async def test_service_architecture_decoupling(self):
        """Test that the service architecture is properly decoupled"""
        print("\nπŸ—οΈ Testing service architecture decoupling...")
        
        # Test that transcription tools can work independently
        try:
            from tools.transcription_tools import (
                transcribe_audio_file_tool,
                check_modal_endpoints_health,
                get_modal_endpoint_url
            )
            print("βœ… Transcription tools import successful")
        except ImportError as e:
            pytest.fail(f"Transcription tools import failed: {e}")
        
        # Test endpoint URL configuration (architectural decoupling)
        try:
            urls = {}
            for endpoint in ["transcribe-audio-endpoint", "health-check-endpoint"]:
                url = get_modal_endpoint_url(endpoint)
                urls[endpoint] = url
                assert url.startswith("https://")
            print("βœ… Endpoint configuration working independently")
        except Exception as e:
            pytest.fail(f"Endpoint configuration failed: {e}")
        
        # Test health check functionality (service layer abstraction)
        try:
            health_status = await check_modal_endpoints_health()
            assert "status" in health_status
            print("βœ… Health check service abstraction working")
        except Exception as e:
            print(f"⚠️ Health check service test failed: {e}")
        
        # Test that Modal config is properly decoupled from business logic
        try:
            import src.config.modal_config as modal_config
            # Check that modal_config only contains configuration, not business logic
            config_content = open("src/config/modal_config.py", "r").read()
            
            # These should NOT be in the config file (business logic)
            business_logic_indicators = [
                "transcribe_audio_parallel", 
                "split_audio_chunks",
                "merge_transcription_results"
            ]
            
            for indicator in business_logic_indicators:
                assert indicator not in config_content, f"Business logic '{indicator}' found in config"
            
            print("βœ… Modal config properly decoupled from business logic")
        except Exception as e:
            print(f"⚠️ Config decoupling test failed: {e}")
        
        print("βœ… Service architecture decoupling test completed")

    def test_model_options_validation(self):
        """Test that model options are properly validated"""
        print("\n🎯 Testing model options validation...")
        
        # Import directly from the file to avoid package import issues
        import sys
        import os
        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
        
        try:
            from utils.modal_helpers import validate_transcription_request
        except ImportError:
            # If import fails, create a simple local validation function for testing
            def validate_transcription_request(request_data):
                valid_models = ["tiny", "base", "small", "medium", "large", "turbo"]
                if not request_data.get("audio_file_data"):
                    return False, "Missing audio_file_data field"
                model_size = request_data.get("model_size", "turbo")
                if model_size not in valid_models:
                    return False, f"Invalid model size '{model_size}'. Valid options: {valid_models}"
                return True, ""
        
        # Test valid request
        valid_request = {
            "audio_file_data": "dGVzdA==",  # base64 encoded "test"
            "model_size": "turbo",
            "output_format": "srt"
        }
        
        is_valid, error = validate_transcription_request(valid_request)
        assert is_valid is True
        assert error == ""
        print("βœ… Valid request validation passed")
        
        # Test invalid model
        invalid_request = {
            "audio_file_data": "dGVzdA==",
            "model_size": "invalid_model",
            "output_format": "srt"
        }
        
        is_valid, error = validate_transcription_request(invalid_request)
        assert is_valid is False
        assert "Invalid model size" in error
        print("βœ… Invalid model validation passed")
        
        # Test missing audio data
        missing_audio_request = {
            "model_size": "turbo",
            "output_format": "srt"
        }
        
        is_valid, error = validate_transcription_request(missing_audio_request)
        assert is_valid is False
        assert "Missing audio_file_data" in error
        print("βœ… Missing audio data validation passed")
        
        print("βœ… Model options validation test completed")


if __name__ == "__main__":
    # Run tests directly
    import asyncio
    
    async def run_async_tests():
        test_instance = TestModalImprovements()
        
        # Run async tests
        await test_instance.test_modal_health_check()
        await test_instance.test_turbo_model_transcription()  
        await test_instance.test_parallel_processing_option()
        await test_instance.test_service_architecture_decoupling()
        
        # Run sync tests
        test_instance.test_endpoint_url_configuration()
        test_instance.test_model_options_validation()
    
    asyncio.run(run_async_tests())
    print("\nπŸŽ‰ All Modal improvement tests completed!")