|
""" |
|
Test segmentation fallback logic for long audio with single segment |
|
""" |
|
|
|
import pytest |
|
from unittest.mock import patch, Mock |
|
from src.services.distributed_transcription_service import DistributedTranscriptionService |
|
|
|
|
|
class TestSegmentationFallback: |
|
"""Test the new segmentation fallback logic""" |
|
|
|
def setup_method(self): |
|
"""Setup test environment""" |
|
self.service = DistributedTranscriptionService() |
|
|
|
@patch('ffmpeg.probe') |
|
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence') |
|
@patch.object(DistributedTranscriptionService, 'split_audio_by_time') |
|
def test_fallback_for_long_audio_single_segment(self, mock_time_split, mock_silence_split, mock_probe): |
|
"""Test fallback to time-based segmentation when silence detection creates only 1 segment for long audio""" |
|
|
|
|
|
mock_probe.return_value = {"format": {"duration": "1380.0"}} |
|
|
|
|
|
mock_silence_split.return_value = [ |
|
{ |
|
"chunk_index": 0, |
|
"start_time": 0.0, |
|
"end_time": 1380.0, |
|
"duration": 1380.0, |
|
"filename": "silence_chunk_000.wav", |
|
"segmentation_type": "silence_based" |
|
} |
|
] |
|
|
|
|
|
mock_time_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 1, "start_time": 180.0, "end_time": 360.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 2, "start_time": 360.0, "end_time": 540.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 3, "start_time": 540.0, "end_time": 720.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 4, "start_time": 720.0, "end_time": 900.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 5, "start_time": 900.0, "end_time": 1080.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 6, "start_time": 1080.0, "end_time": 1260.0, "duration": 180.0, "segmentation_type": "time_based"}, |
|
{"chunk_index": 7, "start_time": 1260.0, "end_time": 1380.0, "duration": 120.0, "segmentation_type": "time_based"} |
|
] |
|
|
|
|
|
segments = self.service.choose_segmentation_strategy( |
|
"test_long_audio.mp3", |
|
use_intelligent_segmentation=True, |
|
chunk_duration=60 |
|
) |
|
|
|
|
|
mock_silence_split.assert_called_once() |
|
|
|
|
|
mock_time_split.assert_called_once_with("test_long_audio.mp3", chunk_duration=180) |
|
|
|
|
|
assert len(segments) == 8 |
|
assert segments[0]["segmentation_type"] == "time_based" |
|
assert segments[0]["duration"] == 180.0 |
|
|
|
@patch('ffmpeg.probe') |
|
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence') |
|
def test_no_fallback_for_short_audio_single_segment(self, mock_silence_split, mock_probe): |
|
"""Test that fallback is NOT triggered for short audio even with single segment""" |
|
|
|
|
|
mock_probe.return_value = {"format": {"duration": "120.0"}} |
|
|
|
|
|
mock_silence_split.return_value = [ |
|
{ |
|
"chunk_index": 0, |
|
"start_time": 0.0, |
|
"end_time": 120.0, |
|
"duration": 120.0, |
|
"filename": "silence_chunk_000.wav", |
|
"segmentation_type": "silence_based" |
|
} |
|
] |
|
|
|
|
|
segments = self.service.choose_segmentation_strategy( |
|
"test_short_audio.mp3", |
|
use_intelligent_segmentation=True, |
|
chunk_duration=60 |
|
) |
|
|
|
|
|
mock_silence_split.assert_called_once() |
|
|
|
|
|
assert len(segments) == 1 |
|
assert segments[0]["segmentation_type"] == "silence_based" |
|
assert segments[0]["duration"] == 120.0 |
|
|
|
@patch('ffmpeg.probe') |
|
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence') |
|
def test_no_fallback_for_long_audio_multiple_segments(self, mock_silence_split, mock_probe): |
|
"""Test that fallback is NOT triggered for long audio with multiple segments""" |
|
|
|
|
|
mock_probe.return_value = {"format": {"duration": "600.0"}} |
|
|
|
|
|
mock_silence_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 150.0, "duration": 150.0, "segmentation_type": "silence_based"}, |
|
{"chunk_index": 1, "start_time": 150.0, "end_time": 320.0, "duration": 170.0, "segmentation_type": "silence_based"}, |
|
{"chunk_index": 2, "start_time": 320.0, "end_time": 480.0, "duration": 160.0, "segmentation_type": "silence_based"}, |
|
{"chunk_index": 3, "start_time": 480.0, "end_time": 600.0, "duration": 120.0, "segmentation_type": "silence_based"} |
|
] |
|
|
|
|
|
segments = self.service.choose_segmentation_strategy( |
|
"test_multi_segment_audio.mp3", |
|
use_intelligent_segmentation=True, |
|
chunk_duration=60 |
|
) |
|
|
|
|
|
mock_silence_split.assert_called_once() |
|
|
|
|
|
assert len(segments) == 4 |
|
assert all(seg["segmentation_type"] == "silence_based" for seg in segments) |
|
|
|
@patch('ffmpeg.probe') |
|
def test_fallback_threshold_exactly_3_minutes(self, mock_probe): |
|
"""Test the exact threshold behavior at 3 minutes (180 seconds)""" |
|
|
|
|
|
mock_probe.return_value = {"format": {"duration": "180.0"}} |
|
|
|
with patch.object(self.service, 'split_audio_by_silence') as mock_silence_split, \ |
|
patch.object(self.service, 'split_audio_by_time') as mock_time_split: |
|
|
|
|
|
mock_silence_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "silence_based"} |
|
] |
|
|
|
mock_time_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "time_based"} |
|
] |
|
|
|
|
|
segments = self.service.choose_segmentation_strategy("test_180s_audio.mp3") |
|
|
|
|
|
mock_silence_split.assert_called_once() |
|
mock_time_split.assert_not_called() |
|
|
|
assert len(segments) == 1 |
|
assert segments[0]["segmentation_type"] == "silence_based" |
|
|
|
@patch('ffmpeg.probe') |
|
def test_fallback_threshold_just_over_3_minutes(self, mock_probe): |
|
"""Test the fallback behavior just over 3 minutes (180.1 seconds)""" |
|
|
|
|
|
mock_probe.return_value = {"format": {"duration": "180.1"}} |
|
|
|
with patch.object(self.service, 'split_audio_by_silence') as mock_silence_split, \ |
|
patch.object(self.service, 'split_audio_by_time') as mock_time_split: |
|
|
|
|
|
mock_silence_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.1, "duration": 180.1, "segmentation_type": "silence_based"} |
|
] |
|
|
|
mock_time_split.return_value = [ |
|
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.1, "duration": 180.1, "segmentation_type": "time_based"} |
|
] |
|
|
|
|
|
segments = self.service.choose_segmentation_strategy("test_180_1s_audio.mp3") |
|
|
|
|
|
mock_silence_split.assert_called_once() |
|
mock_time_split.assert_called_once_with("test_180_1s_audio.mp3", chunk_duration=180) |
|
|
|
assert len(segments) == 1 |
|
assert segments[0]["segmentation_type"] == "time_based" |
|
|
|
|
|
if __name__ == "__main__": |
|
pytest.main([__file__, "-v"]) |