ModalTranscriberMCP / tests /test_segmentation_fallback.py
richard-su's picture
Upload folder using huggingface_hub
76f9cd2 verified
raw
history blame
9.51 kB
"""
Test segmentation fallback logic for long audio with single segment
"""
import pytest
from unittest.mock import patch, Mock
from src.services.distributed_transcription_service import DistributedTranscriptionService
class TestSegmentationFallback:
"""Test the new segmentation fallback logic"""
def setup_method(self):
"""Setup test environment"""
self.service = DistributedTranscriptionService()
@patch('ffmpeg.probe')
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence')
@patch.object(DistributedTranscriptionService, 'split_audio_by_time')
def test_fallback_for_long_audio_single_segment(self, mock_time_split, mock_silence_split, mock_probe):
"""Test fallback to time-based segmentation when silence detection creates only 1 segment for long audio"""
# Mock 23-minute audio (1380 seconds > 3 minutes)
mock_probe.return_value = {"format": {"duration": "1380.0"}}
# Mock silence-based segmentation returning only 1 segment (failed segmentation)
mock_silence_split.return_value = [
{
"chunk_index": 0,
"start_time": 0.0,
"end_time": 1380.0,
"duration": 1380.0,
"filename": "silence_chunk_000.wav",
"segmentation_type": "silence_based"
}
]
# Mock time-based segmentation with 3-minute chunks
mock_time_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 1, "start_time": 180.0, "end_time": 360.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 2, "start_time": 360.0, "end_time": 540.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 3, "start_time": 540.0, "end_time": 720.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 4, "start_time": 720.0, "end_time": 900.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 5, "start_time": 900.0, "end_time": 1080.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 6, "start_time": 1080.0, "end_time": 1260.0, "duration": 180.0, "segmentation_type": "time_based"},
{"chunk_index": 7, "start_time": 1260.0, "end_time": 1380.0, "duration": 120.0, "segmentation_type": "time_based"}
]
# Execute the segmentation strategy
segments = self.service.choose_segmentation_strategy(
"test_long_audio.mp3",
use_intelligent_segmentation=True,
chunk_duration=60 # Original chunk duration, but fallback uses 180s
)
# Verify that silence segmentation was tried first
mock_silence_split.assert_called_once()
# Verify that time-based segmentation was called with 3-minute chunks as fallback
mock_time_split.assert_called_once_with("test_long_audio.mp3", chunk_duration=180)
# Verify the returned segments are from time-based segmentation
assert len(segments) == 8 # 1380s / 180s = 7.67 ≈ 8 chunks
assert segments[0]["segmentation_type"] == "time_based"
assert segments[0]["duration"] == 180.0 # 3-minute chunks
@patch('ffmpeg.probe')
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence')
def test_no_fallback_for_short_audio_single_segment(self, mock_silence_split, mock_probe):
"""Test that fallback is NOT triggered for short audio even with single segment"""
# Mock 2-minute audio (120 seconds < 3 minutes)
mock_probe.return_value = {"format": {"duration": "120.0"}}
# Mock silence-based segmentation returning only 1 segment
mock_silence_split.return_value = [
{
"chunk_index": 0,
"start_time": 0.0,
"end_time": 120.0,
"duration": 120.0,
"filename": "silence_chunk_000.wav",
"segmentation_type": "silence_based"
}
]
# Execute the segmentation strategy
segments = self.service.choose_segmentation_strategy(
"test_short_audio.mp3",
use_intelligent_segmentation=True,
chunk_duration=60
)
# Verify that silence segmentation was used and no fallback occurred
mock_silence_split.assert_called_once()
# Verify the returned segments are from silence-based segmentation (no fallback)
assert len(segments) == 1
assert segments[0]["segmentation_type"] == "silence_based"
assert segments[0]["duration"] == 120.0
@patch('ffmpeg.probe')
@patch.object(DistributedTranscriptionService, 'split_audio_by_silence')
def test_no_fallback_for_long_audio_multiple_segments(self, mock_silence_split, mock_probe):
"""Test that fallback is NOT triggered for long audio with multiple segments"""
# Mock 10-minute audio (600 seconds > 3 minutes)
mock_probe.return_value = {"format": {"duration": "600.0"}}
# Mock silence-based segmentation returning multiple segments (successful segmentation)
mock_silence_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 150.0, "duration": 150.0, "segmentation_type": "silence_based"},
{"chunk_index": 1, "start_time": 150.0, "end_time": 320.0, "duration": 170.0, "segmentation_type": "silence_based"},
{"chunk_index": 2, "start_time": 320.0, "end_time": 480.0, "duration": 160.0, "segmentation_type": "silence_based"},
{"chunk_index": 3, "start_time": 480.0, "end_time": 600.0, "duration": 120.0, "segmentation_type": "silence_based"}
]
# Execute the segmentation strategy
segments = self.service.choose_segmentation_strategy(
"test_multi_segment_audio.mp3",
use_intelligent_segmentation=True,
chunk_duration=60
)
# Verify that silence segmentation was used and no fallback occurred
mock_silence_split.assert_called_once()
# Verify the returned segments are from silence-based segmentation (no fallback)
assert len(segments) == 4
assert all(seg["segmentation_type"] == "silence_based" for seg in segments)
@patch('ffmpeg.probe')
def test_fallback_threshold_exactly_3_minutes(self, mock_probe):
"""Test the exact threshold behavior at 3 minutes (180 seconds)"""
# Mock exactly 3-minute audio (180 seconds)
mock_probe.return_value = {"format": {"duration": "180.0"}}
with patch.object(self.service, 'split_audio_by_silence') as mock_silence_split, \
patch.object(self.service, 'split_audio_by_time') as mock_time_split:
# Mock silence-based segmentation returning only 1 segment
mock_silence_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "silence_based"}
]
mock_time_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.0, "duration": 180.0, "segmentation_type": "time_based"}
]
# Execute the segmentation strategy
segments = self.service.choose_segmentation_strategy("test_180s_audio.mp3")
# At exactly 180s, the condition is duration > 180, so no fallback should occur
mock_silence_split.assert_called_once()
mock_time_split.assert_not_called()
assert len(segments) == 1
assert segments[0]["segmentation_type"] == "silence_based"
@patch('ffmpeg.probe')
def test_fallback_threshold_just_over_3_minutes(self, mock_probe):
"""Test the fallback behavior just over 3 minutes (180.1 seconds)"""
# Mock just over 3-minute audio (180.1 seconds)
mock_probe.return_value = {"format": {"duration": "180.1"}}
with patch.object(self.service, 'split_audio_by_silence') as mock_silence_split, \
patch.object(self.service, 'split_audio_by_time') as mock_time_split:
# Mock silence-based segmentation returning only 1 segment
mock_silence_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.1, "duration": 180.1, "segmentation_type": "silence_based"}
]
mock_time_split.return_value = [
{"chunk_index": 0, "start_time": 0.0, "end_time": 180.1, "duration": 180.1, "segmentation_type": "time_based"}
]
# Execute the segmentation strategy
segments = self.service.choose_segmentation_strategy("test_180_1s_audio.mp3")
# Just over 180s should trigger fallback
mock_silence_split.assert_called_once()
mock_time_split.assert_called_once_with("test_180_1s_audio.mp3", chunk_duration=180)
assert len(segments) == 1
assert segments[0]["segmentation_type"] == "time_based"
if __name__ == "__main__":
pytest.main([__file__, "-v"])