File size: 3,890 Bytes
b5df735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Local transcription adapter for direct processing
"""

import asyncio
from typing import List, Optional

from ..interfaces.transcriber import ITranscriber, TranscriptionResult
from ..utils.config import AudioProcessingConfig
from ..utils.errors import TranscriptionError


class LocalTranscriptionAdapter(ITranscriber):
    """Adapter for local transcription processing"""
    
    def __init__(self, config: Optional[AudioProcessingConfig] = None):
        self.config = config or AudioProcessingConfig()
    
    async def transcribe(
        self,
        audio_file_path: str,
        model_size: str = "turbo",
        language: Optional[str] = None,
        enable_speaker_diarization: bool = False
    ) -> TranscriptionResult:
        """Transcribe audio using local processing"""
        
        try:
            # Use the new AudioProcessingService instead of old methods
            from ..services.audio_processing_service import AudioProcessingService
            from ..models.services import AudioProcessingRequest
            
            print(f"πŸ”„ Starting local transcription for: {audio_file_path}")
            print(f"πŸš€ Running transcription with {model_size} model...")
            
            # Create service and request
            audio_service = AudioProcessingService()
            request = AudioProcessingRequest(
                audio_file_path=audio_file_path,
                model_size=model_size,
                language=language,
                output_format="json",
                enable_speaker_diarization=enable_speaker_diarization
            )
            
            # Process transcription
            result = audio_service.transcribe_full_audio(request)
            
            # Convert service result to adapter format
            return self._convert_service_result(result)
            
        except Exception as e:
            raise TranscriptionError(
                f"Local transcription failed: {str(e)}",
                model=model_size,
                audio_file=audio_file_path
            )
    
    def get_supported_models(self) -> List[str]:
        """Get list of supported model sizes"""
        return list(self.config.whisper_models.keys())
    
    def get_supported_languages(self) -> List[str]:
        """Get list of supported language codes"""
        # This would normally come from Whisper's supported languages
        return ["en", "zh", "ja", "ko", "es", "fr", "de", "ru", "auto"]
    
    def _convert_service_result(self, service_result) -> TranscriptionResult:
        """Convert service result format to TranscriptionResult"""
        from ..interfaces.transcriber import TranscriptionSegment
        
        # Extract segments from service result if available
        segments = []
        if hasattr(service_result, 'segments') and service_result.segments:
            for seg in service_result.segments:
                segments.append(TranscriptionSegment(
                    start=getattr(seg, 'start', 0),
                    end=getattr(seg, 'end', 0),
                    text=getattr(seg, 'text', ''),
                    speaker=getattr(seg, 'speaker', None)
                ))
        
        return TranscriptionResult(
            text=getattr(service_result, 'text', ''),
            segments=segments,
            language=getattr(service_result, 'language_detected', 'unknown'),
            model_used=getattr(service_result, 'model_used', 'unknown'),
            audio_duration=getattr(service_result, 'audio_duration', 0),
            processing_time=getattr(service_result, 'processing_time', 0),
            speaker_diarization_enabled=getattr(service_result, 'speaker_diarization_enabled', False),
            global_speaker_count=getattr(service_result, 'global_speaker_count', 0),
            error_message=getattr(service_result, 'error_message', None)
        )