File size: 1,711 Bytes
b5df735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Speaker detector interface definition
"""

from abc import ABC, abstractmethod
from typing import Dict, List, Optional
from dataclasses import dataclass
import numpy as np


@dataclass
class SpeakerSegment:
    """Speaker segment data class"""
    start: float
    end: float
    speaker_id: str
    confidence: Optional[float] = None


@dataclass
class SpeakerProfile:
    """Speaker profile data class"""
    speaker_id: str
    embedding: np.ndarray
    segments: List[SpeakerSegment]
    total_duration: float


class ISpeakerDetector(ABC):
    """Interface for speaker detection and diarization"""
    
    @abstractmethod
    async def detect_speakers(
        self,
        audio_file_path: str,
        audio_segments: Optional[List] = None
    ) -> Dict[str, SpeakerProfile]:
        """
        Detect and identify speakers in audio
        
        Args:
            audio_file_path: Path to audio file
            audio_segments: Optional pre-segmented audio
            
        Returns:
            Dictionary mapping speaker IDs to SpeakerProfile objects
        """
        pass
    
    @abstractmethod
    def map_to_global_speakers(
        self,
        local_speakers: Dict[str, SpeakerProfile],
        source_file: str
    ) -> Dict[str, str]:
        """
        Map local speakers to global speaker identities
        
        Args:
            local_speakers: Local speaker profiles
            source_file: Source audio file path
            
        Returns:
            Mapping from local speaker ID to global speaker ID
        """
        pass
    
    @abstractmethod
    def get_speaker_summary(self) -> Dict:
        """Get summary of all detected speakers"""
        pass