File size: 1,711 Bytes
b5df735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
"""
Speaker detector interface definition
"""
from abc import ABC, abstractmethod
from typing import Dict, List, Optional
from dataclasses import dataclass
import numpy as np
@dataclass
class SpeakerSegment:
"""Speaker segment data class"""
start: float
end: float
speaker_id: str
confidence: Optional[float] = None
@dataclass
class SpeakerProfile:
"""Speaker profile data class"""
speaker_id: str
embedding: np.ndarray
segments: List[SpeakerSegment]
total_duration: float
class ISpeakerDetector(ABC):
"""Interface for speaker detection and diarization"""
@abstractmethod
async def detect_speakers(
self,
audio_file_path: str,
audio_segments: Optional[List] = None
) -> Dict[str, SpeakerProfile]:
"""
Detect and identify speakers in audio
Args:
audio_file_path: Path to audio file
audio_segments: Optional pre-segmented audio
Returns:
Dictionary mapping speaker IDs to SpeakerProfile objects
"""
pass
@abstractmethod
def map_to_global_speakers(
self,
local_speakers: Dict[str, SpeakerProfile],
source_file: str
) -> Dict[str, str]:
"""
Map local speakers to global speaker identities
Args:
local_speakers: Local speaker profiles
source_file: Source audio file path
Returns:
Mapping from local speaker ID to global speaker ID
"""
pass
@abstractmethod
def get_speaker_summary(self) -> Dict:
"""Get summary of all detected speakers"""
pass |