File size: 2,923 Bytes
b5df735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
"""
Speaker identification and embedding management interfaces
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass
import numpy as np
@dataclass
class SpeakerEmbedding:
"""Speaker embedding data structure"""
speaker_id: str
embedding: np.ndarray
confidence: float
source_files: List[str]
sample_count: int
created_at: str
updated_at: str
@dataclass
class SpeakerSegment:
"""Speaker segment information"""
start: float
end: float
speaker_id: str
confidence: float
class ISpeakerEmbeddingManager(ABC):
"""Interface for speaker embedding management"""
@abstractmethod
async def find_matching_speaker(
self,
embedding: np.ndarray,
source_file: str
) -> Optional[str]:
"""Find matching speaker from existing embeddings"""
pass
@abstractmethod
async def add_or_update_speaker(
self,
embedding: np.ndarray,
source_file: str,
confidence: float = 1.0,
original_label: Optional[str] = None
) -> str:
"""Add new speaker or update existing speaker"""
pass
@abstractmethod
async def map_local_to_global_speakers(
self,
local_embeddings: Dict[str, np.ndarray],
source_file: str
) -> Dict[str, str]:
"""Map local speaker labels to global speaker IDs"""
pass
@abstractmethod
async def get_speaker_info(self, speaker_id: str) -> Optional[SpeakerEmbedding]:
"""Get speaker information by ID"""
pass
@abstractmethod
async def get_all_speakers_summary(self) -> Dict[str, Any]:
"""Get summary of all speakers"""
pass
@abstractmethod
async def save_speakers(self) -> None:
"""Save speaker data to storage"""
pass
@abstractmethod
async def load_speakers(self) -> None:
"""Load speaker data from storage"""
pass
class ISpeakerIdentificationService(ABC):
"""Interface for speaker identification operations"""
@abstractmethod
async def extract_speaker_embeddings(
self,
audio_path: str,
segments: List[SpeakerSegment]
) -> Dict[str, np.ndarray]:
"""Extract speaker embeddings from audio segments"""
pass
@abstractmethod
async def identify_speakers_in_audio(
self,
audio_path: str,
transcription_segments: List[Dict[str, Any]]
) -> List[SpeakerSegment]:
"""Identify speakers in audio file"""
pass
@abstractmethod
async def map_transcription_to_speakers(
self,
transcription_segments: List[Dict[str, Any]],
speaker_segments: List[SpeakerSegment]
) -> List[Dict[str, Any]]:
"""Map transcription segments to speaker information"""
pass |