ModalTranscriberMCP / src /interfaces /speaker_manager.py
richard-su's picture
Upload folder using huggingface_hub
b5df735 verified
raw
history blame
2.92 kB
"""
Speaker identification and embedding management interfaces
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass
import numpy as np
@dataclass
class SpeakerEmbedding:
"""Speaker embedding data structure"""
speaker_id: str
embedding: np.ndarray
confidence: float
source_files: List[str]
sample_count: int
created_at: str
updated_at: str
@dataclass
class SpeakerSegment:
"""Speaker segment information"""
start: float
end: float
speaker_id: str
confidence: float
class ISpeakerEmbeddingManager(ABC):
"""Interface for speaker embedding management"""
@abstractmethod
async def find_matching_speaker(
self,
embedding: np.ndarray,
source_file: str
) -> Optional[str]:
"""Find matching speaker from existing embeddings"""
pass
@abstractmethod
async def add_or_update_speaker(
self,
embedding: np.ndarray,
source_file: str,
confidence: float = 1.0,
original_label: Optional[str] = None
) -> str:
"""Add new speaker or update existing speaker"""
pass
@abstractmethod
async def map_local_to_global_speakers(
self,
local_embeddings: Dict[str, np.ndarray],
source_file: str
) -> Dict[str, str]:
"""Map local speaker labels to global speaker IDs"""
pass
@abstractmethod
async def get_speaker_info(self, speaker_id: str) -> Optional[SpeakerEmbedding]:
"""Get speaker information by ID"""
pass
@abstractmethod
async def get_all_speakers_summary(self) -> Dict[str, Any]:
"""Get summary of all speakers"""
pass
@abstractmethod
async def save_speakers(self) -> None:
"""Save speaker data to storage"""
pass
@abstractmethod
async def load_speakers(self) -> None:
"""Load speaker data from storage"""
pass
class ISpeakerIdentificationService(ABC):
"""Interface for speaker identification operations"""
@abstractmethod
async def extract_speaker_embeddings(
self,
audio_path: str,
segments: List[SpeakerSegment]
) -> Dict[str, np.ndarray]:
"""Extract speaker embeddings from audio segments"""
pass
@abstractmethod
async def identify_speakers_in_audio(
self,
audio_path: str,
transcription_segments: List[Dict[str, Any]]
) -> List[SpeakerSegment]:
"""Identify speakers in audio file"""
pass
@abstractmethod
async def map_transcription_to_speakers(
self,
transcription_segments: List[Dict[str, Any]],
speaker_segments: List[SpeakerSegment]
) -> List[Dict[str, Any]]:
"""Map transcription segments to speaker information"""
pass