Spaces:

Agents-MCP-Hackathon
/

ModalTranscriberMCP

Running

App Files Files Community

ModalTranscriberMCP / src /interfaces /speaker_manager.py

richard-su

Upload folder using huggingface_hub

b5df735 verified 8 days ago

raw

history blame

2.92 kB

	"""
	Speaker identification and embedding management interfaces
	"""

	from abc import ABC, abstractmethod
	from typing import Dict, Any, Optional, List, Tuple
	from dataclasses import dataclass
	import numpy as np


	@dataclass
	class SpeakerEmbedding:
	"""Speaker embedding data structure"""
	speaker_id: str
	embedding: np.ndarray
	confidence: float
	source_files: List[str]
	sample_count: int
	created_at: str
	updated_at: str


	@dataclass
	class SpeakerSegment:
	"""Speaker segment information"""
	start: float
	end: float
	speaker_id: str
	confidence: float


	class ISpeakerEmbeddingManager(ABC):
	"""Interface for speaker embedding management"""

	@abstractmethod
	async def find_matching_speaker(
	self,
	embedding: np.ndarray,
	source_file: str
	) -> Optional[str]:
	"""Find matching speaker from existing embeddings"""
	pass

	@abstractmethod
	async def add_or_update_speaker(
	self,
	embedding: np.ndarray,
	source_file: str,
	confidence: float = 1.0,
	original_label: Optional[str] = None
	) -> str:
	"""Add new speaker or update existing speaker"""
	pass

	@abstractmethod
	async def map_local_to_global_speakers(
	self,
	local_embeddings: Dict[str, np.ndarray],
	source_file: str
	) -> Dict[str, str]:
	"""Map local speaker labels to global speaker IDs"""
	pass

	@abstractmethod
	async def get_speaker_info(self, speaker_id: str) -> Optional[SpeakerEmbedding]:
	"""Get speaker information by ID"""
	pass

	@abstractmethod
	async def get_all_speakers_summary(self) -> Dict[str, Any]:
	"""Get summary of all speakers"""
	pass

	@abstractmethod
	async def save_speakers(self) -> None:
	"""Save speaker data to storage"""
	pass

	@abstractmethod
	async def load_speakers(self) -> None:
	"""Load speaker data from storage"""
	pass


	class ISpeakerIdentificationService(ABC):
	"""Interface for speaker identification operations"""

	@abstractmethod
	async def extract_speaker_embeddings(
	self,
	audio_path: str,
	segments: List[SpeakerSegment]
	) -> Dict[str, np.ndarray]:
	"""Extract speaker embeddings from audio segments"""
	pass

	@abstractmethod
	async def identify_speakers_in_audio(
	self,
	audio_path: str,
	transcription_segments: List[Dict[str, Any]]
	) -> List[SpeakerSegment]:
	"""Identify speakers in audio file"""
	pass

	@abstractmethod
	async def map_transcription_to_speakers(
	self,
	transcription_segments: List[Dict[str, Any]],
	speaker_segments: List[SpeakerSegment]
	) -> List[Dict[str, Any]]:
	"""Map transcription segments to speaker information"""
	pass