import whisper import numpy as np import logging import io import librosa logger = logging.getLogger(__name__) class LanguageDetector: def __init__(self, model_name="tiny"): """ Initialize the language detector with a Whisper model. Args: model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection. """ self.model = whisper.load_model(model_name) logger.info(f"Loaded Whisper model {model_name} for language detection") def detect_language_from_file(self, audio_file_path): """ Detect language from an audio file. Args: audio_file_path (str): Path to the audio file Returns: str: Detected language code (e.g., "en", "fr", etc.) float: Confidence score dict: All language probabilities """ try: # Load and preprocess audio audio = whisper.load_audio(audio_file_path) audio = whisper.pad_or_trim(audio) # Make log-Mel spectrogram with correct dimensions mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device) # Detect language _, probs = self.model.detect_language(mel) detected_lang = max(probs, key=probs.get) confidence = probs[detected_lang] return detected_lang, confidence, probs except Exception as e: logger.error(f"Error in language detection: {e}") raise def detect_language_from_bytes(self, audio_bytes): """ Detect language from audio bytes. Args: audio_bytes (bytes): Audio data in bytes Returns: str: Detected language code (e.g., "en", "fr", etc.) float: Confidence score dict: All language probabilities """ try: # Convert bytes to numpy array using librosa audio_data = io.BytesIO(audio_bytes) audio, sr = librosa.load(audio_data, sr=16000) # Convert to format expected by Whisper audio = (audio * 32768).astype(np.int16) # Load and preprocess audio audio = whisper.pad_or_trim(audio) # Make log-Mel spectrogram with correct dimensions mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device) # Detect language _, probs = self.model.detect_language(mel) detected_lang = max(probs, key=probs.get) confidence = probs[detected_lang] return detected_lang, confidence, probs except Exception as e: logger.error(f"Error in language detection: {e}") raise