Spaces:
Paused
Paused
File size: 2,766 Bytes
b8a4e79 13db51f b8a4e79 13db51f b8a4e79 13db51f b8a4e79 13db51f b8a4e79 13db51f b8a4e79 13db51f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import whisper as whp
import numpy as np
import logging
import io
import librosa
logger = logging.getLogger(__name__)
class LanguageDetector:
def __init__(self, model_name="tiny"):
"""
Initialize the language detector with a Whisper model.
Args:
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
"""
self.model = whp.load_model(model_name)
logger.info(f"Loaded Whisper model {model_name} for language detection")
def detect_language_from_file(self, audio_file_path):
"""
Detect language from an audio file.
Args:
audio_file_path (str): Path to the audio file
Returns:
str: Detected language code (e.g., "en", "fr", etc.)
float: Confidence score
"""
try:
# Load and preprocess audio
audio = whp.load_audio(audio_file_path)
audio = whp.pad_or_trim(audio)
# Make log-Mel spectrogram
mel = whp.log_mel_spectrogram(audio).to(self.model.device)
# Detect language
_, probs = self.model.detect_language(mel)
detected_lang = max(probs, key=probs.get)
confidence = probs[detected_lang]
return detected_lang, confidence
except Exception as e:
logger.error(f"Error in language detection: {e}")
raise
def detect_language_from_bytes(self, audio_bytes):
"""
Detect language from audio bytes.
Args:
audio_bytes (bytes): Audio data in bytes
Returns:
str: Detected language code (e.g., "en", "fr", etc.)
float: Confidence score
"""
try:
# Convert bytes to numpy array using librosa
audio_data = io.BytesIO(audio_bytes)
audio, sr = librosa.load(audio_data, sr=16000)
# Convert to format expected by Whisper
audio = (audio * 32768).astype(np.int16)
# Load and preprocess audio
audio = whp.pad_or_trim(audio)
# Make log-Mel spectrogram
mel = whp.log_mel_spectrogram(audio).to(self.model.device)
# Detect language
_, probs = self.model.detect_language(mel)
detected_lang = max(probs, key=probs.get)
confidence = probs[detected_lang]
return detected_lang, confidence
except Exception as e:
logger.error(f"Error in language detection: {e}")
raise |