Spaces:
Paused
Paused
feat: fix model
Browse files- language_detector.py +9 -9
language_detector.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import whisper
|
2 |
import numpy as np
|
3 |
import logging
|
4 |
import io
|
@@ -14,7 +14,7 @@ class LanguageDetector:
|
|
14 |
Args:
|
15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
16 |
"""
|
17 |
-
self.model =
|
18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
19 |
|
20 |
def detect_language_from_file(self, audio_file_path):
|
@@ -31,11 +31,11 @@ class LanguageDetector:
|
|
31 |
"""
|
32 |
try:
|
33 |
# Load and preprocess audio
|
34 |
-
audio =
|
35 |
-
audio =
|
36 |
|
37 |
-
# Make log-Mel spectrogram
|
38 |
-
mel =
|
39 |
|
40 |
# Detect language
|
41 |
_, probs = self.model.detect_language(mel)
|
@@ -69,10 +69,10 @@ class LanguageDetector:
|
|
69 |
audio = (audio * 32768).astype(np.int16)
|
70 |
|
71 |
# Load and preprocess audio
|
72 |
-
audio =
|
73 |
|
74 |
-
# Make log-Mel spectrogram
|
75 |
-
mel =
|
76 |
|
77 |
# Detect language
|
78 |
_, probs = self.model.detect_language(mel)
|
|
|
1 |
+
import whisper
|
2 |
import numpy as np
|
3 |
import logging
|
4 |
import io
|
|
|
14 |
Args:
|
15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
16 |
"""
|
17 |
+
self.model = whisper.load_model(model_name)
|
18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
19 |
|
20 |
def detect_language_from_file(self, audio_file_path):
|
|
|
31 |
"""
|
32 |
try:
|
33 |
# Load and preprocess audio
|
34 |
+
audio = whisper.load_audio(audio_file_path)
|
35 |
+
audio = whisper.pad_or_trim(audio)
|
36 |
|
37 |
+
# Make log-Mel spectrogram with correct dimensions
|
38 |
+
mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
|
39 |
|
40 |
# Detect language
|
41 |
_, probs = self.model.detect_language(mel)
|
|
|
69 |
audio = (audio * 32768).astype(np.int16)
|
70 |
|
71 |
# Load and preprocess audio
|
72 |
+
audio = whisper.pad_or_trim(audio)
|
73 |
|
74 |
+
# Make log-Mel spectrogram with correct dimensions
|
75 |
+
mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
|
76 |
|
77 |
# Detect language
|
78 |
_, probs = self.model.detect_language(mel)
|