AnyaSchen commited on
Commit
65ac0a4
·
1 Parent(s): d7d66af

feat: fix model

Browse files
Files changed (1) hide show
  1. language_detector.py +9 -9
language_detector.py CHANGED
@@ -1,4 +1,4 @@
1
- import whisper as whp
2
  import numpy as np
3
  import logging
4
  import io
@@ -14,7 +14,7 @@ class LanguageDetector:
14
  Args:
15
  model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
16
  """
17
- self.model = whp.load_model(model_name)
18
  logger.info(f"Loaded Whisper model {model_name} for language detection")
19
 
20
  def detect_language_from_file(self, audio_file_path):
@@ -31,11 +31,11 @@ class LanguageDetector:
31
  """
32
  try:
33
  # Load and preprocess audio
34
- audio = whp.load_audio(audio_file_path)
35
- audio = whp.pad_or_trim(audio)
36
 
37
- # Make log-Mel spectrogram
38
- mel = whp.log_mel_spectrogram(audio).to(self.model.device)
39
 
40
  # Detect language
41
  _, probs = self.model.detect_language(mel)
@@ -69,10 +69,10 @@ class LanguageDetector:
69
  audio = (audio * 32768).astype(np.int16)
70
 
71
  # Load and preprocess audio
72
- audio = whp.pad_or_trim(audio)
73
 
74
- # Make log-Mel spectrogram
75
- mel = whp.log_mel_spectrogram(audio).to(self.model.device)
76
 
77
  # Detect language
78
  _, probs = self.model.detect_language(mel)
 
1
+ import whisper
2
  import numpy as np
3
  import logging
4
  import io
 
14
  Args:
15
  model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
16
  """
17
+ self.model = whisper.load_model(model_name)
18
  logger.info(f"Loaded Whisper model {model_name} for language detection")
19
 
20
  def detect_language_from_file(self, audio_file_path):
 
31
  """
32
  try:
33
  # Load and preprocess audio
34
+ audio = whisper.load_audio(audio_file_path)
35
+ audio = whisper.pad_or_trim(audio)
36
 
37
+ # Make log-Mel spectrogram with correct dimensions
38
+ mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
39
 
40
  # Detect language
41
  _, probs = self.model.detect_language(mel)
 
69
  audio = (audio * 32768).astype(np.int16)
70
 
71
  # Load and preprocess audio
72
+ audio = whisper.pad_or_trim(audio)
73
 
74
+ # Make log-Mel spectrogram with correct dimensions
75
+ mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
76
 
77
  # Detect language
78
  _, probs = self.model.detect_language(mel)