Kabatubare commited on
Commit
f0dd070
·
verified ·
1 Parent(s): 1364a7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -3,23 +3,28 @@ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
  import torch
4
  import librosa
5
  import numpy as np
 
 
 
 
6
 
7
  local_model_path = "./"
8
  extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
9
  model = AutoModelForAudioClassification.from_pretrained(local_model_path)
10
 
11
  def preprocess_audio(audio_file_path, target_sample_rate=16000):
12
- # Load the audio file, ensuring mono conversion
13
- waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
14
- # Normalizing waveform to be between -1 and 1
15
- waveform = librosa.util.normalize(waveform)
16
- return waveform, target_sample_rate
 
 
 
17
 
18
  def predict_voice(audio_file_path):
19
  try:
20
  waveform, sample_rate = preprocess_audio(audio_file_path)
21
- # Ensure waveform is a float32 array
22
- waveform = waveform.astype(np.float32)
23
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
24
 
25
  with torch.no_grad():
@@ -31,9 +36,10 @@ def predict_voice(audio_file_path):
31
  confidence = torch.softmax(logits, dim=1).max().item() * 100
32
 
33
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
 
34
  except Exception as e:
35
- # Improved error handling for debugging
36
  result = f"Error during processing: {e}"
 
37
 
38
  return result
39
 
@@ -46,4 +52,3 @@ iface = gr.Interface(
46
  )
47
 
48
  iface.launch()
49
-
 
3
  import torch
4
  import librosa
5
  import numpy as np
6
+ import logging
7
+
8
+ # Configure logging for debugging purposes
9
+ logging.basicConfig(level=logging.INFO)
10
 
11
  local_model_path = "./"
12
  extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
13
  model = AutoModelForAudioClassification.from_pretrained(local_model_path)
14
 
15
  def preprocess_audio(audio_file_path, target_sample_rate=16000):
16
+ try:
17
+ waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
18
+ waveform = librosa.util.normalize(waveform)
19
+ logging.info(f"Audio preprocessed: {audio_file_path}")
20
+ return waveform.astype(np.float32), target_sample_rate
21
+ except Exception as e:
22
+ logging.error(f"Error preprocessing audio: {e}")
23
+ raise
24
 
25
  def predict_voice(audio_file_path):
26
  try:
27
  waveform, sample_rate = preprocess_audio(audio_file_path)
 
 
28
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
29
 
30
  with torch.no_grad():
 
36
  confidence = torch.softmax(logits, dim=1).max().item() * 100
37
 
38
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
39
+ logging.info("Prediction successful.")
40
  except Exception as e:
 
41
  result = f"Error during processing: {e}"
42
+ logging.error(result)
43
 
44
  return result
45
 
 
52
  )
53
 
54
  iface.launch()