Kabatubare commited on
Commit
6530ee3
·
verified ·
1 Parent(s): 0bccd1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -22
app.py CHANGED
@@ -1,47 +1,38 @@
1
  import gradio as gr
2
- from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
- import torch
4
  import librosa
5
  import numpy as np
 
6
  import logging
 
7
 
8
- # Configure logging for debugging purposes
9
  logging.basicConfig(level=logging.INFO)
10
 
 
11
  local_model_path = "./"
12
- extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
13
  model = AutoModelForAudioClassification.from_pretrained(local_model_path)
14
 
15
- def preprocess_audio(audio_file_path, target_sample_rate=16000):
16
- try:
17
- waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
18
- waveform = librosa.util.normalize(waveform)
19
- logging.info(f"Audio preprocessed: {audio_file_path}")
20
- return waveform.astype(np.float32), target_sample_rate
21
- except Exception as e:
22
- logging.error(f"Error preprocessing audio: {e}")
23
- raise
24
 
25
  def predict_voice(audio_file_path):
26
  try:
27
- waveform, sample_rate = preprocess_audio(audio_file_path)
28
- inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
29
 
30
  with torch.no_grad():
31
- outputs = model(**inputs)
32
 
33
  logits = outputs.logits
34
  predicted_index = logits.argmax()
35
  label = model.config.id2label[predicted_index.item()]
36
  confidence = torch.softmax(logits, dim=1).max().item() * 100
37
 
38
- # Debugging logs
39
- logging.info(f"Logits: {logits}")
40
- logging.info(f"Predicted index: {predicted_index}")
41
- logging.info(f"Label: {label}, Confidence: {confidence}")
42
-
43
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
44
- logging.info("Prediction successful.")
45
  except Exception as e:
46
  result = f"Error during processing: {e}"
47
  logging.error(result)
 
1
  import gradio as gr
 
 
2
  import librosa
3
  import numpy as np
4
+ import torch
5
  import logging
6
+ from transformers import AutoModelForAudioClassification
7
 
8
+ # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
 
11
+ # Model loading
12
  local_model_path = "./"
 
13
  model = AutoModelForAudioClassification.from_pretrained(local_model_path)
14
 
15
+ def custom_feature_extraction(audio_file_path, sr=16000, n_mels=128, n_fft=2048, hop_length=512):
16
+ waveform, sample_rate = librosa.load(audio_file_path, sr=sr)
17
+ S = librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
18
+ S_DB = librosa.power_to_db(S, ref=np.max)
19
+ S_DB_tensor = torch.tensor(S_DB).float().unsqueeze(0) # Add batch dimension
20
+ return S_DB_tensor
 
 
 
21
 
22
  def predict_voice(audio_file_path):
23
  try:
24
+ features = custom_feature_extraction(audio_file_path)
 
25
 
26
  with torch.no_grad():
27
+ outputs = model(inputs=features)
28
 
29
  logits = outputs.logits
30
  predicted_index = logits.argmax()
31
  label = model.config.id2label[predicted_index.item()]
32
  confidence = torch.softmax(logits, dim=1).max().item() * 100
33
 
 
 
 
 
 
34
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
35
+ logging.info(f"Prediction: {result}")
36
  except Exception as e:
37
  result = f"Error during processing: {e}"
38
  logging.error(result)