voice_clone_detection

Runtime error

Kabatubare commited on Mar 13, 2024

Commit

f0dd070

verified ·

1 Parent(s): 1364a7f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,23 +3,28 @@ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import torch
 import librosa
 import numpy as np
 local_model_path = "./"
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
 def preprocess_audio(audio_file_path, target_sample_rate=16000):
-    # Load the audio file, ensuring mono conversion
-    waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
-    # Normalizing waveform to be between -1 and 1
-    waveform = librosa.util.normalize(waveform)
-    return waveform, target_sample_rate
 def predict_voice(audio_file_path):
     try:
         waveform, sample_rate = preprocess_audio(audio_file_path)
-        # Ensure waveform is a float32 array
-        waveform = waveform.astype(np.float32)
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
         with torch.no_grad():
@@ -31,9 +36,10 @@ def predict_voice(audio_file_path):
         confidence = torch.softmax(logits, dim=1).max().item() * 100
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     except Exception as e:
-        # Improved error handling for debugging
         result = f"Error during processing: {e}"
     return result
@@ -46,4 +52,3 @@ iface = gr.Interface(
 )
 iface.launch()

 import torch
 import librosa
 import numpy as np
+import logging
+# Configure logging for debugging purposes
+logging.basicConfig(level=logging.INFO)
 local_model_path = "./"
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
 def preprocess_audio(audio_file_path, target_sample_rate=16000):
+    try:
+        waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
+        waveform = librosa.util.normalize(waveform)
+        logging.info(f"Audio preprocessed: {audio_file_path}")
+        return waveform.astype(np.float32), target_sample_rate
+    except Exception as e:
+        logging.error(f"Error preprocessing audio: {e}")
+        raise
 def predict_voice(audio_file_path):
     try:
         waveform, sample_rate = preprocess_audio(audio_file_path)
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
         with torch.no_grad():
         confidence = torch.softmax(logits, dim=1).max().item() * 100
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
+        logging.info("Prediction successful.")
     except Exception as e:
         result = f"Error during processing: {e}"
+        logging.error(result)
     return result
 )
 iface.launch()