voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

6530ee3

verified ·

1 Parent(s): 0bccd1d

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -22

app.py CHANGED Viewed

@@ -1,47 +1,38 @@
 import gradio as gr
-from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
-import torch
 import librosa
 import numpy as np
 import logging
-# Configure logging for debugging purposes
 logging.basicConfig(level=logging.INFO)
 local_model_path = "./"
-extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
-def preprocess_audio(audio_file_path, target_sample_rate=16000):
-    try:
-        waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
-        waveform = librosa.util.normalize(waveform)
-        logging.info(f"Audio preprocessed: {audio_file_path}")
-        return waveform.astype(np.float32), target_sample_rate
-    except Exception as e:
-        logging.error(f"Error preprocessing audio: {e}")
-        raise
 def predict_voice(audio_file_path):
     try:
-        waveform, sample_rate = preprocess_audio(audio_file_path)
-        inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
         with torch.no_grad():
-            outputs = model(**inputs)
         logits = outputs.logits
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
-        # Debugging logs
-        logging.info(f"Logits: {logits}")
-        logging.info(f"Predicted index: {predicted_index}")
-        logging.info(f"Label: {label}, Confidence: {confidence}")
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
-        logging.info("Prediction successful.")
     except Exception as e:
         result = f"Error during processing: {e}"
         logging.error(result)

 import gradio as gr
 import librosa
 import numpy as np
+import torch
 import logging
+from transformers import AutoModelForAudioClassification
+# Configure logging
 logging.basicConfig(level=logging.INFO)
+# Model loading
 local_model_path = "./"
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
+def custom_feature_extraction(audio_file_path, sr=16000, n_mels=128, n_fft=2048, hop_length=512):
+    waveform, sample_rate = librosa.load(audio_file_path, sr=sr)
+    S = librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
+    S_DB = librosa.power_to_db(S, ref=np.max)
+    S_DB_tensor = torch.tensor(S_DB).float().unsqueeze(0)  # Add batch dimension
+    return S_DB_tensor
 def predict_voice(audio_file_path):
     try:
+        features = custom_feature_extraction(audio_file_path)
         with torch.no_grad():
+            outputs = model(inputs=features)
         logits = outputs.logits
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
+        logging.info(f"Prediction: {result}")
     except Exception as e:
         result = f"Error during processing: {e}"
         logging.error(result)