voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

8a834c6

verified ·

1 Parent(s): df3ef47

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
-import numpy as np
 import torch
 from torch.nn.functional import softmax
 import librosa
@@ -24,16 +23,27 @@ def safe_path_join(base_path, path):
     Returns:
         The safely joined path if it's a subpath of the base_path, otherwise None.
     """
-    # Normalize and absolute both paths
     base_path = os.path.abspath(os.path.normpath(base_path))
     target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
-    # Ensure the target path is within the base_path directory
     if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
         return target_path
     else:
         return None
 def predict_voice(audio_file_path):
     """
     Predicts whether a voice is real or spoofed from an audio file.
@@ -44,19 +54,17 @@ def predict_voice(audio_file_path):
     Returns:
         A string with the prediction and confidence level.
     """
-    # Safety check and path normalization
     expected_base_path = "/expected/path/for/safety"
     safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
     if not safe_audio_file_path:
         return "Error: Invalid file path."
     try:
-        # Load and preprocess the audio file
-        waveform, sample_rate = librosa.load(safe_audio_file_path, sr=16000, mono=True)
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
-        with torch.no_grad():  # No gradients needed
             outputs = model(**inputs)
         logits = outputs.logits
@@ -70,7 +78,6 @@ def predict_voice(audio_file_path):
     return result
-# Gradio interface setup with enhancements for scalability and performance
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
@@ -78,7 +85,7 @@ iface = gr.Interface(
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface",
-    enable_queue=True  # Enable queuing to handle high traffic efficiently
 )
 iface.launch(share=True)

 import gradio as gr
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import torch
 from torch.nn.functional import softmax
 import librosa
     Returns:
         The safely joined path if it's a subpath of the base_path, otherwise None.
     """
     base_path = os.path.abspath(os.path.normpath(base_path))
     target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
     if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
         return target_path
     else:
         return None
+def preprocess_audio(audio_file_path, target_sample_rate=16000):
+    """
+    Preprocesses the audio file for compatibility with the model's expectations.
+    Args:
+        audio_file_path: Path to the audio file.
+        target_sample_rate: Desired sample rate compatible with the model.
+    Returns:
+        Processed waveform and sample rate.
+    """
+    waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
+    return waveform, target_sample_rate
 def predict_voice(audio_file_path):
     """
     Predicts whether a voice is real or spoofed from an audio file.
     Returns:
         A string with the prediction and confidence level.
     """
     expected_base_path = "/expected/path/for/safety"
     safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
     if not safe_audio_file_path:
         return "Error: Invalid file path."
     try:
+        waveform, sample_rate = preprocess_audio(safe_audio_file_path)
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
+        with torch.no_grad():
             outputs = model(**inputs)
         logits = outputs.logits
     return result
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface",
+    enable_queue=True
 )
 iface.launch(share=True)