voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

df3ef47

verified ·

1 Parent(s): e8e81bf

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -16

app.py CHANGED Viewed

@@ -4,14 +4,36 @@ import numpy as np
 import torch
 from torch.nn.functional import softmax
 import librosa
-# Path to the local directory where the model files are stored within the Space
 local_model_path = "./"
 # Load the model and feature extractor outside the function to improve performance
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
 def predict_voice(audio_file_path):
     """
     Predicts whether a voice is real or spoofed from an audio file.
@@ -22,22 +44,21 @@ def predict_voice(audio_file_path):
     Returns:
         A string with the prediction and confidence level.
     """
-    try:
-        # Ensure the file path does not lead to unintended directories
-        if not audio_file_path.startswith("/expected/path/for/safety"):
-            return "Error: Invalid file path."
         # Load and preprocess the audio file
-        waveform, sample_rate = librosa.load(audio_file_path, sr=16000, mono=True)
-        # Convert the input audio file to model's expected format
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
-        # Generate predictions from the model
-        with torch.no_grad():  # Ensure no gradients are calculated
             outputs = model(**inputs)
-        # Extract logits, compute the class with the highest score, and calculate confidence
         logits = outputs.logits
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
@@ -46,17 +67,18 @@ def predict_voice(audio_file_path):
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     except Exception as e:
         result = f"An error occurred during processing: {str(e)}"
     return result
-# Setting up the Gradio interface
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
-    theme="huggingface"
 )
-# Run the Gradio interface, consider using enable_queue=True if processing is expected to be long or the app faces high traffic
-iface.launch(share=True, enable_queue=True)

 import torch
 from torch.nn.functional import softmax
 import librosa
+import os
+# Path to the local directory where the model files are stored
 local_model_path = "./"
 # Load the model and feature extractor outside the function to improve performance
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
+def safe_path_join(base_path, path):
+    """
+    Safely join a base path and a potentially unsafe relative path.
+    Args:
+        base_path: The base directory path.
+        path: The relative path to join with the base path.
+    Returns:
+        The safely joined path if it's a subpath of the base_path, otherwise None.
+    """
+    # Normalize and absolute both paths
+    base_path = os.path.abspath(os.path.normpath(base_path))
+    target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
+    # Ensure the target path is within the base_path directory
+    if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
+        return target_path
+    else:
+        return None
 def predict_voice(audio_file_path):
     """
     Predicts whether a voice is real or spoofed from an audio file.
     Returns:
         A string with the prediction and confidence level.
     """
+    # Safety check and path normalization
+    expected_base_path = "/expected/path/for/safety"
+    safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
+    if not safe_audio_file_path:
+        return "Error: Invalid file path."
+    try:
         # Load and preprocess the audio file
+        waveform, sample_rate = librosa.load(safe_audio_file_path, sr=16000, mono=True)
         inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
+        with torch.no_grad():  # No gradients needed
             outputs = model(**inputs)
         logits = outputs.logits
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
         result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     except Exception as e:
         result = f"An error occurred during processing: {str(e)}"
     return result
+# Gradio interface setup with enhancements for scalability and performance
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
+    theme="huggingface",
+    enable_queue=True  # Enable queuing to handle high traffic efficiently
 )
+iface.launch(share=True)