voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

e8e81bf

verified ·

1 Parent(s): 6f6f035

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -31

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import librosa
 # Path to the local directory where the model files are stored within the Space
 local_model_path = "./"
-# Initialize the feature extractor and model from the local files
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
@@ -22,44 +22,41 @@ def predict_voice(audio_file_path):
     Returns:
         A string with the prediction and confidence level.
     """
-    # Load the audio file. librosa automatically resamples to the target sample rate if needed.
-    waveform, sample_rate = librosa.load(audio_file_path, sr=16000)  # Force resampling to 16000 Hz
-    # Ensure waveform is mono
-    if waveform.ndim > 1:
-        waveform = np.mean(waveform, axis=0)
-    # Convert the input audio file to model's expected format.
-    inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
-    # Generate predictions from the model.
-    with torch.no_grad():  # Ensure no gradients are calculated
-        outputs = model(**inputs)
-    # Extract logits and compute the class with the highest score.
-    logits = outputs.logits
-    predicted_index = logits.argmax()
-    # Translate index to label
-    label = model.config.id2label[predicted_index.item()]
-    # Calculate the confidence of the prediction using softmax.
-    confidence = softmax(logits, dim=1).max().item() * 100
-    # Prepare the output string.
-    result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     return result
 # Setting up the Gradio interface
 iface = gr.Interface(
     fn=predict_voice,
-    inputs=gr.Audio(label="Upload Audio File", type="filepath"),  # Correct parameter usage
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"
 )
-# Run the Gradio interface with share=True for creating a public link
-iface.launch(share=True)

 # Path to the local directory where the model files are stored within the Space
 local_model_path = "./"
+# Load the model and feature extractor outside the function to improve performance
 extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
 model = AutoModelForAudioClassification.from_pretrained(local_model_path)
     Returns:
         A string with the prediction and confidence level.
     """
+    try:
+        # Ensure the file path does not lead to unintended directories
+        if not audio_file_path.startswith("/expected/path/for/safety"):
+            return "Error: Invalid file path."
+        # Load and preprocess the audio file
+        waveform, sample_rate = librosa.load(audio_file_path, sr=16000, mono=True)
+        # Convert the input audio file to model's expected format
+        inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
+        # Generate predictions from the model
+        with torch.no_grad():  # Ensure no gradients are calculated
+            outputs = model(**inputs)
+        # Extract logits, compute the class with the highest score, and calculate confidence
+        logits = outputs.logits
+        predicted_index = logits.argmax()
+        label = model.config.id2label[predicted_index.item()]
+        confidence = softmax(logits, dim=1).max().item() * 100
+        result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
+    except Exception as e:
+        result = f"An error occurred during processing: {str(e)}"
     return result
 # Setting up the Gradio interface
 iface = gr.Interface(
     fn=predict_voice,
+    inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"
 )
+# Run the Gradio interface, consider using enable_queue=True if processing is expected to be long or the app faces high traffic
+iface.launch(share=True, enable_queue=True)