voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

0a26e54

verified ·

1 Parent(s): 3b392fa

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -5

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import numpy as np
 import torch
 from torch.nn.functional import softmax
 import soundfile as sf
 # Path to the local directory where the model files are stored within the Space
@@ -17,14 +18,18 @@ def predict_voice(audio_file_path):
     Predicts whether a voice is real or spoofed from an audio file.
     Args:
-        audio_file_path: The file path of the uploaded audio file to be classified.
     Returns:
         A string with the prediction and confidence level.
     """
-    # Load the audio file. Adjust the loading mechanism based on your audio file format.
-    waveform, sample_rate = sf.read(audio_file_path)
     # Convert the input audio file to model's expected format.
     inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
@@ -47,14 +52,17 @@ def predict_voice(audio_file_path):
     result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     return result
 iface = gr.Interface(
     fn=predict_voice,
-    inputs=gr.Audio(type="filepath", label="Upload Audio File"),  # Ensure filepath is passed
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"
 )
-# Run the Gradio interface
 iface.launch()

 import numpy as np
 import torch
 from torch.nn.functional import softmax
+import librosa
 import soundfile as sf
 # Path to the local directory where the model files are stored within the Space
     Predicts whether a voice is real or spoofed from an audio file.
     Args:
+        audio_file_path: The path to the input audio file to be classified.
     Returns:
         A string with the prediction and confidence level.
     """
+    # Load the audio file. librosa automatically resamples to the target sample rate if needed.
+    waveform, sample_rate = librosa.load(audio_file_path, sr=16000)  # Force resampling to 16000 Hz
+    # Ensure waveform is mono
+    if len(waveform.shape) > 1:
+        waveform = np.mean(waveform, axis=0)
     # Convert the input audio file to model's expected format.
     inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
     result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     return result
+# Setting up the Gradio interface
 iface = gr.Interface(
     fn=predict_voice,
+    inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio File"),
     outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"
 )
+# Run the Gradio interface with share=True for creating a public link
+iface.launch(share=True)
+io interface
 iface.launch()