voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

cea8753

verified ·

1 Parent(s): 7859ecd

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import numpy as np
 # Path to the local directory where the model files are stored within the Space
 local_model_path = "./"
@@ -20,20 +22,24 @@ def predict_voice(audio_file):
         A string with the prediction and confidence level.
     """
     # Convert the input audio file to model's expected format.
-    inputs = extractor(audio_file, return_tensors="pt")
     # Generate predictions from the model.
-    outputs = model(**inputs)
     # Extract logits and compute the class with the highest score.
     logits = outputs.logits
-    predicted_index = np.argmax(logits.detach().numpy())
     # Translate index to label
-    label = model.config.id2label[predicted_index]
-    # Calculate the confidence of the prediction.
-    confidence = np.max(np.softmax(logits.detach().numpy(), axis=1)) * 100
     # Prepare the output string.
     result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
@@ -41,9 +47,9 @@ def predict_voice(audio_file):
 # Setting up the Gradio interface
 iface = gr.Interface(
-    fn=predict_voice,  # Function to call
-    inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"),  # Audio input
-    outputs="text",  # Text output
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"

 import gradio as gr
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import numpy as np
+import torch
+from torch.nn.functional import softmax
 # Path to the local directory where the model files are stored within the Space
 local_model_path = "./"
         A string with the prediction and confidence level.
     """
     # Convert the input audio file to model's expected format.
+    # The following code assumes your audio file is a numpy array.
+    # You may need to modify this depending on how the audio file is being read.
+    waveform = np.array(audio_file)
+    inputs = extractor(waveform, return_tensors="pt", sampling_rate=extractor.sampling_rate)
     # Generate predictions from the model.
+    with torch.no_grad():  # Ensure no gradients are calculated
+        outputs = model(**inputs)
     # Extract logits and compute the class with the highest score.
     logits = outputs.logits
+    predicted_index = logits.argmax()
     # Translate index to label
+    label = model.config.id2label[predicted_index.item()]
+    # Calculate the confidence of the prediction using softmax.
+    confidence = softmax(logits, dim=1).max().item() * 100
     # Prepare the output string.
     result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
 # Setting up the Gradio interface
 iface = gr.Interface(
+    fn=predict_voice,
+    inputs=gr.Audio(source="upload", type="file", label="Upload Audio File"),
+    outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"