voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 14, 2024

Commit

05e6aba

verified ·

1 Parent(s): 84de51b

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -6

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import librosa
 import numpy as np
 import torch
-import torch.nn.functional as F
 from transformers import AutoModelForAudioClassification, ASTFeatureExtractor
 import random
@@ -10,8 +10,16 @@ import random
 model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
-def custom_feature_extraction(audio, sr=16000, n_mels=128, target_length=1024):
-    # Using the loaded feature extractor
     features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
     return features.input_values
@@ -35,14 +43,22 @@ def predict_voice(audio_file_path):
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
-        return f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
     except Exception as e:
-        return f"Error during processing: {e}"
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
-    outputs=gr.Textbox(label="Prediction"),
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
 )

 import librosa
 import numpy as np
 import torch
+import matplotlib.pyplot as plt
 from transformers import AutoModelForAudioClassification, ASTFeatureExtractor
 import random
 model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
+def plot_waveform(waveform, sr):
+    plt.figure(figsize=(10, 3))
+    plt.title('Waveform')
+    plt.ylabel('Amplitude')
+    plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
+    plt.xlabel('Time (s)')
+    # Instead of plt.show(), we'll return the figure
+    return plt.gcf()
+def custom_feature_extraction(audio, sr=16000, target_length=1024):
     features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
     return features.input_values
         predicted_index = logits.argmax()
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
+        # Plot the waveform using the modified function
+        waveform_plot = plot_waveform(waveform, sample_rate)
+        # Return both the label and the plot
+        return f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.", waveform_plot
     except Exception as e:
+        return f"Error during processing: {e}", None
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
+    outputs=[
+        gr.Textbox(label="Prediction"),
+        gr.Plot(label="Waveform")  # Gradio will handle the rendering of the matplotlib figure
+    ],
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
 )