voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 14, 2024

Commit

6aa52fc

verified ·

1 Parent(s): 05e6aba

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -7

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import matplotlib.pyplot as plt
 from transformers import AutoModelForAudioClassification, ASTFeatureExtractor
 import random
-# Model and feature extractor loading from the specified local path
 model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
@@ -16,7 +16,16 @@ def plot_waveform(waveform, sr):
     plt.ylabel('Amplitude')
     plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
     plt.xlabel('Time (s)')
-    # Instead of plt.show(), we'll return the figure
     return plt.gcf()
 def custom_feature_extraction(audio, sr=16000, target_length=1024):
@@ -44,20 +53,24 @@ def predict_voice(audio_file_path):
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
-        # Plot the waveform using the modified function
         waveform_plot = plot_waveform(waveform, sample_rate)
-        # Return both the label and the plot
-        return f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.", waveform_plot
     except Exception as e:
-        return f"Error during processing: {e}", None
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=[
         gr.Textbox(label="Prediction"),
-        gr.Plot(label="Waveform")  # Gradio will handle the rendering of the matplotlib figure
     ],
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."

 from transformers import AutoModelForAudioClassification, ASTFeatureExtractor
 import random
+# Model and feature extractor loading
 model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
     plt.ylabel('Amplitude')
     plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
     plt.xlabel('Time (s)')
+    return plt.gcf()
+def plot_spectrogram(waveform, sr):
+    S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
+    S_DB = librosa.power_to_db(S, ref=np.max)
+    plt.figure(figsize=(10, 4))
+    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
+    plt.title('Mel Spectrogram')
+    plt.colorbar(format='%+2.0f dB')
+    plt.tight_layout()
     return plt.gcf()
 def custom_feature_extraction(audio, sr=16000, target_length=1024):
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
         waveform_plot = plot_waveform(waveform, sample_rate)
+        spectrogram_plot = plot_spectrogram(waveform, sample_rate)
+        return (
+            f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
+            waveform_plot,
+            spectrogram_plot
+        )
     except Exception as e:
+        return f"Error during processing: {e}", None, None
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=[
         gr.Textbox(label="Prediction"),
+        gr.Plot(label="Waveform"),
+        gr.Plot(label="Spectrogram")
     ],
     title="Voice Authenticity Detection",
     description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."