voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 14, 2024

Commit

b860c29

verified ·

1 Parent(s): 1740a24

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -11

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
 def plot_waveform(waveform, sr):
-    plt.figure(figsize=(10, 3))
     plt.title('Waveform')
     plt.ylabel('Amplitude')
     plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
@@ -21,7 +21,7 @@ def plot_waveform(waveform, sr):
 def plot_spectrogram(waveform, sr):
     S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
     S_DB = librosa.power_to_db(S, ref=np.max)
-    plt.figure(figsize=(10, 4))
     librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
     plt.title('Mel Spectrogram')
     plt.colorbar(format='%+2.0f dB')
@@ -53,27 +53,41 @@ def predict_voice(audio_file_path):
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
         waveform_plot = plot_waveform(waveform, sample_rate)
         spectrogram_plot = plot_spectrogram(waveform, sample_rate)
-        return (
-            f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
-            waveform_plot,
-            spectrogram_plot
-        )
     except Exception as e:
         return f"Error during processing: {e}", None, None
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=[
-        gr.Textbox(label="Prediction"),
         gr.Plot(label="Waveform"),
         gr.Plot(label="Spectrogram")
     ],
-    title="Voice Authenticity Detection",
-    description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
 )
-iface.launch()

 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
 def plot_waveform(waveform, sr):
+    plt.figure(figsize=(12, 3))  # Larger figure size
     plt.title('Waveform')
     plt.ylabel('Amplitude')
     plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
 def plot_spectrogram(waveform, sr):
     S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
     S_DB = librosa.power_to_db(S, ref=np.max)
+    plt.figure(figsize=(12, 4))  # Larger figure size
     librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
     plt.title('Mel Spectrogram')
     plt.colorbar(format='%+2.0f dB')
         label = model.config.id2label[predicted_index.item()]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
+        prediction_text = (f"The model predicts the voice as '{label}'. "
+                           f"Confidence level: {confidence:.2f}%")
         waveform_plot = plot_waveform(waveform, sample_rate)
         spectrogram_plot = plot_spectrogram(waveform, sample_rate)
+        return prediction_text, waveform_plot, spectrogram_plot
     except Exception as e:
         return f"Error during processing: {e}", None, None
+# Define the Gradio app layout
 iface = gr.Interface(
     fn=predict_voice,
     inputs=gr.Audio(label="Upload Audio File", type="filepath"),
     outputs=[
+        gr.Textbox(label="Analysis", type="auto"),
         gr.Plot(label="Waveform"),
         gr.Plot(label="Spectrogram")
     ],
+    layout="vertical",
+    title="Voice Clone Detection",
+    description="This tool determines whether a voice is real or an AI-generated clone. Audio files judged to be authentic and produced by humans are classified as 'Bonafide'. In contrast, those perceived to be synthetically generated are labeled as 'Spoof'. Upload an audio file for analysis."
 )
+# Customize the CSS to adjust the layout and component sizes
+css = """
+      .gradio-container {
+        max-width: 960px; /* Adjust the maximum width as needed */
+      }
+      .input-container {
+        width: 25%; /* Smaller input area */
+      }
+      .output-container {
+        width: 74%; /* Larger output area */
+      }
+      """
+iface.launch(css=css)