Spaces:
Runtime error
Runtime error
Update
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ model = AutoModelForAudioClassification.from_pretrained("./")
|
|
11 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
12 |
|
13 |
def plot_waveform(waveform, sr):
|
14 |
-
plt.figure(figsize=(12,
|
15 |
plt.title('Waveform')
|
16 |
plt.ylabel('Amplitude')
|
17 |
plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
|
@@ -21,7 +21,7 @@ def plot_waveform(waveform, sr):
|
|
21 |
def plot_spectrogram(waveform, sr):
|
22 |
S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
|
23 |
S_DB = librosa.power_to_db(S, ref=np.max)
|
24 |
-
plt.figure(figsize=(12,
|
25 |
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
|
26 |
plt.title('Mel Spectrogram')
|
27 |
plt.colorbar(format='%+2.0f dB')
|
@@ -53,29 +53,27 @@ def predict_voice(audio_file_path):
|
|
53 |
label = model.config.id2label[predicted_index.item()]
|
54 |
confidence = torch.softmax(logits, dim=1).max().item() * 100
|
55 |
|
56 |
-
prediction_text = (f"The model predicts the voice as '{label}'. "
|
57 |
-
f"Confidence level: {confidence:.2f}%")
|
58 |
-
|
59 |
waveform_plot = plot_waveform(waveform, sample_rate)
|
60 |
spectrogram_plot = plot_spectrogram(waveform, sample_rate)
|
61 |
|
62 |
-
return
|
|
|
|
|
|
|
|
|
63 |
except Exception as e:
|
64 |
return f"Error during processing: {e}", None, None
|
65 |
|
66 |
-
# Define the Gradio app layout
|
67 |
iface = gr.Interface(
|
68 |
fn=predict_voice,
|
69 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
70 |
outputs=[
|
71 |
-
gr.Textbox(label="
|
72 |
gr.Plot(label="Waveform"),
|
73 |
gr.Plot(label="Spectrogram")
|
74 |
],
|
75 |
title="Voice Clone Detection",
|
76 |
-
description="
|
77 |
-
# Removed the `layout` parameter
|
78 |
)
|
79 |
|
80 |
-
|
81 |
-
iface.launch(css=css)
|
|
|
11 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
12 |
|
13 |
def plot_waveform(waveform, sr):
|
14 |
+
plt.figure(figsize=(12, 4)) # Slightly larger plot for better visibility
|
15 |
plt.title('Waveform')
|
16 |
plt.ylabel('Amplitude')
|
17 |
plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
|
|
|
21 |
def plot_spectrogram(waveform, sr):
|
22 |
S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
|
23 |
S_DB = librosa.power_to_db(S, ref=np.max)
|
24 |
+
plt.figure(figsize=(12, 6)) # Slightly larger plot for better visibility
|
25 |
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
|
26 |
plt.title('Mel Spectrogram')
|
27 |
plt.colorbar(format='%+2.0f dB')
|
|
|
53 |
label = model.config.id2label[predicted_index.item()]
|
54 |
confidence = torch.softmax(logits, dim=1).max().item() * 100
|
55 |
|
|
|
|
|
|
|
56 |
waveform_plot = plot_waveform(waveform, sample_rate)
|
57 |
spectrogram_plot = plot_spectrogram(waveform, sample_rate)
|
58 |
|
59 |
+
return (
|
60 |
+
f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
|
61 |
+
waveform_plot,
|
62 |
+
spectrogram_plot
|
63 |
+
)
|
64 |
except Exception as e:
|
65 |
return f"Error during processing: {e}", None, None
|
66 |
|
|
|
67 |
iface = gr.Interface(
|
68 |
fn=predict_voice,
|
69 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
70 |
outputs=[
|
71 |
+
gr.Textbox(label="Prediction"),
|
72 |
gr.Plot(label="Waveform"),
|
73 |
gr.Plot(label="Spectrogram")
|
74 |
],
|
75 |
title="Voice Clone Detection",
|
76 |
+
description="Detects whether a voice is real or an AI-generated clone. Upload an audio file to see the results."
|
|
|
77 |
)
|
78 |
|
79 |
+
iface.launch()
|
|