Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ model = AutoModelForAudioClassification.from_pretrained("./")
|
|
11 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
12 |
|
13 |
def plot_waveform(waveform, sr):
|
14 |
-
plt.figure(figsize=(
|
15 |
plt.title('Waveform')
|
16 |
plt.ylabel('Amplitude')
|
17 |
plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
|
@@ -21,7 +21,7 @@ def plot_waveform(waveform, sr):
|
|
21 |
def plot_spectrogram(waveform, sr):
|
22 |
S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
|
23 |
S_DB = librosa.power_to_db(S, ref=np.max)
|
24 |
-
plt.figure(figsize=(
|
25 |
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
|
26 |
plt.title('Mel Spectrogram')
|
27 |
plt.colorbar(format='%+2.0f dB')
|
@@ -53,27 +53,41 @@ def predict_voice(audio_file_path):
|
|
53 |
label = model.config.id2label[predicted_index.item()]
|
54 |
confidence = torch.softmax(logits, dim=1).max().item() * 100
|
55 |
|
|
|
|
|
|
|
56 |
waveform_plot = plot_waveform(waveform, sample_rate)
|
57 |
spectrogram_plot = plot_spectrogram(waveform, sample_rate)
|
58 |
|
59 |
-
return
|
60 |
-
f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
|
61 |
-
waveform_plot,
|
62 |
-
spectrogram_plot
|
63 |
-
)
|
64 |
except Exception as e:
|
65 |
return f"Error during processing: {e}", None, None
|
66 |
|
|
|
67 |
iface = gr.Interface(
|
68 |
fn=predict_voice,
|
69 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
70 |
outputs=[
|
71 |
-
gr.Textbox(label="
|
72 |
gr.Plot(label="Waveform"),
|
73 |
gr.Plot(label="Spectrogram")
|
74 |
],
|
75 |
-
|
76 |
-
|
|
|
77 |
)
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
12 |
|
13 |
def plot_waveform(waveform, sr):
|
14 |
+
plt.figure(figsize=(12, 3)) # Larger figure size
|
15 |
plt.title('Waveform')
|
16 |
plt.ylabel('Amplitude')
|
17 |
plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
|
|
|
21 |
def plot_spectrogram(waveform, sr):
|
22 |
S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
|
23 |
S_DB = librosa.power_to_db(S, ref=np.max)
|
24 |
+
plt.figure(figsize=(12, 4)) # Larger figure size
|
25 |
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
|
26 |
plt.title('Mel Spectrogram')
|
27 |
plt.colorbar(format='%+2.0f dB')
|
|
|
53 |
label = model.config.id2label[predicted_index.item()]
|
54 |
confidence = torch.softmax(logits, dim=1).max().item() * 100
|
55 |
|
56 |
+
prediction_text = (f"The model predicts the voice as '{label}'. "
|
57 |
+
f"Confidence level: {confidence:.2f}%")
|
58 |
+
|
59 |
waveform_plot = plot_waveform(waveform, sample_rate)
|
60 |
spectrogram_plot = plot_spectrogram(waveform, sample_rate)
|
61 |
|
62 |
+
return prediction_text, waveform_plot, spectrogram_plot
|
|
|
|
|
|
|
|
|
63 |
except Exception as e:
|
64 |
return f"Error during processing: {e}", None, None
|
65 |
|
66 |
+
# Define the Gradio app layout
|
67 |
iface = gr.Interface(
|
68 |
fn=predict_voice,
|
69 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
70 |
outputs=[
|
71 |
+
gr.Textbox(label="Analysis", type="auto"),
|
72 |
gr.Plot(label="Waveform"),
|
73 |
gr.Plot(label="Spectrogram")
|
74 |
],
|
75 |
+
layout="vertical",
|
76 |
+
title="Voice Clone Detection",
|
77 |
+
description="This tool determines whether a voice is real or an AI-generated clone. Audio files judged to be authentic and produced by humans are classified as 'Bonafide'. In contrast, those perceived to be synthetically generated are labeled as 'Spoof'. Upload an audio file for analysis."
|
78 |
)
|
79 |
|
80 |
+
# Customize the CSS to adjust the layout and component sizes
|
81 |
+
css = """
|
82 |
+
.gradio-container {
|
83 |
+
max-width: 960px; /* Adjust the maximum width as needed */
|
84 |
+
}
|
85 |
+
.input-container {
|
86 |
+
width: 25%; /* Smaller input area */
|
87 |
+
}
|
88 |
+
.output-container {
|
89 |
+
width: 74%; /* Larger output area */
|
90 |
+
}
|
91 |
+
"""
|
92 |
+
|
93 |
+
iface.launch(css=css)
|