Kabatubare commited on
Commit
b860c29
·
verified ·
1 Parent(s): 1740a24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -11,7 +11,7 @@ model = AutoModelForAudioClassification.from_pretrained("./")
11
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
12
 
13
  def plot_waveform(waveform, sr):
14
- plt.figure(figsize=(10, 3))
15
  plt.title('Waveform')
16
  plt.ylabel('Amplitude')
17
  plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
@@ -21,7 +21,7 @@ def plot_waveform(waveform, sr):
21
  def plot_spectrogram(waveform, sr):
22
  S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
23
  S_DB = librosa.power_to_db(S, ref=np.max)
24
- plt.figure(figsize=(10, 4))
25
  librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
26
  plt.title('Mel Spectrogram')
27
  plt.colorbar(format='%+2.0f dB')
@@ -53,27 +53,41 @@ def predict_voice(audio_file_path):
53
  label = model.config.id2label[predicted_index.item()]
54
  confidence = torch.softmax(logits, dim=1).max().item() * 100
55
 
 
 
 
56
  waveform_plot = plot_waveform(waveform, sample_rate)
57
  spectrogram_plot = plot_spectrogram(waveform, sample_rate)
58
 
59
- return (
60
- f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
61
- waveform_plot,
62
- spectrogram_plot
63
- )
64
  except Exception as e:
65
  return f"Error during processing: {e}", None, None
66
 
 
67
  iface = gr.Interface(
68
  fn=predict_voice,
69
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
70
  outputs=[
71
- gr.Textbox(label="Prediction"),
72
  gr.Plot(label="Waveform"),
73
  gr.Plot(label="Spectrogram")
74
  ],
75
- title="Voice Authenticity Detection",
76
- description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
 
77
  )
78
 
79
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
12
 
13
  def plot_waveform(waveform, sr):
14
+ plt.figure(figsize=(12, 3)) # Larger figure size
15
  plt.title('Waveform')
16
  plt.ylabel('Amplitude')
17
  plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
 
21
  def plot_spectrogram(waveform, sr):
22
  S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
23
  S_DB = librosa.power_to_db(S, ref=np.max)
24
+ plt.figure(figsize=(12, 4)) # Larger figure size
25
  librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
26
  plt.title('Mel Spectrogram')
27
  plt.colorbar(format='%+2.0f dB')
 
53
  label = model.config.id2label[predicted_index.item()]
54
  confidence = torch.softmax(logits, dim=1).max().item() * 100
55
 
56
+ prediction_text = (f"The model predicts the voice as '{label}'. "
57
+ f"Confidence level: {confidence:.2f}%")
58
+
59
  waveform_plot = plot_waveform(waveform, sample_rate)
60
  spectrogram_plot = plot_spectrogram(waveform, sample_rate)
61
 
62
+ return prediction_text, waveform_plot, spectrogram_plot
 
 
 
 
63
  except Exception as e:
64
  return f"Error during processing: {e}", None, None
65
 
66
+ # Define the Gradio app layout
67
  iface = gr.Interface(
68
  fn=predict_voice,
69
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
70
  outputs=[
71
+ gr.Textbox(label="Analysis", type="auto"),
72
  gr.Plot(label="Waveform"),
73
  gr.Plot(label="Spectrogram")
74
  ],
75
+ layout="vertical",
76
+ title="Voice Clone Detection",
77
+ description="This tool determines whether a voice is real or an AI-generated clone. Audio files judged to be authentic and produced by humans are classified as 'Bonafide'. In contrast, those perceived to be synthetically generated are labeled as 'Spoof'. Upload an audio file for analysis."
78
  )
79
 
80
+ # Customize the CSS to adjust the layout and component sizes
81
+ css = """
82
+ .gradio-container {
83
+ max-width: 960px; /* Adjust the maximum width as needed */
84
+ }
85
+ .input-container {
86
+ width: 25%; /* Smaller input area */
87
+ }
88
+ .output-container {
89
+ width: 74%; /* Larger output area */
90
+ }
91
+ """
92
+
93
+ iface.launch(css=css)