Kabatubare commited on
Commit
accc14f
·
verified ·
1 Parent(s): 25f1221
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -11,7 +11,7 @@ model = AutoModelForAudioClassification.from_pretrained("./")
11
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
12
 
13
  def plot_waveform(waveform, sr):
14
- plt.figure(figsize=(12, 3)) # Larger figure size
15
  plt.title('Waveform')
16
  plt.ylabel('Amplitude')
17
  plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
@@ -21,7 +21,7 @@ def plot_waveform(waveform, sr):
21
  def plot_spectrogram(waveform, sr):
22
  S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
23
  S_DB = librosa.power_to_db(S, ref=np.max)
24
- plt.figure(figsize=(12, 4)) # Larger figure size
25
  librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
26
  plt.title('Mel Spectrogram')
27
  plt.colorbar(format='%+2.0f dB')
@@ -53,29 +53,27 @@ def predict_voice(audio_file_path):
53
  label = model.config.id2label[predicted_index.item()]
54
  confidence = torch.softmax(logits, dim=1).max().item() * 100
55
 
56
- prediction_text = (f"The model predicts the voice as '{label}'. "
57
- f"Confidence level: {confidence:.2f}%")
58
-
59
  waveform_plot = plot_waveform(waveform, sample_rate)
60
  spectrogram_plot = plot_spectrogram(waveform, sample_rate)
61
 
62
- return prediction_text, waveform_plot, spectrogram_plot
 
 
 
 
63
  except Exception as e:
64
  return f"Error during processing: {e}", None, None
65
 
66
- # Define the Gradio app layout
67
  iface = gr.Interface(
68
  fn=predict_voice,
69
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
70
  outputs=[
71
- gr.Textbox(label="Analysis", type="text"), # Ensure type is correctly set to "text"
72
  gr.Plot(label="Waveform"),
73
  gr.Plot(label="Spectrogram")
74
  ],
75
  title="Voice Clone Detection",
76
- description="This tool determines whether a voice is real or an AI-generated clone. Audio files judged to be authentic and produced by humans are classified as 'Bonafide'. In contrast, those perceived to be synthetically generated are labeled as 'Spoof'. Upload an audio file for analysis."
77
- # Removed the `layout` parameter
78
  )
79
 
80
- # Assuming css is defined as provided in your message
81
- iface.launch(css=css)
 
11
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
12
 
13
  def plot_waveform(waveform, sr):
14
+ plt.figure(figsize=(12, 4)) # Slightly larger plot for better visibility
15
  plt.title('Waveform')
16
  plt.ylabel('Amplitude')
17
  plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
 
21
  def plot_spectrogram(waveform, sr):
22
  S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
23
  S_DB = librosa.power_to_db(S, ref=np.max)
24
+ plt.figure(figsize=(12, 6)) # Slightly larger plot for better visibility
25
  librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
26
  plt.title('Mel Spectrogram')
27
  plt.colorbar(format='%+2.0f dB')
 
53
  label = model.config.id2label[predicted_index.item()]
54
  confidence = torch.softmax(logits, dim=1).max().item() * 100
55
 
 
 
 
56
  waveform_plot = plot_waveform(waveform, sample_rate)
57
  spectrogram_plot = plot_spectrogram(waveform, sample_rate)
58
 
59
+ return (
60
+ f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%.",
61
+ waveform_plot,
62
+ spectrogram_plot
63
+ )
64
  except Exception as e:
65
  return f"Error during processing: {e}", None, None
66
 
 
67
  iface = gr.Interface(
68
  fn=predict_voice,
69
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
70
  outputs=[
71
+ gr.Textbox(label="Prediction"),
72
  gr.Plot(label="Waveform"),
73
  gr.Plot(label="Spectrogram")
74
  ],
75
  title="Voice Clone Detection",
76
+ description="Detects whether a voice is real or an AI-generated clone. Upload an audio file to see the results."
 
77
  )
78
 
79
+ iface.launch()