Kabatubare commited on
Commit
fd5ff13
·
verified ·
1 Parent(s): 880164c
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -10,10 +10,8 @@ from PIL import Image
10
  # Ensure AudioSeal is imported correctly
11
  try:
12
  from audioseal import AudioSeal
13
- audioseal_available = True
14
  print("AudioSeal is available for watermark detection.")
15
  except ImportError as e:
16
- audioseal_available = False
17
  print(f"AudioSeal could not be imported: {e}")
18
 
19
  def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
@@ -21,8 +19,7 @@ def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
21
  if sample_rate != target_sample_rate:
22
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
23
  waveform = resampler(waveform)
24
- sample_rate = target_sample_rate
25
- return waveform, sample_rate
26
 
27
  def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
28
  mfcc_transform = T.MFCC(
@@ -37,33 +34,35 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
37
  )
38
  mfcc = mfcc_transform(waveform)
39
  return mfcc.mean(dim=2)
40
-
41
  def plot_spectrogram(waveform, sample_rate):
42
  if waveform.ndim == 1:
43
- waveform = waveform.unsqueeze(0)
44
  spectrogram_transform = T.Spectrogram()
45
  spectrogram = spectrogram_transform(waveform)
46
  spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
47
  plt.figure(figsize=(10, 4))
48
  plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
49
- plt.axis('off')
50
  buf = io.BytesIO()
51
  plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
52
  plt.close()
53
  buf.seek(0)
54
  return Image.open(buf)
55
 
56
- def detect_watermark(waveforms, sample_rate):
57
- if audioseal_available:
58
- detector = AudioSeal.load_detector("audioseal_detector_16bits")
59
- results, messages = detector.forward(waveforms) # b x 2+nbits x t
60
- detect_probs = results[:, 1, :]
61
- result = detect_probs.mean().cpu().item()
62
- print(f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}")
63
- return result
64
- else:
65
- print("Watermark detection not available. AudioSeal is not installed.")
66
- return None
 
 
67
 
68
  def main(audio_file_path):
69
  waveform, resampled_sr = load_and_resample_audio(audio_file_path)
@@ -77,12 +76,12 @@ def main(audio_file_path):
77
 
78
  # Gradio interface
79
  interface = gr.Interface(
80
- fn=main,
81
- inputs=[gr.Audio(label="Upload your audio", type="filepath")],
82
  outputs=["text", "image"],
83
  title="Deep Fake Defender: AI Voice Cloning Detection",
84
  description="Analyzes audio to detect AI-generated content."
85
  )
86
 
87
  if __name__ == "__main__":
88
- interface.launch()
 
10
  # Ensure AudioSeal is imported correctly
11
  try:
12
  from audioseal import AudioSeal
 
13
  print("AudioSeal is available for watermark detection.")
14
  except ImportError as e:
 
15
  print(f"AudioSeal could not be imported: {e}")
16
 
17
  def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
 
19
  if sample_rate != target_sample_rate:
20
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
21
  waveform = resampler(waveform)
22
+ return waveform, target_sample_rate
 
23
 
24
  def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
25
  mfcc_transform = T.MFCC(
 
34
  )
35
  mfcc = mfcc_transform(waveform)
36
  return mfcc.mean(dim=2)
37
+
38
  def plot_spectrogram(waveform, sample_rate):
39
  if waveform.ndim == 1:
40
+ waveform = waveform.unsqueeze(0) # Ensure waveform is 2D
41
  spectrogram_transform = T.Spectrogram()
42
  spectrogram = spectrogram_transform(waveform)
43
  spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
44
  plt.figure(figsize=(10, 4))
45
  plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
46
+ plt.axis('off') # Hide axes for a clean image
47
  buf = io.BytesIO()
48
  plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
49
  plt.close()
50
  buf.seek(0)
51
  return Image.open(buf)
52
 
53
+ def detect_watermark(waveform, sample_rate):
54
+ if not 'AudioSeal' in globals():
55
+ return "AudioSeal not available", plot_spectrogram(waveform, sample_rate)
56
+
57
+ detector = AudioSeal.load_detector("audioseal_detector_16bits")
58
+ # Assuming `forward` method exists and accepts sample_rate; adjust accordingly
59
+ results, messages = detector.forward(waveform, sample_rate=sample_rate) # Adjusted to pass sample_rate explicitly
60
+ detect_probs = results[:, 1, :]
61
+ result = detect_probs.mean().cpu().item()
62
+ message = f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}\nProbability of watermark: {result}"
63
+ spectrogram_image = plot_spectrogram(waveform, sample_rate)
64
+ return message, spectrogram_image # Return both message and image
65
+
66
 
67
  def main(audio_file_path):
68
  waveform, resampled_sr = load_and_resample_audio(audio_file_path)
 
76
 
77
  # Gradio interface
78
  interface = gr.Interface(
79
+ fn=detect_watermark,
80
+ inputs=gr.Audio(label="Upload your audio", type="filepath"),
81
  outputs=["text", "image"],
82
  title="Deep Fake Defender: AI Voice Cloning Detection",
83
  description="Analyzes audio to detect AI-generated content."
84
  )
85
 
86
  if __name__ == "__main__":
87
+ interface.launch()