Kabatubare commited on
Commit
880164c
·
verified ·
1 Parent(s): 4e2a28d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -38
app.py CHANGED
@@ -11,18 +11,18 @@ from PIL import Image
11
  try:
12
  from audioseal import AudioSeal
13
  audioseal_available = True
 
14
  except ImportError as e:
15
  audioseal_available = False
16
  print(f"AudioSeal could not be imported: {e}")
17
 
18
  def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
19
  waveform, sample_rate = torchaudio.load(audio_file_path)
20
- # Check if the audio needs to be resampled
21
  if sample_rate != target_sample_rate:
22
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
23
  waveform = resampler(waveform)
24
- return waveform, target_sample_rate
25
-
26
 
27
  def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
28
  mfcc_transform = T.MFCC(
@@ -36,7 +36,7 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
36
  }
37
  )
38
  mfcc = mfcc_transform(waveform)
39
- return mfcc.mean(dim=2)
40
 
41
  def plot_spectrogram(waveform, sample_rate):
42
  if waveform.ndim == 1:
@@ -53,50 +53,35 @@ def plot_spectrogram(waveform, sample_rate):
53
  buf.seek(0)
54
  return Image.open(buf)
55
 
56
- audio_file_path = "path_to_your_audio_file.wav"
57
- waveform, resampled_sr = load_and_resample_audio(audio_file_path)
58
-
59
- detect_watermark(waveform, resampled_sr)
60
-
61
- def detect_watermark(waveform, sample_rate):
62
- """Detect watermark in the uploaded audio using AudioSeal."""
63
  if audioseal_available:
64
- # Resample audio to 16kHz if necessary
65
- if sample_rate != 16000:
66
- print("Resampling to 16kHz")
67
- resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
68
- waveform = resampler(waveform)
69
- sample_rate = 16000
70
-
71
- # Load the AudioSeal detector
72
  detector = AudioSeal.load_detector("audioseal_detector_16bits")
73
-
74
- # Process audio in 5-second batches
75
- samples_per_batch = 5 * sample_rate
76
- batches = torch.split(waveform, samples_per_batch, dim=1)
77
- results = []
78
- for batch in batches:
79
- if batch.shape[1] == samples_per_batch: # Ensure the batch is 5 seconds long
80
- batch = batch.unsqueeze(0) # Add batch dimension
81
- # Detect watermark and calculate mean probability across the batch
82
- result, _ = detector.detect_watermark(batch, message_threshold=0.5)
83
- results.append(result.mean().item())
84
-
85
- # Calculate the average result across all batches
86
- average_result = sum(results) / len(results)
87
- print(f"Detection result: {'Watermarked or AI-Generated audio' if average_result > 0.5 else 'Not watermarked or AI-Generated'}")
88
- return average_result
89
  else:
90
  print("Watermark detection not available. AudioSeal is not installed.")
91
  return None
92
 
 
 
 
 
 
 
 
 
 
 
93
  # Gradio interface
94
  interface = gr.Interface(
95
- fn=detect_watermark,
96
- inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
97
  outputs=["text", "image"],
98
  title="Deep Fake Defender: AI Voice Cloning Detection",
99
- description="Analyzes audio to detect AI-generated content. Adjust the detection threshold as needed."
100
  )
101
 
102
  if __name__ == "__main__":
 
11
  try:
12
  from audioseal import AudioSeal
13
  audioseal_available = True
14
+ print("AudioSeal is available for watermark detection.")
15
  except ImportError as e:
16
  audioseal_available = False
17
  print(f"AudioSeal could not be imported: {e}")
18
 
19
  def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
20
  waveform, sample_rate = torchaudio.load(audio_file_path)
 
21
  if sample_rate != target_sample_rate:
22
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
23
  waveform = resampler(waveform)
24
+ sample_rate = target_sample_rate
25
+ return waveform, sample_rate
26
 
27
  def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
28
  mfcc_transform = T.MFCC(
 
36
  }
37
  )
38
  mfcc = mfcc_transform(waveform)
39
+ return mfcc.mean(dim=2)
40
 
41
  def plot_spectrogram(waveform, sample_rate):
42
  if waveform.ndim == 1:
 
53
  buf.seek(0)
54
  return Image.open(buf)
55
 
56
+ def detect_watermark(waveforms, sample_rate):
 
 
 
 
 
 
57
  if audioseal_available:
 
 
 
 
 
 
 
 
58
  detector = AudioSeal.load_detector("audioseal_detector_16bits")
59
+ results, messages = detector.forward(waveforms) # b x 2+nbits x t
60
+ detect_probs = results[:, 1, :]
61
+ result = detect_probs.mean().cpu().item()
62
+ print(f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}")
63
+ return result
 
 
 
 
 
 
 
 
 
 
 
64
  else:
65
  print("Watermark detection not available. AudioSeal is not installed.")
66
  return None
67
 
68
+ def main(audio_file_path):
69
+ waveform, resampled_sr = load_and_resample_audio(audio_file_path)
70
+ plot_spectrogram(waveform, resampled_sr)
71
+ samples_per_batch = 5 * resampled_sr # 5s audios
72
+ audio_batches = torch.split(waveform, samples_per_batch, dim=1)[:-1] # Exclude the last batch if it's not 5 seconds long
73
+ audio_batched = torch.concat(audio_batches, dim=0)
74
+ audio_batched = audio_batched.unsqueeze(1) # add channel dimension
75
+ result = detect_watermark(audio_batched, resampled_sr)
76
+ print(f"Probability of watermark: {result}")
77
+
78
  # Gradio interface
79
  interface = gr.Interface(
80
+ fn=main,
81
+ inputs=[gr.Audio(label="Upload your audio", type="filepath")],
82
  outputs=["text", "image"],
83
  title="Deep Fake Defender: AI Voice Cloning Detection",
84
+ description="Analyzes audio to detect AI-generated content."
85
  )
86
 
87
  if __name__ == "__main__":