audioseal_watermark_detection

Sleeping

App Files Files Community

Kabatubare commited on Feb 29, 2024

Commit

880164c

verified ·

1 Parent(s): 4e2a28d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -38

app.py CHANGED Viewed

@@ -11,18 +11,18 @@ from PIL import Image
 try:
     from audioseal import AudioSeal
     audioseal_available = True
 except ImportError as e:
     audioseal_available = False
     print(f"AudioSeal could not be imported: {e}")
 def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
     waveform, sample_rate = torchaudio.load(audio_file_path)
-    # Check if the audio needs to be resampled
     if sample_rate != target_sample_rate:
         resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
         waveform = resampler(waveform)
-    return waveform, target_sample_rate
 def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
     mfcc_transform = T.MFCC(
@@ -36,7 +36,7 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
         }
     )
     mfcc = mfcc_transform(waveform)
-    return mfcc.mean(dim=2)
 def plot_spectrogram(waveform, sample_rate):
     if waveform.ndim == 1:
@@ -53,50 +53,35 @@ def plot_spectrogram(waveform, sample_rate):
     buf.seek(0)
     return Image.open(buf)
-    audio_file_path = "path_to_your_audio_file.wav"
-    waveform, resampled_sr = load_and_resample_audio(audio_file_path)
-    detect_watermark(waveform, resampled_sr)
-def detect_watermark(waveform, sample_rate):
-    """Detect watermark in the uploaded audio using AudioSeal."""
     if audioseal_available:
-        # Resample audio to 16kHz if necessary
-        if sample_rate != 16000:
-            print("Resampling to 16kHz")
-            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
-            waveform = resampler(waveform)
-            sample_rate = 16000
-        # Load the AudioSeal detector
         detector = AudioSeal.load_detector("audioseal_detector_16bits")
-        # Process audio in 5-second batches
-        samples_per_batch = 5 * sample_rate
-        batches = torch.split(waveform, samples_per_batch, dim=1)
-        results = []
-        for batch in batches:
-            if batch.shape[1] == samples_per_batch:  # Ensure the batch is 5 seconds long
-                batch = batch.unsqueeze(0)  # Add batch dimension
-                # Detect watermark and calculate mean probability across the batch
-                result, _ = detector.detect_watermark(batch, message_threshold=0.5)
-                results.append(result.mean().item())
-        # Calculate the average result across all batches
-        average_result = sum(results) / len(results)
-        print(f"Detection result: {'Watermarked or AI-Generated audio' if average_result > 0.5 else 'Not watermarked or AI-Generated'}")
-        return average_result
     else:
         print("Watermark detection not available. AudioSeal is not installed.")
         return None
 # Gradio interface
 interface = gr.Interface(
-    fn=detect_watermark,
-    inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
     outputs=["text", "image"],
     title="Deep Fake Defender: AI Voice Cloning Detection",
-    description="Analyzes audio to detect AI-generated content. Adjust the detection threshold as needed."
 )
 if __name__ == "__main__":

 try:
     from audioseal import AudioSeal
     audioseal_available = True
+    print("AudioSeal is available for watermark detection.")
 except ImportError as e:
     audioseal_available = False
     print(f"AudioSeal could not be imported: {e}")
 def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
     waveform, sample_rate = torchaudio.load(audio_file_path)
     if sample_rate != target_sample_rate:
         resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
         waveform = resampler(waveform)
+        sample_rate = target_sample_rate
+    return waveform, sample_rate
 def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
     mfcc_transform = T.MFCC(
         }
     )
     mfcc = mfcc_transform(waveform)
+    return mfcc.mean(dim=2)
 def plot_spectrogram(waveform, sample_rate):
     if waveform.ndim == 1:
     buf.seek(0)
     return Image.open(buf)
+def detect_watermark(waveforms, sample_rate):
     if audioseal_available:
         detector = AudioSeal.load_detector("audioseal_detector_16bits")
+        results, messages = detector.forward(waveforms)  # b x 2+nbits x t
+        detect_probs = results[:, 1, :]
+        result = detect_probs.mean().cpu().item()
+        print(f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}")
+        return result
     else:
         print("Watermark detection not available. AudioSeal is not installed.")
         return None
+def main(audio_file_path):
+    waveform, resampled_sr = load_and_resample_audio(audio_file_path)
+    plot_spectrogram(waveform, resampled_sr)
+    samples_per_batch = 5 * resampled_sr  # 5s audios
+    audio_batches = torch.split(waveform, samples_per_batch, dim=1)[:-1]  # Exclude the last batch if it's not 5 seconds long
+    audio_batched = torch.concat(audio_batches, dim=0)
+    audio_batched = audio_batched.unsqueeze(1) # add channel dimension
+    result = detect_watermark(audio_batched, resampled_sr)
+    print(f"Probability of watermark: {result}")
 # Gradio interface
 interface = gr.Interface(
+    fn=main,
+    inputs=[gr.Audio(label="Upload your audio", type="filepath")],
     outputs=["text", "image"],
     title="Deep Fake Defender: AI Voice Cloning Detection",
+    description="Analyzes audio to detect AI-generated content."
 )
 if __name__ == "__main__":