audioseal_watermark_detection

Sleeping

App Files Files Community

Kabatubare commited on Feb 29, 2024

Commit

fd5ff13

verified ·

1 Parent(s): 880164c

Update

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -10,10 +10,8 @@ from PIL import Image
 # Ensure AudioSeal is imported correctly
 try:
     from audioseal import AudioSeal
-    audioseal_available = True
     print("AudioSeal is available for watermark detection.")
 except ImportError as e:
-    audioseal_available = False
     print(f"AudioSeal could not be imported: {e}")
 def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
@@ -21,8 +19,7 @@ def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
     if sample_rate != target_sample_rate:
         resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
         waveform = resampler(waveform)
-        sample_rate = target_sample_rate
-    return waveform, sample_rate
 def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
     mfcc_transform = T.MFCC(
@@ -37,33 +34,35 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
     )
     mfcc = mfcc_transform(waveform)
     return mfcc.mean(dim=2)
 def plot_spectrogram(waveform, sample_rate):
     if waveform.ndim == 1:
-        waveform = waveform.unsqueeze(0)
     spectrogram_transform = T.Spectrogram()
     spectrogram = spectrogram_transform(waveform)
     spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
     plt.figure(figsize=(10, 4))
     plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
-    plt.axis('off')
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
     plt.close()
     buf.seek(0)
     return Image.open(buf)
-def detect_watermark(waveforms, sample_rate):
-    if audioseal_available:
-        detector = AudioSeal.load_detector("audioseal_detector_16bits")
-        results, messages = detector.forward(waveforms)  # b x 2+nbits x t
-        detect_probs = results[:, 1, :]
-        result = detect_probs.mean().cpu().item()
-        print(f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}")
-        return result
-    else:
-        print("Watermark detection not available. AudioSeal is not installed.")
-        return None
 def main(audio_file_path):
     waveform, resampled_sr = load_and_resample_audio(audio_file_path)
@@ -77,12 +76,12 @@ def main(audio_file_path):
 # Gradio interface
 interface = gr.Interface(
-    fn=main,
-    inputs=[gr.Audio(label="Upload your audio", type="filepath")],
     outputs=["text", "image"],
     title="Deep Fake Defender: AI Voice Cloning Detection",
     description="Analyzes audio to detect AI-generated content."
 )
 if __name__ == "__main__":
-    interface.launch()

 # Ensure AudioSeal is imported correctly
 try:
     from audioseal import AudioSeal
     print("AudioSeal is available for watermark detection.")
 except ImportError as e:
     print(f"AudioSeal could not be imported: {e}")
 def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
     if sample_rate != target_sample_rate:
         resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
         waveform = resampler(waveform)
+    return waveform, target_sample_rate
 def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
     mfcc_transform = T.MFCC(
     )
     mfcc = mfcc_transform(waveform)
     return mfcc.mean(dim=2)
 def plot_spectrogram(waveform, sample_rate):
     if waveform.ndim == 1:
+        waveform = waveform.unsqueeze(0)  # Ensure waveform is 2D
     spectrogram_transform = T.Spectrogram()
     spectrogram = spectrogram_transform(waveform)
     spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
     plt.figure(figsize=(10, 4))
     plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
+    plt.axis('off')  # Hide axes for a clean image
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
     plt.close()
     buf.seek(0)
     return Image.open(buf)
+def detect_watermark(waveform, sample_rate):
+    if not 'AudioSeal' in globals():
+        return "AudioSeal not available", plot_spectrogram(waveform, sample_rate)
+    detector = AudioSeal.load_detector("audioseal_detector_16bits")
+    # Assuming `forward` method exists and accepts sample_rate; adjust accordingly
+    results, messages = detector.forward(waveform, sample_rate=sample_rate)  # Adjusted to pass sample_rate explicitly
+    detect_probs = results[:, 1, :]
+    result = detect_probs.mean().cpu().item()
+    message = f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}\nProbability of watermark: {result}"
+    spectrogram_image = plot_spectrogram(waveform, sample_rate)
+    return message, spectrogram_image  # Return both message and image
 def main(audio_file_path):
     waveform, resampled_sr = load_and_resample_audio(audio_file_path)
 # Gradio interface
 interface = gr.Interface(
+    fn=detect_watermark,
+    inputs=gr.Audio(label="Upload your audio", type="filepath"),
     outputs=["text", "image"],
     title="Deep Fake Defender: AI Voice Cloning Detection",
     description="Analyzes audio to detect AI-generated content."
 )
 if __name__ == "__main__":
+    interface.launch()