Update app.py
Browse files
app.py
CHANGED
@@ -11,18 +11,18 @@ from PIL import Image
|
|
11 |
try:
|
12 |
from audioseal import AudioSeal
|
13 |
audioseal_available = True
|
|
|
14 |
except ImportError as e:
|
15 |
audioseal_available = False
|
16 |
print(f"AudioSeal could not be imported: {e}")
|
17 |
|
18 |
def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
|
19 |
waveform, sample_rate = torchaudio.load(audio_file_path)
|
20 |
-
# Check if the audio needs to be resampled
|
21 |
if sample_rate != target_sample_rate:
|
22 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
|
23 |
waveform = resampler(waveform)
|
24 |
-
|
25 |
-
|
26 |
|
27 |
def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
|
28 |
mfcc_transform = T.MFCC(
|
@@ -36,7 +36,7 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
|
|
36 |
}
|
37 |
)
|
38 |
mfcc = mfcc_transform(waveform)
|
39 |
-
return mfcc.mean(dim=2)
|
40 |
|
41 |
def plot_spectrogram(waveform, sample_rate):
|
42 |
if waveform.ndim == 1:
|
@@ -53,50 +53,35 @@ def plot_spectrogram(waveform, sample_rate):
|
|
53 |
buf.seek(0)
|
54 |
return Image.open(buf)
|
55 |
|
56 |
-
|
57 |
-
waveform, resampled_sr = load_and_resample_audio(audio_file_path)
|
58 |
-
|
59 |
-
detect_watermark(waveform, resampled_sr)
|
60 |
-
|
61 |
-
def detect_watermark(waveform, sample_rate):
|
62 |
-
"""Detect watermark in the uploaded audio using AudioSeal."""
|
63 |
if audioseal_available:
|
64 |
-
# Resample audio to 16kHz if necessary
|
65 |
-
if sample_rate != 16000:
|
66 |
-
print("Resampling to 16kHz")
|
67 |
-
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
68 |
-
waveform = resampler(waveform)
|
69 |
-
sample_rate = 16000
|
70 |
-
|
71 |
-
# Load the AudioSeal detector
|
72 |
detector = AudioSeal.load_detector("audioseal_detector_16bits")
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
for batch in batches:
|
79 |
-
if batch.shape[1] == samples_per_batch: # Ensure the batch is 5 seconds long
|
80 |
-
batch = batch.unsqueeze(0) # Add batch dimension
|
81 |
-
# Detect watermark and calculate mean probability across the batch
|
82 |
-
result, _ = detector.detect_watermark(batch, message_threshold=0.5)
|
83 |
-
results.append(result.mean().item())
|
84 |
-
|
85 |
-
# Calculate the average result across all batches
|
86 |
-
average_result = sum(results) / len(results)
|
87 |
-
print(f"Detection result: {'Watermarked or AI-Generated audio' if average_result > 0.5 else 'Not watermarked or AI-Generated'}")
|
88 |
-
return average_result
|
89 |
else:
|
90 |
print("Watermark detection not available. AudioSeal is not installed.")
|
91 |
return None
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
# Gradio interface
|
94 |
interface = gr.Interface(
|
95 |
-
fn=
|
96 |
-
inputs=[gr.Audio(label="Upload your audio", type="filepath")
|
97 |
outputs=["text", "image"],
|
98 |
title="Deep Fake Defender: AI Voice Cloning Detection",
|
99 |
-
description="Analyzes audio to detect AI-generated content.
|
100 |
)
|
101 |
|
102 |
if __name__ == "__main__":
|
|
|
11 |
try:
|
12 |
from audioseal import AudioSeal
|
13 |
audioseal_available = True
|
14 |
+
print("AudioSeal is available for watermark detection.")
|
15 |
except ImportError as e:
|
16 |
audioseal_available = False
|
17 |
print(f"AudioSeal could not be imported: {e}")
|
18 |
|
19 |
def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
|
20 |
waveform, sample_rate = torchaudio.load(audio_file_path)
|
|
|
21 |
if sample_rate != target_sample_rate:
|
22 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
|
23 |
waveform = resampler(waveform)
|
24 |
+
sample_rate = target_sample_rate
|
25 |
+
return waveform, sample_rate
|
26 |
|
27 |
def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
|
28 |
mfcc_transform = T.MFCC(
|
|
|
36 |
}
|
37 |
)
|
38 |
mfcc = mfcc_transform(waveform)
|
39 |
+
return mfcc.mean(dim=2)
|
40 |
|
41 |
def plot_spectrogram(waveform, sample_rate):
|
42 |
if waveform.ndim == 1:
|
|
|
53 |
buf.seek(0)
|
54 |
return Image.open(buf)
|
55 |
|
56 |
+
def detect_watermark(waveforms, sample_rate):
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
if audioseal_available:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
detector = AudioSeal.load_detector("audioseal_detector_16bits")
|
59 |
+
results, messages = detector.forward(waveforms) # b x 2+nbits x t
|
60 |
+
detect_probs = results[:, 1, :]
|
61 |
+
result = detect_probs.mean().cpu().item()
|
62 |
+
print(f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}")
|
63 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
else:
|
65 |
print("Watermark detection not available. AudioSeal is not installed.")
|
66 |
return None
|
67 |
|
68 |
+
def main(audio_file_path):
|
69 |
+
waveform, resampled_sr = load_and_resample_audio(audio_file_path)
|
70 |
+
plot_spectrogram(waveform, resampled_sr)
|
71 |
+
samples_per_batch = 5 * resampled_sr # 5s audios
|
72 |
+
audio_batches = torch.split(waveform, samples_per_batch, dim=1)[:-1] # Exclude the last batch if it's not 5 seconds long
|
73 |
+
audio_batched = torch.concat(audio_batches, dim=0)
|
74 |
+
audio_batched = audio_batched.unsqueeze(1) # add channel dimension
|
75 |
+
result = detect_watermark(audio_batched, resampled_sr)
|
76 |
+
print(f"Probability of watermark: {result}")
|
77 |
+
|
78 |
# Gradio interface
|
79 |
interface = gr.Interface(
|
80 |
+
fn=main,
|
81 |
+
inputs=[gr.Audio(label="Upload your audio", type="filepath")],
|
82 |
outputs=["text", "image"],
|
83 |
title="Deep Fake Defender: AI Voice Cloning Detection",
|
84 |
+
description="Analyzes audio to detect AI-generated content."
|
85 |
)
|
86 |
|
87 |
if __name__ == "__main__":
|