Update
Browse files
app.py
CHANGED
@@ -10,10 +10,8 @@ from PIL import Image
|
|
10 |
# Ensure AudioSeal is imported correctly
|
11 |
try:
|
12 |
from audioseal import AudioSeal
|
13 |
-
audioseal_available = True
|
14 |
print("AudioSeal is available for watermark detection.")
|
15 |
except ImportError as e:
|
16 |
-
audioseal_available = False
|
17 |
print(f"AudioSeal could not be imported: {e}")
|
18 |
|
19 |
def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
|
@@ -21,8 +19,7 @@ def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
|
|
21 |
if sample_rate != target_sample_rate:
|
22 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
|
23 |
waveform = resampler(waveform)
|
24 |
-
|
25 |
-
return waveform, sample_rate
|
26 |
|
27 |
def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
|
28 |
mfcc_transform = T.MFCC(
|
@@ -37,33 +34,35 @@ def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_leng
|
|
37 |
)
|
38 |
mfcc = mfcc_transform(waveform)
|
39 |
return mfcc.mean(dim=2)
|
40 |
-
|
41 |
def plot_spectrogram(waveform, sample_rate):
|
42 |
if waveform.ndim == 1:
|
43 |
-
waveform = waveform.unsqueeze(0)
|
44 |
spectrogram_transform = T.Spectrogram()
|
45 |
spectrogram = spectrogram_transform(waveform)
|
46 |
spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
|
47 |
plt.figure(figsize=(10, 4))
|
48 |
plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
|
49 |
-
plt.axis('off')
|
50 |
buf = io.BytesIO()
|
51 |
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
|
52 |
plt.close()
|
53 |
buf.seek(0)
|
54 |
return Image.open(buf)
|
55 |
|
56 |
-
def detect_watermark(
|
57 |
-
if
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
67 |
|
68 |
def main(audio_file_path):
|
69 |
waveform, resampled_sr = load_and_resample_audio(audio_file_path)
|
@@ -77,12 +76,12 @@ def main(audio_file_path):
|
|
77 |
|
78 |
# Gradio interface
|
79 |
interface = gr.Interface(
|
80 |
-
fn=
|
81 |
-
inputs=
|
82 |
outputs=["text", "image"],
|
83 |
title="Deep Fake Defender: AI Voice Cloning Detection",
|
84 |
description="Analyzes audio to detect AI-generated content."
|
85 |
)
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
-
interface.launch()
|
|
|
10 |
# Ensure AudioSeal is imported correctly
|
11 |
try:
|
12 |
from audioseal import AudioSeal
|
|
|
13 |
print("AudioSeal is available for watermark detection.")
|
14 |
except ImportError as e:
|
|
|
15 |
print(f"AudioSeal could not be imported: {e}")
|
16 |
|
17 |
def load_and_resample_audio(audio_file_path, target_sample_rate=16000):
|
|
|
19 |
if sample_rate != target_sample_rate:
|
20 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
|
21 |
waveform = resampler(waveform)
|
22 |
+
return waveform, target_sample_rate
|
|
|
23 |
|
24 |
def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160):
|
25 |
mfcc_transform = T.MFCC(
|
|
|
34 |
)
|
35 |
mfcc = mfcc_transform(waveform)
|
36 |
return mfcc.mean(dim=2)
|
37 |
+
|
38 |
def plot_spectrogram(waveform, sample_rate):
|
39 |
if waveform.ndim == 1:
|
40 |
+
waveform = waveform.unsqueeze(0) # Ensure waveform is 2D
|
41 |
spectrogram_transform = T.Spectrogram()
|
42 |
spectrogram = spectrogram_transform(waveform)
|
43 |
spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
|
44 |
plt.figure(figsize=(10, 4))
|
45 |
plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
|
46 |
+
plt.axis('off') # Hide axes for a clean image
|
47 |
buf = io.BytesIO()
|
48 |
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
|
49 |
plt.close()
|
50 |
buf.seek(0)
|
51 |
return Image.open(buf)
|
52 |
|
53 |
+
def detect_watermark(waveform, sample_rate):
|
54 |
+
if not 'AudioSeal' in globals():
|
55 |
+
return "AudioSeal not available", plot_spectrogram(waveform, sample_rate)
|
56 |
+
|
57 |
+
detector = AudioSeal.load_detector("audioseal_detector_16bits")
|
58 |
+
# Assuming `forward` method exists and accepts sample_rate; adjust accordingly
|
59 |
+
results, messages = detector.forward(waveform, sample_rate=sample_rate) # Adjusted to pass sample_rate explicitly
|
60 |
+
detect_probs = results[:, 1, :]
|
61 |
+
result = detect_probs.mean().cpu().item()
|
62 |
+
message = f"Detection result: {'Watermarked or AI-Generated audio' if result > 0.5 else 'Not watermarked or AI-Generated'}\nProbability of watermark: {result}"
|
63 |
+
spectrogram_image = plot_spectrogram(waveform, sample_rate)
|
64 |
+
return message, spectrogram_image # Return both message and image
|
65 |
+
|
66 |
|
67 |
def main(audio_file_path):
|
68 |
waveform, resampled_sr = load_and_resample_audio(audio_file_path)
|
|
|
76 |
|
77 |
# Gradio interface
|
78 |
interface = gr.Interface(
|
79 |
+
fn=detect_watermark,
|
80 |
+
inputs=gr.Audio(label="Upload your audio", type="filepath"),
|
81 |
outputs=["text", "image"],
|
82 |
title="Deep Fake Defender: AI Voice Cloning Detection",
|
83 |
description="Analyzes audio to detect AI-generated content."
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
+
interface.launch()
|