|
import time |
|
import streamlit as st |
|
|
|
import os |
|
import torch |
|
import datetime |
|
import numpy as np |
|
import soundfile |
|
from wavmark.utils import file_reader |
|
from audioseal import AudioSeal |
|
import torchaudio |
|
from pydub import AudioSegment |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_as_single_channel_16k(audio_file, def_sr=16000, verbose=True, aim_second=None): |
|
assert os.path.exists(audio_file) |
|
|
|
file_extension = os.path.splitext(audio_file)[1].lower() |
|
|
|
if file_extension == ".mp3": |
|
data, origin_sr = librosa.load(audio_file, sr=None) |
|
elif file_extension in [".wav", ".flac"]: |
|
data, origin_sr = soundfile.read(audio_file) |
|
else: |
|
raise Exception("unsupported file:" + file_extension) |
|
|
|
|
|
if len(data.shape) == 2: |
|
left_channel = data[:, 0] |
|
if verbose: |
|
print("Warning! the input audio has multiple chanel, this tool only use the first channel!") |
|
data = left_channel |
|
|
|
|
|
if origin_sr != def_sr: |
|
data = resampy.resample(data, origin_sr, def_sr) |
|
if verbose: |
|
print("Warning! The original samplerate is not 16Khz; the watermarked audio will be re-sampled to 16KHz") |
|
|
|
sr = def_sr |
|
audio_length_second = 1.0 * len(data) / sr |
|
|
|
|
|
|
|
if aim_second is not None: |
|
signal = data |
|
assert len(signal) > 0 |
|
current_second = len(signal) / sr |
|
if current_second < aim_second: |
|
repeat_count = int(aim_second / current_second) + 1 |
|
signal = np.repeat(signal, repeat_count) |
|
data = signal[0:sr * aim_second] |
|
|
|
return data, sr, audio_length_second |
|
|
|
|
|
def my_read_file(audio_path, max_second): |
|
signal, sr, audio_length_second = read_as_single_channel_16k(audio_path, default_sr) |
|
if audio_length_second > max_second: |
|
signal = signal[0:default_sr * max_second] |
|
audio_length_second = max_second |
|
|
|
return signal, sr, audio_length_second |
|
|
|
def create_default_value(): |
|
if "def_value" not in st.session_state: |
|
def_val_npy = np.random.choice([0, 1], size=32 - len_start_bit) |
|
def_val_str = "".join([str(i) for i in def_val_npy]) |
|
st.session_state.def_value = def_val_str |
|
|
|
|
|
def main(): |
|
create_default_value() |
|
|
|
|
|
|
|
markdown_text = """ |
|
# MDS07 |
|
[AudioSeal](https://github.com/jcha0155/AudioSealEnhanced) is the next-generation watermarking tool driven by AI. |
|
You can upload an audio file and encode a custom 16-bit watermark or perform decoding from a watermarked audio. |
|
|
|
This page is for demonstration usage and only process **the first minute** of the audio. |
|
If you have longer files for processing, we recommend using [our python toolkit](https://github.com/jcha0155/AudioSealEnhanced). |
|
""" |
|
|
|
|
|
st.markdown(markdown_text) |
|
|
|
audio_file = st.file_uploader("Upload Audio", type=["wav", "mp3"], accept_multiple_files=False) |
|
|
|
if audio_file: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp_input_audio_file = os.path.join("/tmp/", audio_file.name) |
|
with open(tmp_input_audio_file, "wb") as f: |
|
f.write(audio_file.getbuffer()) |
|
|
|
|
|
mp3_audio = AudioSegment.from_mp3(tmp_input_audio_file) |
|
wav_output_file = tmp_input_audio_file.replace(".mp3", ".wav") |
|
mp3_audio.export(wav_output_file, format="wav") |
|
|
|
|
|
wav, sample_rate = torchaudio.load(wav_output_file) |
|
st.markdown("Before unsquuezewav") |
|
st.markdown(wav) |
|
wav= wav.unsqueeze(0) |
|
|
|
|
|
st.audio(wav_output_file, format="audio/wav") |
|
|
|
|
|
st.markdown("SR") |
|
st.markdown(sample_rate) |
|
st.markdown("after unsqueeze wav") |
|
st.markdown(wav) |
|
|
|
|
|
|
|
action = st.selectbox("Select Action", ["Add Watermark", "Decode Watermark"]) |
|
|
|
if action == "Add Watermark": |
|
watermark_text = st.text_input("The watermark (0, 1 list of length-16):", value=st.session_state.def_value) |
|
add_watermark_button = st.button("Add Watermark", key="add_watermark_btn") |
|
if add_watermark_button: |
|
if audio_file and watermark_text: |
|
with st.spinner("Adding Watermark..."): |
|
wav = my_read_file(wav,max_second_encode) |
|
watermark = model.get_watermark(wav, default_sr) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
default_sr = 16000 |
|
max_second_encode = 60 |
|
max_second_decode = 30 |
|
len_start_bit = 16 |
|
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') |
|
|
|
model = AudioSeal.load_generator("audioseal_wm_16bits") |
|
main() |
|
|
|
|
|
|
|
|
|
|