Spaces:

M4869
/

WavMark

Runtime error

App Files Files Community

my commited on Jul 31, 2023

Commit

32ca76b

1 Parent(s): 84c0c04

Add application file

Browse files

Files changed (19) hide show

requirements.txt +3 -0
.gitignore +146 -0
app.py +143 -0
models/__init__.py +0 -0
models/hinet.py +20 -0
models/invblock.py +36 -0
models/module_util.py +79 -0
models/my_model_v7_recover.py +95 -0
models/rrdb_denselayer.py +25 -0
utils/__init__.py +0 -0
utils/bin_util.py +104 -0
utils/file_reader.py +77 -0
utils/metric_util.py +88 -0
utils/model_util.py +118 -0
utils/pesq_util.py +25 -0
utils/pickle_util.py +27 -0
utils/silent_util.py +18 -0
utils/wm_add_v2.py +87 -0
utils/wm_decode_v2.py +113 -0

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==1.13.1
+torchvision==0.14.1
+torchaudio==0.13.1

.gitignore ADDED Viewed

	@@ -0,0 +1,146 @@

+.DS_Store
+#idea
+.idea
+wandb/
+temp/
+data/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import pdb
+import time
+import soundfile
+import streamlit as st
+import os
+from utils import wm_add_v2, file_reader, model_util, wm_decode_v2, bin_util
+from models import my_model_v7_recover
+import torch
+import uuid
+import datetime
+import numpy as np
+from huggingface_hub import hf_hub_download, HfApi
+# Function to add watermark to audio
+def add_watermark(audio_path, watermark_text):
+    assert len(watermark_text) == 5
+    start_bit, msg_bit, watermark = wm_add_v2.create_parcel_message(len_start_bit, 32, watermark_text)
+    data, sr, audio_length_second = file_reader.read_as_single_channel_16k(audio_path, 16000)
+    _, signal_wmd, time_cost = wm_add_v2.add_watermark(watermark, data, 16000, 0.1, device, model)
+    tmp_file_name = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + "_" + str(uuid.uuid4()) + ".wav"
+    tmp_file_path = 'temp/' + tmp_file_name
+    soundfile.write(tmp_file_path, signal_wmd, sr)
+    return tmp_file_path
+# Function to decode watermark from audio
+def decode_watermark(audio_path):
+    data, sr, audio_length_second = file_reader.read_as_single_channel_16k(audio_path, 16000)
+    data = data[0:5 * sr]
+    start_bit = wm_add_v2.fix_pattern[0:len_start_bit]
+    support_count, mean_result, results = wm_decode_v2.extract_watermark_v2(
+        data,
+        start_bit,
+        0.1,
+        16000,
+        0.3,
+        model,
+        device, "best")
+    if mean_result is None:
+        return "No Watermark"
+    payload = mean_result[len_start_bit:]
+    return bin_util.binArray2HexStr(payload)
+# Main web app
+def main():
+    if "def_value" not in st.session_state:
+        st.session_state.def_value = bin_util.binArray2HexStr(np.random.choice([0, 1], size=32 - len_start_bit))
+    st.title("Neural Audio Watermark")
+    st.write("Choose the action you want to perform:")
+    action = st.selectbox("Select Action", ["Add Watermark", "Decode Watermark"])
+    if action == "Add Watermark":
+        audio_file = st.file_uploader("Upload Audio File (WAV)", type=["wav"], accept_multiple_files=False)
+        if audio_file:
+            tmp_input_audio_file = os.path.join("temp", audio_file.name)
+            with open(tmp_input_audio_file, "wb") as f:
+                f.write(audio_file.getbuffer())
+            st.audio(tmp_input_audio_file, format="audio/wav")
+            watermark_text = st.text_input("Enter Watermark Text (5 English letters)", value=st.session_state.def_value)
+            add_watermark_button = st.button("Add Watermark", key="add_watermark_btn")
+            if add_watermark_button:  # 点击按钮后执行的
+                if audio_file and watermark_text:
+                    with st.spinner("Adding Watermark..."):
+                        # add_watermark_button.empty()
+                        # st.button("Add Watermark", disabled=True)
+                        # st.button("Add Watermark", disabled=True, key="add_watermark_btn_disabled")
+                        t1 = time.time()
+                        watermarked_audio = add_watermark(tmp_input_audio_file, watermark_text)
+                        encode_time_cost = time.time() - t1
+                        st.write("Watermarked Audio:")
+                        st.audio(watermarked_audio, format="audio/wav")
+                        st.write("Time Cost:%d seconds" % encode_time_cost)
+                        # st.button("Add Watermark", disabled=False)
+    elif action == "Decode Watermark":
+        audio_file = st.file_uploader("Upload Audio File (WAV/MP3)", type=["wav", "mp3"], accept_multiple_files=False)
+        if audio_file:
+            if st.button("Decode Watermark"):
+                # 1.保存
+                tmp_file_for_decode_path = os.path.join("temp", audio_file.name)
+                with open(tmp_file_for_decode_path, "wb") as f:
+                    f.write(audio_file.getbuffer())
+                # 2.执行
+                with st.spinner("Decoding..."):
+                    t1 = time.time()
+                    decoded_watermark = decode_watermark(tmp_file_for_decode_path)
+                    decode_cost = time.time() - t1
+                print("decoded_watermark", decoded_watermark)
+                # Display the decoded watermark
+                st.write("Decoded Watermark:", decoded_watermark)
+                st.write("Time Cost:%d seconds" % (decode_cost))
+def load_model(resume_path):
+    n_fft = 1000
+    hop_length = 400
+    # https://huggingface.co/M4869/InvertibleWM/blob/main/step59000_snr39.99_pesq4.35_BERP_none0.30_mean1.81_std1.81.pkl
+    api_key = st.secrets["api_key"]
+    print(api_key, api_key)
+    model_ckpt_path = hf_hub_download(repo_id="M4869/InvertibleWM",
+                                      filename="step59000_snr39.99_pesq4.35_BERP_none0.30_mean1.81_std1.81.pkl",
+                                      token=api_key
+                                      )
+    # print("model_ckpt_path", model_ckpt_path)
+    resume_path = model_ckpt_path
+    # return
+    model = my_model_v7_recover.Model(16000, 32, n_fft, hop_length,
+                                      use_recover_layer=False, num_layers=8).to(device)
+    checkpoint = torch.load(resume_path, map_location=torch.device('cpu'))
+    state_dict = model_util.map_state_dict(checkpoint['model'])
+    model.load_state_dict(state_dict, strict=True)
+    model.eval()
+    return model
+if __name__ == "__main__":
+    len_start_bit = 12
+    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+    model = load_model("./data/step59000_snr39.99_pesq4.35_BERP_none0.30_mean1.81_std1.81.pkl")
+    main()
+    # decode_watermark("/Users/my/Downloads/7a95b353a46893903e9f946c24170b210ce14e8c52c63bb2ab3d144e.wav")

models/__init__.py ADDED Viewed

File without changes

models/hinet.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torch
+from models.invblock import INV_block
+class Hinet(torch.nn.Module):
+    def __init__(self, in_channel=2, num_layers=16):
+        super(Hinet, self).__init__()
+        self.inv_blocks = torch.nn.ModuleList([INV_block(in_channel) for _ in range(num_layers)])
+    def forward(self, x1, x2, rev=False):
+        # x1:cover
+        # x2:secret
+        if not rev:
+            for inv_block in self.inv_blocks:
+                x1, x2 = inv_block(x1, x2)
+        else:
+            for inv_block in reversed(self.inv_blocks):
+                x1, x2 = inv_block(x1, x2, rev=True)
+        return x1, x2

models/invblock.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+import torch.nn as nn
+from models.rrdb_denselayer import ResidualDenseBlock_out
+class INV_block(nn.Module):
+    def __init__(self, channel=2, subnet_constructor=ResidualDenseBlock_out, clamp=2.0):
+        super().__init__()
+        self.clamp = clamp
+        # ρ
+        self.r = subnet_constructor(channel, channel)
+        # η
+        self.y = subnet_constructor(channel, channel)
+        # φ
+        self.f = subnet_constructor(channel, channel)
+    def e(self, s):
+        return torch.exp(self.clamp * 2 * (torch.sigmoid(s) - 0.5))
+    def forward(self, x1, x2, rev=False):
+        if not rev:
+            t2 = self.f(x2)
+            y1 = x1 + t2
+            s1, t1 = self.r(y1), self.y(y1)
+            y2 = self.e(s1) * x2 + t1
+        else:
+            s1, t1 = self.r(x1), self.y(x1)
+            y2 = (x2 - t1) / self.e(s1)
+            t2 = self.f(y2)
+            y1 = (x1 - t2)
+        return y1, y2

models/module_util.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torch.nn.functional as F
+def initialize_weights(net_l, scale=1):
+    if not isinstance(net_l, list):
+        net_l = [net_l]
+    for net in net_l:
+        for m in net.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale  # for residual block
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias.data, 0.0)
+def make_layer(block, n_layers):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    return nn.Sequential(*layers)
+class ResidualBlock_noBN(nn.Module):
+    '''Residual block w/o BN
+    ---Conv-ReLU-Conv-+-
+     |________________|
+    '''
+    def __init__(self, nf=64):
+        super(ResidualBlock_noBN, self).__init__()
+        self.conv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        # initialization
+        initialize_weights([self.conv1, self.conv2], 0.1)
+    def forward(self, x):
+        identity = x
+        out = F.relu(self.conv1(x), inplace=True)
+        out = self.conv2(out)
+        return identity + out
+def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros'):
+    """Warp an image or feature map with optical flow
+    Args:
+        x (Tensor): size (N, C, H, W)
+        flow (Tensor): size (N, H, W, 2), normal value
+        interp_mode (str): 'nearest' or 'bilinear'
+        padding_mode (str): 'zeros' or 'border' or 'reflection'
+    Returns:
+        Tensor: warped image or feature map
+    """
+    flow = flow.permute(0,2,3,1)
+    assert x.size()[-2:] == flow.size()[1:3]
+    B, C, H, W = x.size()
+    # mesh grid
+    grid_y, grid_x = torch.meshgrid(torch.arange(0, H), torch.arange(0, W))
+    grid = torch.stack((grid_x, grid_y), 2).float()  # W(x), H(y), 2
+    grid.requires_grad = False
+    grid = grid.type_as(x)
+    vgrid = grid + flow
+    # scale grid to [-1,1]
+    vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(W - 1, 1) - 1.0
+    vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(H - 1, 1) - 1.0
+    vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3)
+    output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode)
+    return output

models/my_model_v7_recover.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import pdb
+import torch.optim
+import torch.nn as nn
+from models.hinet import Hinet
+# from utils.attacks import attack_layer, mp3_attack_v2, butterworth_attack
+import numpy as np
+import random
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+class Model(nn.Module):
+    def __init__(self, num_point, num_bit, n_fft, hop_length, use_recover_layer, num_layers):
+        super(Model, self).__init__()
+        self.hinet = Hinet(num_layers=num_layers)
+        self.watermark_fc = torch.nn.Linear(num_bit, num_point)
+        self.watermark_fc_back = torch.nn.Linear(num_point, num_bit)
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.dropout1 = torch.nn.Dropout()
+        self.identity = torch.nn.Identity()
+        self.recover_layer = SameSizeConv2d(2, 2)
+        self.use_recover_layer = use_recover_layer
+    def stft(self, data):
+        window = torch.hann_window(self.n_fft).to(data.device)
+        tmp = torch.stft(data, n_fft=self.n_fft, hop_length=self.hop_length, window=window, return_complex=False)
+        # [1, 501, 41, 2]
+        return tmp
+    def istft(self, signal_wmd_fft):
+        window = torch.hann_window(self.n_fft).to(signal_wmd_fft.device)
+        # Changed in version 2.0: Real datatype inputs are no longer supported. Input must now have a complex datatype, as returned by stft(..., return_complex=True).
+        return torch.istft(signal_wmd_fft, n_fft=self.n_fft, hop_length=self.hop_length, window=window,
+                           return_complex=False)
+    def encode(self, signal, message, need_fft=False):
+        # 1.信号执行fft
+        signal_fft = self.stft(signal)
+        # import pdb
+        # pdb.set_trace()
+        # (batch,freq_bins,time_frames,2)
+        # 2.Message执行fft
+        message_expand = self.watermark_fc(message)
+        message_fft = self.stft(message_expand)
+        # 3.encode
+        signal_wmd_fft, msg_remain = self.enc_dec(signal_fft, message_fft, rev=False)
+        # (batch,freq_bins,time_frames,2)
+        signal_wmd = self.istft(signal_wmd_fft)
+        if need_fft:
+            return signal_wmd, signal_fft, message_fft
+        return signal_wmd
+    def decode(self, signal):
+        signal_fft = self.stft(signal)
+        if self.use_recover_layer:
+            signal_fft = self.recover_layer(signal_fft)
+        watermark_fft = signal_fft
+        # watermark_fft = torch.randn(signal_fft.shape).cuda()
+        _, message_restored_fft = self.enc_dec(signal_fft, watermark_fft, rev=True)
+        message_restored_expanded = self.istft(message_restored_fft)
+        message_restored_float = self.watermark_fc_back(message_restored_expanded).clamp(-1, 1)
+        return message_restored_float
+    def enc_dec(self, signal, watermark, rev):
+        signal = signal.permute(0, 3, 2, 1)
+        # [4, 2, 41, 501]
+        watermark = watermark.permute(0, 3, 2, 1)
+        # pdb.set_trace()
+        signal2, watermark2 = self.hinet(signal, watermark, rev)
+        return signal2.permute(0, 3, 2, 1), watermark2.permute(0, 3, 2, 1)
+class SameSizeConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(SameSizeConv2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        # (batch,501,41,2]
+        x1 = x.permute(0, 3, 1, 2)
+        # (batch,2,501,41]
+        x2 = self.conv(x1)
+        # (batch,2,501,41]
+        x3 = x2.permute(0, 2, 3, 1)
+        # (batch,501,41,2]
+        return x3

models/rrdb_denselayer.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+import torch.nn as nn
+import models.module_util as mutil
+# Dense connection
+class ResidualDenseBlock_out(nn.Module):
+    def __init__(self, in_channel, out_channel, bias=True):
+        super(ResidualDenseBlock_out, self).__init__()
+        self.conv1 = nn.Conv2d(in_channel, 32, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(in_channel + 32, 32, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(in_channel + 2 * 32, 32, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(in_channel + 3 * 32, 32, 3, 1, 1, bias=bias)
+        self.conv5 = nn.Conv2d(in_channel + 4 * 32, out_channel, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(inplace=True)
+        # initialization
+        mutil.initialize_weights([self.conv5], 0.)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5

utils/__init__.py ADDED Viewed

File without changes

utils/bin_util.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import numpy as np
+def hexChar2binStr(v):
+    assert len(v) == 1
+    # e => '1110'
+    return '{0:04b}'.format(int(v, 16))
+def hexStr2BinStr(hex_str):
+    output = [hexChar2binStr(c) for c in hex_str]
+    # ['1110', '1100', ....]
+    return "".join(output)
+def hexStr2BinArray(hex_str):
+    # 十六进制字符串==> 0,1g构成的数组
+    tmp = hexStr2BinStr(hex_str)
+    return np.array([int(i) for i in tmp])
+def binStr2HexStr(binary_str):
+    return hex(int(binary_str, 2))[2:]
+def binArray2HexStr(bin_array):
+    tmp = "".join(["%d" % i for i in bin_array])
+    return binStr2HexStr(tmp)
+# 判断是否为合法的16进制字符串
+def is_hex_str(s):
+    hex_chars = "0123456789abcdefABCDEF"
+    return all(c in hex_chars for c in s)
+def flip_bytearray(input_bytearray, num_bits_to_flip):
+    tmp = bytearray_to_binary_list(input_bytearray)
+    tmp = flip_array(tmp,num_bits_to_flip)
+    return binary_list_to_bytearray(tmp)
+def flip_array(input_bits, num_bits_to_flip):
+    # 随机选择要翻转的位的索引
+    flip_indices = np.random.choice(len(input_bits), num_bits_to_flip, replace=False)
+    # 创建一个全零的掩码数组
+    mask = np.zeros_like(input_bits)
+    # 将选定的索引设置为 1
+    mask[flip_indices] = 1
+    # 将输入位数组与掩码进行逐元素异或运算，实现翻转位
+    flipped_bits = input_bits ^ mask
+    return flipped_bits
+def bytearray_to_binary_list(byte_array):
+    binary_list = []
+    for byte in byte_array:
+        binary_str = format(byte, '08b')  # 将字节转换为 8 位二进制字符串
+        binary_digits = [int(bit) for bit in binary_str]  # 将二进制字符串转换为整数列表
+        binary_list.extend(binary_digits)  # 将整数列表添加到结果列表中
+    return binary_list
+def binary_list_to_bytearray(binary_list):
+    # 这个函数假设输入列表的长度是 8 的倍数，否则将引发异常。
+    byte_list = []
+    for i in range(0, len(binary_list), 8):
+        binary_str = ''.join(str(bit) for bit in binary_list[i:i + 8])  # 将 8 个位连接为一个二进制字符串
+        byte_value = int(binary_str, 2)  # 将二进制字符串转换为整数
+        byte_list.append(byte_value)  # 将整数添加到字节列表中
+    return bytearray(byte_list)
+if __name__ == "__main__":
+    # hex_str = "ecd057f0d1fbb25d6430b338b5d72eb2"
+    # arr = hexStr2BinArray(hex_str)
+    # out = binArray2HexStr(arr)
+    # print(out==hex_str)
+    # bin_str = "".join()
+    # assert bin2hex_str(bin_str) == hex_str
+    # print(bin_str, len(bin_str))
+    #
+    watermark = np.random.randint(2, size=44)
+    res = binArray2HexStr(watermark)
+    print(res)
+    test_str1 = "3ad30c748a2"
+    test_str2 = "3ad30Z748a2"
+    print(is_hex_str(test_str1))  # 输出 True
+    print(is_hex_str(test_str2))  # 输出 False
+    # encode_file("1.wav", watermark)
+    # out = decode_file("tmp_output.wav")
+    # assert np.all(watermark == out)

utils/file_reader.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+import soundfile
+import librosa
+import resampy
+def is_wav_file(filename):
+    # 获取文件扩展名
+    file_extension = os.path.splitext(filename)[1]
+    # 判断文件扩展名是否为'.wav'或'.WAV'
+    return file_extension.lower() == ".wav"
+import numpy as np
+def read_as_single_channel_16k(audio_file, def_sr, verbose=False, aim_second=None):
+    assert os.path.exists(audio_file), "音频文件不存在"
+    file_extension = os.path.splitext(audio_file)[1].lower()
+    if file_extension == ".mp3":
+        data, origin_sr = librosa.load(audio_file, sr=None)
+    elif file_extension in [".wav", ".flac"]:
+        data, origin_sr = soundfile.read(audio_file)
+    else:
+        raise Exception("不支持的文件类型:" + file_extension)
+    # 通道数
+    if len(data.shape) == 2:
+        left_channel = data[:, 0]
+        if verbose:
+            print("双通道文件,变为单通道")
+        data = left_channel
+    # 采样率
+    if origin_sr != def_sr:
+        data = resampy.resample(data, origin_sr, def_sr)
+        if verbose:
+            print("原始音频采样率不是16kHZ,可能会对水印性能造成影响")
+    sr = def_sr
+    audio_length_second = 1.0 * len(data) / sr
+    if verbose:
+        print("输入音频长度:%d秒" % audio_length_second)
+    # 判断通道数
+    if len(data.shape) == 2:
+        data = data[:, 0]
+        print("选取第一个通道")
+    if aim_second is not None:
+        signal = data
+        assert len(signal) > 0
+        current_second = len(signal) / sr
+        if current_second < aim_second:
+            repeat_count = int(aim_second / current_second) + 1
+            signal = np.repeat(signal, repeat_count)
+        data = signal[0:sr * aim_second]
+    return data, sr, audio_length_second
+def read_as_single_channel(file, aim_sr):
+    if file.endswith(".mp3"):
+        data, sr = librosa.load(file, sr=aim_sr)  # 这里默认就是会转换为输入的sr
+    else:
+        data, sr = soundfile.read(file)
+    if len(data.shape) == 2:  # 双声道
+        data = data[:, 0]  # 只要第一个声道
+    # 然后再切换sr,因为soundfile可能读取出一个双通道的东西
+    if sr != aim_sr:
+        data = resampy.resample(data, sr, aim_sr)
+    return data

utils/metric_util.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import numpy as np
+def calc_ber(watermark_decoded_tensor, watermark_tensor, threshold=0.5):
+    watermark_decoded_binary = watermark_decoded_tensor >= threshold
+    watermark_binary = watermark_tensor >= threshold
+    ber_tensor = 1 - (watermark_decoded_binary == watermark_binary).to(torch.float32).mean()
+    return ber_tensor
+def to_equal_length(original, signal_watermarked):
+    if original.shape != signal_watermarked.shape:
+        print("警告！输入内容长度不一致", len(original), len(signal_watermarked))
+        min_length = min(len(original), len(signal_watermarked))
+        original = original[0:min_length]
+        signal_watermarked = signal_watermarked[0:min_length]
+    assert original.shape == signal_watermarked.shape
+    return original, signal_watermarked
+def signal_noise_ratio(original, signal_watermarked):
+    # 数值越高越好，最好的结果为无穷大
+    original, signal_watermarked = to_equal_length(original, signal_watermarked)
+    noise_strength = np.sum((original - signal_watermarked) ** 2)
+    if noise_strength == 0:  # 说明原始信号并未改变
+        return np.inf
+    signal_strength = np.sum(original ** 2)
+    ratio = signal_strength / noise_strength
+    # np.log10(1) == 0
+    # 当噪声比信号强度还高时，信噪比就是负的
+    # 如果ratio是0,那么 np.log10(0) 就是负无穷 -inf
+    # 这里限定一个最小值,以免出现负无穷情况
+    ratio = max(1e-10, ratio)
+    return 10 * np.log10(ratio)
+def batch_signal_noise_ratio(original, signal_watermarked):
+    signal = original.detach().cpu().numpy()
+    signal_watermarked = signal_watermarked.detach().cpu().numpy()
+    tmp_list = []
+    for s, swm in zip(signal, signal_watermarked):
+        out = signal_noise_ratio(s, swm)
+        tmp_list.append(out)
+    return np.mean(tmp_list)
+def calc_bce_acc(predictions, ground_truth, threshold=0.5):
+    assert predictions.shape == ground_truth.shape
+    # 将预测值转换为类别标签
+    predicted_labels = (predictions >= threshold).float()
+    # 计算准确率
+    accuracy = ((predicted_labels == ground_truth).float().mean().item())
+    return accuracy
+def resample_to16k(data, old_sr):
+    # 对数据进行重采样
+    new_fs = 16000
+    new_data = data[::int(old_sr / new_fs)]
+    return new_data
+import pypesq
+def pesq(signal1, signal2, sr):
+    signal1, signal2 = to_equal_length(signal1, signal2)
+    # Perceptual Evaluation of Speech Quality
+    # [−0.5 to 4.5], PESQ>3.5 时音频质量较好，>4.0基本上就听不到了
+    # 函数只支持16k或8k的输入，因此在输入前校验采样率。由于这个指标计算的是可感知性，因此这里改变采样率和水印鲁棒性是无关的
+    if sr != 16000:
+        signal1 = resample_to16k(signal1, sr)
+        signal2 = resample_to16k(signal2, sr)
+    try:
+        pesq = pypesq.pesq(signal1, signal2, 16000)
+        # 可能会有错误：ValueError: ref is all zeros, processing error!
+    except Exception as e:
+        pesq = 0
+        print("pesq计算错误:", e)
+    return pesq

utils/model_util.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch
+import os
+import json
+import sys
+from utils import pickle_util
+history_array = []
+def save_model(epoch, model, optimizer, file_save_path):
+    dirpath = os.path.abspath(os.path.join(file_save_path, os.pardir))
+    if not os.path.exists(dirpath):
+        print("mkdir:", dirpath)
+        os.makedirs(dirpath)
+    opti = None
+    if optimizer is not None:
+        opti = optimizer.state_dict()
+    torch.save(obj={
+        'epoch': epoch,
+        'model': model.state_dict(),
+        'optimizer': opti,
+    }, f=file_save_path)
+    history_array.append(file_save_path)
+def save_model_v4(epoch, model, optimizer, file_save_path, discriminator):
+    dirpath = os.path.abspath(os.path.join(file_save_path, os.pardir))
+    if not os.path.exists(dirpath):
+        print("mkdir:", dirpath)
+        os.makedirs(dirpath)
+    opti = None
+    if optimizer is not None:
+        opti = optimizer.state_dict()
+    torch.save(obj={
+        'epoch': epoch,
+        'model': model.state_dict(),
+        'optimizer': opti,
+        "discriminator": discriminator,
+    }, f=file_save_path)
+    history_array.append(file_save_path)
+def delete_last_saved_model():
+    if len(history_array) == 0:
+        return
+    last_path = history_array.pop()
+    if os.path.exists(last_path):
+        os.remove(last_path)
+        print("delete model:", last_path)
+    if os.path.exists(last_path + ".json"):
+        os.remove(last_path + ".json")
+def load_model(resume_path, model, optimizer=None, strict=True):
+    checkpoint = torch.load(resume_path, map_location=torch.device('cpu'))
+    start_epoch = checkpoint['epoch'] + 1
+    model.load_state_dict(checkpoint['model'], strict=strict)
+    if optimizer is not None:
+        optimizer.load_state_dict(checkpoint['optimizer'])
+    print("checkpoint loaded!")
+    return start_epoch
+def save_model_v2(model, args, model_save_name):
+    model_save_path = os.path.join(args.model_save_folder, args.project, args.name, model_save_name)
+    save_model(0, model, None, model_save_path)
+    print("save:", model_save_path)
+def save_project_info(args):
+    run_info = {
+        "cmd_str": ' '.join(sys.argv[1:]),
+        "args": vars(args),
+    }
+    name = "run_info.json"
+    folder = os.path.join(args.model_save_folder, args.project, args.name)
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    json_file_path = os.path.join(folder, name)
+    with open(json_file_path, "w") as f:
+        json.dump(run_info, f)
+    print("save_project_info:", json_file_path)
+def get_pkl_json(folder):
+    names = [i for i in os.listdir(folder) if ".pkl.json" in i]
+    assert len(names) == 1
+    json_path = os.path.join(folder, names[0])
+    obj = pickle_util.read_json(json_path)
+    return obj
+# 并行
+def is_data_parallel_checkpoint(state_dict):
+    return any(key.startswith('module.') for key in state_dict.keys())
+def map_state_dict(state_dict):
+    if is_data_parallel_checkpoint(state_dict):
+        # 处理 DataParallel 添加的前缀 'module.'
+        from collections import OrderedDict
+        new_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            name = k[7:] if k.startswith('module.') else k  # 移除前缀 'module.'
+            new_state_dict[name] = v
+        return new_state_dict
+    return state_dict

utils/pesq_util.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import pypesq
+import numpy as np
+def batch_pesq(batch_signal, batch_signal_wmd):
+    batch_signal1 = batch_signal.detach().cpu().numpy()
+    batch_signal2 = batch_signal_wmd.detach().cpu().numpy()
+    pesq_array = []
+    for signal1, signal2 in zip(batch_signal1, batch_signal2):
+        try:
+            pesq = pypesq.pesq(signal1, signal2, 16000)
+            #可能会有错误：ValueError: ref is all zeros, processing error!
+        except Exception as e:
+            print(e)
+            continue
+        if np.isnan(pesq):
+            print("pesq is nan!")
+            continue
+        pesq_array.append(pesq)
+    if len(pesq_array) > 0:
+        return np.mean(pesq_array)
+    return -1

utils/pickle_util.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import _pickle as pickle  # python3
+import time
+import json
+def read_pickle(filepath):
+    f = open(filepath, 'rb')
+    word2mfccs = pickle.load(f)
+    f.close()
+    return word2mfccs
+def save_pickle(save_path, save_data):
+    f = open(save_path, 'wb')
+    pickle.dump(save_data, f)
+    f.close()
+def read_json(filepath):
+    with open(filepath) as f:
+        obj = json.load(f)
+    return obj
+def save_json(save_path, obj):
+    with open(save_path, 'w') as f:
+        json.dump(obj, f)

utils/silent_util.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import numpy as np
+def is_silent(data, silence_threshold=0.01):
+    rms = np.sqrt(np.mean(data ** 2))
+    return rms < silence_threshold
+def has_silent_part(trunck):
+    num_part = 3
+    part_length = int(len(trunck) / num_part)
+    for i in range(num_part):
+        start = part_length * i
+        end = start + part_length
+        mini_trunck = trunck[start:end]
+        if is_silent(mini_trunck):
+            return True
+    return False

utils/wm_add_v2.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from utils import silent_util
+import torch
+import numpy as np
+from utils import bin_util
+fix_pattern = [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
+               0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+               1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
+               1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0,
+               0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0]
+def create_parcel_message(len_start_bit, num_bit, wm_text, verbose=False):
+    # 2.起始bit
+    # start_bit = np.array([0] * len_start_bit)
+    start_bit = fix_pattern[0:len_start_bit]
+    error_prob = 2 ** len_start_bit / 10000
+    # todo:考虑threshold的时候的错误率呢?
+    if verbose:
+        print("起始bit长度:%d,错误率:%.1f万" % (len(start_bit), error_prob))
+    # 3.信息内容
+    length_msg = num_bit - len(start_bit)
+    if wm_text:
+        msg_arr = bin_util.hexStr2BinArray(wm_text)
+    else:
+        msg_arr = np.random.choice([0, 1], size=length_msg)
+    # 4.封装信息
+    watermark = np.concatenate([start_bit, msg_arr])
+    assert len(watermark) == num_bit
+    return start_bit, msg_arr, watermark
+import time
+def add_watermark(bir_array, data, num_point, shift_range, device, model, silence_check=False):
+    t1 = time.time()
+    # 1.获得区块大小
+    chunk_size = num_point + int(num_point * shift_range)
+    output_chunks = []
+    idx_trunck = -1
+    for i in range(0, len(data), chunk_size):
+        idx_trunck += 1
+        current_chunk = data[i:i + chunk_size].copy()
+        # 最后一块，长度不足
+        if len(current_chunk) < chunk_size:
+            output_chunks.append(current_chunk)
+            break
+        # 处理区块: [水印区|间隔区]
+        current_chunk_cover_area = current_chunk[0:num_point]
+        current_chunk_shift_area = current_chunk[num_point:]
+        current_chunk_cover_area_wmd = encode_trunck_with_silence_check(silence_check,
+                                                                        idx_trunck,
+                                                                        current_chunk_cover_area, bir_array,
+                                                                        device, model)
+        output = np.concatenate([current_chunk_cover_area_wmd, current_chunk_shift_area])
+        assert output.shape == current_chunk.shape
+        output_chunks.append(output)
+    assert len(output_chunks) > 0
+    reconstructed_array = np.concatenate(output_chunks)
+    time_cost = time.time() - t1
+    return data, reconstructed_array, time_cost
+def encode_trunck_with_silence_check(silence_check, trunck_idx, trunck, wm, device, model):
+    # 1.判断是否是静音,通过判断子段是否静音来处理
+    if silence_check and silent_util.is_silent(trunck):
+        print("跳过静音区块:", trunck_idx)
+        return trunck
+    # 2.加入水印
+    trnck_wmd = encode_trunck(trunck, wm, device, model)
+    return trnck_wmd
+def encode_trunck(trunck, wm, device, model):
+    with torch.no_grad():
+        signal = torch.FloatTensor(trunck).to(device)[None]
+        message = torch.FloatTensor(np.array(wm)).to(device)[None]
+        signal_wmd_tensor = model.encode(signal, message)
+        signal_wmd = signal_wmd_tensor.detach().cpu().numpy().squeeze()
+        return signal_wmd

utils/wm_decode_v2.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import pdb
+import torch
+import numpy as np
+from utils import bin_util
+def decode_trunck(trunck, model, device):
+    with torch.no_grad():
+        signal = torch.FloatTensor(trunck).to(device).unsqueeze(0)
+        message = (model.decode(signal) >= 0.5).int()
+        message = message.detach().cpu().numpy().squeeze()
+    return message
+def is_start_bit_match(start_bit, decoded_start_bit, start_bit_ber_threshold):
+    assert decoded_start_bit.shape == start_bit.shape
+    ber = 1 - np.mean(start_bit == decoded_start_bit)
+    return ber < start_bit_ber_threshold
+def extract_watermark(data, start_bit, shift_range, num_point, start_bit_ber_threshold, model, device,
+                      verbose=False):
+    # pdb.set_trace()
+    shift_range_points = int(shift_range * num_point)
+    i = 0  # 当前的指针位置
+    results = []
+    while True:
+        start = i
+        end = start + num_point
+        trunck = data[start:end]
+        if len(trunck) < num_point:
+            break
+        bit_array = decode_trunck(trunck, model, device)
+        decoded_start_bit = bit_array[0:len(start_bit)]
+        if not is_start_bit_match(start_bit, decoded_start_bit, start_bit_ber_threshold):
+            i = i + shift_range_points
+            continue
+        # 寻找到了起始位置
+        if verbose:
+            msg_bit = bit_array[len(start_bit):]
+            msg_str = bin_util.binArray2HexStr(msg_bit)
+            print(i, "解码信息:", msg_str)
+        results.append(bit_array)
+        i = i + num_point + shift_range_points
+    support_count = len(results)
+    if support_count == 0:
+        mean_result = None
+        first_result = None
+        exist_prob = None
+    else:
+        mean_result = (np.array(results).mean(axis=0) >= 0.5).astype(int)
+        exist_prob = (mean_result[0:len(start_bit)] == start_bit).mean()
+        first_result = results[0]
+    return support_count, exist_prob, mean_result, first_result
+def extract_watermark_v2(data, start_bit, shift_range, num_point,
+                         start_bit_ber_threshold, model, device,
+                         merge_type,
+                         shift_range_p=0.5, ):
+    shift_range_points = int(shift_range * num_point * shift_range_p)
+    i = 0  # 当前的指针位置
+    results = []
+    while True:
+        start = i
+        end = start + num_point
+        trunck = data[start:end]
+        if len(trunck) < num_point:
+            break
+        bit_array = decode_trunck(trunck, model, device)
+        decoded_start_bit = bit_array[0:len(start_bit)]
+        ber_start_bit = 1 - np.mean(start_bit == decoded_start_bit)
+        if ber_start_bit > start_bit_ber_threshold:
+            i = i + shift_range_points
+            continue
+        # 寻找到了起始位置
+        results.append({
+            "sim": 1 - ber_start_bit,
+            "msg": bit_array,
+        })
+        # 这里很重要，如果threshold设置的太大，那么就会跳过一些可能的点
+        # i = i + num_point + shift_range_points
+        i = i + shift_range_points
+    support_count = len(results)
+    if support_count == 0:
+        mean_result = None
+    else:
+        # 1.加权得到最终结果
+        if merge_type == "weighted":
+            raise Exception("")
+        elif merge_type == "best":
+            # 相似度从大到小排序
+            best_val = sorted(results, key=lambda x: x["sim"], reverse=True)[0]
+            if np.isclose(1.0, best_val["sim"]):
+                # 那么对所有为1.0的进行求平均
+                results_1 = [i["msg"] for i in results if np.isclose(i["sim"], 1.0)]
+                mean_result = (np.array(results_1).mean(axis=0) >= 0.5).astype(int)
+            else:
+                mean_result = best_val["msg"]
+        else:
+            raise Exception("")
+            # assert merge_type == "mean"
+            # mean_result = (np.array([i[-1] for i in results]).mean(axis=0) >= 0.5).astype(int)
+    return support_count, mean_result, results