File size: 4,960 Bytes
cc1bad4
 
3392a2f
cc1bad4
 
 
 
 
 
8deac30
56588b0
9e72422
04dd2d0
cc1bad4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b3cbf2
 
 
 
 
 
 
cc1bad4
 
 
7b3cbf2
 
 
 
 
 
 
 
6bff3e8
7b3cbf2
 
 
41db4ae
f24fcd0
23b0da1
 
c3cf49f
 
cc1bad4
 
 
 
 
0153e63
 
 
04dd2d0
 
 
23b0da1
71c9ddf
cc1bad4
 
 
 
 
 
 
 
 
 
 
 
 
0b400c4
 
 
 
 
cc1bad4
2ac0b62
cc1bad4
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import time
import streamlit as st
# from transformers import pipeline
import os
import torch
import datetime
import numpy as np
import soundfile
from wavmark.utils import file_reader
from audioseal import AudioSeal
import torchaudio
from pydub import AudioSegment

# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

# st.title("Hot Dog? Or Not?")

# file_name = st.file_uploader("Upload a hot dog candidate image")

# if file_name is not None:
#     col1, col2 = st.columns(2)

#     image = Image.open(file_name)
#     col1.image(image, use_column_width=True)
#     predictions = pipeline(image)

#     col2.header("Probabilities")
#     for p in predictions:
#         col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")

def create_default_value():
    if "def_value" not in st.session_state:
        def_val_npy = np.random.choice([0, 1], size=32 - len_start_bit)
        def_val_str = "".join([str(i) for i in def_val_npy])
        st.session_state.def_value = def_val_str

# Main web app
def main():
    create_default_value()

    # st.title("MDS07")
    # st.write("https://github.com/wavmark/wavmark")
    markdown_text = """
    # MDS07
    [AudioSeal](https://github.com/jcha0155/AudioSealEnhanced) is the next-generation watermarking tool driven by AI. 
    You can upload an audio file and encode a custom 16-bit watermark or perform decoding from a watermarked audio.
    
    This page is for demonstration usage and only process **the first minute** of the audio. 
    If you have longer files for processing, we recommend using [our python toolkit](https://github.com/jcha0155/AudioSealEnhanced).
    """

    # 使用st.markdown渲染Markdown文本
    st.markdown(markdown_text)

    audio_file = st.file_uploader("Upload Audio", type=["wav", "mp3"], accept_multiple_files=False)

    if audio_file:
        # 保存文件到本地:
        # tmp_input_audio_file = os.path.join("/tmp/", audio_file.name)
        # st.markdown(tmp_input_audio_file)
        # with open(tmp_input_audio_file, "wb") as f:
        #     f.write(audio_file.getbuffer())
        # st.audio(tmp_input_audio_file, format="mp3/wav")

        # Save file to local storage
        tmp_input_audio_file = os.path.join("/tmp/", audio_file.name)
        with open(tmp_input_audio_file, "wb") as f:
            f.write(audio_file.getbuffer())

        # Convert MP3 to WAV using pydub
        mp3_audio = AudioSegment.from_mp3(tmp_input_audio_file)
        wav_output_file = tmp_input_audio_file.replace(".mp3", ".wav")
        mp3_audio.export(wav_output_file, format="wav")

        # Load the WAV file using torchaudio
        wav, sample_rate = torchaudio.load(wav_output_file)
        wav.unsqueeze(0)

        # Play the audio file (WAV format)
        st.audio(wav_output_file, format="audio/wav")
        
        # wav, sample_rate = torchaudio.load(audio_file, format="mp3/wav")
        st.markdown("SR")
        st.markdown(sample_rate)
        st.markdown("wav")
        st.markdown(wav)
        # 展示文件到页面上
        # st.audio(tmp_input_audio_file, format="audio/wav")

        action = st.selectbox("Select Action", ["Add Watermark", "Decode Watermark"])

        if action == "Add Watermark":
            watermark_text = st.text_input("The watermark (0, 1 list of length-16):", value=st.session_state.def_value)
            add_watermark_button = st.button("Add Watermark", key="add_watermark_btn")
            if add_watermark_button:  # 点击按钮后执行的
                if audio_file and watermark_text:
                    with st.spinner("Adding Watermark..."):
                        watermark = model.get_watermark(wav, default_sr)
                        # watermarked_audio, encode_time_cost = add_watermark(tmp_input_audio_file, watermark_text)
#                         st.write("Watermarked Audio:")
#                         print("watermarked_audio:", watermarked_audio)
#                         st.audio(watermarked_audio, format="audio/wav")
#                         st.write("Time Cost: %d seconds" % encode_time_cost)

#                         # st.button("Add Watermark", disabled=False)
#         elif action == "Decode Watermark":
#             if st.button("Decode"):
#                 with st.spinner("Decoding..."):
#                     decode_watermark(tmp_input_audio_file)


if __name__ == "__main__":
    default_sr = 16000
    max_second_encode = 60
    max_second_decode = 30
    len_start_bit = 16
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # model = wavmark.load_model().to(device)
    model = AudioSeal.load_generator("audioseal_wm_16bits")
    main()

    # audio_path = "/Users/my/Library/Mobile Documents/com~apple~CloudDocs/CODE/PycharmProjects/4_语音水印/419_huggingface水印/WavMark/example.wav"

    # decoded_watermark, decode_cost = decode_watermark(audio_path)
    # print(decoded_watermark)