File size: 2,465 Bytes
ed33636
 
f739d3e
ed33636
5c6830a
ed33636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03b820b
8f644ec
03b820b
ed33636
 
 
3310cc3
ed33636
 
 
 
3310cc3
 
 
 
 
 
ed33636
 
3310cc3
ed33636
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch
import torchaudio
import audio_diffusion_attacks_forhf.src.test_encoder_attack as attack
import numpy as np
from transformers import EncodecModel

'''
Files edited:
- test_encoder_attack.py
- losses.py
- audio_signal.py; print statements only
'''

'''
#Set default parameters (if necessary)
args = 0

#Load pretrained protection model:
modelD = md.model(args)
modelD.load_state_dict(torch.load('path to pretrained weights', map_location=torch.device('cpu')), strict=True)
modelD.eval()

#Load pretrained mimickry model
modelM = mm.model(args)
modelM.load_state_dict(torch.load('path to pretrained weights', map_location=torch.device('cpu')), strict=True)
modelM.eval()
'''

#Define function to convert final audio format:
def float32_to_int16(waveform):
    waveform = waveform / np.abs(waveform).max()
    waveform = waveform * 32767
    waveform = waveform.astype(np.int16)
    waveform = waveform.ravel()
    return waveform

#Define predict function:
def predict(inp):
    #How to transform audio from string to tensor
    waveform, sample_rate = torchaudio.load(inp)

    #Convert to tensor:
    waveform.clone().detach().requires_grad_(True)

    encoders = [EncodecModel.from_pretrained("facebook/encodec_48khz")]

    #Run modelD to disguise audio
    waveform, waveform2, waveform3, waveform4 = attack.poison_audio(waveform, sample_rate, encoders)

    #Transform output audio into gradio-readable format
    waveform = waveform.numpy()
    waveform = float32_to_int16(waveform)
    waveform2 = waveform2.numpy()
    waveform2 = float32_to_int16(waveform2)
    waveform3 = waveform3.numpy()
    waveform3 = float32_to_int16(waveform3)
    waveform4 = waveform4.numpy()
    waveform4 = float32_to_int16(waveform4)

    #return (sample_rate, waveformD), (sample_rate, waveformM), (sample_rate, waveformDM)
    return (sample_rate, waveform), (sample_rate, waveform2), (sample_rate, waveform3), (sample_rate, waveform4)

#Set up gradio interface
import gradio as gr

interface = gr.Interface(
    fn=predict,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Audio(), gr.Audio(), gr.Audio()],
    title="Music Protection Net",
    description="This model is designed to add perturbations to a musical clip so that musical cloning models fail to properly reproduce the song. \n \n 1) Upload (or record) an audio file of your music. \n 2) Click submit to run the model. \n 3) Listen to and download your protected audio.",
)

interface.launch()