j commited on
Commit
516059f
·
1 Parent(s): 3c309fd

initial commit

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. app.py +114 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
2
- title: Ldm Styletransfer
3
  emoji: 🏢
4
  colorFrom: pink
5
  colorTo: blue
 
6
  sdk: gradio
7
  sdk_version: 4.15.0
8
  app_file: app.py
 
1
  ---
2
+ title: AudioLDM Style Transfer HARP plugin
3
  emoji: 🏢
4
  colorFrom: pink
5
  colorTo: blue
6
+ python_version: 3.8
7
  sdk: gradio
8
  sdk_version: 4.15.0
9
  app_file: app.py
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pyharp import ModelCard, build_endpoint, save_and_return_filepath
2
+ from audiotools import AudioSignal
3
+ from audioldm import build_model, style_transfer
4
+ import gradio as gr
5
+ import soundfile as sf
6
+
7
+ audioldm = build_model(model_name="audioldm-m-full")
8
+
9
+ def save_wave(waveform, savepath, name="outwav"):
10
+ if type(name) is not list:
11
+ name = [name] * waveform.shape[0]
12
+
13
+ for i in range(waveform.shape[0]):
14
+ path = os.path.join(
15
+ savepath,
16
+ "%s_%s.wav"
17
+ % (
18
+ os.path.basename(name[i])
19
+ if (not ".wav" in name[i])
20
+ else os.path.basename(name[i]).split(".")[0],
21
+ i,
22
+ ),
23
+ )
24
+ print("Save audio to %s" % path)
25
+ sf.write(path, waveform[i, 0], samplerate=16000)
26
+
27
+ def process_fn(input_audio_path, prompt, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s, transfer_strength):
28
+ waveform = style_transfer(
29
+ audioldm,
30
+ prompt,
31
+ audio_file,
32
+ transfer_strength,
33
+ int(seed),
34
+ duration = audio_length_in_s,
35
+ guidance_scale = guidance_scale,
36
+ ddim_steps = int(num_inference_steps),
37
+ batchsize = int(num_candidates),
38
+ config=None,
39
+ )
40
+ waveform = waveform[:,None,:]
41
+ sf.write("./output.wav", waveform[0, 0], samplerate=16000)
42
+
43
+ #save_wave(waveform, "./", name="output.wav") #broken, always appends _0.wav
44
+ return "./output.wav"
45
+
46
+
47
+ card = ModelCard(
48
+ name='AudioLDM Variations',
49
+ description='AudioLDM Variation Generator, operates on region selected in track.',
50
+ author='Team Audio',
51
+ tags=['AudioLDM', 'Variations', 'audio-to-audio']
52
+ )
53
+
54
+ with gr.Blocks() as webapp:
55
+ # Define your Gradio interface
56
+ inputs = [
57
+ gr.Audio(
58
+ label="Audio Input",
59
+ type="filepath"
60
+ ),
61
+ gr.Slider(
62
+ label="seed",
63
+ minimum="0",
64
+ maximum="65535",
65
+ value="43534",
66
+ step="1"
67
+ ),
68
+ gr.Slider(
69
+ minimum=0, maximum=10,
70
+ step=0.1, value=2.5,
71
+ label="Guidance Scale"
72
+ ),
73
+ gr.Slider(
74
+ minimum=1, maximum=500,
75
+ step=1, value=200,
76
+ label="Inference Steps"
77
+ ),
78
+ gr.Slider(
79
+ minimum=1, maximum=10,
80
+ step=1, value=1,
81
+ label="Candidates"
82
+ ),
83
+ gr.Slider(
84
+ minimum=2.5, maximum=10.0,
85
+ step=2.5, value=5,
86
+ label="Duration"
87
+ ),
88
+ ]
89
+
90
+ output = gr.Audio(label="Audio Output", type="filepath")
91
+
92
+ ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card)
93
+
94
+ # queue the webapp: https://www.gradio.app/guides/setting-up-a-demo-for-maximum-performance
95
+ #webapp.queue()
96
+ webapp.launch(share=True)
97
+
98
+
99
+
100
+ for audio_file in input_files:
101
+ waveform = style_transfer(
102
+ audioldm,
103
+ PROMPT,
104
+ audio_file,
105
+ TRANSFER_STRENGTH,
106
+ SEED,
107
+ duration = DURATION,
108
+ guidance_scale = GUIDANCE_SCALE,
109
+ ddim_steps = STEPS,
110
+ batchsize = N_VARIATIONS,
111
+ config=None,
112
+ )
113
+ waveform = waveform[:,None,:]
114
+ save_wave(waveform, OUTPUT_DIRECTORY, name=f"{os.path.basename(audio_file)}_STYLE_{PROMPT}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ -e git+https://github.com/audacitorch/pyharp.git#egg=pyharp
2
+ descript-audiotools
3
+ transformers==4.29.0
4
+ audioldm