from pyharp import ModelCard, build_endpoint, save_and_return_filepath from audiotools import AudioSignal from audioldm import build_model, style_transfer import gradio as gr import soundfile as sf audioldm = build_model(model_name="audioldm-m-full") def process_fn(input_audio_path, prompt, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s, transfer_strength): waveform = style_transfer( audioldm, prompt, input_audio_path, transfer_strength, int(seed), duration = audio_length_in_s, guidance_scale = guidance_scale, ddim_steps = int(num_inference_steps), batchsize = int(num_candidates), config=None, ) waveform = waveform[:,None,:] sf.write("./output.wav", waveform[0, 0], samplerate=16000) #save_wave(waveform, "./", name="output.wav") #broken, always appends _0.wav return "./output.wav" card = ModelCard( name='AudioLDM Variations', description='AudioLDM Variation Generator, operates on region selected in track.', author='Team Audio', tags=['AudioLDM', 'Variations', 'audio-to-audio'] ) with gr.Blocks() as webapp: # Define your Gradio interface inputs = [ gr.Audio( label="Audio Input", type="filepath" ), gr.Text( label="Prompt", interactive=True ), gr.Slider( label="seed", minimum="0", maximum="65535", value="43534", step="1" ), gr.Slider( minimum=0, maximum=10, step=0.1, value=2.5, label="Guidance Scale" ), gr.Slider( minimum=1, maximum=500, step=1, value=200, label="Inference Steps" ), gr.Slider( minimum=1, maximum=10, step=1, value=1, label="Candidates" ), gr.Slider( minimum=2.5, maximum=10.0, step=2.5, value=5, label="Duration" ), gr.Slider( minimum=0, maximum=1.0, value=0.5, label="Transfer Strength" ), ] output = gr.Audio(label="Audio Output", type="filepath") ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card) # queue the webapp: https://www.gradio.app/guides/setting-up-a-demo-for-maximum-performance #webapp.queue() webapp.launch(share=True)