Spaces:

TechAudio
/

diffusers_HARP_test

App Files Files Community

diffusers_HARP_test / app.py

j

made description less misleading

b6dcb42 9 months ago

history blame contribute delete

3.66 kB

	import sys
	print(sys.path)
	sys.path.append('/home/user/audio_ai/diffusers_harp/venv/src')

	from pyharp import ModelCard, build_endpoint, save_and_return_filepath

	from audiotools import AudioSignal
	import scipy
	import torch
	import gradio as gr
	from diffusers import AudioLDM2Pipeline
	import subprocess as sp


	#harp_deps = [
	#"descript-audiotools"]
	#
	#try:
	# from pyharp import ModelCard, build_endpoint, save_and_return_filepath
	#except ImportError:
	# print("Installing harp dependencies...")
	# sp.check_call(["pip", "install", *harp_deps])
	# sp.check_call(["pip", "install", "-e git+https://github.com/audacitorch/pyharp.git#egg=pyharp"])
	# sp.check_call(["pip", "install", "pydantic<2.0.0"])
	# from pyharp import ModelCard, build_endpoint, save_and_return_filepath

	# Create a Model Card
	card = ModelCard(
	name='Diffusers AudioLDM2 Generation',
	description='AudioLDM2 text-to-audio generation, operates on region selected in track. Not conditioned on selected audio, simply replaces audio in source track with generation.',
	author='Team Audio',
	tags=['AudioLDM', 'Diffusers', 'Generation']
	)

	# Load the model
	repo_id = "cvssp/audioldm2"
	pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
	pipe = pipe.to("cuda")


	def process_fn(input_audio_path, prompt, negative_prompt, seed, num_inference_steps, audio_length_in_s, num_waveforms_per_prompt):
	"""
	This function defines the audio processing steps

	Args:
	input_audio_path (str): the audio filepath to be processed.

	<YOUR_KWARGS>: additional keyword arguments necessary for processing.
	NOTE: These should correspond to and match order of UI elements defined below.

	Returns:
	output_audio_path (str): the filepath of the processed audio.
	"""

	sig = AudioSignal(input_audio_path)
	outfile = "./output.wav"

	#prompt = "The sound of a hammer hitting a wooden surface."
	#negative_prompt = "Low quality."

	# set the seed for generator
	generator = torch.Generator("cuda").manual_seed(int(seed))

	audio = pipe(
	prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=int(num_inference_steps),
	audio_length_in_s=audio_length_in_s,
	num_waveforms_per_prompt=int(num_waveforms_per_prompt),
	generator=generator,
	).audios

	scipy.io.wavfile.write(outfile, rate=16000, data=audio[0])
	return outfile


	# Build the endpoint
	with gr.Blocks() as webapp:
	# Define your Gradio interface
	inputs = [
	gr.Audio(
	label="Audio Input",
	type="filepath"
	),
	gr.Text(
	label="Prompt",
	interactive=True
	),
	gr.Text(
	label="Negative Prompt",
	interactive=True
	),
	gr.Slider(
	label="seed",
	minimum="0",
	maximum="65535",
	value="0",
	step="1"
	),
	gr.Slider(
	minimum=1, maximum=500,
	step=1, value=1,
	label="Inference Steps"
	),
	gr.Slider(
	minimum=2.5, maximum=10.0,
	step=2.5, value=2.5,
	label="Duration"
	),
	gr.Slider(
	minimum=1, maximum=10,
	step=1, value=1,
	label="Waveforms Per Prompt"
	),
	]

	# make an output audio widget
	output = gr.Audio(label="Audio Output", type="filepath")

	# Build the endpoint
	ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card)

	#webapp.queue()
	webapp.launch(share=True)