Spaces:

gnosticdev
/

audio-a-video

Running

App Files Files Community

audio-a-video / app.py

gnosticdev

Update app.py

a2b183c verified 3 months ago

raw

history blame

5.16 kB

	import gradio as gr
	print(f"Gradio version: {gr.__version__}")
	import moviepy.editor as mp
	import numpy as np
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import io
	import os

	# Función principal para generar el video
	def audio_to_video(audio_file, image_file, effect_type="waveform"):
	"""
	Genera un video a partir de un archivo de audio y una imagen, con un efecto visual sincronizado.

	Args:
	audio_file: Ruta al archivo de audio (wav o mp3).
	image_file: Ruta al archivo de imagen (debe ser un formato soportado por MoviePy).
	effect_type: Tipo de efecto visual a utilizar ("waveform" por defecto, otros tipos se pueden agregar).

	Returns:
	Ruta al archivo de video generado (mp4). Si falla, retorna un mensaje de error.
	"""
	try:
	# 1. Cargar el audio usando Librosa
	y, sr = librosa.load(audio_file)
	duration = librosa.get_duration(y=y, sr=sr)

	# 2. Cargar la imagen
	img_clip = mp.ImageClip(image_file)
	img_clip = img_clip.set_duration(duration) # Asignar la duración del audio a la imagen

	# 3. Generar el efecto visual
	if effect_type == "waveform":
	audio_envelope = np.abs(y) # Calculate the audio envelope

	# Normalize audio envelope to image dimensions
	audio_envelope = audio_envelope / np.max(audio_envelope)
	audio_envelope = audio_envelope * img_clip.size[1] / 2 # Scale to half the image height


	def make_frame(t):
	# Create a new figure for each frame
	fig, ax = plt.subplots(figsize=(img_clip.size[0]/100, img_clip.size[1]/100), dpi=100) # Adjust figsize for image dimensions
	ax.set_xlim(0, duration)
	ax.set_ylim(-img_clip.size[1] / 2, img_clip.size[1] / 2)
	ax.axis('off') # Hide axis

	# Plot waveform
	time_index = int(t * sr)
	wave_slice = audio_envelope[max(0,time_index - sr//10):min(len(audio_envelope), time_index + sr//10)]
	time_slice = np.linspace(0,0.2,len(wave_slice))
	ax.plot(np.linspace(t-0.1,t+0.1,len(wave_slice)), wave_slice-img_clip.size[1]/4, color='red')
	ax.plot(np.linspace(t-0.1,t+0.1,len(wave_slice)), -wave_slice+img_clip.size[1]/4, color='red')


	# Convert the Matplotlib figure to an image
	buf = io.BytesIO()
	fig.canvas.print_png(buf)
	data = np.frombuffer(buf.getvalue(), dtype=np.uint8)
	img = plt.imread(io.BytesIO(data)) #read as image
	plt.close(fig) # Close the figure to prevent memory leaks
	return img


	audio_effect_clip = mp.VideoClip(make_frame, duration=duration)
	audio_effect_clip = audio_effect_clip.set_fps(24) # Set a reasonable frame rate

	else:
	return "Error: Efecto visual no soportado."

	# 4. Overlay effect onto image
	final_clip = mp.CompositeVideoClip([img_clip, audio_effect_clip.set_pos("center")])


	# 5. Agregar el audio al video
	audio_clip = mp.AudioFileClip(audio_file)
	final_clip = final_clip.set_audio(audio_clip)


	# 6. Guardar el video
	output_video_path = "output.mp4"
	final_clip.write_videofile(output_video_path, fps=24, codec="libx264", audio_codec="aac") # Ajustar los parámetros de codificación según sea necesario
	return output_video_path

	except Exception as e:
	return f"Error: {str(e)}"


	# ----------------------------------
	# Gradio Interface
	# ----------------------------------

	iface = gr.Interface(
	fn=audio_to_video,
	inputs=[
	# Eliminamos 'source="upload"' (no es necesario en Gradio 4.x)
	gr.Audio(type="filepath", label="Subir Archivo de Audio (WAV o MP3)"),
	gr.Image(type="filepath", label="Subir Imagen"),
	gr.Radio(["waveform"], value="waveform", label="Tipo de Efecto Visual")
	],
	outputs=gr.Video(label="Video Generado"), # Usar gr.Video() en lugar de "video"
	title="Audio to Video Generator",
	description="Sube un audio y una imagen para generar un video con efecto visual sincronizado.",
	examples=[["audio_example.wav", "image_example.jpg", "waveform"]]
	)


	# ----------------------------------
	# Example files (optional). Create these files
	# or remove the 'examples' line above.
	# ----------------------------------
	# Create dummy audio and image for example purposes if they don't exist
	if not os.path.exists("audio_example.wav"):
	sr = 22050
	T = 5
	t = np.linspace(0, T, int(T*sr), endpoint=False)
	x = 0.5np.sin(2np.pi440t) # A4 frequency
	librosa.output.write_wav("audio_example.wav", x, sr)

	if not os.path.exists("image_example.jpg"):
	# Create a simple placeholder image
	import matplotlib.pyplot as plt
	fig, ax = plt.subplots(figsize=(6,4))
	ax.text(0.5, 0.5, "Placeholder Image", ha="center", va="center")
	ax.axis("off")
	fig.savefig("image_example.jpg")
	plt.close(fig)



	if __name__ == "__main__":
	iface.queue().launch()