Spaces:

gnosticdev
/

audio-a-video

Running

App Files Files Community

audio-a-video / app.py

gnosticdev

Create app.py

0023f3b verified about 1 month ago

raw

history blame

3.88 kB

	import gradio as gr
	import moviepy.editor as mp
	import numpy as np
	import librosa
	from PIL import Image, ImageDraw
	import tempfile
	import os
	import logging

	# Configuración de logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[logging.StreamHandler()]
	)
	logger = logging.getLogger("audio_to_video")

	def generate_video(audio_file, image_file):
	try:
	# 1. Cargar audio
	y, sr = librosa.load(audio_file)
	duration = librosa.get_duration(y=y, sr=sr)
	logger.info(f"Audio cargado: {duration:.1f} segundos")

	# 2. Cargar imagen
	img = Image.open(image_file).convert('RGB')
	img_w, img_h = img.size
	logger.info(f"Imagen cargada: {img_w}x{img_h}")

	# 3. Analizar audio
	audio_envelope = np.abs(y) / np.max(np.abs(y)) # Normalizar
	audio_envelope_zoom = audio_envelope * 0.2 + 0.9 # Escalar para zoom (0.9x a 1.1x)
	audio_envelope_wave = audio_envelope * (img_h // 6) # Para el waveform

	# 4. Generar frames con zoom y waveform
	def make_frame(t):
	# Calcular posición en el audio
	time_idx = int(t * sr)

	# --- Efecto de Zoom ---
	zoom_factor = audio_envelope_zoom[time_idx] if time_idx < len(audio_envelope_zoom) else 1.0
	new_size = (int(img_w * zoom_factor), int(img_h * zoom_factor))
	zoomed_img = img.resize(new_size, Image.LANCZOS)

	# Centrar la imagen ampliada
	x_offset = (new_size[0] - img_w) // 2
	y_offset = (new_size[1] - img_h) // 2
	cropped_img = zoomed_img.crop((
	x_offset,
	y_offset,
	x_offset + img_w,
	y_offset + img_h
	))

	# --- Dibujar Waveform ---
	frame = ImageDraw.Draw(cropped_img)

	# Posición vertical del waveform (abajo)
	start_y = int(img_h * 0.75) # 75% hacia abajo

	# Extraer slice de audio
	start = max(0, time_idx - sr//10)
	end = min(len(audio_envelope_wave), time_idx + sr//10)
	wave_slice = audio_envelope_wave[start:end]

	# Dibujar onda
	points = []
	for i, val in enumerate(wave_slice):
	x = int((i / len(wave_slice)) * img_w)
	y_pos = start_y - int(val)
	y_neg = start_y + int(val)
	points.extend([(x, y_pos), (x, y_neg)])

	if len(points) > 2:
	frame.polygon(points, fill=(255, 0, 0, 150)) # Rojo semitransparente

	return np.array(cropped_img)

	# 5. Crear video
	video = mp.VideoClip(make_frame, duration=duration)
	video.fps = 24
	video = video.set_audio(mp.AudioFileClip(audio_file))

	# 6. Guardar video
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
	video.write_videofile(
	tmpfile.name,
	codec="libx264",
	audio_codec="aac",
	fps=24,
	logger=None
	)

	logger.info(f"Video guardado: {tmpfile.name}")
	return tmpfile.name

	except Exception as e:
	logger.error(f"Error crítico: {str(e)}")
	return f"Error: {str(e)}"

	# Interfaz Gradio
	iface = gr.Interface(
	fn=generate_video,
	inputs=[
	gr.Audio(type="filepath", label="Audio (WAV/MP3)"),
	gr.Image(type="filepath", label="Imagen de Fondo")
	],
	outputs=gr.File(label="Descargar Video"),
	title="Generador de Video Musical",
	description="Crea videos con zoom automático y efectos de audio sincronizados"
	)

	if __name__ == "__main__":
	iface.queue().launch()