import gradio as gr import moviepy.editor as mp import numpy as np import librosa import matplotlib.pyplot as plt from io import BytesIO import logging # Configuración de logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("audio_to_video") def generate_waveform_video(audio_path, image_path): try: # 1. Cargar audio logger.info("Cargando archivo de audio...") y, sr = librosa.load(audio_path) duration = librosa.get_duration(y=y, sr=sr) logger.info(f"Duración del audio: {duration:.2f} segundos") # 2. Cargar imagen logger.info("Procesando imagen...") img_clip = mp.ImageClip(image_path).set_duration(duration) img_width, img_height = img_clip.size # 3. Crear efecto de waveform logger.info("Generando efecto visual...") audio_envelope = np.abs(y) # Envelope del audio audio_envelope = (audio_envelope / np.max(audio_envelope)) * (img_height // 3) def make_frame(t): fig, ax = plt.subplots(figsize=(img_width/100, img_height/100), dpi=100) ax.set_xlim(0, duration) ax.set_ylim(-img_height//2, img_height//2) ax.axis('off') time_index = int(t * sr) start = max(0, time_index - sr//10) end = min(len(audio_envelope), time_index + sr//10) wave_slice = audio_envelope[start:end] x_values = np.linspace(t-0.1, t+0.1, len(wave_slice)) ax.fill_between(x_values, wave_slice - img_height//4, -wave_slice + img_height//4, facecolor='red', alpha=0.7) buf = BytesIO() plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0) plt.close(fig) return mp.ImageClip(buf).get_frame(0) logger.info("Renderizando video...") effect_clip = mp.VideoClip(make_frame, duration=duration).set_fps(24) final_clip = mp.CompositeVideoClip([img_clip, effect_clip.set_pos("center")]) # 4. Combinar con audio final_clip = final_clip.set_audio(mp.AudioFileClip(audio_path)) # 5. Guardar en memoria buffer = BytesIO() final_clip.write_videofile(buffer, fps=24, codec="libx264", audio_codec="aac", logger=None) buffer.seek(0) logger.info("Video generado exitosamente") return buffer except Exception as e: logger.error(f"Error durante la generación: {str(e)}") return f"Error: {str(e)}" # Interfaz Gradio iface = gr.Interface( fn=generate_waveform_video, inputs=[ gr.Audio(type="filepath", label="Audio (WAV/MP3)"), gr.Image(type="filepath", label="Imagen de Fondo"), ], outputs=gr.Video(label="Video Resultante", format="mp4"), title="Generador de Video con Efectos de Audio", description="Crea videos con efectos visuales sincronizados con el audio. Actualmente soporta efecto de waveform.", allow_flagging="never" ) if __name__ == "__main__": logger.info("Iniciando aplicación Gradio...") iface.queue().launch(share=False, debug=True)