File size: 3,120 Bytes
9857383
 
 
 
68930cd
be67e91
 
a474c04
 
 
01a849d
 
 
 
 
 
9857383
01a849d
9857383
35ee7d1
01a849d
9857383
01a849d
9857383
68930cd
01a849d
68930cd
01a849d
9857383
01a849d
68930cd
 
9857383
68930cd
35ee7d1
68930cd
 
 
be67e91
 
 
35ee7d1
000e078
68930cd
 
 
 
 
 
 
 
0c0757c
68930cd
 
 
 
 
 
01a849d
be67e91
01a849d
 
0c0757c
 
 
 
 
 
 
01a849d
0c0757c
01a849d
 
 
 
 
9857383
 
01a849d
68930cd
9857383
35ee7d1
9857383
68930cd
9857383
01a849d
 
9857383
01a849d
000e078
68930cd
9857383
 
88eef70
be67e91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import moviepy.editor as mp
import numpy as np
import librosa
from PIL import Image, ImageDraw
import tempfile
import os
import logging

# Configuración de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("audio_to_video")

def generate_video(audio_file, image_file):
    try:
        # 1. Cargar audio
        y, sr = librosa.load(audio_file)
        duration = librosa.get_duration(y=y, sr=sr)
        logger.info(f"Audio cargado: {duration:.1f} segundos")

        # 2. Cargar imagen
        img = Image.open(image_file).convert('RGB')
        img_w, img_h = img.size
        logger.info(f"Imagen cargada: {img_w}x{img_h}")

        # 3. Crear efecto waveform
        audio_envelope = np.abs(y) / np.max(np.abs(y))  # Normalizar
        audio_envelope *= img_h // 4  # Escalar al 25% de la altura

        # 4. Generar frames
        def make_frame(t):
            frame = img.copy()
            draw = ImageDraw.Draw(frame)
            
            time_idx = int(t * sr)
            start = max(0, time_idx - sr//10)
            end = min(len(audio_envelope), time_idx + sr//10)
            wave_slice = audio_envelope[start:end]
            
            points = []
            for i, val in enumerate(wave_slice):
                x = int((i / len(wave_slice)) * img_w)
                y_pos = img_h//2 - int(val)
                y_neg = img_h//2 + int(val)
                points.extend([(x, y_pos), (x, y_neg)])
            
            if len(points) > 2:
                draw.polygon(points, fill=(255, 0, 0, 128))  # Rojo semitransparente
            
            return np.array(frame)

        # 5. Crear video
        video = mp.VideoClip(make_frame, duration=duration)
        video.fps = 24
        video = video.set_audio(mp.AudioFileClip(audio_file))

        # 6. Guardar video
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
            video.write_videofile(
                tmpfile.name,
                codec="libx264",
                audio_codec="aac",
                fps=24,
                logger=None
            )
            
            # Verificar que el archivo existe
            if not os.path.exists(tmpfile.name):
                raise Exception("Error al guardar el video temporal")
            
            logger.info(f"Video guardado: {tmpfile.name}")
            return tmpfile.name  # Retornar la ruta del archivo

    except Exception as e:
        logger.error(f"Error crítico: {str(e)}")
        return f"Error: {str(e)}"

# Interfaz Gradio
iface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Audio(type="filepath", label="Audio (WAV/MP3)"),
        gr.Image(type="filepath", label="Imagen de Fondo")
    ],
    outputs=gr.File(label="Descargar Video"),  # Usar File para descargar
    title="Generador de Video Musical",
    description="Crea videos con efectos de audio sincronizados. Sube un audio y una imagen."
)

if __name__ == "__main__":
    iface.queue().launch()