import gradio as gr from tts_module import get_voices, text_to_speech # Usamos el tts_module.py actualizado from pexels_api import search_pexels from moviepy.editor import AudioFileClip, VideoFileClip, CompositeAudioClip, concatenate_audioclips, concatenate_videoclips import asyncio import os import requests import tempfile # Forzar instalación de moviepy si no está disponible def install(package): import subprocess import sys subprocess.check_call([sys.executable, "-m", "pip", "install", package]) try: # Intentar importar moviepy.editor from moviepy.editor import AudioFileClip, VideoFileClip, CompositeAudioClip, concatenate_audioclips, concatenate_videoclips print("MoviePy.editor está instalado correctamente.") except ImportError: print("Instalando MoviePy...") install("moviepy==1.0.3") # Forzar instalación de la versión compatible try: from moviepy.editor import AudioFileClip, VideoFileClip, CompositeAudioClip, concatenate_audioclips, concatenate_videoclips print("MoviePy.editor instalado con éxito después de la reinstalación.") except ImportError: raise ImportError("Error crítico: No se pudo instalar moviepy.editor. Verifica las dependencias.") # Ajustar música de fondo (repetición automática) def adjust_background_music(video_duration, music_file): music = AudioFileClip(music_file) if music.duration < video_duration: repetitions = int(video_duration / music.duration) + 1 music_clips = [music] * repetitions music = concatenate_audioclips(music_clips) if music.duration > video_duration: music = music.subclip(0, video_duration) music = music.volumex(0.2) # Reducir volumen al 20% return music # Concatenar múltiples videos de Pexels def concatenate_pexels_videos(text, num_videos=5): sentences = [sentence.strip() for sentence in text.split(".") if sentence.strip()] video_links = [] for sentence in sentences: try: links = search_pexels(sentence, num_results=num_videos) if links: video_links.append(links[0]) # Usamos el primer video encontrado para cada frase except Exception as e: print(f"Error al buscar video para la frase '{sentence}': {e}") continue if not video_links: raise Exception("No se encontraron videos relevantes para el texto proporcionado.") video_clips = [] for link in video_links: video_response = requests.get(link) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video: tmp_video.write(video_response.content) video_clips.append(VideoFileClip(tmp_video.name)) # Concatenar videos final_clip = concatenate_videoclips(video_clips, method="compose") return final_clip # Combinar audio, video y música con fade out solo en el video y la música def combine_audio_video(audio_file, video_clip, music_clip=None): audio_clip = AudioFileClip(audio_file) # Duración total: speech + 5 segundos para fade out total_duration = audio_clip.duration + 5 # Extender la duración del video si es más corto que el audio + fade out if video_clip.duration < total_duration: video_clip = video_clip.loop(duration=total_duration) # Repetir el video si es necesario # Aplicar fade out solo al video video_clip = video_clip.set_duration(total_duration).fadeout(5) # Combinar audio y video final_clip = video_clip.set_audio(audio_clip) # Añadir música de fondo si aplica if music_clip: # Extender la música para que coincida con la duración total if music_clip.duration < total_duration: repetitions = int(total_duration / music_clip.duration) + 1 music_clips = [music_clip] * repetitions music_clip = concatenate_audioclips(music_clips) if music_clip.duration > total_duration: music_clip = music_clip.subclip(0, total_duration) # Aplicar fade out a la música music_clip = music_clip.audio_fadeout(5) # Combinar audio principal, música y video final_clip = final_clip.set_audio(CompositeAudioClip([audio_clip, music_clip])) # Exportar el video final output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24) return output_path # Función principal def process_input(text, txt_file, mp3_file, selected_voice, rate, pitch): try: if text.strip(): final_text = text elif txt_file is not None: final_text = txt_file.decode("utf-8") else: return "No input provided", None # Obtener voces disponibles voices = asyncio.run(get_voices()) if selected_voice not in voices: return f"La voz '{selected_voice}' no es válida. Por favor, seleccione una de las siguientes voces: {', '.join(voices.keys())}", None # Generar audio con edge_tts try: audio_file = asyncio.run(text_to_speech(final_text, selected_voice, rate, pitch)) except Exception as e: return f"Error con la voz seleccionada: {e}", None # Concatenar múltiples videos de Pexels basados en el texto try: video_clip = concatenate_pexels_videos(final_text, num_videos=5) except Exception as e: return f"Error al buscar videos en Pexels: {e}", None # Ajustar música de fondo if mp3_file is not None: music_clip = adjust_background_music(video_clip.duration, mp3_file.name) else: music_clip = None # Combinar audio, video y música con fade out solo en el video y la música final_video = combine_audio_video(audio_file, video_clip, music_clip) return final_video except Exception as e: return f"Error durante el procesamiento: {e}", None # Interfaz Gradio with gr.Blocks() as demo: gr.Markdown("# Text-to-Video Generator") with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Write your text here", lines=5) txt_file_input = gr.File(label="Or upload a .txt file", file_types=[".txt"]) mp3_file_input = gr.File(label="Upload background music (.mp3)", file_types=[".mp3"]) voices = asyncio.run(get_voices()) # Obtener voces disponibles voice_dropdown = gr.Dropdown(choices=list(voices.keys()), label="Select Voice") rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1) pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1) with gr.Column(): output_video = gr.Video(label="Generated Video") btn = gr.Button("Generate Video") btn.click( process_input, inputs=[text_input, txt_file_input, mp3_file_input, voice_dropdown, rate_slider, pitch_slider], outputs=output_video ) demo.launch()