Spaces:
Building
Building
import gradio as gr | |
from tts_module import get_voices, text_to_speech | |
from pexels_api import search_pexels | |
from moviepy.editor import ( | |
AudioFileClip, VideoFileClip, CompositeAudioClip, | |
concatenate_audioclips, concatenate_videoclips, vfx, CompositeVideoClip | |
) | |
import asyncio | |
import os | |
import time | |
import requests | |
from googleapiclient.discovery import build | |
from googleapiclient.http import MediaFileUpload | |
import tempfile | |
import re | |
import random | |
# Define la carpeta de salida temporal | |
output_folder = "outputs" | |
os.makedirs(output_folder, exist_ok=True) | |
def clean_text_for_search(text): | |
"""Limpia el texto para hacer búsquedas válidas en Pexels""" | |
# Eliminar caracteres especiales y limitar longitud | |
text = re.sub(r'[^\w\s]', '', text).strip() | |
return text | |
def resize_and_blur_video(clip, target_width=1920, target_height=1080): | |
""" | |
Redimensiona y aplica desenfoque al video para mantener aspecto 16:9 con resolución objetivo. | |
Los videos más pequeños se redimensionan y los verticales se convierten en horizontales con blur. | |
Args: | |
clip: VideoFileClip object | |
target_width: Ancho objetivo (default 1920 para 1080p) | |
target_height: Alto objetivo (default 1080 para 1080p) | |
""" | |
try: | |
w, h = clip.size | |
current_ratio = w / h | |
target_ratio = target_width / target_height | |
print(f"Video original: {w}x{h}, ratio: {current_ratio}") | |
if current_ratio < target_ratio: # Video vertical o más cuadrado que 16:9 | |
# Crear un fondo desenfocado escalado | |
background = (clip | |
.resize(width=target_width) | |
.resize(width=target_width * 2) # Hacer el blur más suave | |
.fx(vfx.blur, sigma=10) | |
.resize(width=target_width)) | |
# Calcular el tamaño para el video principal | |
new_height = target_height | |
new_width = int(h * current_ratio) | |
if new_width > target_width: | |
new_width = target_width | |
new_height = int(new_width / current_ratio) | |
# Redimensionar video principal | |
foreground = clip.resize(width=new_width, height=new_height) | |
# Centrar el video | |
x_center = (target_width - new_width) // 2 | |
y_center = (target_height - new_height) // 2 | |
final = CompositeVideoClip( | |
[background, | |
foreground.set_position((x_center, y_center))], | |
size=(target_width, target_height) | |
) | |
return final | |
else: # Video horizontal | |
# Si es más ancho que 16:9, recortamos los bordes | |
return clip.resize(width=target_width, height=target_height) | |
except Exception as e: | |
print(f"Error en resize_and_blur_video: {e}") | |
return clip.resize(width=target_width, height=target_height) | |
except Exception as e: | |
print(f"Error en resize_and_blur_video: {e}") | |
return clip | |
def concatenate_pexels_videos(keywords, num_videos_per_keyword=1, target_width=1920, target_height=1080): | |
""" | |
Concatena videos de Pexels manteniendo una calidad y resolución consistentes. | |
Args: | |
keywords (str): Palabras clave separadas por comas | |
num_videos_per_keyword (int): Número de videos por palabra clave | |
target_width (int): Ancho objetivo para los videos | |
target_height (int): Alto objetivo para los videos | |
""" | |
keyword_list = [keyword.strip() for keyword in keywords.split(",") if keyword.strip()] | |
if not keyword_list: | |
raise ValueError("No se proporcionaron palabras clave válidas.") | |
video_clips = [] | |
processed_keywords = [] | |
for keyword in keyword_list: | |
try: | |
print(f"Buscando videos para: '{keyword}'...") | |
# Limpiar la palabra clave para búsqueda | |
clean_keyword = clean_text_for_search(keyword) | |
# Obtener videos con la nueva API | |
links = search_pexels(clean_keyword, num_results=num_videos_per_keyword) | |
if not links: | |
print(f"No se encontraron videos para: '{keyword}'") | |
continue | |
for link in links: | |
try: | |
print(f"Descargando video para: '{keyword}'...") | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video: | |
video_response = requests.get(link, stream=True) | |
if video_response.status_code != 200: | |
print(f"Error descargando video: {video_response.status_code}") | |
continue | |
# Guardar el video | |
for chunk in video_response.iter_content(chunk_size=8192): | |
if chunk: | |
tmp_video.write(chunk) | |
tmp_video.flush() | |
# Procesar el video | |
try: | |
clip = VideoFileClip(tmp_video.name) | |
# Verificar duración mínima | |
if clip.duration < 3: | |
print(f"Video demasiado corto ({clip.duration}s), saltando...") | |
clip.close() | |
continue | |
# Procesar y agregar el clip | |
processed_clip = resize_and_blur_video(clip, target_width, target_height) | |
if processed_clip: | |
video_clips.append(processed_clip) | |
processed_keywords.append(keyword) | |
print(f"Video procesado exitosamente para: '{keyword}'") | |
except Exception as e: | |
print(f"Error procesando video: {e}") | |
if 'clip' in locals(): | |
clip.close() | |
continue | |
finally: | |
# Limpiar archivo temporal | |
if os.path.exists(tmp_video.name): | |
try: | |
os.unlink(tmp_video.name) | |
except Exception as e: | |
print(f"Error eliminando archivo temporal: {e}") | |
except Exception as e: | |
print(f"Error procesando palabra clave '{keyword}': {e}") | |
continue | |
if not video_clips: | |
raise Exception("No se pudieron obtener videos válidos para ninguna palabra clave.") | |
print(f"Videos procesados exitosamente para las palabras: {', '.join(processed_keywords)}") | |
# Aleatorizar el orden de los clips | |
random.shuffle(video_clips) | |
# Concatenar los clips | |
try: | |
final_clip = concatenate_videoclips(video_clips, method="compose") | |
print(f"Video final generado: {final_clip.size}") | |
return final_clip | |
except Exception as e: | |
raise Exception(f"Error concatenando clips: {e}") | |
def combine_audio_video(audio_file, video_clip, music_clip=None): | |
try: | |
audio_clip = AudioFileClip(audio_file) | |
total_duration = audio_clip.duration + 5 | |
if video_clip.duration < total_duration: | |
video_clip = video_clip.loop(duration=total_duration) | |
video_clip = video_clip.set_duration(total_duration).fadeout(5) | |
final_clip = video_clip.set_audio(audio_clip) | |
if music_clip: | |
if music_clip.duration < total_duration: | |
repetitions = int(total_duration / music_clip.duration) + 1 | |
music_clips = [music_clip] * repetitions | |
music_clip = concatenate_audioclips(music_clips) | |
if music_clip.duration > total_duration: | |
music_clip = music_clip.subclip(0, total_duration) | |
music_clip = music_clip.audio_fadeout(5) | |
final_clip = final_clip.set_audio(CompositeAudioClip([audio_clip, music_clip])) | |
output_filename = f"final_video_{int(time.time())}.mp4" | |
output_path = os.path.join(output_folder, output_filename) | |
final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24) | |
return output_path | |
except Exception as e: | |
print(f"Error combinando audio y video: {e}") | |
return None | |
def process_input(text, txt_file, mp3_file, selected_voice, rate, pitch, keywords): | |
try: | |
# Validar entrada de texto | |
if text.strip(): | |
final_text = text | |
elif txt_file is not None: | |
final_text = txt_file.decode("utf-8") | |
else: | |
# Retornar None en lugar de string de error | |
return None | |
# Validar voces | |
voices = asyncio.run(get_voices()) | |
if selected_voice not in voices: | |
return None | |
# Generar audio | |
try: | |
audio_file = asyncio.run(text_to_speech(final_text, selected_voice, rate, pitch)) | |
except Exception as e: | |
print(f"Error generando audio: {e}") | |
return None | |
# Procesar videos | |
try: | |
video_clip = concatenate_pexels_videos(keywords, num_videos_per_keyword=1) | |
except Exception as e: | |
print(f"Error concatenando videos: {e}") | |
return None | |
# Procesar música de fondo si existe | |
if mp3_file is not None: | |
music_clip = adjust_background_music(video_clip.duration, mp3_file.name) | |
else: | |
music_clip = None | |
# Combinar audio y video | |
final_video_path = combine_audio_video(audio_file, video_clip, music_clip) | |
if final_video_path is None: | |
return None | |
# Subir a Google Drive | |
upload_to_google_drive(final_video_path) | |
# Verificar que el archivo existe antes de retornarlo | |
if os.path.exists(final_video_path): | |
return final_video_path | |
else: | |
return None | |
except Exception as e: | |
print(f"Error en process_input: {e}") | |
return None | |
def upload_to_google_drive(file_path): | |
try: | |
api_key = os.getenv("GOOGLE_API_KEY") | |
if not api_key: | |
print("Error: GOOGLE_API_KEY no está definida en las variables de entorno.") | |
return None | |
service = build("drive", "v3", developerKey=api_key) | |
file_metadata = {"name": os.path.basename(file_path)} | |
media = MediaFileUpload(file_path, resumable=True) | |
file = service.files().create(body=file_metadata, media_body=media, fields="id").execute() | |
print(f"Archivo subido exitosamente con ID: {file.get('id')}") | |
return file.get("id") | |
except Exception as e: | |
print(f"Error subiendo a Google Drive: {e}") | |
return None | |
# Interfaz Gradio | |
with gr.Blocks() as demo: | |
gr.Markdown("# Text-to-Video Generator") | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox(label="Write your text here", lines=5) | |
txt_file_input = gr.File(label="Or upload a .txt file", file_types=[".txt"]) | |
mp3_file_input = gr.File(label="Upload background music (.mp3)", file_types=[".mp3"]) | |
keyword_input = gr.Textbox(label="Enter keywords separated by commas (e.g., universe, galaxy, forest, cat)") | |
voices = asyncio.run(get_voices()) | |
voice_dropdown = gr.Dropdown(choices=list(voices.keys()), label="Select Voice") | |
rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1) | |
pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1) | |
with gr.Column(): | |
output_video = gr.File(label="Download Generated Video") | |
btn = gr.Button("Generate Video") | |
btn.click( | |
process_input, | |
inputs=[text_input, txt_file_input, mp3_file_input, voice_dropdown, rate_slider, pitch_slider, keyword_input], | |
outputs=output_video | |
) | |
# Leer el puerto asignado por Hugging Face | |
port = int(os.getenv("PORT", 7860)) | |
# Lanzar la aplicación | |
demo.launch(server_name="0.0.0.0", server_port=port, share=True) |