Spaces:

orderlymirror
/

Text-to-Video

Runtime error

File size: 3,536 Bytes

import gradio as gr
import torch
import os
import uuid

from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from diffusers.utils import export_to_video
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

# Ensure GPU Availability
if not torch.cuda.is_available():
    raise NotImplementedError("A GPU is required for this task.")

device = "cuda"
dtype = torch.float16

# Base Model Paths
BASE_MODELS = {
    "Realistic": "emilianJR/epiCRealism",
    "Cartoon": "frankjoshua/toonyou_beta6",
    "3D": "Lykon/DreamShaper",
    "Anime": "Yntec/mistoonAnime2",
}

# Initialize Pipeline
print("Loading AnimateDiff pipeline...")
base_model = "Realistic"
pipe = AnimateDiffPipeline.from_pretrained(BASE_MODELS[base_model], torch_dtype=dtype).to(device)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
print("Pipeline loaded successfully.")

# Video Generation Function
def generate_video(prompt, base="Realistic", motion="", steps=8):
    global pipe
    print(f"Generating video: Prompt='{prompt}', Base='{base}', Steps='{steps}'")

    # Switch Base Model
    if base in BASE_MODELS:
        print(f"Loading base model: {base}")
        pipe = AnimateDiffPipeline.from_pretrained(BASE_MODELS[base], torch_dtype=dtype).to(device)

    # Set Inference Steps
    steps = int(steps)
    fps = 10  # Frames per second
    duration = 30  # Video duration in seconds
    total_frames = fps * duration  # Total frames to generate

    # Generate Frames
    video_frames = []
    for i in range(total_frames):
        output = pipe(
            prompt=prompt,
            guidance_scale=1.2,
            num_inference_steps=steps
        )
        video_frames.extend(output.frames[0])

    # Export to Video
    name = str(uuid.uuid4()).replace("-", "")
    output_path = f"/tmp/{name}.mp4"
    export_to_video(video_frames, output_path, fps=fps)
    print(f"Video saved to {output_path}")
    return output_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.HTML("<h1><center>30-Second Text-to-Video Generation</center></h1>")
    
    with gr.Row():
        prompt = gr.Textbox(label="Text Prompt", placeholder="Describe your scene...")
    
    with gr.Row():
        base_model = gr.Dropdown(
            label="Base Model",
            choices=["Realistic", "Cartoon", "3D", "Anime"],
            value="Realistic"
        )
        motion = gr.Dropdown(
            label="Motion Adapter",
            choices=[
                ("None", ""),
                ("Zoom In", "guoyww/animatediff-motion-lora-zoom-in"),
                ("Zoom Out", "guoyww/animatediff-motion-lora-zoom-out"),
                ("Tilt Up", "guoyww/animatediff-motion-lora-tilt-up"),
                ("Tilt Down", "guoyww/animatediff-motion-lora-tilt-down"),
                ("Pan Left", "guoyww/animatediff-motion-lora-pan-left"),
                ("Pan Right", "guoyww/animatediff-motion-lora-pan-right"),
            ],
            value=""
        )
        steps = gr.Dropdown(
            label="Inference Steps",
            choices=["4", "8", "12"],
            value="8"
        )
    
    with gr.Row():
        generate_button = gr.Button("Generate Video")
    
    video_output = gr.Video(label="Generated Video", autoplay=True, height=512, width=512)

    generate_button.click(
        fn=generate_video,
        inputs=[prompt, base_model, motion, steps],
        outputs=video_output
    )

demo.launch()