Spaces:
Running
Running
import torch | |
from diffusers import ( | |
AnimateDiffControlNetPipeline, AutoencoderKL, | |
ControlNetModel, MotionAdapter, LCMScheduler | |
) | |
from diffusers.utils import export_to_gif, load_video | |
from controlnet_aux import MidasDetector # Faster than ZoeDetector | |
# Load depth-based ControlNet (in diffusers format) | |
controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 | |
) | |
# Load AnimateDiff Motion Adapter (AnimateLCM) | |
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM") | |
# Load VAE for SD 1.5 | |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16) | |
# Load AnimateDiff pipeline with ControlNet | |
pipe = AnimateDiffControlNetPipeline.from_pretrained( | |
"SG161222/Realistic_Vision_V5.1_noVAE", | |
motion_adapter=motion_adapter, | |
controlnet=controlnet, | |
vae=vae, | |
).to(device="cuda", dtype=torch.float16) | |
# Use LCM Scheduler (optimized for AnimateLCM) | |
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
# Load AnimateLCM LoRA | |
pipe.load_lora_weights( | |
"wangfuyun/AnimateLCM", | |
weight_name="AnimateLCM_sd15_t2v_lora.safetensors", | |
adapter_name="lcm-lora" | |
) | |
pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8]) | |
# Use MiDaS for depth extraction (faster) | |
depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda") | |
# Load input video for depth-based conditioning | |
video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif") | |
conditioning_frames = [] | |
# Process video frames into depth maps | |
for frame in video: | |
conditioning_frames.append(depth_detector(frame)) | |
# Define prompts | |
prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality" | |
negative_prompt = "blurry, deformed, distorted, bad quality" | |
# Generate animated output | |
output = pipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
num_frames=len(video), | |
num_inference_steps=10, | |
guidance_scale=2.0, | |
conditioning_frames=conditioning_frames, | |
generator=torch.manual_seed(42), | |
).frames[0] | |
# Save animation as GIF | |
export_to_gif(output, "animatediff_controlnet.gif", fps=8) | |