import torch from diffusers import ( AnimateDiffControlNetPipeline, AutoencoderKL, ControlNetModel, MotionAdapter, LCMScheduler ) from diffusers.utils import export_to_gif, load_video from controlnet_aux import MidasDetector # Faster than ZoeDetector # Load depth-based ControlNet (in diffusers format) controlnet = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 ) # Load AnimateDiff Motion Adapter (AnimateLCM) motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM") # Load VAE for SD 1.5 vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16) # Load AnimateDiff pipeline with ControlNet pipe = AnimateDiffControlNetPipeline.from_pretrained( "SG161222/Realistic_Vision_V5.1_noVAE", motion_adapter=motion_adapter, controlnet=controlnet, vae=vae, ).to(device="cuda", dtype=torch.float16) # Use LCM Scheduler (optimized for AnimateLCM) pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) # Load AnimateLCM LoRA pipe.load_lora_weights( "wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora" ) pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8]) # Use MiDaS for depth extraction (faster) depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda") # Load input video for depth-based conditioning video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif") conditioning_frames = [] # Process video frames into depth maps for frame in video: conditioning_frames.append(depth_detector(frame)) # Define prompts prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality" negative_prompt = "blurry, deformed, distorted, bad quality" # Generate animated output output = pipe( prompt=prompt, negative_prompt=negative_prompt, num_frames=len(video), num_inference_steps=10, guidance_scale=2.0, conditioning_frames=conditioning_frames, generator=torch.manual_seed(42), ).frames[0] # Save animation as GIF export_to_gif(output, "animatediff_controlnet.gif", fps=8)