import gradio as gr from huggingface_hub import login import os import spaces,tempfile import torch from diffusers import AnimateDiffSparseControlNetPipeline from diffusers.models import AutoencoderKL, MotionAdapter, SparseControlNetModel from diffusers.schedulers import DPMSolverMultistepScheduler from diffusers.utils import export_to_gif, load_image token = os.getenv("HF_TOKEN") login(token=token) model_id = "SG161222/Realistic_Vision_V5.1_noVAE" motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3" controlnet_id = "guoyww/animatediff-sparsectrl-rgb" lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3" vae_id = "stabilityai/sd-vae-ft-mse" device = "cuda" motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float16).to(device) controlnet = SparseControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16).to(device) vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device) scheduler = DPMSolverMultistepScheduler.from_pretrained( model_id, subfolder="scheduler", beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True, ) pipe = AnimateDiffSparseControlNetPipeline.from_pretrained( model_id, motion_adapter=motion_adapter, controlnet=controlnet, vae=vae, scheduler=scheduler, torch_dtype=torch.float16, ).to(device) pipe.load_lora_weights(lora_adapter_id, adapter_name="motion_lora") @spaces.GPU def generate_image(prompt, reference_image, controlnet_conditioning_scale,num_frames): style_images = [load_image(f.name) for f in reference_image] video = pipe( prompt=prompt, negative_prompt="low quality, worst quality", num_inference_steps=25, num_frames=num_frames, conditioning_frames=style_images, controlnet_frame_indices=[0], controlnet_conditioning_scale=controlnet_conditioning_scale, generator=torch.Generator().manual_seed(42), ).frames[0] export_to_gif(video, "output.gif") return "animation.gif" # Set up Gradio interface interface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox(label="Prompt"), # gr.Image( type= "filepath",label="Reference Image (Style)"), gr.File(type="file",file_count="multiple",label="Reference Image (Style)"), gr.Slider(label="Control Net Conditioning Scale", minimum=0, maximum=1.0, step=0.1, value=1.0), gr.Slider(label="Number of frames", minimum=0, maximum=1.0, step=0.1, value=1.0), ], outputs="image", title="Image Generation with Stable Diffusion 3 medium and ControlNet", description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet." ) interface.launch()