File size: 1,992 Bytes
683afc3
 
91e7ec6
c1497a6
0737dc8
74c4e79
a55b025
5a5a07a
121ee3d
4fbc46c
c1497a6
683afc3
121ee3d
33b527c
91e7ec6
683afc3
121ee3d
91e7ec6
cbee50c
121ee3d
 
52d3f89
74c4e79
0737dc8
74c4e79
5a5a07a
121ee3d
5a5a07a
0737dc8
121ee3d
52d3f89
683afc3
f22d4e9
505f3d2
8d2ed6a
5a5a07a
8d2ed6a
 
683afc3
8d2ed6a
 
 
683afc3
8d2ed6a
312f233
5a5a07a
683afc3
8d2ed6a
6f63764
 
683afc3
 
8d2ed6a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import torch
from diffusers import StableDiffusion3ControlNetPipeline, SD3ControlNetModel, UniPCMultistepScheduler
from huggingface_hub import login
import os
import spaces
from PIL import Image

# Log in to Hugging Face with your token
token = os.getenv("HF_TOKEN")
login(token=token)

# Model IDs for Stable Diffusion 1.5 and ControlNet
model_id = "stabilityai/stable-diffusion-3-medium-diffusers"
controlnet_id = "InstantX/SD3-Controlnet-Tile"

# Load the ControlNet model and Stable Diffusion pipeline
controlnet = SD3ControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16)
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
    model_id, controlnet=controlnet, torch_dtype=torch.float16
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

@spaces.GPU
def generate_image(prompt, reference_image, controlnet_conditioning_scale):
    # Prepare the reference image for ControlNet
    reference_image = reference_image.convert("RGB").resize((1024, 1024), Image.LANCZOS)

    # Generate the image with ControlNet conditioning
    generated_image = pipe(
        prompt=prompt,
        control_image=reference_image,
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        guidance_scale=7.5,
        num_inference_steps=75  # Increased from 50 to refine quality
    ).images[0]
    return generated_image

# Set up Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Image(type="pil", label="Reference Image (Style)"),
        gr.Slider(label="Control Net Conditioning Scale", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
    ],
    outputs="image",
    title="Image Generation with Stable Diffusion 3.5 and ControlNet",
    description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3.5 with ControlNet."
)

# Launch the Gradio interface
interface.launch()