File size: 3,112 Bytes
4f91ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683afc3
88d1237
3aadc38
0737dc8
bf8b15f
68e88ea
3aadc38
1ae6c5e
 
4fbc46c
c1497a6
3aadc38
 
 
 
 
d8f1f69
 
36e35d5
d8f1f69
 
 
 
 
 
 
 
 
 
 
 
68e88ea
56ab815
1ae6c5e
bf8b15f
36e35d5
 
 
 
 
 
 
 
 
 
 
bca93ec
 
3aadc38
68e88ea
 
 
 
 
 
 
 
bf8b15f
56ab815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68e88ea
 
1ae6c5e
a4cc7b2
68e88ea
 
56ab815
68e88ea
 
56ab815
68e88ea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import requests

url = "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin"
file_path = "ip-adapter.bin"

# Check if the file already exists
if not os.path.exists(file_path):
    print("File not found, downloading...")
    response = requests.get(url, stream=True)
    with open(file_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
    print("Download completed!")
else:
    print("File already exists.")

from models.transformer_sd3 import SD3Transformer2DModel
import gradio as gr
import torch
from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
import os
from PIL import Image
import spaces
from huggingface_hub import login
from diffusers.utils import load_image

token = os.getenv("HF_TOKEN")
login(token=token)

model_path = 'stabilityai/stable-diffusion-3.5-large'
ip_adapter_path = './ip-adapter.bin'
image_encoder_path = "google/siglip-so400m-patch14-384"

transformer = SD3Transformer2DModel.from_pretrained(
    model_path, subfolder="transformer", torch_dtype=torch.bfloat16
)

pipe = StableDiffusion3Pipeline.from_pretrained(
    model_path, transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")

pipe.init_ipadapter(
    ip_adapter_path=ip_adapter_path,
    image_encoder_path=image_encoder_path,
    nb_token=64,
)


@spaces.GPU
def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
    ref_img = load_image(ref_img.name)

    with torch.no_grad():
        # Ensure the pipeline runs with correct dtype and device
        image = pipe(
            width=1024,
            height=1024,
            prompt=prompt,
            negative_prompt="lowres, low quality, worst quality",
            num_inference_steps=24,
            guidance_scale=guidance_scale,
            generator=torch.Generator("cuda").manual_seed(42),
            clip_image=ref_img.convert('RGB'),
            ipadapter_scale=ipadapter_scale).images
    return image[0]

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Stable Diffusion 3.5 Image Generation")

    with gr.Row():
        prompt_box = gr.Textbox(label="Prompt", placeholder="Enter your image generation prompt")

    with gr.Row():
        ref_img = gr.Image(type="filepath", label="Upload Reference Image")

    with gr.Row():
        guidance_slider = gr.Slider(
            label="Guidance Scale",
            minimum=2,
            maximum=16,
            value=7,
            step=0.5,
            info="Controls adherence to the text prompt"
        )
        ipadapter_slider = gr.Slider(
            label="IP-Adapter Scale",
            minimum=0,
            maximum=1,
            value=0.5,
            step=0.1,
            info="Controls influence of the image prompt"
        )

    generate_btn = gr.Button("Generate")
    gallery = gr.File(type="pil", label="Generated Image")

    generate_btn.click(
        fn=gui_generation,
        inputs=[prompt_box, ref_img, guidance_slider, ipadapter_slider],
        outputs=gallery
    )

demo.launch()