Spaces:
Paused
Paused
File size: 3,112 Bytes
4f91ffe 683afc3 88d1237 3aadc38 0737dc8 bf8b15f 68e88ea 3aadc38 1ae6c5e 4fbc46c c1497a6 3aadc38 d8f1f69 36e35d5 d8f1f69 68e88ea 56ab815 1ae6c5e bf8b15f 36e35d5 bca93ec 3aadc38 68e88ea bf8b15f 56ab815 68e88ea 1ae6c5e a4cc7b2 68e88ea 56ab815 68e88ea 56ab815 68e88ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import os
import requests
url = "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin"
file_path = "ip-adapter.bin"
# Check if the file already exists
if not os.path.exists(file_path):
print("File not found, downloading...")
response = requests.get(url, stream=True)
with open(file_path, "wb") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
print("Download completed!")
else:
print("File already exists.")
from models.transformer_sd3 import SD3Transformer2DModel
import gradio as gr
import torch
from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
import os
from PIL import Image
import spaces
from huggingface_hub import login
from diffusers.utils import load_image
token = os.getenv("HF_TOKEN")
login(token=token)
model_path = 'stabilityai/stable-diffusion-3.5-large'
ip_adapter_path = './ip-adapter.bin'
image_encoder_path = "google/siglip-so400m-patch14-384"
transformer = SD3Transformer2DModel.from_pretrained(
model_path, subfolder="transformer", torch_dtype=torch.bfloat16
)
pipe = StableDiffusion3Pipeline.from_pretrained(
model_path, transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")
pipe.init_ipadapter(
ip_adapter_path=ip_adapter_path,
image_encoder_path=image_encoder_path,
nb_token=64,
)
@spaces.GPU
def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
ref_img = load_image(ref_img.name)
with torch.no_grad():
# Ensure the pipeline runs with correct dtype and device
image = pipe(
width=1024,
height=1024,
prompt=prompt,
negative_prompt="lowres, low quality, worst quality",
num_inference_steps=24,
guidance_scale=guidance_scale,
generator=torch.Generator("cuda").manual_seed(42),
clip_image=ref_img.convert('RGB'),
ipadapter_scale=ipadapter_scale).images
return image[0]
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Stable Diffusion 3.5 Image Generation")
with gr.Row():
prompt_box = gr.Textbox(label="Prompt", placeholder="Enter your image generation prompt")
with gr.Row():
ref_img = gr.Image(type="filepath", label="Upload Reference Image")
with gr.Row():
guidance_slider = gr.Slider(
label="Guidance Scale",
minimum=2,
maximum=16,
value=7,
step=0.5,
info="Controls adherence to the text prompt"
)
ipadapter_slider = gr.Slider(
label="IP-Adapter Scale",
minimum=0,
maximum=1,
value=0.5,
step=0.1,
info="Controls influence of the image prompt"
)
generate_btn = gr.Button("Generate")
gallery = gr.File(type="pil", label="Generated Image")
generate_btn.click(
fn=gui_generation,
inputs=[prompt_box, ref_img, guidance_slider, ipadapter_slider],
outputs=gallery
)
demo.launch()
|