amos1088 commited on
Commit
51f8f41
·
1 Parent(s): 67c0ca0
Files changed (1) hide show
  1. app.py +29 -28
app.py CHANGED
@@ -1,10 +1,19 @@
1
  import os
2
  import requests
 
 
 
 
 
 
 
 
 
3
 
 
4
  url = "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin"
5
  file_path = "ip-adapter.bin"
6
 
7
- # Check if the file already exists
8
  if not os.path.exists(file_path):
9
  print("File not found, downloading...")
10
  response = requests.get(url, stream=True)
@@ -13,32 +22,23 @@ if not os.path.exists(file_path):
13
  if chunk:
14
  file.write(chunk)
15
  print("Download completed!")
16
- else:
17
- print("File already exists.")
18
-
19
- from models.transformer_sd3 import SD3Transformer2DModel
20
- import gradio as gr
21
- import torch
22
- from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
23
- import os
24
- from PIL import Image
25
- import spaces
26
- from huggingface_hub import login
27
- from diffusers.utils import load_image
28
 
 
29
  token = os.getenv("HF_TOKEN")
30
  login(token=token)
31
 
 
32
  model_path = 'stabilityai/stable-diffusion-3.5-large'
33
  ip_adapter_path = './ip-adapter.bin'
34
  image_encoder_path = "google/siglip-so400m-patch14-384"
35
 
 
36
  transformer = SD3Transformer2DModel.from_pretrained(
37
- model_path, subfolder="transformer", torch_dtype=torch.float16
38
  )
39
 
40
  pipe = StableDiffusion3Pipeline.from_pretrained(
41
- model_path, transformer=transformer, torch_dtype=torch.float16
42
  ).to("cuda")
43
 
44
  pipe.init_ipadapter(
@@ -50,10 +50,10 @@ pipe.init_ipadapter(
50
 
51
  @spaces.GPU
52
  def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
53
- ref_img = load_image(ref_img.name)
 
54
 
55
  with torch.no_grad():
56
- # Ensure the pipeline runs with correct dtype and device
57
  image = pipe(
58
  width=1024,
59
  height=1024,
@@ -62,10 +62,14 @@ def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
62
  num_inference_steps=24,
63
  guidance_scale=guidance_scale,
64
  generator=torch.Generator("cuda").manual_seed(42),
65
- clip_image=ref_img.convert('RGB'),
66
- ipadapter_scale=ipadapter_scale).images
67
- return image[0]
 
 
 
68
 
 
69
  prompt_box = gr.Textbox(label="Prompt", placeholder="Enter your image generation prompt")
70
  ref_img = gr.File(label="Upload Reference Image")
71
  guidance_slider = gr.Slider(
@@ -74,7 +78,8 @@ guidance_slider = gr.Slider(
74
  maximum=16,
75
  value=7,
76
  step=0.5,
77
- info="Controls adherence to the text prompt")
 
78
 
79
  ipadapter_slider = gr.Slider(
80
  label="IP-Adapter Scale",
@@ -85,16 +90,12 @@ ipadapter_slider = gr.Slider(
85
  info="Controls influence of the image prompt"
86
  )
87
 
88
-
89
-
90
- # Set up Gradio interface
91
  interface = gr.Interface(
92
  fn=gui_generation,
93
  inputs=[prompt_box, ref_img, guidance_slider, ipadapter_slider],
94
  outputs="image",
95
- title="Image Generation with Stable Diffusion 3 medium and ControlNet",
96
- description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet."
97
-
98
  )
99
 
100
- interface.launch()
 
1
  import os
2
  import requests
3
+ import torch
4
+ import gradio as gr
5
+ import spaces
6
+ from PIL import Image
7
+ from huggingface_hub import login
8
+ from diffusers.utils import load_image
9
+
10
+ from models.transformer_sd3 import SD3Transformer2DModel
11
+ from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
12
 
13
+ # Download IP Adapter if not exists
14
  url = "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin"
15
  file_path = "ip-adapter.bin"
16
 
 
17
  if not os.path.exists(file_path):
18
  print("File not found, downloading...")
19
  response = requests.get(url, stream=True)
 
22
  if chunk:
23
  file.write(chunk)
24
  print("Download completed!")
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Hugging Face login
27
  token = os.getenv("HF_TOKEN")
28
  login(token=token)
29
 
30
+ # Model paths
31
  model_path = 'stabilityai/stable-diffusion-3.5-large'
32
  ip_adapter_path = './ip-adapter.bin'
33
  image_encoder_path = "google/siglip-so400m-patch14-384"
34
 
35
+ # Load transformer and pipeline
36
  transformer = SD3Transformer2DModel.from_pretrained(
37
+ model_path, subfolder="transformer", torch_dtype=torch.bfloat16
38
  )
39
 
40
  pipe = StableDiffusion3Pipeline.from_pretrained(
41
+ model_path, transformer=transformer, torch_dtype=torch.bfloat16
42
  ).to("cuda")
43
 
44
  pipe.init_ipadapter(
 
50
 
51
  @spaces.GPU
52
  def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
53
+ # Load and convert reference image
54
+ ref_img = Image.open(ref_img.name).convert('RGB')
55
 
56
  with torch.no_grad():
 
57
  image = pipe(
58
  width=1024,
59
  height=1024,
 
62
  num_inference_steps=24,
63
  guidance_scale=guidance_scale,
64
  generator=torch.Generator("cuda").manual_seed(42),
65
+ clip_image=ref_img,
66
+ ipadapter_scale=ipadapter_scale
67
+ ).images[0]
68
+
69
+ return image
70
+
71
 
72
+ # Set up Gradio interface
73
  prompt_box = gr.Textbox(label="Prompt", placeholder="Enter your image generation prompt")
74
  ref_img = gr.File(label="Upload Reference Image")
75
  guidance_slider = gr.Slider(
 
78
  maximum=16,
79
  value=7,
80
  step=0.5,
81
+ info="Controls adherence to the text prompt"
82
+ )
83
 
84
  ipadapter_slider = gr.Slider(
85
  label="IP-Adapter Scale",
 
90
  info="Controls influence of the image prompt"
91
  )
92
 
 
 
 
93
  interface = gr.Interface(
94
  fn=gui_generation,
95
  inputs=[prompt_box, ref_img, guidance_slider, ipadapter_slider],
96
  outputs="image",
97
+ title="Image Generation with Stable Diffusion 3.5 Large and IP-Adapter",
98
+ description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3.5 Large with IP-Adapter."
 
99
  )
100
 
101
+ interface.launch(share=True)