amos1088 commited on
Commit
b2d0aef
·
1 Parent(s): 4ec6616
Files changed (1) hide show
  1. app.py +33 -49
app.py CHANGED
@@ -34,30 +34,6 @@ if not token:
34
  raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
35
  login(token=token)
36
 
37
- # ----------------------------
38
- # Step 3: Model Paths
39
- # ----------------------------
40
- model_path = 'stabilityai/stable-diffusion-3.5-large'
41
- ip_adapter_path = './ip-adapter.bin'
42
- image_encoder_path = "google/siglip-so400m-patch14-384"
43
-
44
- # ----------------------------
45
- # Step 4: Load Transformer and Pipeline
46
- # ----------------------------
47
- transformer = SD3Transformer2DModel.from_pretrained(
48
- model_path, subfolder="transformer", torch_dtype=torch.float16
49
- )
50
-
51
- pipe = StableDiffusion3Pipeline.from_pretrained(
52
- model_path, transformer=transformer, torch_dtype=torch.float16
53
- ).to("cuda")
54
-
55
- pipe.init_ipadapter(
56
- ip_adapter_path=ip_adapter_path,
57
- image_encoder_path=image_encoder_path,
58
- nb_token=64,
59
- )
60
-
61
 
62
 
63
  # ----------------------------
@@ -65,31 +41,39 @@ pipe.init_ipadapter(
65
  # ----------------------------
66
  @spaces.GPU
67
  def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
68
- """Generate an image using Stable Diffusion 3.5 Large with IP-Adapter."""
69
- try:
70
- # Load and preprocess the reference image
71
- image_pil = load_image(ref_img.name) # Load as PIL Image
72
-
73
- # Convert PIL Image to Tensor
74
- preprocess = T.Compose([
75
- T.Resize((384, 384)), # Match input size expected by image encoder
76
- T.ToTensor(), # Convert to tensor
77
- T.Normalize([0.5], [0.5]) # Normalize to [-1, 1] range if needed
78
- ])
79
- ref_img_tensor = preprocess(image_pil).unsqueeze(0).to("cuda") # Add batch dim and move to GPU
80
-
81
- except Exception as e:
82
- raise ValueError(f"Error loading reference image: {e}")
83
- # Run the pipeline
84
- with torch.enable_grad():
85
- image = pipe(
86
- prompt=prompt,
87
- negative_prompt="lowres, low quality, worst quality",
88
- num_inference_steps=24,
89
- guidance_scale=guidance_scale,
90
- clip_image=ref_img_tensor,
91
- ipadapter_scale=ipadapter_scale
92
- ).images[0]
 
 
 
 
 
 
 
 
93
 
94
  return image
95
 
 
34
  raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
35
  login(token=token)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
  # ----------------------------
 
41
  # ----------------------------
42
  @spaces.GPU
43
  def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
44
+
45
+ model_path = 'stabilityai/stable-diffusion-3.5-large'
46
+ ip_adapter_path = './ip-adapter.bin'
47
+ image_encoder_path = "google/siglip-so400m-patch14-384"
48
+
49
+ transformer = SD3Transformer2DModel.from_pretrained(
50
+ model_path, subfolder="transformer", torch_dtype=torch.bfloat16
51
+ )
52
+
53
+ pipe = StableDiffusion3Pipeline.from_pretrained(
54
+ model_path, transformer=transformer, torch_dtype=torch.bfloat16
55
+ ).to("cuda")
56
+
57
+ pipe.init_ipadapter(
58
+ ip_adapter_path=ip_adapter_path,
59
+ image_encoder_path=image_encoder_path,
60
+ nb_token=64,
61
+ )
62
+
63
+ ref_img = load_image(ref_img.name).convert('RGB')
64
+
65
+ # please note that SD3.5 Large is sensitive to highres generation like 1536x1536
66
+ image = pipe(
67
+ width=1024,
68
+ height=1024,
69
+ prompt=prompt,
70
+ negative_prompt="lowres, low quality, worst quality",
71
+ num_inference_steps=24,
72
+ guidance_scale=guidance_scale,
73
+ generator=torch.Generator("cuda").manual_seed(42),
74
+ clip_image=ref_img,
75
+ ipadapter_scale=ipadapter_scale,
76
+ ).images[0]
77
 
78
  return image
79