Spaces:

amos1088
/

test_gradio

Paused

App Files Files Community

amos1088 commited on Dec 17, 2024

Commit

b2d0aef

1 Parent(s): 4ec6616

uuu

Browse files

Files changed (1) hide show

app.py +33 -49

app.py CHANGED Viewed

@@ -34,30 +34,6 @@ if not token:
     raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
 login(token=token)
-# ----------------------------
-# Step 3: Model Paths
-# ----------------------------
-model_path = 'stabilityai/stable-diffusion-3.5-large'
-ip_adapter_path = './ip-adapter.bin'
-image_encoder_path = "google/siglip-so400m-patch14-384"
-# ----------------------------
-# Step 4: Load Transformer and Pipeline
-# ----------------------------
-transformer = SD3Transformer2DModel.from_pretrained(
-    model_path, subfolder="transformer", torch_dtype=torch.float16
-)
-pipe = StableDiffusion3Pipeline.from_pretrained(
-    model_path, transformer=transformer, torch_dtype=torch.float16
-).to("cuda")
-pipe.init_ipadapter(
-    ip_adapter_path=ip_adapter_path,
-    image_encoder_path=image_encoder_path,
-    nb_token=64,
-)
 # ----------------------------
@@ -65,31 +41,39 @@ pipe.init_ipadapter(
 # ----------------------------
 @spaces.GPU
 def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
-    """Generate an image using Stable Diffusion 3.5 Large with IP-Adapter."""
-    try:
-        # Load and preprocess the reference image
-        image_pil = load_image(ref_img.name)  # Load as PIL Image
-        # Convert PIL Image to Tensor
-        preprocess = T.Compose([
-            T.Resize((384, 384)),  # Match input size expected by image encoder
-            T.ToTensor(),          # Convert to tensor
-            T.Normalize([0.5], [0.5])  # Normalize to [-1, 1] range if needed
-        ])
-        ref_img_tensor = preprocess(image_pil).unsqueeze(0).to("cuda")  # Add batch dim and move to GPU
-    except Exception as e:
-        raise ValueError(f"Error loading reference image: {e}")
-    # Run the pipeline
-    with torch.enable_grad():
-        image = pipe(
-            prompt=prompt,
-            negative_prompt="lowres, low quality, worst quality",
-            num_inference_steps=24,
-            guidance_scale=guidance_scale,
-            clip_image=ref_img_tensor,
-            ipadapter_scale=ipadapter_scale
-        ).images[0]
     return image

     raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
 login(token=token)
 # ----------------------------
 # ----------------------------
 @spaces.GPU
 def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
+    model_path = 'stabilityai/stable-diffusion-3.5-large'
+    ip_adapter_path = './ip-adapter.bin'
+    image_encoder_path = "google/siglip-so400m-patch14-384"
+    transformer = SD3Transformer2DModel.from_pretrained(
+        model_path, subfolder="transformer", torch_dtype=torch.bfloat16
+    )
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        model_path, transformer=transformer, torch_dtype=torch.bfloat16
+    ).to("cuda")
+    pipe.init_ipadapter(
+        ip_adapter_path=ip_adapter_path,
+        image_encoder_path=image_encoder_path,
+        nb_token=64,
+    )
+    ref_img = load_image(ref_img.name).convert('RGB')
+    # please note that SD3.5 Large is sensitive to highres generation like 1536x1536
+    image = pipe(
+        width=1024,
+        height=1024,
+        prompt=prompt,
+        negative_prompt="lowres, low quality, worst quality",
+        num_inference_steps=24,
+        guidance_scale=guidance_scale,
+        generator=torch.Generator("cuda").manual_seed(42),
+        clip_image=ref_img,
+        ipadapter_scale=ipadapter_scale,
+    ).images[0]
     return image