ar0551 commited on
Commit
4601988
Β·
verified Β·
1 Parent(s): 3f3dcbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -74
app.py CHANGED
@@ -7,18 +7,23 @@ from PIL import Image
7
  import spaces
8
 
9
 
10
- # 🌟 Auto-detect device (CPU/GPU)
11
  device = "cuda"
12
  precision = torch.float16
13
 
14
- # πŸ—οΈ Load ControlNet model for Canny edge detection
15
- # xinsir/controlnet-canny-sdxl-1.0
16
- # diffusers/controlnet-canny-sdxl-1.0
17
- controlnet = ControlNetModel.from_pretrained(
18
  "xinsir/controlnet-canny-sdxl-1.0",
19
  torch_dtype=precision
20
  )
21
 
 
 
 
 
 
 
 
22
  # when test with other base model, you need to change the vae also.
23
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
24
 
@@ -26,68 +31,31 @@ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype
26
  eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
27
 
28
  # Stable Diffusion Model with ControlNet
29
- pipe_cn = StableDiffusionXLControlNetPipeline.from_pretrained(
30
  "stabilityai/stable-diffusion-xl-base-1.0",
31
  controlnet=controlnet,
32
  vae=vae,
33
  torch_dtype=precision,
34
  scheduler=eulera_scheduler,
35
  )
36
- pipe_cn.to(device)
37
-
38
- # Stable Diffusion Model without ControlNet
39
- pipe = StableDiffusionXLPipeline.from_pretrained(
40
- "stabilityai/stable-diffusion-xl-base-1.0",
41
- vae=vae,
42
- torch_dtype=precision,
43
- scheduler=eulera_scheduler,
44
- )
45
- pipe.to(device)
46
-
47
-
48
- # πŸ“Έ Edge detection function using OpenCV (Canny)
49
- @spaces.GPU
50
- def apply_canny(image, low_threshold, high_threshold):
51
- image = np.array(image)
52
- image = cv2.Canny(image, low_threshold, high_threshold)
53
- image = image[:, :, None]
54
- image = np.concatenate([image, image, image], axis=2)
55
- return Image.fromarray(image)
56
 
57
 
58
  # 🎨 Image generation function from image
59
  @spaces.GPU
60
- def generate_image(prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale):
61
-
62
- # Apply edge detection
63
- edge_detected = apply_canny(input_image, low_threshold, high_threshold)
64
 
65
  # Generate styled image using ControlNet
66
- result = pipe_cn(
67
  prompt=prompt,
68
- image=edge_detected,
69
  num_inference_steps=30,
70
  guidance_scale=guidance,
71
- controlnet_conditioning_scale=float(controlnet_conditioning_scale),
72
  strength=strength
73
  ).images[0]
74
 
75
- return edge_detected, result
76
-
77
-
78
- # 🎨 Image generation function from prompt
79
- @spaces.GPU
80
- def generate_prompt(prompt, strength, guidance):
81
-
82
- # Generate styled image from prompt
83
- result = pipe(
84
- prompt=prompt,
85
- num_inference_steps=30,
86
- guidance_scale=guidance,
87
- strength=strength
88
- ).images[0]
89
-
90
- return result, result
91
 
92
 
93
  # πŸ–₯️ Gradio UI
@@ -96,36 +64,27 @@ with gr.Blocks() as demo:
96
 
97
  with gr.Row():
98
  with gr.Column():
99
- input_image = gr.Image(label="Upload 3D Screenshot", type="pil")
100
- prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
101
-
102
- low_threshold = gr.Slider(50, 150, value=100, label="Canny Edge Low Threshold")
103
- high_threshold = gr.Slider(100, 200, value=150, label="Canny Edge High Threshold")
104
-
105
- strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
106
- guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
107
- controlnet_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="ControlNet Conditioning Scale")
108
-
109
- with gr.Row():
110
- generate_img_button = gr.Button("Generate from Image")
111
- generate_prompt_button = gr.Button("Generate from Prompt")
112
-
113
 
114
- with gr.Column():
115
- edge_output = gr.Image(label="Edge Detected Image")
116
- result_output = gr.Image(label="Generated Styled Image")
 
 
 
 
 
 
 
 
 
117
 
118
  # πŸ”— Generate Button Action
119
  generate_img_button.click(
120
  fn=generate_image,
121
- inputs=[prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale],
122
- outputs=[edge_output, result_output]
123
- )
124
-
125
- generate_prompt_button.click(
126
- fn=generate_prompt,
127
- inputs=[prompt, strength, guidance],
128
- outputs=[edge_output, result_output]
129
  )
130
 
131
 
 
7
  import spaces
8
 
9
 
10
+ # 🌟 set device and precision
11
  device = "cuda"
12
  precision = torch.float16
13
 
14
+ # πŸ—οΈ Load ControlNet model for Canny and Depth
15
+ controlnet_canny = ControlNetModel.from_pretrained(
 
 
16
  "xinsir/controlnet-canny-sdxl-1.0",
17
  torch_dtype=precision
18
  )
19
 
20
+ controlnet_depth = ControlNetModel.from_pretrained(
21
+ "xinsir/controlnet-depth-sdxl-1.0",
22
+ torch_dtype=precision
23
+ )
24
+
25
+ controlnet = [controlnet_canny, controlnet_depth]
26
+
27
  # when test with other base model, you need to change the vae also.
28
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
29
 
 
31
  eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
32
 
33
  # Stable Diffusion Model with ControlNet
34
+ pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained(
35
  "stabilityai/stable-diffusion-xl-base-1.0",
36
  controlnet=controlnet,
37
  vae=vae,
38
  torch_dtype=precision,
39
  scheduler=eulera_scheduler,
40
  )
41
+ pipe_canny_depth.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
  # 🎨 Image generation function from image
45
  @spaces.GPU
46
+ def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale):
 
 
 
47
 
48
  # Generate styled image using ControlNet
49
+ result = pipe_canny_depth(
50
  prompt=prompt,
51
+ image=[canny_input, depth_input],
52
  num_inference_steps=30,
53
  guidance_scale=guidance,
54
+ controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)],
55
  strength=strength
56
  ).images[0]
57
 
58
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  # πŸ–₯️ Gradio UI
 
64
 
65
  with gr.Row():
66
  with gr.Column():
67
+ canny_input = gr.Image(label="Upload Canny Screenshot", type="pil")
68
+ canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale")
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ with gr.Column():
71
+ depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil")
72
+ depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale")
73
+
74
+ with gr.Row():
75
+ prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
76
+ strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
77
+ guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
78
+ generate_img_button = gr.Button("Generate from Image")
79
+
80
+ with gr.Row():
81
+ result_output = gr.Image(label="Generated Styled Image")
82
 
83
  # πŸ”— Generate Button Action
84
  generate_img_button.click(
85
  fn=generate_image,
86
+ inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale],
87
+ outputs=[result_output]
 
 
 
 
 
 
88
  )
89
 
90