kadirnar commited on
Commit
b5fb0c0
1 Parent(s): 3fe4a8b

Delete diffusion_webui

Browse files
diffusion_webui/__init__.py DELETED
File without changes
diffusion_webui/controlnet/__init__.py DELETED
File without changes
diffusion_webui/controlnet/controlnet_canny.py DELETED
@@ -1,176 +0,0 @@
1
- import cv2
2
- import gradio as gr
3
- import numpy as np
4
- import torch
5
- from diffusers import (
6
- ControlNetModel,
7
- StableDiffusionControlNetPipeline,
8
- UniPCMultistepScheduler,
9
- )
10
- from PIL import Image
11
-
12
- stable_model_list = [
13
- "runwayml/stable-diffusion-v1-5",
14
- "stabilityai/stable-diffusion-2-1",
15
- ]
16
-
17
- controlnet_canny_model_list = [
18
- "lllyasviel/sd-controlnet-canny",
19
- "thibaud/controlnet-sd21-canny-diffusers",
20
- ]
21
-
22
-
23
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
24
-
25
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
26
-
27
- data_list = [
28
- "data/test.png",
29
- ]
30
-
31
-
32
- def controlnet_canny(
33
- image_path: str,
34
- controlnet_model_path: str,
35
- ):
36
- image = Image.open(image_path)
37
- image = np.array(image)
38
-
39
- image = cv2.Canny(image, 100, 200)
40
- image = image[:, :, None]
41
- image = np.concatenate([image, image, image], axis=2)
42
- image = Image.fromarray(image)
43
-
44
- controlnet = ControlNetModel.from_pretrained(
45
- controlnet_model_path, torch_dtype=torch.float16
46
- )
47
- return controlnet, image
48
-
49
-
50
- def stable_diffusion_controlnet_canny(
51
- image_path: str,
52
- stable_model_path: str,
53
- controlnet_model_path: str,
54
- prompt: str,
55
- negative_prompt: str,
56
- guidance_scale: int,
57
- num_inference_step: int,
58
- ):
59
-
60
- controlnet, image = controlnet_canny(
61
- image_path=image_path, controlnet_model_path=controlnet_model_path
62
- )
63
-
64
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
65
- pretrained_model_name_or_path=stable_model_path,
66
- controlnet=controlnet,
67
- safety_checker=None,
68
- torch_dtype=torch.float16,
69
- )
70
- pipe.to("cuda")
71
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
72
- pipe.enable_xformers_memory_efficient_attention()
73
-
74
- output = pipe(
75
- prompt=prompt,
76
- image=image,
77
- negative_prompt=negative_prompt,
78
- num_inference_steps=num_inference_step,
79
- guidance_scale=guidance_scale,
80
- ).images
81
-
82
- return output[0]
83
-
84
-
85
- def stable_diffusion_controlnet_canny_app():
86
- with gr.Blocks():
87
- with gr.Row():
88
- with gr.Column():
89
- controlnet_canny_image_file = gr.Image(
90
- type="filepath", label="Image"
91
- )
92
-
93
- controlnet_canny_stable_model_id = gr.Dropdown(
94
- choices=stable_model_list,
95
- value=stable_model_list[0],
96
- label="Stable Model Id",
97
- )
98
-
99
- controlnet_canny_model_id = gr.Dropdown(
100
- choices=controlnet_canny_model_list,
101
- value=controlnet_canny_model_list[0],
102
- label="Controlnet Model Id",
103
- )
104
-
105
- controlnet_canny_prompt = gr.Textbox(
106
- lines=1, value=stable_prompt_list[0], label="Prompt"
107
- )
108
-
109
- controlnet_canny_negative_prompt = gr.Textbox(
110
- lines=1,
111
- value=stable_negative_prompt_list[0],
112
- label="Negative Prompt",
113
- )
114
-
115
- with gr.Accordion("Advanced Options", open=False):
116
- controlnet_canny_guidance_scale = gr.Slider(
117
- minimum=0.1,
118
- maximum=15,
119
- step=0.1,
120
- value=7.5,
121
- label="Guidance Scale",
122
- )
123
-
124
- controlnet_canny_num_inference_step = gr.Slider(
125
- minimum=1,
126
- maximum=100,
127
- step=1,
128
- value=50,
129
- label="Num Inference Step",
130
- )
131
-
132
- controlnet_canny_predict = gr.Button(value="Generator")
133
-
134
- with gr.Column():
135
- output_image = gr.Image(label="Output")
136
-
137
- gr.Examples(
138
- fn=stable_diffusion_controlnet_canny,
139
- examples=[
140
- [
141
- data_list[0],
142
- stable_model_list[0],
143
- controlnet_canny_model_list[0],
144
- stable_prompt_list[0],
145
- stable_negative_prompt_list[0],
146
- 7.5,
147
- 50,
148
- ]
149
- ],
150
- inputs=[
151
- controlnet_canny_image_file,
152
- controlnet_canny_stable_model_id,
153
- controlnet_canny_model_id,
154
- controlnet_canny_prompt,
155
- controlnet_canny_negative_prompt,
156
- controlnet_canny_guidance_scale,
157
- controlnet_canny_num_inference_step,
158
- ],
159
- outputs=[output_image],
160
- cache_examples=False,
161
- label="Controlnet Canny Example",
162
- )
163
-
164
- controlnet_canny_predict.click(
165
- fn=stable_diffusion_controlnet_canny,
166
- inputs=[
167
- controlnet_canny_image_file,
168
- controlnet_canny_stable_model_id,
169
- controlnet_canny_model_id,
170
- controlnet_canny_prompt,
171
- controlnet_canny_negative_prompt,
172
- controlnet_canny_guidance_scale,
173
- controlnet_canny_num_inference_step,
174
- ],
175
- outputs=[output_image],
176
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_depth.py DELETED
@@ -1,176 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
- from transformers import pipeline
11
-
12
- stable_model_list = [
13
- "runwayml/stable-diffusion-v1-5",
14
- "stabilityai/stable-diffusion-2-1",
15
- ]
16
-
17
- controlnet_depth_model_list = [
18
- "lllyasviel/sd-controlnet-depth",
19
- "thibaud/controlnet-sd21-depth-diffusers",
20
- ]
21
-
22
-
23
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
24
-
25
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
26
-
27
- data_list = [
28
- "data/test.png",
29
- ]
30
-
31
-
32
- def controlnet_depth(image_path: str, depth_model_path: str):
33
- depth_estimator = pipeline("depth-estimation")
34
-
35
- image = Image.open(image_path)
36
- image = depth_estimator(image)["depth"]
37
- image = np.array(image)
38
- image = image[:, :, None]
39
- image = np.concatenate([image, image, image], axis=2)
40
- image = Image.fromarray(image)
41
-
42
- controlnet = ControlNetModel.from_pretrained(
43
- depth_model_path, torch_dtype=torch.float16
44
- )
45
-
46
- return controlnet, image
47
-
48
-
49
- def stable_diffusion_controlnet_depth(
50
- image_path: str,
51
- stable_model_path: str,
52
- depth_model_path: str,
53
- prompt: str,
54
- negative_prompt: str,
55
- guidance_scale: int,
56
- num_inference_step: int,
57
- ):
58
-
59
- controlnet, image = controlnet_depth(
60
- image_path=image_path, depth_model_path=depth_model_path
61
- )
62
-
63
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
64
- pretrained_model_name_or_path=stable_model_path,
65
- controlnet=controlnet,
66
- safety_checker=None,
67
- torch_dtype=torch.float16,
68
- )
69
-
70
- pipe.to("cuda")
71
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
72
- pipe.enable_xformers_memory_efficient_attention()
73
-
74
- output = pipe(
75
- prompt=prompt,
76
- image=image,
77
- negative_prompt=negative_prompt,
78
- num_inference_steps=num_inference_step,
79
- guidance_scale=guidance_scale,
80
- ).images
81
-
82
- return output[0]
83
-
84
-
85
- def stable_diffusion_controlnet_depth_app():
86
- with gr.Blocks():
87
- with gr.Row():
88
- with gr.Column():
89
- controlnet_depth_image_file = gr.Image(
90
- type="filepath", label="Image"
91
- )
92
-
93
- controlnet_depth_stable_model_id = gr.Dropdown(
94
- choices=stable_model_list,
95
- value=stable_model_list[0],
96
- label="Stable Model Id",
97
- )
98
-
99
- controlnet_depth_model_id = gr.Dropdown(
100
- choices=controlnet_depth_model_list,
101
- value=controlnet_depth_model_list[0],
102
- label="ControlNet Model Id",
103
- )
104
-
105
- controlnet_depth_prompt = gr.Textbox(
106
- lines=1, value=stable_prompt_list[0], label="Prompt"
107
- )
108
-
109
- controlnet_depth_negative_prompt = gr.Textbox(
110
- lines=1,
111
- value=stable_negative_prompt_list[0],
112
- label="Negative Prompt",
113
- )
114
-
115
- with gr.Accordion("Advanced Options", open=False):
116
- controlnet_depth_guidance_scale = gr.Slider(
117
- minimum=0.1,
118
- maximum=15,
119
- step=0.1,
120
- value=7.5,
121
- label="Guidance Scale",
122
- )
123
-
124
- controlnet_depth_num_inference_step = gr.Slider(
125
- minimum=1,
126
- maximum=100,
127
- step=1,
128
- value=50,
129
- label="Num Inference Step",
130
- )
131
-
132
- controlnet_depth_predict = gr.Button(value="Generator")
133
-
134
- with gr.Column():
135
- output_image = gr.Image(label="Output")
136
-
137
- gr.Examples(
138
- fn=stable_diffusion_controlnet_depth,
139
- examples=[
140
- [
141
- data_list[0],
142
- stable_model_list[0],
143
- controlnet_depth_model_list[0],
144
- stable_prompt_list[0],
145
- stable_negative_prompt_list[0],
146
- 7.5,
147
- 50,
148
- ]
149
- ],
150
- inputs=[
151
- controlnet_depth_image_file,
152
- controlnet_depth_stable_model_id,
153
- controlnet_depth_model_id,
154
- controlnet_depth_prompt,
155
- controlnet_depth_negative_prompt,
156
- controlnet_depth_guidance_scale,
157
- controlnet_depth_num_inference_step,
158
- ],
159
- outputs=[output_image],
160
- cache_examples=False,
161
- label="ControlNet Depth Example",
162
- )
163
-
164
- controlnet_depth_predict.click(
165
- fn=stable_diffusion_controlnet_depth,
166
- inputs=[
167
- controlnet_depth_image_file,
168
- controlnet_depth_stable_model_id,
169
- controlnet_depth_model_id,
170
- controlnet_depth_prompt,
171
- controlnet_depth_negative_prompt,
172
- controlnet_depth_guidance_scale,
173
- controlnet_depth_num_inference_step,
174
- ],
175
- outputs=output_image,
176
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_hed.py DELETED
@@ -1,170 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from controlnet_aux import HEDdetector
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
-
11
- stable_model_list = [
12
- "runwayml/stable-diffusion-v1-5",
13
- "stabilityai/stable-diffusion-2-1",
14
- ]
15
-
16
- controlnet_hed_model_list = [
17
- "lllyasviel/sd-controlnet-hed",
18
- "thibaud/controlnet-sd21-hed-diffusers",
19
- ]
20
-
21
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
22
-
23
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
24
-
25
- data_list = [
26
- "data/test.png",
27
- ]
28
-
29
-
30
- def controlnet_hed(image_path: str, controlnet_hed_model_path: str):
31
- hed = HEDdetector.from_pretrained("lllyasviel/ControlNet")
32
-
33
- image = Image.open(image_path)
34
- image = hed(image)
35
-
36
- controlnet = ControlNetModel.from_pretrained(
37
- controlnet_hed_model_path, torch_dtype=torch.float16
38
- )
39
- return controlnet, image
40
-
41
-
42
- def stable_diffusion_controlnet_hed(
43
- image_path: str,
44
- stable_model_path: str,
45
- controlnet_hed_model_path: str,
46
- prompt: str,
47
- negative_prompt: str,
48
- guidance_scale: int,
49
- num_inference_step: int,
50
- ):
51
-
52
- controlnet, image = controlnet_hed(
53
- image_path=image_path,
54
- controlnet_hed_model_path=controlnet_hed_model_path,
55
- )
56
-
57
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
58
- pretrained_model_name_or_path=stable_model_path,
59
- controlnet=controlnet,
60
- safety_checker=None,
61
- torch_dtype=torch.float16,
62
- )
63
-
64
- pipe.to("cuda")
65
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
66
- pipe.enable_xformers_memory_efficient_attention()
67
-
68
- output = pipe(
69
- prompt=prompt,
70
- image=image,
71
- negative_prompt=negative_prompt,
72
- num_inference_steps=num_inference_step,
73
- guidance_scale=guidance_scale,
74
- ).images
75
-
76
- return output[0]
77
-
78
-
79
- def stable_diffusion_controlnet_hed_app():
80
- with gr.Blocks():
81
- with gr.Row():
82
- with gr.Column():
83
- controlnet_hed_image_file = gr.Image(
84
- type="filepath", label="Image"
85
- )
86
-
87
- controlnet_hed_stable_model_id = gr.Dropdown(
88
- choices=stable_model_list,
89
- value=stable_model_list[0],
90
- label="Stable Model Id",
91
- )
92
-
93
- controlnet_hed_model_id = gr.Dropdown(
94
- choices=controlnet_hed_model_list,
95
- value=controlnet_hed_model_list[0],
96
- label="ControlNet Model Id",
97
- )
98
-
99
- controlnet_hed_prompt = gr.Textbox(
100
- lines=1, value=stable_prompt_list[0], label="Prompt"
101
- )
102
-
103
- controlnet_hed_negative_prompt = gr.Textbox(
104
- lines=1,
105
- value=stable_negative_prompt_list[0],
106
- label="Negative Prompt",
107
- )
108
-
109
- with gr.Accordion("Advanced Options", open=False):
110
- controlnet_hed_guidance_scale = gr.Slider(
111
- minimum=0.1,
112
- maximum=15,
113
- step=0.1,
114
- value=7.5,
115
- label="Guidance Scale",
116
- )
117
-
118
- controlnet_hed_num_inference_step = gr.Slider(
119
- minimum=1,
120
- maximum=100,
121
- step=1,
122
- value=50,
123
- label="Num Inference Step",
124
- )
125
-
126
- controlnet_hed_predict = gr.Button(value="Generator")
127
-
128
- with gr.Column():
129
- output_image = gr.Image(label="Output")
130
-
131
- gr.Examples(
132
- fn=stable_diffusion_controlnet_hed,
133
- examples=[
134
- [
135
- data_list[0],
136
- stable_model_list[0],
137
- controlnet_hed_model_list[0],
138
- stable_prompt_list[0],
139
- stable_negative_prompt_list[0],
140
- 7.5,
141
- 50,
142
- ]
143
- ],
144
- inputs=[
145
- controlnet_hed_image_file,
146
- controlnet_hed_stable_model_id,
147
- controlnet_hed_model_id,
148
- controlnet_hed_prompt,
149
- controlnet_hed_negative_prompt,
150
- controlnet_hed_guidance_scale,
151
- controlnet_hed_num_inference_step,
152
- ],
153
- outputs=[output_image],
154
- cache_examples=False,
155
- label="ControlNet HED Example",
156
- )
157
-
158
- controlnet_hed_predict.click(
159
- fn=stable_diffusion_controlnet_hed,
160
- inputs=[
161
- controlnet_hed_image_file,
162
- controlnet_hed_stable_model_id,
163
- controlnet_hed_model_id,
164
- controlnet_hed_prompt,
165
- controlnet_hed_negative_prompt,
166
- controlnet_hed_guidance_scale,
167
- controlnet_hed_num_inference_step,
168
- ],
169
- outputs=[output_image],
170
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_mlsd.py DELETED
@@ -1,153 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from controlnet_aux import MLSDdetector
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
-
11
- stable_model_list = [
12
- "runwayml/stable-diffusion-v1-5",
13
- ]
14
-
15
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
16
-
17
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
18
-
19
- data_list = [
20
- "data/test.png",
21
- ]
22
-
23
-
24
- def controlnet_mlsd(image_path: str):
25
- mlsd = MLSDdetector.from_pretrained("lllyasviel/ControlNet")
26
-
27
- image = Image.open(image_path)
28
- image = mlsd(image)
29
-
30
- controlnet = ControlNetModel.from_pretrained(
31
- "lllyasviel/sd-controlnet-mlsd",
32
- torch_dtype=torch.float16,
33
- )
34
-
35
- return controlnet, image
36
-
37
-
38
- def stable_diffusion_controlnet_mlsd(
39
- image_path: str,
40
- model_path: str,
41
- prompt: str,
42
- negative_prompt: str,
43
- guidance_scale: int,
44
- num_inference_step: int,
45
- ):
46
-
47
- controlnet, image = controlnet_mlsd(image_path=image_path)
48
-
49
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
50
- pretrained_model_name_or_path=model_path,
51
- controlnet=controlnet,
52
- safety_checker=None,
53
- torch_dtype=torch.float16,
54
- )
55
-
56
- pipe.to("cuda")
57
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
58
- pipe.enable_xformers_memory_efficient_attention()
59
-
60
- output = pipe(
61
- prompt=prompt,
62
- image=image,
63
- negative_prompt=negative_prompt,
64
- num_inference_steps=num_inference_step,
65
- guidance_scale=guidance_scale,
66
- ).images
67
-
68
- return output[0]
69
-
70
-
71
- def stable_diffusion_controlnet_mlsd_app():
72
- with gr.Blocks():
73
- with gr.Row():
74
- with gr.Column():
75
- controlnet_mlsd_image_file = gr.Image(
76
- type="filepath", label="Image"
77
- )
78
-
79
- controlnet_mlsd_model_id = gr.Dropdown(
80
- choices=stable_model_list,
81
- value=stable_model_list[0],
82
- label="Stable Model Id",
83
- )
84
-
85
- controlnet_mlsd_prompt = gr.Textbox(
86
- lines=1, value=stable_prompt_list[0], label="Prompt"
87
- )
88
-
89
- controlnet_mlsd_negative_prompt = gr.Textbox(
90
- lines=1,
91
- value=stable_negative_prompt_list[0],
92
- label="Negative Prompt",
93
- )
94
-
95
- with gr.Accordion("Advanced Options", open=False):
96
- controlnet_mlsd_guidance_scale = gr.Slider(
97
- minimum=0.1,
98
- maximum=15,
99
- step=0.1,
100
- value=7.5,
101
- label="Guidance Scale",
102
- )
103
-
104
- controlnet_mlsd_num_inference_step = gr.Slider(
105
- minimum=1,
106
- maximum=100,
107
- step=1,
108
- value=50,
109
- label="Num Inference Step",
110
- )
111
-
112
- controlnet_mlsd_predict = gr.Button(value="Generator")
113
-
114
- with gr.Column():
115
- output_image = gr.Image(label="Output")
116
-
117
- gr.Examples(
118
- fn=stable_diffusion_controlnet_mlsd,
119
- examples=[
120
- [
121
- data_list[0],
122
- stable_model_list[0],
123
- stable_prompt_list[0],
124
- stable_negative_prompt_list[0],
125
- 7.5,
126
- 50,
127
- ]
128
- ],
129
- inputs=[
130
- controlnet_mlsd_image_file,
131
- controlnet_mlsd_model_id,
132
- controlnet_mlsd_prompt,
133
- controlnet_mlsd_negative_prompt,
134
- controlnet_mlsd_guidance_scale,
135
- controlnet_mlsd_num_inference_step,
136
- ],
137
- outputs=[output_image],
138
- label="ControlNet-MLSD Example",
139
- cache_examples=False,
140
- )
141
-
142
- controlnet_mlsd_predict.click(
143
- fn=stable_diffusion_controlnet_mlsd,
144
- inputs=[
145
- controlnet_mlsd_image_file,
146
- controlnet_mlsd_model_id,
147
- controlnet_mlsd_prompt,
148
- controlnet_mlsd_negative_prompt,
149
- controlnet_mlsd_guidance_scale,
150
- controlnet_mlsd_num_inference_step,
151
- ],
152
- outputs=output_image,
153
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_pose.py DELETED
@@ -1,170 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from controlnet_aux import OpenposeDetector
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
-
11
- stable_model_list = [
12
- "runwayml/stable-diffusion-v1-5",
13
- "stabilityai/stable-diffusion-2-1",
14
- ]
15
-
16
- controlnet_pose_model_list = [
17
- "lllyasviel/sd-controlnet-openpose",
18
- "thibaud/controlnet-sd21-openpose-diffusers",
19
- ]
20
-
21
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
22
-
23
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
24
-
25
- data_list = [
26
- "data/test.png",
27
- ]
28
-
29
-
30
- def controlnet_pose(image_path: str, controlnet_pose_model_path: str):
31
- openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
32
-
33
- image = Image.open(image_path)
34
- image = openpose(image)
35
-
36
- controlnet = ControlNetModel.from_pretrained(
37
- controlnet_pose_model_path, torch_dtype=torch.float16
38
- )
39
-
40
- return controlnet, image
41
-
42
-
43
- def stable_diffusion_controlnet_pose(
44
- image_path: str,
45
- stable_model_path: str,
46
- controlnet_pose_model_path: str,
47
- prompt: str,
48
- negative_prompt: str,
49
- guidance_scale: int,
50
- num_inference_step: int,
51
- ):
52
-
53
- controlnet, image = controlnet_pose(
54
- image_path=image_path,
55
- controlnet_pose_model_path=controlnet_pose_model_path,
56
- )
57
-
58
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
59
- pretrained_model_name_or_path=stable_model_path,
60
- controlnet=controlnet,
61
- safety_checker=None,
62
- torch_dtype=torch.float16,
63
- )
64
-
65
- pipe.to("cuda")
66
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
67
- pipe.enable_xformers_memory_efficient_attention()
68
-
69
- output = pipe(
70
- prompt=prompt,
71
- image=image,
72
- negative_prompt=negative_prompt,
73
- num_inference_steps=num_inference_step,
74
- guidance_scale=guidance_scale,
75
- ).images
76
-
77
- return output[0]
78
-
79
-
80
- def stable_diffusion_controlnet_pose_app():
81
- with gr.Blocks():
82
- with gr.Row():
83
- with gr.Column():
84
- controlnet_pose_image_file = gr.Image(
85
- type="filepath", label="Image"
86
- )
87
-
88
- controlnet_pose_stable_model_id = gr.Dropdown(
89
- choices=stable_model_list,
90
- value=stable_model_list[0],
91
- label="Stable Model Id",
92
- )
93
-
94
- controlnet_pose_model_id = gr.Dropdown(
95
- choices=controlnet_pose_model_list,
96
- value=controlnet_pose_model_list[0],
97
- label="ControlNet Model Id",
98
- )
99
-
100
- controlnet_pose_prompt = gr.Textbox(
101
- lines=1, value=stable_prompt_list[0], label="Prompt"
102
- )
103
-
104
- controlnet_pose_negative_prompt = gr.Textbox(
105
- lines=1,
106
- value=stable_negative_prompt_list[0],
107
- label="Negative Prompt",
108
- )
109
-
110
- with gr.Accordion("Advanced Options", open=False):
111
- controlnet_pose_guidance_scale = gr.Slider(
112
- minimum=0.1,
113
- maximum=15,
114
- step=0.1,
115
- value=7.5,
116
- label="Guidance Scale",
117
- )
118
-
119
- controlnet_pose_num_inference_step = gr.Slider(
120
- minimum=1,
121
- maximum=100,
122
- step=1,
123
- value=50,
124
- label="Num Inference Step",
125
- )
126
-
127
- controlnet_pose_predict = gr.Button(value="Generator")
128
-
129
- with gr.Column():
130
- output_image = gr.Image(label="Output")
131
-
132
- gr.Examples(
133
- fn=stable_diffusion_controlnet_pose,
134
- examples=[
135
- [
136
- data_list[0],
137
- stable_model_list[0],
138
- controlnet_pose_model_list[0],
139
- stable_prompt_list[0],
140
- stable_negative_prompt_list[0],
141
- 7.5,
142
- 50,
143
- ]
144
- ],
145
- inputs=[
146
- controlnet_pose_image_file,
147
- controlnet_pose_stable_model_id,
148
- controlnet_pose_model_id,
149
- controlnet_pose_prompt,
150
- controlnet_pose_negative_prompt,
151
- controlnet_pose_guidance_scale,
152
- controlnet_pose_num_inference_step,
153
- ],
154
- outputs=[output_image],
155
- label="ControlNet Pose Example",
156
- cache_examples=False,
157
- )
158
- controlnet_pose_predict.click(
159
- fn=stable_diffusion_controlnet_pose,
160
- inputs=[
161
- controlnet_pose_image_file,
162
- controlnet_pose_stable_model_id,
163
- controlnet_pose_model_id,
164
- controlnet_pose_prompt,
165
- controlnet_pose_negative_prompt,
166
- controlnet_pose_guidance_scale,
167
- controlnet_pose_num_inference_step,
168
- ],
169
- outputs=output_image,
170
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_scribble.py DELETED
@@ -1,170 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from controlnet_aux import HEDdetector
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
-
11
- stable_model_list = [
12
- "runwayml/stable-diffusion-v1-5",
13
- "stabilityai/stable-diffusion-2-1",
14
- ]
15
-
16
- controlnet_hed_model_list = [
17
- "lllyasviel/sd-controlnet-scribble",
18
- "thibaud/controlnet-sd21-scribble-diffusers",
19
- ]
20
-
21
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
22
-
23
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
24
-
25
- data_list = [
26
- "data/test.png",
27
- ]
28
-
29
-
30
- def controlnet_scribble(image_path: str, controlnet_hed_model_path: str):
31
- hed = HEDdetector.from_pretrained("lllyasviel/ControlNet")
32
-
33
- image = Image.open(image_path)
34
- image = hed(image, scribble=True)
35
-
36
- controlnet = ControlNetModel.from_pretrained(
37
- controlnet_hed_model_path, torch_dtype=torch.float16
38
- )
39
-
40
- return controlnet, image
41
-
42
-
43
- def stable_diffusion_controlnet_scribble(
44
- image_path: str,
45
- stable_model_path: str,
46
- controlnet_hed_model_path: str,
47
- prompt: str,
48
- negative_prompt: str,
49
- guidance_scale: int,
50
- num_inference_step: int,
51
- ):
52
-
53
- controlnet, image = controlnet_scribble(
54
- image_path=image_path,
55
- controlnet_hed_model_path=controlnet_hed_model_path,
56
- )
57
-
58
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
59
- pretrained_model_name_or_path=stable_model_path,
60
- controlnet=controlnet,
61
- safety_checker=None,
62
- torch_dtype=torch.float16,
63
- )
64
-
65
- pipe.to("cuda")
66
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
67
- pipe.enable_xformers_memory_efficient_attention()
68
-
69
- output = pipe(
70
- prompt=prompt,
71
- image=image,
72
- negative_prompt=negative_prompt,
73
- num_inference_steps=num_inference_step,
74
- guidance_scale=guidance_scale,
75
- ).images
76
-
77
- return output[0]
78
-
79
-
80
- def stable_diffusion_controlnet_scribble_app():
81
- with gr.Blocks():
82
- with gr.Row():
83
- with gr.Column():
84
- controlnet_scribble_image_file = gr.Image(
85
- type="filepath", label="Image"
86
- )
87
-
88
- controlnet_scribble_stable_model_id = gr.Dropdown(
89
- choices=stable_model_list,
90
- value=stable_model_list[0],
91
- label="Stable v1.5 Model Id",
92
- )
93
-
94
- controlnet_scribble_model_id = gr.Dropdown(
95
- choices=controlnet_hed_model_list,
96
- value=controlnet_hed_model_list[0],
97
- label="ControlNet Model Id",
98
- )
99
-
100
- controlnet_scribble_prompt = gr.Textbox(
101
- lines=1, value=stable_prompt_list[0], label="Prompt"
102
- )
103
-
104
- controlnet_scribble_negative_prompt = gr.Textbox(
105
- lines=1,
106
- value=stable_negative_prompt_list[0],
107
- label="Negative Prompt",
108
- )
109
-
110
- with gr.Accordion("Advanced Options", open=False):
111
- controlnet_scribble_guidance_scale = gr.Slider(
112
- minimum=0.1,
113
- maximum=15,
114
- step=0.1,
115
- value=7.5,
116
- label="Guidance Scale",
117
- )
118
-
119
- controlnet_scribble_num_inference_step = gr.Slider(
120
- minimum=1,
121
- maximum=100,
122
- step=1,
123
- value=50,
124
- label="Num Inference Step",
125
- )
126
-
127
- controlnet_scribble_predict = gr.Button(value="Generator")
128
-
129
- with gr.Column():
130
- output_image = gr.Image(label="Output")
131
-
132
- gr.Examples(
133
- fn=stable_diffusion_controlnet_scribble,
134
- examples=[
135
- [
136
- data_list[0],
137
- stable_model_list[0],
138
- controlnet_hed_model_list[0],
139
- stable_prompt_list[0],
140
- stable_negative_prompt_list[0],
141
- 7.5,
142
- 50,
143
- ],
144
- ],
145
- inputs=[
146
- controlnet_scribble_image_file,
147
- controlnet_scribble_stable_model_id,
148
- controlnet_scribble_model_id,
149
- controlnet_scribble_prompt,
150
- controlnet_scribble_negative_prompt,
151
- controlnet_scribble_guidance_scale,
152
- controlnet_scribble_num_inference_step,
153
- ],
154
- outputs=[output_image],
155
- label="ControlNet Scribble Example",
156
- cache_examples=False,
157
- )
158
- controlnet_scribble_predict.click(
159
- fn=stable_diffusion_controlnet_scribble,
160
- inputs=[
161
- controlnet_scribble_image_file,
162
- controlnet_scribble_stable_model_id,
163
- controlnet_scribble_model_id,
164
- controlnet_scribble_prompt,
165
- controlnet_scribble_negative_prompt,
166
- controlnet_scribble_guidance_scale,
167
- controlnet_scribble_num_inference_step,
168
- ],
169
- outputs=output_image,
170
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet/controlnet_seg.py DELETED
@@ -1,329 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- from diffusers import (
5
- ControlNetModel,
6
- StableDiffusionControlNetPipeline,
7
- UniPCMultistepScheduler,
8
- )
9
- from PIL import Image
10
- from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
11
-
12
- stable_model_list = [
13
- "runwayml/stable-diffusion-v1-5",
14
- ]
15
-
16
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
17
-
18
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
19
-
20
- data_list = [
21
- "data/test.png",
22
- ]
23
-
24
-
25
- def ade_palette():
26
- """ADE20K palette that maps each class to RGB values."""
27
- return [
28
- [120, 120, 120],
29
- [180, 120, 120],
30
- [6, 230, 230],
31
- [80, 50, 50],
32
- [4, 200, 3],
33
- [120, 120, 80],
34
- [140, 140, 140],
35
- [204, 5, 255],
36
- [230, 230, 230],
37
- [4, 250, 7],
38
- [224, 5, 255],
39
- [235, 255, 7],
40
- [150, 5, 61],
41
- [120, 120, 70],
42
- [8, 255, 51],
43
- [255, 6, 82],
44
- [143, 255, 140],
45
- [204, 255, 4],
46
- [255, 51, 7],
47
- [204, 70, 3],
48
- [0, 102, 200],
49
- [61, 230, 250],
50
- [255, 6, 51],
51
- [11, 102, 255],
52
- [255, 7, 71],
53
- [255, 9, 224],
54
- [9, 7, 230],
55
- [220, 220, 220],
56
- [255, 9, 92],
57
- [112, 9, 255],
58
- [8, 255, 214],
59
- [7, 255, 224],
60
- [255, 184, 6],
61
- [10, 255, 71],
62
- [255, 41, 10],
63
- [7, 255, 255],
64
- [224, 255, 8],
65
- [102, 8, 255],
66
- [255, 61, 6],
67
- [255, 194, 7],
68
- [255, 122, 8],
69
- [0, 255, 20],
70
- [255, 8, 41],
71
- [255, 5, 153],
72
- [6, 51, 255],
73
- [235, 12, 255],
74
- [160, 150, 20],
75
- [0, 163, 255],
76
- [140, 140, 140],
77
- [250, 10, 15],
78
- [20, 255, 0],
79
- [31, 255, 0],
80
- [255, 31, 0],
81
- [255, 224, 0],
82
- [153, 255, 0],
83
- [0, 0, 255],
84
- [255, 71, 0],
85
- [0, 235, 255],
86
- [0, 173, 255],
87
- [31, 0, 255],
88
- [11, 200, 200],
89
- [255, 82, 0],
90
- [0, 255, 245],
91
- [0, 61, 255],
92
- [0, 255, 112],
93
- [0, 255, 133],
94
- [255, 0, 0],
95
- [255, 163, 0],
96
- [255, 102, 0],
97
- [194, 255, 0],
98
- [0, 143, 255],
99
- [51, 255, 0],
100
- [0, 82, 255],
101
- [0, 255, 41],
102
- [0, 255, 173],
103
- [10, 0, 255],
104
- [173, 255, 0],
105
- [0, 255, 153],
106
- [255, 92, 0],
107
- [255, 0, 255],
108
- [255, 0, 245],
109
- [255, 0, 102],
110
- [255, 173, 0],
111
- [255, 0, 20],
112
- [255, 184, 184],
113
- [0, 31, 255],
114
- [0, 255, 61],
115
- [0, 71, 255],
116
- [255, 0, 204],
117
- [0, 255, 194],
118
- [0, 255, 82],
119
- [0, 10, 255],
120
- [0, 112, 255],
121
- [51, 0, 255],
122
- [0, 194, 255],
123
- [0, 122, 255],
124
- [0, 255, 163],
125
- [255, 153, 0],
126
- [0, 255, 10],
127
- [255, 112, 0],
128
- [143, 255, 0],
129
- [82, 0, 255],
130
- [163, 255, 0],
131
- [255, 235, 0],
132
- [8, 184, 170],
133
- [133, 0, 255],
134
- [0, 255, 92],
135
- [184, 0, 255],
136
- [255, 0, 31],
137
- [0, 184, 255],
138
- [0, 214, 255],
139
- [255, 0, 112],
140
- [92, 255, 0],
141
- [0, 224, 255],
142
- [112, 224, 255],
143
- [70, 184, 160],
144
- [163, 0, 255],
145
- [153, 0, 255],
146
- [71, 255, 0],
147
- [255, 0, 163],
148
- [255, 204, 0],
149
- [255, 0, 143],
150
- [0, 255, 235],
151
- [133, 255, 0],
152
- [255, 0, 235],
153
- [245, 0, 255],
154
- [255, 0, 122],
155
- [255, 245, 0],
156
- [10, 190, 212],
157
- [214, 255, 0],
158
- [0, 204, 255],
159
- [20, 0, 255],
160
- [255, 255, 0],
161
- [0, 153, 255],
162
- [0, 41, 255],
163
- [0, 255, 204],
164
- [41, 0, 255],
165
- [41, 255, 0],
166
- [173, 0, 255],
167
- [0, 245, 255],
168
- [71, 0, 255],
169
- [122, 0, 255],
170
- [0, 255, 184],
171
- [0, 92, 255],
172
- [184, 255, 0],
173
- [0, 133, 255],
174
- [255, 214, 0],
175
- [25, 194, 194],
176
- [102, 255, 0],
177
- [92, 0, 255],
178
- ]
179
-
180
-
181
- def controlnet_mlsd(image_path: str):
182
- image_processor = AutoImageProcessor.from_pretrained(
183
- "openmmlab/upernet-convnext-small"
184
- )
185
- image_segmentor = UperNetForSemanticSegmentation.from_pretrained(
186
- "openmmlab/upernet-convnext-small"
187
- )
188
-
189
- image = Image.open(image_path).convert("RGB")
190
- pixel_values = image_processor(image, return_tensors="pt").pixel_values
191
-
192
- with torch.no_grad():
193
- outputs = image_segmentor(pixel_values)
194
-
195
- seg = image_processor.post_process_semantic_segmentation(
196
- outputs, target_sizes=[image.size[::-1]]
197
- )[0]
198
-
199
- color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
200
- palette = np.array(ade_palette())
201
-
202
- for label, color in enumerate(palette):
203
- color_seg[seg == label, :] = color
204
-
205
- color_seg = color_seg.astype(np.uint8)
206
- image = Image.fromarray(color_seg)
207
- controlnet = ControlNetModel.from_pretrained(
208
- "lllyasviel/sd-controlnet-seg", torch_dtype=torch.float16
209
- )
210
-
211
- return controlnet, image
212
-
213
-
214
- def stable_diffusion_controlnet_seg(
215
- image_path: str,
216
- model_path: str,
217
- prompt: str,
218
- negative_prompt: str,
219
- guidance_scale: int,
220
- num_inference_step: int,
221
- ):
222
-
223
- controlnet, image = controlnet_mlsd(image_path=image_path)
224
-
225
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
226
- pretrained_model_name_or_path=model_path,
227
- controlnet=controlnet,
228
- safety_checker=None,
229
- torch_dtype=torch.float16,
230
- )
231
-
232
- pipe.to("cuda")
233
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
234
- pipe.enable_xformers_memory_efficient_attention()
235
-
236
- output = pipe(
237
- prompt=prompt,
238
- image=image,
239
- negative_prompt=negative_prompt,
240
- num_inference_steps=num_inference_step,
241
- guidance_scale=guidance_scale,
242
- ).images
243
-
244
- return output[0]
245
-
246
-
247
- def stable_diffusion_controlnet_seg_app():
248
- with gr.Blocks():
249
- with gr.Row():
250
- with gr.Column():
251
- controlnet_seg_image_file = gr.Image(
252
- type="filepath", label="Image"
253
- )
254
-
255
- controlnet_seg_model_id = gr.Dropdown(
256
- choices=stable_model_list,
257
- value=stable_model_list[0],
258
- label="Stable Model Id",
259
- )
260
-
261
- controlnet_seg_prompt = gr.Textbox(
262
- lines=1, value=stable_prompt_list[0], label="Prompt"
263
- )
264
-
265
- controlnet_seg_negative_prompt = gr.Textbox(
266
- lines=1,
267
- value=stable_negative_prompt_list[0],
268
- label="Negative Prompt",
269
- )
270
-
271
- with gr.Accordion("Advanced Options", open=False):
272
- controlnet_seg_guidance_scale = gr.Slider(
273
- minimum=0.1,
274
- maximum=15,
275
- step=0.1,
276
- value=7.5,
277
- label="Guidance Scale",
278
- )
279
-
280
- controlnet_seg_num_inference_step = gr.Slider(
281
- minimum=1,
282
- maximum=100,
283
- step=1,
284
- value=50,
285
- label="Num Inference Step",
286
- )
287
-
288
- controlnet_seg_predict = gr.Button(value="Generator")
289
-
290
- with gr.Column():
291
- output_image = gr.Image(label="Output")
292
-
293
- gr.Examples(
294
- fn=stable_diffusion_controlnet_seg,
295
- examples=[
296
- [
297
- data_list[0],
298
- stable_model_list[0],
299
- stable_prompt_list[0],
300
- stable_negative_prompt_list[0],
301
- 7.5,
302
- 50,
303
- ],
304
- ],
305
- inputs=[
306
- controlnet_seg_image_file,
307
- controlnet_seg_model_id,
308
- controlnet_seg_prompt,
309
- controlnet_seg_negative_prompt,
310
- controlnet_seg_guidance_scale,
311
- controlnet_seg_num_inference_step,
312
- ],
313
- outputs=[output_image],
314
- cache_examples=False,
315
- label="ControlNet Segmentation Example",
316
- )
317
-
318
- controlnet_seg_predict.click(
319
- fn=stable_diffusion_controlnet_seg,
320
- inputs=[
321
- controlnet_seg_image_file,
322
- controlnet_seg_model_id,
323
- controlnet_seg_prompt,
324
- controlnet_seg_negative_prompt,
325
- controlnet_seg_guidance_scale,
326
- controlnet_seg_num_inference_step,
327
- ],
328
- outputs=[output_image],
329
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet_inpaint/__init__.py DELETED
File without changes
diffusion_webui/controlnet_inpaint/canny_inpaint.py DELETED
@@ -1,176 +0,0 @@
1
- import cv2
2
- import gradio as gr
3
- import numpy as np
4
- import torch
5
- from diffusers import (
6
- ControlNetModel,
7
- StableDiffusionControlNetPipeline,
8
- UniPCMultistepScheduler,
9
- )
10
- from PIL import Image
11
-
12
- stable_model_list = [
13
- "runwayml/stable-diffusion-v1-5",
14
- "stabilityai/stable-diffusion-2-1",
15
- ]
16
-
17
- controlnet_canny_model_list = [
18
- "lllyasviel/sd-controlnet-canny",
19
- "thibaud/controlnet-sd21-canny-diffusers",
20
- ]
21
-
22
-
23
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
24
-
25
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
26
-
27
- data_list = [
28
- "data/test.png",
29
- ]
30
-
31
-
32
- def controlnet_canny(
33
- dict_image: str,
34
- controlnet_model_path: str,
35
- ):
36
- image = dict_image["image"].convert("RGB").resize((512, 512))
37
- image = np.array(image)
38
-
39
- image = cv2.Canny(image, 100, 200)
40
- image = image[:, :, None]
41
- image = np.concatenate([image, image, image], axis=2)
42
- image = Image.fromarray(image)
43
-
44
- controlnet = ControlNetModel.from_pretrained(
45
- controlnet_model_path, torch_dtype=torch.float16
46
- )
47
- return controlnet, image
48
-
49
-
50
- def stable_diffusion_controlnet_canny(
51
- image_path: str,
52
- stable_model_path: str,
53
- controlnet_model_path: str,
54
- prompt: str,
55
- negative_prompt: str,
56
- guidance_scale: int,
57
- num_inference_step: int,
58
- ):
59
-
60
- controlnet, image = controlnet_canny(
61
- image_path=image_path, controlnet_model_path=controlnet_model_path
62
- )
63
-
64
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
65
- pretrained_model_name_or_path=stable_model_path,
66
- controlnet=controlnet,
67
- safety_checker=None,
68
- torch_dtype=torch.float16,
69
- )
70
- pipe.to("cuda")
71
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
72
- pipe.enable_xformers_memory_efficient_attention()
73
-
74
- output = pipe(
75
- prompt=prompt,
76
- image=image,
77
- negative_prompt=negative_prompt,
78
- num_inference_steps=num_inference_step,
79
- guidance_scale=guidance_scale,
80
- ).images
81
-
82
- return output[0]
83
-
84
-
85
- def stable_diffusion_controlnet_canny_app():
86
- with gr.Blocks():
87
- with gr.Row():
88
- with gr.Column():
89
- controlnet_canny_image_file = gr.Image(
90
- type="filepath", label="Image"
91
- )
92
-
93
- controlnet_canny_stable_model_id = gr.Dropdown(
94
- choices=stable_model_list,
95
- value=stable_model_list[0],
96
- label="Stable Model Id",
97
- )
98
-
99
- controlnet_canny_model_id = gr.Dropdown(
100
- choices=controlnet_canny_model_list,
101
- value=controlnet_canny_model_list[0],
102
- label="Controlnet Model Id",
103
- )
104
-
105
- controlnet_canny_prompt = gr.Textbox(
106
- lines=1, value=stable_prompt_list[0], label="Prompt"
107
- )
108
-
109
- controlnet_canny_negative_prompt = gr.Textbox(
110
- lines=1,
111
- value=stable_negative_prompt_list[0],
112
- label="Negative Prompt",
113
- )
114
-
115
- with gr.Accordion("Advanced Options", open=False):
116
- controlnet_canny_guidance_scale = gr.Slider(
117
- minimum=0.1,
118
- maximum=15,
119
- step=0.1,
120
- value=7.5,
121
- label="Guidance Scale",
122
- )
123
-
124
- controlnet_canny_num_inference_step = gr.Slider(
125
- minimum=1,
126
- maximum=100,
127
- step=1,
128
- value=50,
129
- label="Num Inference Step",
130
- )
131
-
132
- controlnet_canny_predict = gr.Button(value="Generator")
133
-
134
- with gr.Column():
135
- output_image = gr.Image(label="Output")
136
-
137
- gr.Examples(
138
- fn=stable_diffusion_controlnet_canny,
139
- examples=[
140
- [
141
- data_list[0],
142
- stable_model_list[0],
143
- controlnet_canny_model_list[0],
144
- stable_prompt_list[0],
145
- stable_negative_prompt_list[0],
146
- 7.5,
147
- 50,
148
- ]
149
- ],
150
- inputs=[
151
- controlnet_canny_image_file,
152
- controlnet_canny_stable_model_id,
153
- controlnet_canny_model_id,
154
- controlnet_canny_prompt,
155
- controlnet_canny_negative_prompt,
156
- controlnet_canny_guidance_scale,
157
- controlnet_canny_num_inference_step,
158
- ],
159
- outputs=[output_image],
160
- cache_examples=False,
161
- label="Controlnet Canny Example",
162
- )
163
-
164
- controlnet_canny_predict.click(
165
- fn=stable_diffusion_controlnet_canny,
166
- inputs=[
167
- controlnet_canny_image_file,
168
- controlnet_canny_stable_model_id,
169
- controlnet_canny_model_id,
170
- controlnet_canny_prompt,
171
- controlnet_canny_negative_prompt,
172
- controlnet_canny_guidance_scale,
173
- controlnet_canny_num_inference_step,
174
- ],
175
- outputs=[output_image],
176
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet_inpaint/controlnet_inpaint_app.py DELETED
@@ -1,159 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- from diffusers import UniPCMultistepScheduler
5
- from PIL import Image
6
-
7
- from diffusion_webui.controlnet_inpaint.canny_inpaint import controlnet_canny
8
- from diffusion_webui.controlnet_inpaint.pipeline_stable_diffusion_controlnet_inpaint import (
9
- StableDiffusionControlNetInpaintPipeline,
10
- )
11
-
12
- stable_inpaint_model_list = [
13
- "runwayml/stable-diffusion-inpainting",
14
- "stabilityai/stable-diffusion-2-inpainting",
15
- ]
16
-
17
- controlnet_model_list = [
18
- "lllyasviel/sd-controlnet-canny",
19
- ]
20
-
21
- prompt_list = [
22
- "a red panda sitting on a bench",
23
- ]
24
-
25
- negative_prompt_list = [
26
- "bad, ugly",
27
- ]
28
-
29
-
30
- def load_img(image_path: str):
31
- image = Image.open(image_path)
32
- image = np.array(image)
33
- image = Image.fromarray(image)
34
-
35
- return image
36
-
37
-
38
- def stable_diffusion_inpiant_controlnet_canny(
39
- dict_image: str,
40
- stable_model_path: str,
41
- controlnet_model_path: str,
42
- prompt: str,
43
- negative_prompt: str,
44
- controlnet_conditioning_scale: str,
45
- guidance_scale: int,
46
- num_inference_steps: int,
47
- ):
48
- normal_image = dict_image["image"].convert("RGB").resize((512, 512))
49
- mask_image = dict_image["mask"].convert("RGB").resize((512, 512))
50
-
51
- controlnet, control_image = controlnet_canny(
52
- dict_image=dict_image,
53
- controlnet_model_path=controlnet_model_path,
54
- )
55
-
56
- pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
57
- pretrained_model_name_or_path=stable_model_path,
58
- controlnet=controlnet,
59
- torch_dtype=torch.float16,
60
- )
61
- pipe.to("cuda")
62
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
63
- pipe.enable_xformers_memory_efficient_attention()
64
-
65
- generator = torch.manual_seed(0)
66
-
67
- output = pipe(
68
- prompt=prompt,
69
- negative_prompt=negative_prompt,
70
- num_inference_steps=num_inference_steps,
71
- generator=generator,
72
- image=normal_image,
73
- control_image=control_image,
74
- controlnet_conditioning_scale=controlnet_conditioning_scale,
75
- guidance_scale=guidance_scale,
76
- mask_image=mask_image,
77
- ).images
78
-
79
- return output[0]
80
-
81
-
82
- def stable_diffusion_inpiant_controlnet_canny_app():
83
- with gr.Blocks():
84
- with gr.Row():
85
- with gr.Column():
86
- inpaint_image_file = gr.Image(
87
- source="upload",
88
- tool="sketch",
89
- elem_id="image_upload",
90
- type="pil",
91
- label="Upload",
92
- )
93
-
94
- inpaint_model_id = gr.Dropdown(
95
- choices=stable_inpaint_model_list,
96
- value=stable_inpaint_model_list[0],
97
- label="Inpaint Model Id",
98
- )
99
-
100
- inpaint_controlnet_model_id = gr.Dropdown(
101
- choices=controlnet_model_list,
102
- value=controlnet_model_list[0],
103
- label="ControlNet Model Id",
104
- )
105
-
106
- inpaint_prompt = gr.Textbox(
107
- lines=1, value=prompt_list[0], label="Prompt"
108
- )
109
-
110
- inpaint_negative_prompt = gr.Textbox(
111
- lines=1,
112
- value=negative_prompt_list[0],
113
- label="Negative Prompt",
114
- )
115
-
116
- with gr.Accordion("Advanced Options", open=False):
117
- controlnet_conditioning_scale = gr.Slider(
118
- minimum=0.1,
119
- maximum=1,
120
- step=0.1,
121
- value=0.5,
122
- label="ControlNet Conditioning Scale",
123
- )
124
-
125
- inpaint_guidance_scale = gr.Slider(
126
- minimum=0.1,
127
- maximum=15,
128
- step=0.1,
129
- value=7.5,
130
- label="Guidance Scale",
131
- )
132
-
133
- inpaint_num_inference_step = gr.Slider(
134
- minimum=1,
135
- maximum=100,
136
- step=1,
137
- value=50,
138
- label="Num Inference Step",
139
- )
140
-
141
- inpaint_predict = gr.Button(value="Generator")
142
-
143
- with gr.Column():
144
- output_image = gr.Image(label="Outputs")
145
-
146
- inpaint_predict.click(
147
- fn=stable_diffusion_inpiant_controlnet_canny,
148
- inputs=[
149
- inpaint_image_file,
150
- inpaint_model_id,
151
- inpaint_controlnet_model_id,
152
- inpaint_prompt,
153
- inpaint_negative_prompt,
154
- controlnet_conditioning_scale,
155
- inpaint_guidance_scale,
156
- inpaint_num_inference_step,
157
- ],
158
- outputs=output_image,
159
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/controlnet_inpaint/pipeline_stable_diffusion_controlnet_inpaint.py DELETED
@@ -1,607 +0,0 @@
1
- # Copyright 2023 The HuggingFace Team. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import numpy as np
16
- import PIL.Image
17
- import torch
18
- from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import *
19
-
20
- EXAMPLE_DOC_STRING = """
21
- Examples:
22
- ```py
23
- >>> # !pip install opencv-python transformers accelerate
24
- >>> from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
25
- >>> from diffusers.utils import load_image
26
- >>> import numpy as np
27
- >>> import torch
28
-
29
- >>> import cv2
30
- >>> from PIL import Image
31
- >>> # download an image
32
- >>> image = load_image(
33
- ... "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
34
- ... )
35
- >>> image = np.array(image)
36
- >>> mask_image = load_image(
37
- ... "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
38
- ... )
39
- >>> mask_image = np.array(mask_image)
40
- >>> # get canny image
41
- >>> canny_image = cv2.Canny(image, 100, 200)
42
- >>> canny_image = canny_image[:, :, None]
43
- >>> canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
44
- >>> canny_image = Image.fromarray(canny_image)
45
-
46
- >>> # load control net and stable diffusion v1-5
47
- >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
48
- >>> pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
49
- ... "runwayml/stable-diffusion-inpainting", controlnet=controlnet, torch_dtype=torch.float16
50
- ... )
51
-
52
- >>> # speed up diffusion process with faster scheduler and memory optimization
53
- >>> pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
54
- >>> # remove following line if xformers is not installed
55
- >>> pipe.enable_xformers_memory_efficient_attention()
56
-
57
- >>> pipe.enable_model_cpu_offload()
58
-
59
- >>> # generate image
60
- >>> generator = torch.manual_seed(0)
61
- >>> image = pipe(
62
- ... "futuristic-looking doggo",
63
- ... num_inference_steps=20,
64
- ... generator=generator,
65
- ... image=image,
66
- ... control_image=canny_image,
67
- ... mask_image=mask_image
68
- ... ).images[0]
69
- ```
70
- """
71
-
72
-
73
- def prepare_mask_and_masked_image(image, mask):
74
- """
75
- Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be
76
- converted to ``torch.Tensor`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the
77
- ``image`` and ``1`` for the ``mask``.
78
- The ``image`` will be converted to ``torch.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be
79
- binarized (``mask > 0.5``) and cast to ``torch.float32`` too.
80
- Args:
81
- image (Union[np.array, PIL.Image, torch.Tensor]): The image to inpaint.
82
- It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` or a ``channels x height x width``
83
- ``torch.Tensor`` or a ``batch x channels x height x width`` ``torch.Tensor``.
84
- mask (_type_): The mask to apply to the image, i.e. regions to inpaint.
85
- It can be a ``PIL.Image``, or a ``height x width`` ``np.array`` or a ``1 x height x width``
86
- ``torch.Tensor`` or a ``batch x 1 x height x width`` ``torch.Tensor``.
87
- Raises:
88
- ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
89
- should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
90
- TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
91
- (ot the other way around).
92
- Returns:
93
- tuple[torch.Tensor]: The pair (mask, masked_image) as ``torch.Tensor`` with 4
94
- dimensions: ``batch x channels x height x width``.
95
- """
96
- if isinstance(image, torch.Tensor):
97
- if not isinstance(mask, torch.Tensor):
98
- raise TypeError(
99
- f"`image` is a torch.Tensor but `mask` (type: {type(mask)} is not"
100
- )
101
-
102
- # Batch single image
103
- if image.ndim == 3:
104
- assert (
105
- image.shape[0] == 3
106
- ), "Image outside a batch should be of shape (3, H, W)"
107
- image = image.unsqueeze(0)
108
-
109
- # Batch and add channel dim for single mask
110
- if mask.ndim == 2:
111
- mask = mask.unsqueeze(0).unsqueeze(0)
112
-
113
- # Batch single mask or add channel dim
114
- if mask.ndim == 3:
115
- # Single batched mask, no channel dim or single mask not batched but channel dim
116
- if mask.shape[0] == 1:
117
- mask = mask.unsqueeze(0)
118
-
119
- # Batched masks no channel dim
120
- else:
121
- mask = mask.unsqueeze(1)
122
-
123
- assert (
124
- image.ndim == 4 and mask.ndim == 4
125
- ), "Image and Mask must have 4 dimensions"
126
- assert (
127
- image.shape[-2:] == mask.shape[-2:]
128
- ), "Image and Mask must have the same spatial dimensions"
129
- assert (
130
- image.shape[0] == mask.shape[0]
131
- ), "Image and Mask must have the same batch size"
132
-
133
- # Check image is in [-1, 1]
134
- if image.min() < -1 or image.max() > 1:
135
- raise ValueError("Image should be in [-1, 1] range")
136
-
137
- # Check mask is in [0, 1]
138
- if mask.min() < 0 or mask.max() > 1:
139
- raise ValueError("Mask should be in [0, 1] range")
140
-
141
- # Binarize mask
142
- mask[mask < 0.5] = 0
143
- mask[mask >= 0.5] = 1
144
-
145
- # Image as float32
146
- image = image.to(dtype=torch.float32)
147
- elif isinstance(mask, torch.Tensor):
148
- raise TypeError(
149
- f"`mask` is a torch.Tensor but `image` (type: {type(image)} is not"
150
- )
151
- else:
152
- # preprocess image
153
- if isinstance(image, (PIL.Image.Image, np.ndarray)):
154
- image = [image]
155
-
156
- if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
157
- image = [np.array(i.convert("RGB"))[None, :] for i in image]
158
- image = np.concatenate(image, axis=0)
159
- elif isinstance(image, list) and isinstance(image[0], np.ndarray):
160
- image = np.concatenate([i[None, :] for i in image], axis=0)
161
-
162
- image = image.transpose(0, 3, 1, 2)
163
- image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
164
-
165
- # preprocess mask
166
- if isinstance(mask, (PIL.Image.Image, np.ndarray)):
167
- mask = [mask]
168
-
169
- if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image):
170
- mask = np.concatenate(
171
- [np.array(m.convert("L"))[None, None, :] for m in mask], axis=0
172
- )
173
- mask = mask.astype(np.float32) / 255.0
174
- elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
175
- mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
176
-
177
- mask[mask < 0.5] = 0
178
- mask[mask >= 0.5] = 1
179
- mask = torch.from_numpy(mask)
180
-
181
- masked_image = image * (mask < 0.5)
182
-
183
- return mask, masked_image
184
-
185
-
186
- class StableDiffusionControlNetInpaintPipeline(
187
- StableDiffusionControlNetPipeline
188
- ):
189
- r"""
190
- Pipeline for text-guided image inpainting using Stable Diffusion with ControlNet guidance.
191
-
192
- This model inherits from [`StableDiffusionControlNetPipeline`]. Check the superclass documentation for the generic methods the
193
- library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
194
-
195
- Args:
196
- vae ([`AutoencoderKL`]):
197
- Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
198
- text_encoder ([`CLIPTextModel`]):
199
- Frozen text-encoder. Stable Diffusion uses the text portion of
200
- [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
201
- the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
202
- tokenizer (`CLIPTokenizer`):
203
- Tokenizer of class
204
- [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
205
- unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
206
- controlnet ([`ControlNetModel`]):
207
- Provides additional conditioning to the unet during the denoising process
208
- scheduler ([`SchedulerMixin`]):
209
- A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
210
- [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
211
- safety_checker ([`StableDiffusionSafetyChecker`]):
212
- Classification module that estimates whether generated images could be considered offensive or harmful.
213
- Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
214
- feature_extractor ([`CLIPFeatureExtractor`]):
215
- Model that extracts features from generated images to be used as inputs for the `safety_checker`.
216
- """
217
-
218
- def prepare_mask_latents(
219
- self,
220
- mask,
221
- masked_image,
222
- batch_size,
223
- height,
224
- width,
225
- dtype,
226
- device,
227
- generator,
228
- do_classifier_free_guidance,
229
- ):
230
- # resize the mask to latents shape as we concatenate the mask to the latents
231
- # we do that before converting to dtype to avoid breaking in case we're using cpu_offload
232
- # and half precision
233
- mask = torch.nn.functional.interpolate(
234
- mask,
235
- size=(
236
- height // self.vae_scale_factor,
237
- width // self.vae_scale_factor,
238
- ),
239
- )
240
- mask = mask.to(device=device, dtype=dtype)
241
-
242
- masked_image = masked_image.to(device=device, dtype=dtype)
243
-
244
- # encode the mask image into latents space so we can concatenate it to the latents
245
- if isinstance(generator, list):
246
- masked_image_latents = [
247
- self.vae.encode(masked_image[i : i + 1]).latent_dist.sample(
248
- generator=generator[i]
249
- )
250
- for i in range(batch_size)
251
- ]
252
- masked_image_latents = torch.cat(masked_image_latents, dim=0)
253
- else:
254
- masked_image_latents = self.vae.encode(
255
- masked_image
256
- ).latent_dist.sample(generator=generator)
257
- masked_image_latents = (
258
- self.vae.config.scaling_factor * masked_image_latents
259
- )
260
-
261
- # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
262
- if mask.shape[0] < batch_size:
263
- if not batch_size % mask.shape[0] == 0:
264
- raise ValueError(
265
- "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
266
- f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
267
- " of masks that you pass is divisible by the total requested batch size."
268
- )
269
- mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
270
- if masked_image_latents.shape[0] < batch_size:
271
- if not batch_size % masked_image_latents.shape[0] == 0:
272
- raise ValueError(
273
- "The passed images and the required batch size don't match. Images are supposed to be duplicated"
274
- f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
275
- " Make sure the number of images that you pass is divisible by the total requested batch size."
276
- )
277
- masked_image_latents = masked_image_latents.repeat(
278
- batch_size // masked_image_latents.shape[0], 1, 1, 1
279
- )
280
-
281
- mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask
282
- masked_image_latents = (
283
- torch.cat([masked_image_latents] * 2)
284
- if do_classifier_free_guidance
285
- else masked_image_latents
286
- )
287
-
288
- # aligning device to prevent device errors when concating it with the latent model input
289
- masked_image_latents = masked_image_latents.to(
290
- device=device, dtype=dtype
291
- )
292
- return mask, masked_image_latents
293
-
294
- @torch.no_grad()
295
- @replace_example_docstring(EXAMPLE_DOC_STRING)
296
- def __call__(
297
- self,
298
- prompt: Union[str, List[str]] = None,
299
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
300
- control_image: Union[
301
- torch.FloatTensor,
302
- PIL.Image.Image,
303
- List[torch.FloatTensor],
304
- List[PIL.Image.Image],
305
- ] = None,
306
- mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
307
- height: Optional[int] = None,
308
- width: Optional[int] = None,
309
- num_inference_steps: int = 50,
310
- guidance_scale: float = 7.5,
311
- negative_prompt: Optional[Union[str, List[str]]] = None,
312
- num_images_per_prompt: Optional[int] = 1,
313
- eta: float = 0.0,
314
- generator: Optional[
315
- Union[torch.Generator, List[torch.Generator]]
316
- ] = None,
317
- latents: Optional[torch.FloatTensor] = None,
318
- prompt_embeds: Optional[torch.FloatTensor] = None,
319
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
320
- output_type: Optional[str] = "pil",
321
- return_dict: bool = True,
322
- callback: Optional[
323
- Callable[[int, int, torch.FloatTensor], None]
324
- ] = None,
325
- callback_steps: int = 1,
326
- cross_attention_kwargs: Optional[Dict[str, Any]] = None,
327
- controlnet_conditioning_scale: float = 1.0,
328
- ):
329
- r"""
330
- Function invoked when calling the pipeline for generation.
331
- Args:
332
- prompt (`str` or `List[str]`, *optional*):
333
- The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
334
- instead.
335
- image (`PIL.Image.Image`):
336
- `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will
337
- be masked out with `mask_image` and repainted according to `prompt`.
338
- control_image (`torch.FloatTensor`, `PIL.Image.Image`, `List[torch.FloatTensor]` or `List[PIL.Image.Image]`):
339
- The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
340
- the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. PIL.Image.Image` can
341
- also be accepted as an image. The control image is automatically resized to fit the output image.
342
- mask_image (`PIL.Image.Image`):
343
- `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
344
- repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
345
- to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
346
- instead of 3, so the expected shape would be `(B, H, W, 1)`.
347
- height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
348
- The height in pixels of the generated image.
349
- width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
350
- The width in pixels of the generated image.
351
- num_inference_steps (`int`, *optional*, defaults to 50):
352
- The number of denoising steps. More denoising steps usually lead to a higher quality image at the
353
- expense of slower inference.
354
- guidance_scale (`float`, *optional*, defaults to 7.5):
355
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
356
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
357
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
358
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
359
- usually at the expense of lower image quality.
360
- negative_prompt (`str` or `List[str]`, *optional*):
361
- The prompt or prompts not to guide the image generation. If not defined, one has to pass
362
- `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
363
- Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
364
- num_images_per_prompt (`int`, *optional*, defaults to 1):
365
- The number of images to generate per prompt.
366
- eta (`float`, *optional*, defaults to 0.0):
367
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
368
- [`schedulers.DDIMScheduler`], will be ignored for others.
369
- generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
370
- One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
371
- to make generation deterministic.
372
- latents (`torch.FloatTensor`, *optional*):
373
- Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
374
- generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
375
- tensor will ge generated by sampling using the supplied random `generator`.
376
- prompt_embeds (`torch.FloatTensor`, *optional*):
377
- Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
378
- provided, text embeddings will be generated from `prompt` input argument.
379
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
380
- Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
381
- weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
382
- argument.
383
- output_type (`str`, *optional*, defaults to `"pil"`):
384
- The output format of the generate image. Choose between
385
- [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
386
- return_dict (`bool`, *optional*, defaults to `True`):
387
- Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
388
- plain tuple.
389
- callback (`Callable`, *optional*):
390
- A function that will be called every `callback_steps` steps during inference. The function will be
391
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
392
- callback_steps (`int`, *optional*, defaults to 1):
393
- The frequency at which the `callback` function will be called. If not specified, the callback will be
394
- called at every step.
395
- cross_attention_kwargs (`dict`, *optional*):
396
- A kwargs dictionary that if specified is passed along to the `AttnProcessor` as defined under
397
- `self.processor` in
398
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
399
- controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
400
- The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
401
- to the residual in the original unet.
402
- Examples:
403
- Returns:
404
- [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
405
- [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
406
- When returning a tuple, the first element is a list with the generated images, and the second element is a
407
- list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
408
- (nsfw) content, according to the `safety_checker`.
409
- """
410
- # 0. Default height and width to unet
411
- height, width = self._default_height_width(height, width, control_image)
412
-
413
- # 1. Check inputs. Raise error if not correct
414
- self.check_inputs(
415
- prompt,
416
- control_image,
417
- height,
418
- width,
419
- callback_steps,
420
- negative_prompt,
421
- prompt_embeds,
422
- negative_prompt_embeds,
423
- )
424
-
425
- # 2. Define call parameters
426
- if prompt is not None and isinstance(prompt, str):
427
- batch_size = 1
428
- elif prompt is not None and isinstance(prompt, list):
429
- batch_size = len(prompt)
430
- else:
431
- batch_size = prompt_embeds.shape[0]
432
-
433
- device = self._execution_device
434
- # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
435
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
436
- # corresponds to doing no classifier free guidance.
437
- do_classifier_free_guidance = guidance_scale > 1.0
438
-
439
- # 3. Encode input prompt
440
- prompt_embeds = self._encode_prompt(
441
- prompt,
442
- device,
443
- num_images_per_prompt,
444
- do_classifier_free_guidance,
445
- negative_prompt,
446
- prompt_embeds=prompt_embeds,
447
- negative_prompt_embeds=negative_prompt_embeds,
448
- )
449
-
450
- # 4. Prepare image
451
- control_image = self.prepare_image(
452
- control_image,
453
- width,
454
- height,
455
- batch_size * num_images_per_prompt,
456
- num_images_per_prompt,
457
- device,
458
- self.controlnet.dtype,
459
- )
460
-
461
- if do_classifier_free_guidance:
462
- control_image = torch.cat([control_image] * 2)
463
-
464
- # 5. Prepare timesteps
465
- self.scheduler.set_timesteps(num_inference_steps, device=device)
466
- timesteps = self.scheduler.timesteps
467
-
468
- # 6. Prepare latent variables
469
- num_channels_latents = self.controlnet.in_channels
470
- latents = self.prepare_latents(
471
- batch_size * num_images_per_prompt,
472
- num_channels_latents,
473
- height,
474
- width,
475
- prompt_embeds.dtype,
476
- device,
477
- generator,
478
- latents,
479
- )
480
-
481
- # EXTRA: prepare mask latents
482
- mask, masked_image = prepare_mask_and_masked_image(image, mask_image)
483
- mask, masked_image_latents = self.prepare_mask_latents(
484
- mask,
485
- masked_image,
486
- batch_size * num_images_per_prompt,
487
- height,
488
- width,
489
- prompt_embeds.dtype,
490
- device,
491
- generator,
492
- do_classifier_free_guidance,
493
- )
494
-
495
- # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
496
- extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
497
-
498
- # 8. Denoising loop
499
- num_warmup_steps = (
500
- len(timesteps) - num_inference_steps * self.scheduler.order
501
- )
502
- with self.progress_bar(total=num_inference_steps) as progress_bar:
503
- for i, t in enumerate(timesteps):
504
- # expand the latents if we are doing classifier free guidance
505
- latent_model_input = (
506
- torch.cat([latents] * 2)
507
- if do_classifier_free_guidance
508
- else latents
509
- )
510
- latent_model_input = self.scheduler.scale_model_input(
511
- latent_model_input, t
512
- )
513
-
514
- down_block_res_samples, mid_block_res_sample = self.controlnet(
515
- latent_model_input,
516
- t,
517
- encoder_hidden_states=prompt_embeds,
518
- controlnet_cond=control_image,
519
- return_dict=False,
520
- )
521
-
522
- down_block_res_samples = [
523
- down_block_res_sample * controlnet_conditioning_scale
524
- for down_block_res_sample in down_block_res_samples
525
- ]
526
- mid_block_res_sample *= controlnet_conditioning_scale
527
-
528
- # predict the noise residual
529
- latent_model_input = torch.cat(
530
- [latent_model_input, mask, masked_image_latents], dim=1
531
- )
532
- noise_pred = self.unet(
533
- latent_model_input,
534
- t,
535
- encoder_hidden_states=prompt_embeds,
536
- cross_attention_kwargs=cross_attention_kwargs,
537
- down_block_additional_residuals=down_block_res_samples,
538
- mid_block_additional_residual=mid_block_res_sample,
539
- ).sample
540
-
541
- # perform guidance
542
- if do_classifier_free_guidance:
543
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
544
- noise_pred = noise_pred_uncond + guidance_scale * (
545
- noise_pred_text - noise_pred_uncond
546
- )
547
-
548
- # compute the previous noisy sample x_t -> x_t-1
549
- latents = self.scheduler.step(
550
- noise_pred, t, latents, **extra_step_kwargs
551
- ).prev_sample
552
-
553
- # call the callback, if provided
554
- if i == len(timesteps) - 1 or (
555
- (i + 1) > num_warmup_steps
556
- and (i + 1) % self.scheduler.order == 0
557
- ):
558
- progress_bar.update()
559
- if callback is not None and i % callback_steps == 0:
560
- callback(i, t, latents)
561
-
562
- # If we do sequential model offloading, let's offload unet and controlnet
563
- # manually for max memory savings
564
- if (
565
- hasattr(self, "final_offload_hook")
566
- and self.final_offload_hook is not None
567
- ):
568
- self.unet.to("cpu")
569
- self.controlnet.to("cpu")
570
- torch.cuda.empty_cache()
571
-
572
- if output_type == "latent":
573
- image = latents
574
- has_nsfw_concept = None
575
- elif output_type == "pil":
576
- # 8. Post-processing
577
- image = self.decode_latents(latents)
578
-
579
- # 9. Run safety checker
580
- image, has_nsfw_concept = self.run_safety_checker(
581
- image, device, prompt_embeds.dtype
582
- )
583
-
584
- # 10. Convert to PIL
585
- image = self.numpy_to_pil(image)
586
- else:
587
- # 8. Post-processing
588
- image = self.decode_latents(latents)
589
-
590
- # 9. Run safety checker
591
- image, has_nsfw_concept = self.run_safety_checker(
592
- image, device, prompt_embeds.dtype
593
- )
594
-
595
- # Offload last model to CPU
596
- if (
597
- hasattr(self, "final_offload_hook")
598
- and self.final_offload_hook is not None
599
- ):
600
- self.final_offload_hook.offload()
601
-
602
- if not return_dict:
603
- return (image, has_nsfw_concept)
604
-
605
- return StableDiffusionPipelineOutput(
606
- images=image, nsfw_content_detected=has_nsfw_concept
607
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/helpers.py DELETED
@@ -1,48 +0,0 @@
1
- from diffusion_webui.controlnet.controlnet_canny import (
2
- stable_diffusion_controlnet_canny,
3
- stable_diffusion_controlnet_canny_app,
4
- )
5
- from diffusion_webui.controlnet.controlnet_depth import (
6
- stable_diffusion_controlnet_depth,
7
- stable_diffusion_controlnet_depth_app,
8
- )
9
- from diffusion_webui.controlnet.controlnet_hed import (
10
- stable_diffusion_controlnet_hed,
11
- stable_diffusion_controlnet_hed_app,
12
- )
13
- from diffusion_webui.controlnet.controlnet_mlsd import (
14
- stable_diffusion_controlnet_mlsd,
15
- stable_diffusion_controlnet_mlsd_app,
16
- )
17
- from diffusion_webui.controlnet.controlnet_pose import (
18
- stable_diffusion_controlnet_pose,
19
- stable_diffusion_controlnet_pose_app,
20
- )
21
- from diffusion_webui.controlnet.controlnet_scribble import (
22
- stable_diffusion_controlnet_scribble,
23
- stable_diffusion_controlnet_scribble_app,
24
- )
25
- from diffusion_webui.controlnet.controlnet_seg import (
26
- stable_diffusion_controlnet_seg,
27
- stable_diffusion_controlnet_seg_app,
28
- )
29
- from diffusion_webui.controlnet_inpaint.controlnet_inpaint_app import (
30
- stable_diffusion_inpiant_controlnet_canny,
31
- stable_diffusion_inpiant_controlnet_canny_app,
32
- )
33
- from diffusion_webui.stable_diffusion.img2img_app import (
34
- stable_diffusion_img2img,
35
- stable_diffusion_img2img_app,
36
- )
37
- from diffusion_webui.stable_diffusion.inpaint_app import (
38
- stable_diffusion_inpaint,
39
- stable_diffusion_inpaint_app,
40
- )
41
- from diffusion_webui.stable_diffusion.keras_txt2img import (
42
- keras_stable_diffusion,
43
- keras_stable_diffusion_app,
44
- )
45
- from diffusion_webui.stable_diffusion.text2img_app import (
46
- stable_diffusion_text2img,
47
- stable_diffusion_text2img_app,
48
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/stable_diffusion/__init__.py DELETED
File without changes
diffusion_webui/stable_diffusion/img2img_app.py DELETED
@@ -1,131 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from diffusers import DDIMScheduler, StableDiffusionImg2ImgPipeline
4
- from PIL import Image
5
-
6
- stable_model_list = [
7
- "runwayml/stable-diffusion-v1-5",
8
- "stabilityai/stable-diffusion-2-1",
9
- ]
10
-
11
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
12
-
13
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
14
-
15
- data_list = [
16
- "data/test.png",
17
- ]
18
-
19
-
20
- def stable_diffusion_img2img(
21
- image_path: str,
22
- model_path: str,
23
- prompt: str,
24
- negative_prompt: str,
25
- guidance_scale: int,
26
- num_inference_step: int,
27
- ):
28
-
29
- image = Image.open(image_path)
30
-
31
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
32
- model_path, safety_checker=None, torch_dtype=torch.float16
33
- )
34
- pipe.to("cuda")
35
- pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
36
- pipe.enable_xformers_memory_efficient_attention()
37
-
38
- output = pipe(
39
- prompt=prompt,
40
- image=image,
41
- negative_prompt=negative_prompt,
42
- num_inference_steps=num_inference_step,
43
- guidance_scale=guidance_scale,
44
- ).images
45
-
46
- return output[0]
47
-
48
-
49
- def stable_diffusion_img2img_app():
50
- with gr.Blocks():
51
- with gr.Row():
52
- with gr.Column():
53
- image2image2_image_file = gr.Image(
54
- type="filepath", label="Image"
55
- )
56
-
57
- image2image_model_path = gr.Dropdown(
58
- choices=stable_model_list,
59
- value=stable_model_list[0],
60
- label="Image-Image Model Id",
61
- )
62
-
63
- image2image_prompt = gr.Textbox(
64
- lines=1, value=stable_prompt_list[0], label="Prompt"
65
- )
66
-
67
- image2image_negative_prompt = gr.Textbox(
68
- lines=1,
69
- value=stable_negative_prompt_list[0],
70
- label="Negative Prompt",
71
- )
72
-
73
- with gr.Accordion("Advanced Options", open=False):
74
- image2image_guidance_scale = gr.Slider(
75
- minimum=0.1,
76
- maximum=15,
77
- step=0.1,
78
- value=7.5,
79
- label="Guidance Scale",
80
- )
81
-
82
- image2image_num_inference_step = gr.Slider(
83
- minimum=1,
84
- maximum=100,
85
- step=1,
86
- value=50,
87
- label="Num Inference Step",
88
- )
89
-
90
- image2image_predict = gr.Button(value="Generator")
91
-
92
- with gr.Column():
93
- output_image = gr.Image(label="Output")
94
-
95
- gr.Examples(
96
- fn=stable_diffusion_img2img,
97
- examples=[
98
- [
99
- data_list[0],
100
- stable_model_list[0],
101
- stable_prompt_list[0],
102
- stable_negative_prompt_list[0],
103
- 7.5,
104
- 50,
105
- ],
106
- ],
107
- inputs=[
108
- image2image2_image_file,
109
- image2image_model_path,
110
- image2image_prompt,
111
- image2image_negative_prompt,
112
- image2image_guidance_scale,
113
- image2image_num_inference_step,
114
- ],
115
- outputs=[output_image],
116
- cache_examples=False,
117
- label="Image-Image Generator",
118
- )
119
-
120
- image2image_predict.click(
121
- fn=stable_diffusion_img2img,
122
- inputs=[
123
- image2image2_image_file,
124
- image2image_model_path,
125
- image2image_prompt,
126
- image2image_negative_prompt,
127
- image2image_guidance_scale,
128
- image2image_num_inference_step,
129
- ],
130
- outputs=[output_image],
131
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/stable_diffusion/inpaint_app.py DELETED
@@ -1,105 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from diffusers import DDIMScheduler, DiffusionPipeline
4
-
5
- stable_inpiant_model_list = [
6
- "stabilityai/stable-diffusion-2-inpainting",
7
- "runwayml/stable-diffusion-inpainting",
8
- ]
9
-
10
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
11
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
12
-
13
-
14
- def stable_diffusion_inpaint(
15
- dict: str,
16
- model_path: str,
17
- prompt: str,
18
- negative_prompt: str,
19
- guidance_scale: int,
20
- num_inference_step: int,
21
- ):
22
-
23
- image = dict["image"].convert("RGB").resize((512, 512))
24
- mask_image = dict["mask"].convert("RGB").resize((512, 512))
25
- pipe = DiffusionPipeline.from_pretrained(
26
- model_path,
27
- revision="fp16",
28
- torch_dtype=torch.float16,
29
- )
30
- pipe.to("cuda")
31
-
32
- output = pipe(
33
- prompt=prompt,
34
- image=image,
35
- mask_image=mask_image,
36
- negative_prompt=negative_prompt,
37
- num_inference_steps=num_inference_step,
38
- guidance_scale=guidance_scale,
39
- ).images
40
-
41
- return output[0]
42
-
43
-
44
- def stable_diffusion_inpaint_app():
45
- with gr.Blocks():
46
- with gr.Row():
47
- with gr.Column():
48
- inpaint_image_file = gr.Image(
49
- source="upload",
50
- tool="sketch",
51
- elem_id="image_upload",
52
- type="pil",
53
- label="Upload",
54
- )
55
-
56
- inpaint_model_id = gr.Dropdown(
57
- choices=stable_inpiant_model_list,
58
- value=stable_inpiant_model_list[0],
59
- label="Inpaint Model Id",
60
- )
61
-
62
- inpaint_prompt = gr.Textbox(
63
- lines=1, value=stable_prompt_list[0], label="Prompt"
64
- )
65
-
66
- inpaint_negative_prompt = gr.Textbox(
67
- lines=1,
68
- value=stable_negative_prompt_list[0],
69
- label="Negative Prompt",
70
- )
71
-
72
- with gr.Accordion("Advanced Options", open=False):
73
- inpaint_guidance_scale = gr.Slider(
74
- minimum=0.1,
75
- maximum=15,
76
- step=0.1,
77
- value=7.5,
78
- label="Guidance Scale",
79
- )
80
-
81
- inpaint_num_inference_step = gr.Slider(
82
- minimum=1,
83
- maximum=100,
84
- step=1,
85
- value=50,
86
- label="Num Inference Step",
87
- )
88
-
89
- inpaint_predict = gr.Button(value="Generator")
90
-
91
- with gr.Column():
92
- output_image = gr.Image(label="Outputs")
93
-
94
- inpaint_predict.click(
95
- fn=stable_diffusion_inpaint,
96
- inputs=[
97
- inpaint_image_file,
98
- inpaint_model_id,
99
- inpaint_prompt,
100
- inpaint_negative_prompt,
101
- inpaint_guidance_scale,
102
- inpaint_num_inference_step,
103
- ],
104
- outputs=output_image,
105
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/stable_diffusion/keras_txt2img.py DELETED
@@ -1,131 +0,0 @@
1
- import gradio as gr
2
- from huggingface_hub import from_pretrained_keras
3
- from keras_cv import models
4
- from tensorflow import keras
5
-
6
- keras_model_list = [
7
- "keras-dreambooth/keras_diffusion_lowpoly_world",
8
- "keras-dreambooth/keras-diffusion-traditional-furniture",
9
- ]
10
-
11
- stable_prompt_list = [
12
- "photo of lowpoly_world",
13
- "photo of traditional_furniture",
14
- ]
15
-
16
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
17
-
18
- keras.mixed_precision.set_global_policy("mixed_float16")
19
- dreambooth_model = models.StableDiffusion(
20
- img_width=512,
21
- img_height=512,
22
- jit_compile=True,
23
- )
24
-
25
-
26
- def keras_stable_diffusion(
27
- model_path: str,
28
- prompt: str,
29
- negative_prompt: str,
30
- num_imgs_to_gen: int,
31
- num_steps: int,
32
- ):
33
- """
34
- This function is used to generate images using our fine-tuned keras dreambooth stable diffusion model.
35
- Args:
36
- prompt (str): The text input given by the user based on which images will be generated.
37
- num_imgs_to_gen (int): The number of images to be generated using given prompt.
38
- num_steps (int): The number of denoising steps
39
- Returns:
40
- generated_img (List): List of images that were generated using the model
41
- """
42
- loaded_diffusion_model = from_pretrained_keras(model_path)
43
- dreambooth_model._diffusion_model = loaded_diffusion_model
44
-
45
- generated_img = dreambooth_model.text_to_image(
46
- prompt,
47
- negative_prompt=negative_prompt,
48
- batch_size=num_imgs_to_gen,
49
- num_steps=num_steps,
50
- )
51
-
52
- return generated_img
53
-
54
-
55
- def keras_stable_diffusion_app():
56
- with gr.Blocks():
57
- with gr.Row():
58
- with gr.Column():
59
- keras_text2image_model_path = gr.Dropdown(
60
- choices=keras_model_list,
61
- value=keras_model_list[0],
62
- label="Text-Image Model Id",
63
- )
64
-
65
- keras_text2image_prompt = gr.Textbox(
66
- lines=1, value=stable_prompt_list[0], label="Prompt"
67
- )
68
-
69
- keras_text2image_negative_prompt = gr.Textbox(
70
- lines=1,
71
- value=stable_negative_prompt_list[0],
72
- label="Negative Prompt",
73
- )
74
-
75
- keras_text2image_guidance_scale = gr.Slider(
76
- minimum=0.1,
77
- maximum=15,
78
- step=0.1,
79
- value=7.5,
80
- label="Guidance Scale",
81
- )
82
-
83
- keras_text2image_num_inference_step = gr.Slider(
84
- minimum=1,
85
- maximum=100,
86
- step=1,
87
- value=50,
88
- label="Num Inference Step",
89
- )
90
-
91
- keras_text2image_predict = gr.Button(value="Generator")
92
-
93
- with gr.Column():
94
- output_image = gr.Gallery(label="Outputs").style(grid=(1, 2))
95
-
96
- gr.Examples(
97
- fn=keras_stable_diffusion,
98
- inputs=[
99
- keras_text2image_model_path,
100
- keras_text2image_prompt,
101
- keras_text2image_negative_prompt,
102
- keras_text2image_guidance_scale,
103
- keras_text2image_num_inference_step,
104
- ],
105
- outputs=[output_image],
106
- examples=[
107
- [
108
- keras_model_list[0],
109
- stable_prompt_list[0],
110
- stable_negative_prompt_list[0],
111
- 7.5,
112
- 50,
113
- 512,
114
- 512,
115
- ],
116
- ],
117
- label="Keras Stable Diffusion Example",
118
- cache_examples=False,
119
- )
120
-
121
- keras_text2image_predict.click(
122
- fn=keras_stable_diffusion,
123
- inputs=[
124
- keras_text2image_model_path,
125
- keras_text2image_prompt,
126
- keras_text2image_negative_prompt,
127
- keras_text2image_guidance_scale,
128
- keras_text2image_num_inference_step,
129
- ],
130
- outputs=output_image,
131
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion_webui/stable_diffusion/text2img_app.py DELETED
@@ -1,139 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from diffusers import DDIMScheduler, StableDiffusionPipeline
4
-
5
- stable_model_list = [
6
- "andite/anything-v4.0",
7
- ]
8
-
9
- stable_prompt_list = ["a photo of a man.", "a photo of a girl."]
10
-
11
- stable_negative_prompt_list = ["bad, ugly", "deformed"]
12
-
13
-
14
- def stable_diffusion_text2img(
15
- model_path: str,
16
- prompt: str,
17
- negative_prompt: str,
18
- guidance_scale: int,
19
- num_inference_step: int,
20
- height: int,
21
- width: int,
22
- ):
23
-
24
- pipe = StableDiffusionPipeline.from_pretrained(
25
- model_path, safety_checker=None, torch_dtype=torch.float16
26
- ).to("cuda")
27
-
28
- pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
29
- pipe.enable_xformers_memory_efficient_attention()
30
-
31
- images = pipe(
32
- prompt,
33
- height=height,
34
- width=width,
35
- negative_prompt=negative_prompt,
36
- num_inference_steps=num_inference_step,
37
- guidance_scale=guidance_scale,
38
- ).images
39
-
40
- return images[0]
41
-
42
-
43
- def stable_diffusion_text2img_app():
44
- with gr.Blocks():
45
- with gr.Row():
46
- with gr.Column():
47
- text2image_model_path = gr.Dropdown(
48
- choices=stable_model_list,
49
- value=stable_model_list[0],
50
- label="Text-Image Model Id",
51
- )
52
-
53
- text2image_prompt = gr.Textbox(
54
- lines=1, value=stable_prompt_list[0], label="Prompt"
55
- )
56
-
57
- text2image_negative_prompt = gr.Textbox(
58
- lines=1,
59
- value=stable_negative_prompt_list[0],
60
- label="Negative Prompt",
61
- )
62
-
63
- with gr.Accordion("Advanced Options", open=False):
64
- text2image_guidance_scale = gr.Slider(
65
- minimum=0.1,
66
- maximum=15,
67
- step=0.1,
68
- value=7.5,
69
- label="Guidance Scale",
70
- )
71
-
72
- text2image_num_inference_step = gr.Slider(
73
- minimum=1,
74
- maximum=100,
75
- step=1,
76
- value=50,
77
- label="Num Inference Step",
78
- )
79
-
80
- text2image_height = gr.Slider(
81
- minimum=128,
82
- maximum=1280,
83
- step=32,
84
- value=512,
85
- label="Image Height",
86
- )
87
-
88
- text2image_width = gr.Slider(
89
- minimum=128,
90
- maximum=1280,
91
- step=32,
92
- value=768,
93
- label="Image Width",
94
- )
95
-
96
- text2image_predict = gr.Button(value="Generator")
97
-
98
- with gr.Column():
99
- output_image = gr.Image(label="Output")
100
-
101
- gr.Examples(
102
- examples=[
103
- [
104
- stable_model_list[0],
105
- stable_prompt_list[0],
106
- stable_negative_prompt_list[0],
107
- 7.5,
108
- 50,
109
- 512,
110
- 768,
111
- ]
112
- ],
113
- inputs=[
114
- text2image_model_path,
115
- text2image_prompt,
116
- text2image_negative_prompt,
117
- text2image_guidance_scale,
118
- text2image_num_inference_step,
119
- text2image_height,
120
- text2image_width,
121
- ],
122
- outputs=[output_image],
123
- cache_examples=False,
124
- fn=stable_diffusion_text2img,
125
- label="Text2Image Example",
126
- )
127
- text2image_predict.click(
128
- fn=stable_diffusion_text2img,
129
- inputs=[
130
- text2image_model_path,
131
- text2image_prompt,
132
- text2image_negative_prompt,
133
- text2image_guidance_scale,
134
- text2image_num_inference_step,
135
- text2image_height,
136
- text2image_width,
137
- ],
138
- outputs=output_image,
139
- )