kadirnar commited on
Commit
5784791
1 Parent(s): a8cbc39

Upload 3 files

Browse files
app.py CHANGED
@@ -1,85 +1,30 @@
1
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
2
- from utils import write_video, dummy, preprocess_image, preprocess_mask_image
3
- from PIL import Image
4
- import gradio as gr
5
- import torch
6
- import os
7
- os.environ["CUDA_VISIBLE_DEVICES"]="0"
8
-
9
-
10
- orig_prompt = "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers"
11
- orig_negative_prompt = "lurry, bad art, blurred, text, watermark"
12
- model_list = ["stabilityai/stable-diffusion-2-inpainting", "runwayml/stable-diffusion-inpainting"]
13
-
14
- def stable_diffusion_zoom_out(
15
- repo_id,
16
- original_prompt,
17
- negative_prompt,
18
- step_size,
19
- num_frames,
20
- fps,
21
- num_inference_steps
22
- ):
23
-
24
- pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
25
- pipe.set_use_memory_efficient_attention_xformers(True)
26
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
27
- pipe = pipe.to("cuda")
28
- pipe.safety_checker = dummy
29
-
30
- new_image = Image.new(mode="RGBA", size=(512,512))
31
- current_image, mask_image = preprocess_mask_image(new_image)
32
-
33
- current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
34
-
35
-
36
- all_frames = []
37
- all_frames.append(current_image)
38
 
39
- for i in range(num_frames):
40
- prev_image = preprocess_image(current_image, step_size, 512)
41
- current_image = prev_image
42
- current_image, mask_image = preprocess_mask_image(current_image)
43
- current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
44
-
45
- current_image.paste(prev_image, mask=prev_image)
46
- all_frames.append(current_image)
47
 
48
- save_path = "output.mp4"
49
- write_video(save_path, all_frames, fps=fps)
50
- return save_path
51
-
52
-
53
- inputs = [
54
- gr.Dropdown(model_list, value=model_list[0], label="Model"),
55
- gr.inputs.Textbox(lines=5, default=orig_prompt, label="Prompt"),
56
- gr.inputs.Textbox(lines=1, default=orig_negative_prompt, label="Negative Prompt"),
57
- gr.inputs.Slider(minimum=1, maximum=120, default=25, step=5, label="Steps"),
58
- gr.inputs.Slider(minimum=1, maximum=100, default=10, step=1, label="Frames"),
59
- gr.inputs.Slider(minimum=1, maximum=100, default=16, step=1, label="FPS"),
60
- gr.inputs.Slider(minimum=1, maximum=100, default=15, step=1, label="Inference Steps")
61
- ]
62
-
63
- output = gr.outputs.Video()
64
- examples = [
65
- ["stabilityai/stable-diffusion-2-inpainting", orig_prompt, orig_negative_prompt, 25, 10, 16, 15],
66
- ]
67
-
68
- title = "Stable Diffusion Infinite Zoom Out"
69
- description = """<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
70
- <br/>
71
- <a href="https://huggingface.co/spaces/kadirnar/stable-diffusion-2-infinite-zoom-out?duplicate=true">
72
- <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
73
- <p/>"""
74
 
75
- demo_app = gr.Interface(
76
- fn=stable_diffusion_zoom_out,
77
- description=description,
78
- inputs=inputs,
79
- outputs=output,
80
- title=title,
81
- theme='huggingface',
82
- examples=examples,
83
- cache_examples=True
84
- )
85
- demo_app.launch(debug=True, enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from inpaint_zoom.zoom_out_app import stable_diffusion_text2img_app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
 
 
 
 
 
 
 
3
 
4
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ app = gr.Blocks()
7
+ with app:
8
+ gr.HTML(
9
+ """
10
+ <h1 style='text-align: center'>
11
+ Stable Diffusion Infinite Zoom Out
12
+ </h1>
13
+ """
14
+ )
15
+ gr.Markdown(
16
+ """
17
+ <h4 style='text-align: center'>
18
+ Follow me for more!
19
+ <a href='https://twitter.com/kadirnar_ai' target='_blank'>Twitter</a> | <a href='https://github.com/kadirnar' target='_blank'>Github</a> | <a href='https://www.linkedin.com/in/kadir-nar/' target='_blank'>Linkedin</a>
20
+ </h4>
21
+ """
22
+ )
23
+ with gr.Row():
24
+ with gr.Column():
25
+ with gr.Tab('Zoom Out'):
26
+ stable_diffusion_text2img_app()
27
+ with gr.Tab('Zoom In'):
28
+ pass
29
+
30
+ app.launch(debug=True)
inpaint_zoom/zoom_out_app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
2
+ from inpaint_zoom.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import torch
6
+ import os
7
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
8
+
9
+
10
+ stable_paint_model_list = [
11
+ "stabilityai/stable-diffusion-2-inpainting",
12
+ "runwayml/stable-diffusion-inpainting"
13
+ ]
14
+
15
+ stable_paint_prompt_list = [
16
+ "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
17
+ "A beautiful landscape of a mountain range with a lake in the foreground",
18
+ ]
19
+
20
+ stable_paint_negative_prompt_list = [
21
+ "lurry, bad art, blurred, text, watermark",
22
+ ]
23
+
24
+
25
+ def stable_diffusion_zoom_out(
26
+ model_id,
27
+ original_prompt,
28
+ negative_prompt,
29
+ guidance_scale,
30
+ num_inference_steps,
31
+ step_size,
32
+ num_frames,
33
+ fps,
34
+ ):
35
+
36
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
37
+ pipe.set_use_memory_efficient_attention_xformers(True)
38
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
39
+ pipe = pipe.to("cuda")
40
+ pipe.safety_checker = dummy
41
+
42
+ new_image = Image.new(mode="RGBA", size=(512,512))
43
+ current_image, mask_image = preprocess_mask_image(new_image)
44
+
45
+ current_image = pipe(
46
+ prompt=[original_prompt],
47
+ negative_prompt=[negative_prompt],
48
+ image=current_image,
49
+ mask_image=mask_image,
50
+ num_inference_steps=num_inference_steps,
51
+ guidance_scale=guidance_scale
52
+ ).images[0]
53
+
54
+
55
+ all_frames = []
56
+ all_frames.append(current_image)
57
+
58
+ for i in range(num_frames):
59
+ prev_image = preprocess_image(current_image, step_size, 512)
60
+ current_image = prev_image
61
+ current_image, mask_image = preprocess_mask_image(current_image)
62
+ current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
63
+
64
+ current_image.paste(prev_image, mask=prev_image)
65
+ all_frames.append(current_image)
66
+
67
+ save_path = "output.mp4"
68
+ write_video(save_path, all_frames, fps=fps)
69
+ return save_path
70
+
71
+
72
+ def stable_diffusion_text2img_app():
73
+ with gr.Blocks():
74
+ with gr.Row():
75
+ with gr.Column():
76
+ text2image_out_model_path = gr.Dropdown(
77
+ choices=stable_paint_model_list,
78
+ value=stable_paint_model_list[0],
79
+ label='Text-Image Model Id'
80
+ )
81
+
82
+ text2image_out_prompt = gr.Textbox(
83
+ lines=1,
84
+ value=stable_paint_prompt_list[0],
85
+ label='Prompt'
86
+ )
87
+
88
+ text2image_out_negative_prompt = gr.Textbox(
89
+ lines=1,
90
+ value=stable_paint_negative_prompt_list[0],
91
+ label='Negative Prompt'
92
+ )
93
+
94
+ with gr.Accordion("Advanced Options", open=False):
95
+ text2image_out_guidance_scale = gr.Slider(
96
+ minimum=0.1,
97
+ maximum=15,
98
+ step=0.1,
99
+ value=7.5,
100
+ label='Guidance Scale'
101
+ )
102
+
103
+ text2image_out_num_inference_step = gr.Slider(
104
+ minimum=1,
105
+ maximum=100,
106
+ step=1,
107
+ value=50,
108
+ label='Num Inference Step'
109
+ )
110
+
111
+ text2image_out_step_size = gr.Slider(
112
+ minimum=1,
113
+ maximum=100,
114
+ step=1,
115
+ value=10,
116
+ label='Step Size'
117
+ )
118
+
119
+ text2image_out_num_frames = gr.Slider(
120
+ minimum=1,
121
+ maximum=100,
122
+ step=1,
123
+ value=10,
124
+ label='Frames'
125
+ )
126
+
127
+ text2image_out_fps = gr.Slider(
128
+ minimum=1,
129
+ maximum=100,
130
+ step=1,
131
+ value=30,
132
+ label='FPS'
133
+ )
134
+
135
+ text2image_out_predict = gr.Button(value='Generator')
136
+
137
+ with gr.Column():
138
+ output_image = gr.Image(label='Output')
139
+
140
+
141
+ text2image_out_predict.click(
142
+ fn=stable_diffusion_zoom_out,
143
+ inputs=[
144
+ text2image_out_model_path,
145
+ text2image_out_prompt,
146
+ text2image_out_negative_prompt,
147
+ text2image_out_guidance_scale,
148
+ text2image_out_num_inference_step,
149
+ text2image_out_step_size,
150
+ text2image_out_num_frames,
151
+ text2image_out_fps
152
+ ],
153
+ outputs=output_image
154
+ )
inpaint_zoom/zoom_out_utils.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ from PIL import Image
4
+
5
+ def write_video(file_path, frames, fps):
6
+ """
7
+ Writes frames to an mp4 video file
8
+ :param file_path: Path to output video, must end with .mp4
9
+ :param frames: List of PIL.Image objects
10
+ :param fps: Desired frame rate
11
+ """
12
+
13
+ w, h = frames[0].size
14
+ fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
15
+ writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
16
+
17
+ for frame in frames:
18
+ np_frame = np.array(frame.convert('RGB'))
19
+ cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
20
+ writer.write(cv_frame)
21
+
22
+ writer.release()
23
+
24
+
25
+ def dummy(images, **kwargs):
26
+ return images, False
27
+
28
+ def preprocess_image(current_image, steps, image_size):
29
+ next_image = np.array(current_image.convert("RGBA"))*0
30
+ prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
31
+ prev_image = prev_image.convert("RGBA")
32
+ prev_image = np.array(prev_image)
33
+ next_image[:, :, 3] = 1
34
+ next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
35
+ prev_image = Image.fromarray(next_image)
36
+
37
+ return prev_image
38
+
39
+
40
+ def preprocess_mask_image(current_image):
41
+ mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
42
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
43
+ current_image = current_image.convert("RGB")
44
+
45
+ return current_image, mask_image