v8hid commited on
Commit
74998aa
·
1 Parent(s): 6171e4c

Init zoom APP

Browse files
Files changed (3) hide show
  1. app.py +14 -0
  2. requirements.txt +6 -0
  3. zoom.py +207 -0
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from zoom import zoom_app
3
+ app = gr.Blocks()
4
+ with app:
5
+ gr.HTML(
6
+ """
7
+ <p style='text-align: center'>
8
+ Text to Video - Infinite zoom effect
9
+ </p>
10
+ """
11
+ )
12
+ zoom_app()
13
+
14
+ app.launch(share=True, debug=True, enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ PIL==8.4.0
2
+ cv2==4.7.0
3
+ diffusers==0.14.0
4
+ torch==1.13.1+cu116
5
+ numpy==1.22.4
6
+ gradio==3.23.0
zoom.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from helpers import *
2
+ from diffusers import StableDiffusionInpaintPipeline, DPMSolverMultistepScheduler
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import numpy as np
6
+ import torch
7
+ import os
8
+ import time
9
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
10
+ inpaint_model_list = [
11
+ "stabilityai/stable-diffusion-2-inpainting",
12
+ "runwayml/stable-diffusion-inpainting",
13
+ "parlance/dreamlike-diffusion-1.0-inpainting",
14
+ "ghunkins/stable-diffusion-liberty-inpainting",
15
+ "ImNoOne/f222-inpainting-diffusers"
16
+ ]
17
+ default_prompt = "A psychedelic jungle with trees that have glowing, fractal-like patterns, Simon stalenhag poster 1920s style, street level view, hyper futuristic, 8k resolution, hyper realistic"
18
+ default_negative_prompt = "frames, borderline, text, charachter, duplicate, error, out of frame, watermark, low quality, ugly, deformed, blur"
19
+ # TODO:
20
+ # prompts = {
21
+ # 0: "prompt1",
22
+ # 7: "prompt2"
23
+ # }
24
+
25
+ custom_init_image = False
26
+ init_image_address = "/init/image.jpeg"
27
+
28
+
29
+ def zoom(
30
+ model_id,
31
+ prompt,
32
+ negative_prompt,
33
+ num_outpainting_steps,
34
+ guidance_scale,
35
+ num_inference_steps,
36
+ ):
37
+
38
+ pipe = StableDiffusionInpaintPipeline.from_pretrained(
39
+ model_id,
40
+ torch_dtype=torch.float16,
41
+ )
42
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(
43
+ pipe.scheduler.config)
44
+ pipe = pipe.to("cuda")
45
+
46
+ def no_check(images, **kwargs):
47
+ return images, False
48
+ pipe.safety_checker = no_check
49
+ pipe.enable_attention_slicing()
50
+ g_cuda = torch.Generator(device='cuda')
51
+
52
+ height = 512
53
+ width = height
54
+
55
+ current_image = Image.new(mode="RGBA", size=(height, width))
56
+ mask_image = np.array(current_image)[:, :, 3]
57
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
58
+ current_image = current_image.convert("RGB")
59
+
60
+ init_images = pipe(prompt=prompt, # TODO: prompt=prompts[max(k for k in prompts.keys() if k >= 0)],
61
+ negative_prompt=negative_prompt,
62
+ image=current_image,
63
+ guidance_scale=guidance_scale,
64
+ height=height,
65
+ width=width,
66
+ mask_image=mask_image,
67
+ num_inference_steps=num_inference_steps)[0]
68
+
69
+ mask_width = 128
70
+ num_interpol_frames = 30
71
+ if (custom_init_image):
72
+ current_image = load_img(init_image_address, (width, height))
73
+ else:
74
+ current_image = init_images[0]
75
+
76
+ all_frames = []
77
+ all_frames.append(current_image)
78
+
79
+ for i in range(num_outpainting_steps):
80
+ print('Outpaint step: ' + str(i+1) +
81
+ ' / ' + str(num_outpainting_steps))
82
+
83
+ prev_image_fix = current_image
84
+
85
+ prev_image = shrink_and_paste_on_blank(current_image, mask_width)
86
+
87
+ current_image = prev_image
88
+
89
+ # create mask (black image with white mask_width width edges)
90
+ mask_image = np.array(current_image)[:, :, 3]
91
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
92
+
93
+ # inpainting step
94
+ current_image = current_image.convert("RGB")
95
+ images = pipe(prompt=prompt, # TODO: prompt=prompts[max(k for k in prompts.keys() if k <= i)],
96
+ negative_prompt=negative_prompt,
97
+ image=current_image,
98
+ guidance_scale=guidance_scale,
99
+ height=height,
100
+ width=width,
101
+ # generator = g_cuda.manual_seed(seed),
102
+ mask_image=mask_image,
103
+ num_inference_steps=num_inference_steps)[0]
104
+ current_image = images[0]
105
+ current_image.paste(prev_image, mask=prev_image)
106
+
107
+ # interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
108
+ for j in range(num_interpol_frames - 1):
109
+ interpol_image = current_image
110
+ interpol_width = round(
111
+ (1 - (1-2*mask_width/height)**(1-(j+1)/num_interpol_frames))*height/2
112
+ )
113
+ interpol_image = interpol_image.crop((interpol_width,
114
+ interpol_width,
115
+ width - interpol_width,
116
+ height - interpol_width))
117
+
118
+ interpol_image = interpol_image.resize((height, width))
119
+
120
+ # paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
121
+ interpol_width2 = round(
122
+ (1 - (height-2*mask_width) / (height-2*interpol_width)) / 2*height
123
+ )
124
+ prev_image_fix_crop = shrink_and_paste_on_blank(
125
+ prev_image_fix, interpol_width2)
126
+ interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)
127
+
128
+ all_frames.append(interpol_image)
129
+
130
+ all_frames.append(current_image)
131
+
132
+ video_file_name = "infinite_zoom_" + str(time.time())
133
+ fps = 30
134
+ save_path = video_file_name + ".mp4"
135
+ start_frame_dupe_amount = 15
136
+ last_frame_dupe_amount = 15
137
+
138
+ write_video(save_path, all_frames, fps, False,
139
+ start_frame_dupe_amount, last_frame_dupe_amount)
140
+ return save_path
141
+
142
+
143
+ def zoom_app():
144
+ with gr.Blocks():
145
+ with gr.Row():
146
+ with gr.Column():
147
+
148
+ outpaint_prompt = gr.Textbox(
149
+ lines=1,
150
+ value=default_prompt,
151
+ label='Prompt'
152
+ )
153
+
154
+ outpaint_negative_prompt = gr.Textbox(
155
+ lines=1,
156
+ value=default_negative_prompt,
157
+ label='Negative Prompt'
158
+ )
159
+
160
+ outpaint_steps = gr.Slider(
161
+ minimum=5,
162
+ maximum=25,
163
+ step=1,
164
+ value=12,
165
+ label='Total Outpaint Steps'
166
+ )
167
+ with gr.Accordion("Advanced Options", open=False):
168
+ model_id = gr.Dropdown(
169
+ choices=inpaint_model_list,
170
+ value=inpaint_model_list[0],
171
+ label='Pre-trained Model ID'
172
+ )
173
+
174
+ guidance_scale = gr.Slider(
175
+ minimum=0.1,
176
+ maximum=15,
177
+ step=0.1,
178
+ value=7,
179
+ label='Guidance Scale'
180
+ )
181
+
182
+ sampling_step = gr.Slider(
183
+ minimum=1,
184
+ maximum=100,
185
+ step=1,
186
+ value=50,
187
+ label='Sampling Steps for each outpaint'
188
+ )
189
+
190
+ generate_btn = gr.Button(value='Generate video')
191
+
192
+ with gr.Column():
193
+ output_image = gr.Video(label='Output', format="mp4").style(
194
+ width=512, height=512)
195
+
196
+ generate_btn.click(
197
+ fn=zoom,
198
+ inputs=[
199
+ model_id,
200
+ outpaint_prompt,
201
+ outpaint_negative_prompt,
202
+ outpaint_steps,
203
+ guidance_scale,
204
+ sampling_step
205
+ ],
206
+ outputs=output_image,
207
+ )