fantos commited on
Commit
4eb0876
·
verified ·
1 Parent(s): 0b8ea0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -392
app.py CHANGED
@@ -1,393 +1,2 @@
1
- import spaces
2
- import random
3
- import torch
4
- import cv2
5
- import gradio as gr
6
- import numpy as np
7
- from huggingface_hub import snapshot_download
8
- from transformers import pipeline
9
- from diffusers.utils import load_image
10
- from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
11
- from kolors.models.modeling_chatglm import ChatGLMModel
12
- from kolors.models.tokenization_chatglm import ChatGLMTokenizer
13
- from kolors.models.controlnet import ControlNetModel
14
- from diffusers import AutoencoderKL
15
- from kolors.models.unet_2d_condition import UNet2DConditionModel
16
- from diffusers import EulerDiscreteScheduler
17
- from PIL import Image, ImageDraw, ImageFont
18
  import os
19
-
20
-
21
- device = "cuda"
22
- ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
23
- ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
24
-
25
- # Add translation pipeline
26
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
27
-
28
- text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
29
- tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
30
- vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
31
- scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
32
- unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
33
- controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
34
-
35
- pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
36
- vae=vae,
37
- controlnet=controlnet_canny,
38
- text_encoder=text_encoder,
39
- tokenizer=tokenizer,
40
- unet=unet,
41
- scheduler=scheduler,
42
- force_zeros_for_empty_prompt=False
43
- )
44
-
45
- @spaces.GPU
46
- def translate_korean_to_english(text):
47
- if any(ord(char) >= 0xAC00 and ord(char) <= 0xD7A3 for char in text): # Check if Korean characters are present
48
- translated = translator(text, max_length=512)[0]['translation_text']
49
- return translated
50
- return text
51
-
52
- def HWC3(x):
53
- assert x.dtype == np.uint8
54
- if x.ndim == 2:
55
- x = x[:, :, None]
56
- assert x.ndim == 3
57
- H, W, C = x.shape
58
- assert C == 1 or C == 3 or C == 4
59
- if C == 3:
60
- return x
61
- if C == 1:
62
- return np.concatenate([x, x, x], axis=2)
63
- if C == 4:
64
- color = x[:, :, 0:3].astype(np.float32)
65
- alpha = x[:, :, 3:4].astype(np.float32) / 255.0
66
- y = color * alpha + 255.0 * (1.0 - alpha)
67
- y = y.clip(0, 255).astype(np.uint8)
68
- return y
69
-
70
- @spaces.GPU
71
- def process_canny_condition(image, canny_threods=[100,200]):
72
- np_image = np.array(image)
73
- np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
74
- np_image = np_image[:, :, None]
75
- np_image = np.concatenate([np_image, np_image, np_image], axis=2)
76
- np_image = HWC3(np_image)
77
- return Image.fromarray(np_image)
78
-
79
- MAX_SEED = np.iinfo(np.int32).max
80
- MAX_IMAGE_SIZE = 1024
81
-
82
- def resize_image(image, resolution):
83
- w, h = image.size
84
- ratio = resolution / max(w, h)
85
- new_w = int(w * ratio)
86
- new_h = int(h * ratio)
87
- return image.resize((new_w, new_h), Image.LANCZOS)
88
-
89
- def get_font_files():
90
- return [f for f in os.listdir() if f.endswith('.ttf')]
91
-
92
- def preview_font(font_name, preview_text):
93
- font_size = 24
94
- image = Image.new('RGB', (400, 100), color='white')
95
- draw = ImageDraw.Draw(image)
96
- try:
97
- font = ImageFont.truetype(font_name, font_size)
98
- draw.text((10, 10), f"{font_name}:", font=font, fill='black')
99
- draw.text((10, 40), preview_text, font=font, fill='black')
100
- except IOError:
101
- draw.text((10, 10), f"Error loading font: {font_name}", fill='black')
102
- return image
103
-
104
- def text_to_image(text, size=72, position="middle-center", font_name="Arial_Unicode.ttf"):
105
- width, height = 1024, 576
106
- image = Image.new("RGB", (width, height), "white")
107
- draw = ImageDraw.Draw(image)
108
-
109
- try:
110
- font = ImageFont.truetype(font_name, size=size)
111
- except IOError:
112
- print(f"Error loading font: {font_name}. Using default font.")
113
- font = ImageFont.load_default()
114
-
115
- lines = text.split('\n')
116
- max_line_width = 0
117
- total_height = 0
118
- line_heights = []
119
- for line in lines:
120
- left, top, right, bottom = draw.textbbox((0, 0), line, font=font)
121
- line_width = right - left
122
- line_height = bottom - top
123
- line_heights.append(line_height)
124
- max_line_width = max(max_line_width, line_width)
125
- total_height += line_height
126
-
127
- position_mapping = {
128
- "top-left": (10, 10),
129
- "top-left-center": (width // 4 - max_line_width // 2, 10),
130
- "top-center": ((width - max_line_width) / 2, 10),
131
- "top-right-center": (3 * width // 4 - max_line_width // 2, 10),
132
- "top-right": (width - max_line_width - 10, 10),
133
- "upper-left": (10, height // 4 - total_height // 2),
134
- "upper-left-center": (width // 4 - max_line_width // 2, height // 4 - total_height // 2),
135
- "upper-center": ((width - max_line_width) / 2, height // 4 - total_height // 2),
136
- "upper-right-center": (3 * width // 4 - max_line_width // 2, height // 4 - total_height // 2),
137
- "upper-right": (width - max_line_width - 10, height // 4 - total_height // 2),
138
- "middle-left": (10, (height - total_height) / 2),
139
- "middle-left-center": (width // 4 - max_line_width // 2, (height - total_height) / 2),
140
- "middle-center": ((width - max_line_width) / 2, (height - total_height) / 2),
141
- "middle-right-center": (3 * width // 4 - max_line_width // 2, (height - total_height) / 2),
142
- "middle-right": (width - max_line_width - 10, (height - total_height) / 2),
143
- "lower-left": (10, 3 * height // 4 - total_height // 2),
144
- "lower-left-center": (width // 4 - max_line_width // 2, 3 * height // 4 - total_height // 2),
145
- "lower-center": ((width - max_line_width) / 2, 3 * height // 4 - total_height // 2),
146
- "lower-right-center": (3 * width // 4 - max_line_width // 2, 3 * height // 4 - total_height // 2),
147
- "lower-right": (width - max_line_width - 10, 3 * height // 4 - total_height // 2),
148
- "bottom-left": (10, height - total_height - 10),
149
- "bottom-left-center": (width // 4 - max_line_width // 2, height - total_height - 10),
150
- "bottom-center": ((width - max_line_width) / 2, height - total_height - 10),
151
- "bottom-right-center": (3 * width // 4 - max_line_width // 2, height - total_height - 10),
152
- "bottom-right": (width - max_line_width - 10, height - total_height - 10),
153
- }
154
-
155
- x, y = position_mapping.get(position, ((width - max_line_width) / 2, (height - total_height) / 2))
156
- for i, line in enumerate(lines):
157
- draw.text((x, y), line, fill="black", font=font)
158
- y += line_heights[i]
159
-
160
- return image
161
-
162
- @spaces.GPU
163
- def infer_canny(prompt, text_for_image, text_position, font_size, font_name,
164
- negative_prompt = "nsfw, facial shadows, low resolution, jpeg artifacts, blurry, bad quality, dark face, neon lights",
165
- seed = 397886929,
166
- randomize_seed = False,
167
- guidance_scale = 8.0,
168
- num_inference_steps = 50,
169
- controlnet_conditioning_scale = 0.8,
170
- control_guidance_end = 0.9,
171
- strength = 1.0
172
- ):
173
-
174
- prompt = translate_korean_to_english(prompt)
175
- negative_prompt = translate_korean_to_english(negative_prompt)
176
-
177
- if randomize_seed:
178
- seed = random.randint(0, MAX_SEED)
179
- generator = torch.Generator().manual_seed(seed)
180
-
181
- # Generate text image with selected font
182
- init_image = text_to_image(text_for_image, size=font_size, position=text_position, font_name=font_name)
183
- init_image = resize_image(init_image, MAX_IMAGE_SIZE)
184
-
185
- pipe = pipe_canny.to("cuda")
186
- condi_img = process_canny_condition(init_image)
187
-
188
- image = pipe(
189
- prompt=prompt,
190
- image=init_image,
191
- controlnet_conditioning_scale=controlnet_conditioning_scale,
192
- control_guidance_end=control_guidance_end,
193
- strength=strength,
194
- control_image=condi_img,
195
- negative_prompt=negative_prompt,
196
- num_inference_steps=num_inference_steps,
197
- guidance_scale=guidance_scale,
198
- num_images_per_prompt=1,
199
- generator=generator,
200
- ).images[0]
201
- return image, seed
202
-
203
- def update_button_states(selected_position):
204
- return [
205
- gr.update(variant="primary") if pos == selected_position else gr.update(variant="secondary")
206
- for pos in position_list
207
- ]
208
-
209
-
210
- def generate_font_preview(font_name):
211
- font_size = 24
212
- image = Image.new('RGB', (400, 100), color='white')
213
- draw = ImageDraw.Draw(image)
214
- try:
215
- font = ImageFont.truetype(font_name, font_size)
216
- draw.text((10, 10), f"{font_name}:", font=font, fill='black')
217
- draw.text((10, 40), "AaBbCc 123 가나다", font=font, fill='black')
218
- except IOError:
219
- draw.text((10, 10), f"Error loading font: {font_name}", fill='black')
220
- return image
221
-
222
-
223
- position_list = [
224
- "top-left", "top-left-center", "top-center", "top-right-center", "top-right",
225
- "upper-left", "upper-left-center", "upper-center", "upper-right-center", "upper-right",
226
- "middle-left", "middle-left-center", "middle-center", "middle-right-center", "middle-right",
227
- "lower-left", "lower-left-center", "lower-center", "lower-right-center", "lower-right",
228
- "bottom-left", "bottom-left-center", "bottom-center", "bottom-right-center", "bottom-right"
229
- ]
230
-
231
- css = """
232
- footer {
233
- visibility: hidden;
234
- }
235
- .text-position-grid {
236
- display: grid;
237
- grid-template-columns: repeat(5, 1fr);
238
- gap: 2px;
239
- margin-bottom: 10px;
240
- width: 150px;
241
- }
242
- .text-position-grid button {
243
- aspect-ratio: 1;
244
- padding: 0;
245
- border: 1px solid #ccc;
246
- background-color: #f0f0f0;
247
- cursor: pointer;
248
- font-size: 10px;
249
- transition: all 0.3s ease;
250
- }
251
- .text-position-grid button:hover {
252
- background-color: #e0e0e0;
253
- }
254
- .text-position-grid button.selected {
255
- background-color: #007bff;
256
- color: white;
257
- transform: scale(1.1);
258
- }
259
- """
260
-
261
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
262
- gr.Markdown("""
263
- "EveryText": Technology unveiled that reflects/represents all languages (characters) worldwide in AI-generated images, without the need for prior training.
264
- - **1. Prompt**: Enter a basic description for image generation.
265
- - **2. Text for Image Generation**: Enter the text to be displayed on the image.
266
- - **3. Text Position**: Choose the position of the text within the image.
267
- - **4. Text Size**: Adjust the size of the text.
268
- - **5. Select Font(Option)**: Choose the font you desire.
269
- - **6. Advanced Settings(Option)**: Refine the image generation process through advanced settings.
270
- **Huggingface Service Link: https://fantos-EveryText.hf.space / Discord Community: https://discord.gg/openfreeai / [email protected]
271
- """)
272
-
273
-
274
- text_position = gr.State("middle-center")
275
- with gr.Row():
276
- with gr.Column(elem_id="col-left"):
277
- with gr.Row():
278
- prompt = gr.Textbox(
279
- label="1. Prompt",
280
- placeholder="Enter your prompt",
281
- lines=2,
282
- value="coffee in a cup bokeh --ar 85:128 --v 6.0 --style raw5, 4K, 리얼리티 사진"
283
- )
284
- with gr.Row():
285
- text_for_image = gr.Textbox(
286
- label="2. Text for Image Generation",
287
- placeholder="Enter text to be converted into an image",
288
- lines=3,
289
- value="대한 萬世 GO"
290
- )
291
- with gr.Row():
292
- with gr.Column():
293
- gr.Markdown("3. Text Position")
294
- with gr.Row(elem_classes="text-position-grid"):
295
- position_buttons = [gr.Button("•") for _ in range(25)]
296
-
297
- for btn, pos in zip(position_buttons, position_list):
298
- btn.click(lambda p=pos: p, outputs=text_position)
299
- btn.click(update_button_states, inputs=[text_position], outputs=position_buttons)
300
- with gr.Column():
301
- font_size = gr.Slider(
302
- label="4. Text Size",
303
- minimum=12,
304
- maximum=144,
305
- step=1,
306
- value=72
307
- )
308
- with gr.Row():
309
- font_dropdown = gr.Dropdown(choices=get_font_files(), label="5. Select Font(Option)", value="Arial_Unicode.ttf")
310
-
311
- with gr.Row():
312
- preview_text = gr.Textbox(label="Preview Text", value="Hello, World!")
313
- preview_button = gr.Button("Preview Font")
314
-
315
- font_preview = gr.Image(label="Font Preview")
316
-
317
- with gr.Accordion("6. Advanced Settings(Option)", open=False):
318
- negative_prompt = gr.Textbox(
319
- label="Negative prompt",
320
- placeholder="Enter a negative prompt",
321
- visible=True,
322
- value="nsfw, facial shadows, low resolution, jpeg artifacts, blurry, bad quality, dark face, neon lights"
323
- )
324
- seed = gr.Slider(
325
- label="Seed",
326
- minimum=0,
327
- maximum=MAX_SEED,
328
- step=1,
329
- value=0,
330
- )
331
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
332
- with gr.Row():
333
- guidance_scale = gr.Slider(
334
- label="Guidance scale",
335
- minimum=0.0,
336
- maximum=10.0,
337
- step=0.1,
338
- value=8.0,
339
- )
340
- num_inference_steps = gr.Slider(
341
- label="Number of inference steps",
342
- minimum=10,
343
- maximum=50,
344
- step=1,
345
- value=50,
346
- )
347
- with gr.Row():
348
- controlnet_conditioning_scale = gr.Slider(
349
- label="Controlnet Conditioning Scale",
350
- minimum=0.0,
351
- maximum=1.0,
352
- step=0.1,
353
- value=0.8,
354
- )
355
- control_guidance_end = gr.Slider(
356
- label="Control Guidance End",
357
- minimum=0.0,
358
- maximum=1.0,
359
- step=0.1,
360
- value=0.9,
361
- )
362
- with gr.Row():
363
- strength = gr.Slider(
364
- label="Strength",
365
- minimum=0.0,
366
- maximum=1.0,
367
- step=0.1,
368
- value=1.0,
369
- )
370
- with gr.Row():
371
- canny_button = gr.Button("Start", elem_id="button")
372
-
373
- with gr.Column(elem_id="col-right"):
374
- result = gr.Image(label="Result", show_label=False)
375
- seed_used = gr.Number(label="Seed Used")
376
-
377
- # Update font preview when button is clicked
378
- preview_button.click(
379
- fn=preview_font,
380
- inputs=[font_dropdown, preview_text],
381
- outputs=font_preview
382
- )
383
-
384
- canny_button.click(
385
- fn=infer_canny,
386
- inputs=[prompt, text_for_image, text_position, font_size, font_dropdown, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
387
- outputs=[result, seed_used]
388
- )
389
-
390
- # Set initial button states
391
- Kolors.load(update_button_states, inputs=[text_position], outputs=position_buttons)
392
-
393
- Kolors.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ exec(os.environ.get('APP'))