EveryText

Building

App Files Files Community

EveryText / app.py

fantos

Update app.py

f2a614f verified 11 months ago

raw

history blame

8.9 kB

	import spaces
	import random
	import torch
	import cv2
	import gradio as gr
	import numpy as np
	from huggingface_hub import snapshot_download
	from transformers import pipeline
	from diffusers.utils import load_image
	from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
	from kolors.models.modeling_chatglm import ChatGLMModel
	from kolors.models.tokenization_chatglm import ChatGLMTokenizer
	from kolors.models.controlnet import ControlNetModel
	from diffusers import AutoencoderKL
	from kolors.models.unet_2d_condition import UNet2DConditionModel
	from diffusers import EulerDiscreteScheduler
	from PIL import Image, ImageDraw, ImageFont
	import os

	device = "cuda"
	ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
	ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")

	# Add translation pipeline
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")

	text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
	tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
	vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
	scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
	unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
	controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)

	pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
	vae=vae,
	controlnet=controlnet_canny,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	unet=unet,
	scheduler=scheduler,
	force_zeros_for_empty_prompt=False
	)

	@spaces.GPU
	def translate_korean_to_english(text):
	if any(ord(char) >= 0xAC00 and ord(char) <= 0xD7A3 for char in text): # Check if Korean characters are present
	translated = translator(text, max_length=512)[0]['translation_text']
	return translated
	return text

	@spaces.GPU
	def process_canny_condition(image, canny_threods=[100,200]):
	np_image = image.copy()
	np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
	np_image = np_image[:, :, None]
	np_image = np.concatenate([np_image, np_image, np_image], axis=2)
	np_image = HWC3(np_image)
	return Image.fromarray(np_image)

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024

	def text_to_image(text, size=72, position="middle-center"):
	width, height = 1024, 576
	image = Image.new("RGB", (width, height), "white")
	draw = ImageDraw.Draw(image)

	font_files = ["Arial_Unicode.ttf"]
	font = None
	for font_file in font_files:
	font_path = os.path.join(os.path.dirname(__file__), font_file)
	if os.path.exists(font_path):
	try:
	font = ImageFont.truetype(font_path, size=size)
	print(f"Using font: {font_file}")
	break
	except IOError:
	print(f"Error loading font: {font_file}")
	if font is None:
	print("No suitable font found. Using default font.")
	font = ImageFont.load_default()

	lines = text.split('\n')
	max_line_width = 0
	total_height = 0
	line_heights = []
	for line in lines:
	left, top, right, bottom = draw.textbbox((0, 0), line, font=font)
	line_width = right - left
	line_height = bottom - top
	line_heights.append(line_height)
	max_line_width = max(max_line_width, line_width)
	total_height += line_height

	position_mapping = {
	"top-left": (10, 10),
	"top-center": ((width - max_line_width) / 2, 10),
	"top-right": (width - max_line_width - 10, 10),
	"middle-left": (10, (height - total_height) / 2),
	"middle-center": ((width - max_line_width) / 2, (height - total_height) / 2),
	"middle-right": (width - max_line_width - 10, (height - total_height) / 2),
	"bottom-left": (10, height - total_height - 10),
	"bottom-center": ((width - max_line_width) / 2, height - total_height - 10),
	"bottom-right": (width - max_line_width - 10, height - total_height - 10),
	}

	x, y = position_mapping.get(position, ((width - max_line_width) / 2, height - total_height - 10))
	for i, line in enumerate(lines):
	draw.text((x, y), line, fill="black", font=font)
	y += line_heights[i]

	return image

	@spaces.GPU
	def infer_canny(prompt,
	negative_prompt = "nsfw, facial shadows, low resolution, jpeg artifacts, blurry, bad quality, dark face, neon lights",
	seed = 397886929,
	randomize_seed = False,
	guidance_scale = 6.0,
	num_inference_steps = 50,
	controlnet_conditioning_scale = 0.7,
	control_guidance_end = 0.9,
	strength = 1.0
	):
	prompt = translate_korean_to_english(prompt)
	negative_prompt = translate_korean_to_english(negative_prompt)

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator().manual_seed(seed)

	# Generate text image
	init_image = text_to_image(prompt)
	init_image = resize_image(init_image, MAX_IMAGE_SIZE)

	pipe = pipe_canny.to("cuda")
	condi_img = process_canny_condition(np.array(init_image))
	image = pipe(
	prompt=prompt,
	image=init_image,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	control_guidance_end=control_guidance_end,
	strength=strength,
	control_image=condi_img,
	negative_prompt=negative_prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	generator=generator,
	).images[0]
	return [condi_img, image], seed

	css = """
	footer {
	visibility: hidden;
	}
	"""

	with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
	with gr.Row():
	with gr.Column(elem_id="col-left"):
	with gr.Row():
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Enter your prompt",
	lines=2
	)
	with gr.Accordion("Advanced Settings", open=False):
	negative_prompt = gr.Textbox(
	label="Negative prompt",
	placeholder="Enter a negative prompt",
	visible=True,
	value="nsfw, facial shadows, low resolution, jpeg artifacts, blurry, bad quality, dark face, neon lights"
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=6.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=10,
	maximum=50,
	step=1,
	value=30,
	)
	with gr.Row():
	controlnet_conditioning_scale = gr.Slider(
	label="Controlnet Conditioning Scale",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.7,
	)
	control_guidance_end = gr.Slider(
	label="Control Guidance End",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.9,
	)
	with gr.Row():
	strength = gr.Slider(
	label="Strength",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=1.0,
	)
	with gr.Row():
	canny_button = gr.Button("Canny", elem_id="button")

	with gr.Column(elem_id="col-right"):
	result = gr.Gallery(label="Result", show_label=False, columns=2)
	seed_used = gr.Number(label="Seed Used")

	canny_button.click(
	fn = infer_canny,
	inputs = [prompt, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
	outputs = [result, seed_used]
	)

	Kolors.queue().launch(debug=True)