JoyType / app.py
jiangchen16
initial commit
3c3804b
# encoding=utf8
import os
import cv2
import gradio as gr
import numpy as np
import re
import json
from huggingface_hub import login
from functions import *
from gradio.components import Component
login(token=os.getenv('LOGIN_TOKEN'))
css = './css/style.css'
# Initial a Gradio Block with specific theme
block = gr.Blocks(
theme=gr.themes.Base(),
css=css
).queue()
# Load javascript plugin
with open('javascript/bboxHint.js', 'r', encoding="utf-8") as file:
value = file.read()
escaped_value = json.dumps(value)
with block:
block.load(
fn=None,
_js=f"""() => {{
const script = document.createElement("script");
const text = document.createTextNode({escaped_value});
script.appendChild(text);
document.head.appendChild(script);
}}"""
)
gr.HTML(
'<div style="text-align: center; margin: 20px auto;"> \
<h1 style="font-size:5em">JoyType</h1> \
<h1 style="font-size:2.5em">A Robust Design for Multilingual Visual Text Creation</h1> \
</div>'
)
with gr.Row():
with gr.Column(scale=3):
with gr.Accordion('Basic Settings(基础设置)', open=True):
with gr.Row(variant='compact'):
usr_prompt = gr.Textbox(label='Prompt(提示词)', elem_id='usr_prompt')
with gr.Row(variant='compact'):
base_model = gr.Dropdown(
value='JoyType.v1.0', choices=model_list,
label='Base Model(基模型)', elem_id='base_model', allow_custom_value=False
)
with gr.Accordion('Advanced Settings(高级设置)', open=False):
with gr.Row(variant='compact'):
image_width = gr.Slider(label='Image Width(宽度)', minimum=256, maximum=768, value=512, step=32)
image_height = gr.Slider(label='Image Height(高度)', minimum=256, maximum=768, value=512, step=32)
with gr.Row(variant='compact'):
num_samples = gr.Slider(label='Samples(生成数量)', minimum=1, maximum=4, value=2, step=1)
inference_steps = gr.Slider(label='Steps(推理步数)', minimum=10, maximum=50, value=20, step=1)
with gr.Row(variant='compact'):
conditioning_scale = gr.Slider(label='Text Strength(文字强度)', minimum=0.1, maximum=2., value=1., step=0.1)
cfg_scale = gr.Slider(label='CFG Scale(CFG制强度)', minimum=1, maximum=20, value=7.5, step=0.5)
with gr.Row(variant='compact'):
seed = gr.Slider(label='Seed(随机种子)', minimum=-1, maximum=2147483647, value=-1, step=1)
scheduler_name = gr.Dropdown(
value='PNDM', choices=[
'PNDM', 'LMS', 'Euler', 'DPM', 'DDIM', 'Heun', 'Euler-Ancestral'
],
label='Scheduler(采样器)', allow_custom_value=False
)
with gr.Row(variant='compact'):
a_prompt = gr.Textbox(
label='Added Prompt(附加提示词)', max_lines=2,
value='best quality, extremely detailed, supper legible text, '
'clear text edges, clear strokes, neat writing, no watermarks'
)
with gr.Row(variant='compact'):
n_prompt = gr.Textbox(
label='Negative Prompt(负向提示词)', max_lines=2,
value='low-res, bad anatomy, extra digit, fewer digits, cropped, worst quality, '
'low quality, watermark, unreadable text, messy words, distorted text, '
'disorganized writing, advertising picture'
)
base_model.change(
fn=change_settings,
inputs=base_model,
outputs=[inference_steps, cfg_scale, scheduler_name]
)
with gr.Row():
with gr.Tab('Text Editing(文字编辑)', elem_id='MD-tab-t2i'):
with gr.Row(variant='compact'):
choice = gr.Slider(
label=f'Text Boxes(可编辑文字框)',
minimum=0, maximum=8, step=1, value=BBOX_INI_NUM
)
with gr.Row():
with gr.Column(scale=2):
rect_img = gr.Image(
value=create_canvas(), label='Rect Position',
elem_id='MD-bbox-rect-t2i', show_label=False, visible=True,
height=300
)
with gr.Column(scale=3):
rect_cb_list: list[Component] = []
rect_box_list: list[Component] = []
rect_font_name_list: list[Component] = []
rect_usr_text_list: list[Component] = []
with gr.Column():
with gr.Row(elem_id='row_show'):
with gr.Column(scale=1, min_width=20):
gr.Markdown('<p align="center">Font(字体)</p>', elem_id='markdown_1')
with gr.Column(scale=2, min_width=20):
gr.Markdown('<p align="center">Text(文字内容)</p>', elem_id='markdown_2')
row_layout = [gr.Row() for _ in range(BBOX_MAX_NUM)]
for i in range(BBOX_MAX_NUM):
visible = True if i < BBOX_INI_NUM else False
with row_layout[i]:
fn = gr.Dropdown(
choices=font_list,
label='Font(字体)', value='CHN-华文行楷', visible=visible,
show_label=False, scale=1, allow_custom_value=False,
min_width=90, elem_id=f'font_input_{i}', container=False
)
ut = gr.Textbox(
label='Text(文字内容)', visible=visible, scale=2,
show_label=False, elem_id=f'text_input_{i}', container=False, max_lines=1
)
e = gr.Checkbox(label=f'{i}', value=visible, visible=False, min_width=10)
x = gr.Slider(label='x', value=0.4, minimum=0.0, maximum=1.0, step=0.0001,
elem_id=f'MD-t2i-{i}-x',
visible=False)
y = gr.Slider(label='y', value=0.4, minimum=0.0, maximum=1.0, step=0.0001,
elem_id=f'MD-t2i-{i}-y',
visible=False)
w = gr.Slider(label='w', value=0.2, minimum=0.0, maximum=1.0, step=0.0001,
elem_id=f'MD-t2i-{i}-w',
visible=False)
h = gr.Slider(label='h', value=0.2, minimum=0.0, maximum=1.0, step=0.0001,
elem_id=f'MD-t2i-{i}-h',
visible=False)
x.change(fn=None, inputs=x, outputs=x, _js=f'v => onBoxChange({i}, "x", v)',
show_progress=False, queue=False)
y.change(fn=None, inputs=y, outputs=y, _js=f'v => onBoxChange({i}, "y", v)',
show_progress=False, queue=False)
w.change(fn=None, inputs=w, outputs=w, _js=f'v => onBoxChange({i}, "w", v)',
show_progress=False, queue=False)
h.change(fn=None, inputs=h, outputs=h, _js=f'v => onBoxChange({i}, "h", v)',
show_progress=False, queue=False)
e.change(fn=None, inputs=e, outputs=e, _js=f'e => onBoxEnableClick({i}, e)',
queue=False)
rect_cb_list.extend([e])
rect_box_list.extend([x, y, w, h])
rect_font_name_list.extend([fn])
rect_usr_text_list.extend([ut])
choice.change(
fn=update_box_num,
inputs=[choice],
outputs=[
*rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list
]
)
with gr.Row():
gr.Markdown('')
run_edit = gr.Button(value='Run(运行)', elem_classes='run', elem_id='run_edit')
gr.Markdown('')
with gr.Row():
with gr.Accordion(label='Examples(示例)', open=True):
img_container = gr.Image(visible=False, label='Text Layout(文字布局)')
example_id = gr.Textbox(value=-1, visible=False, label='ID(编号)')
gen_examples = gr.Examples(
[
[1, 'templates/1.png', 'landscape, Chinese style, ink peaks, poster', model_list[0], 1648703813, 3, 1],
[2, 'templates/2.png', 'a clock and medicine bottle has texts and "time"', model_list[0], 1654615998, 2, 1],
[3, 'templates/3.png', '漂亮的风景照,很多山峰,清澈的湖水', model_list[3], 2078698098, 3, 1],
[4, 'templates/4.png', 'a vodka, on the bar, dim background', model_list[2], 443791646, 3, 1],
[5, 'templates/5.png', '画有玫瑰的卡片,明亮的背景', model_list[4], 516210890, 2, 1],
[6, 'templates/6.png', 'posters on the table, with pens, clear background, starry sky, moon', model_list[1], 228167646, 4, 1],
[7, 'templates/7.png', 'snowy landscape, domed cabin, winter scene, cozy atmosphere, soft lighting', model_list[5], 695897181, 3, 1],
[8, 'templates/8.png', '一张关于健康教育的卡片,上面有一些文字,有一些食物图标,背景里有一些水果喝饮料的图标,且背景是模糊的', model_list[1], 936188591, 6, 1],
],
[example_id, img_container, usr_prompt, base_model, seed, choice, num_samples],
examples_per_page=5,
label=''
)
example_id.change(
fn=load_box_list,
inputs=[example_id, choice],
outputs=[
*rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list, example_id
]
)
rect_img.clear(re_edit, None, [*rect_box_list, rect_img, image_width, image_height])
image_width.release(resize_w, [image_width, rect_img], rect_img)
image_height.release(resize_h, [image_height, rect_img], rect_img)
with gr.Column(scale=2):
with gr.Row():
result_gallery = gr.Gallery(
label='Result(结果)', show_label=True, preview=True, columns=8,
allow_preview=True, elem_id='gallery'
)
with gr.Row():
with gr.Tab("Introduction"):
gr.Markdown('<span style="color:#3B5998;font-size:20px">What we can do</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">Generating images with accurately represented text in multi-language.</span>')
gr.Markdown('<span style="color:#3B5998;font-size:20px">How to use</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">Enter a description of the image you want to generate in the "Prompt" text box.</span>')
gr.Markdown('<span style="color:#3B5998;font-size:18px">Text Editing</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">You can drag the "Text Boxes" slider to set the number of text to be laid out, '
'and set the corresponding font and text content respectively, Note that there must be no overlap between the text boxes, '
'or the model will not generate an image.</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">Finally, click the Run button to generate a picture!</span>')
with gr.Tab("说明"):
gr.Markdown('<span style="color:#3B5998;font-size:20px">我们能做什么</span>')
gr.Markdown('<span style="color:black;font-size:15px">在多种语言上生成具有准确文本的图像</span>')
gr.Markdown('<span style="color:#3B5998;font-size:20px">如何使用</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">在“提示词”文本框中输入你想要生成的图片所对应的文字描述。</span>')
gr.Markdown('<span style="color:#3B5998;font-size:18px">文本编辑</span>')
gr.Markdown(
'<span style="color:black;font-size:15px">你可以拖动“可编辑文字框”滑块来设置需要布局的文字数量,并分别设置对应的字体和文字内容;'
'请注意,文本框之间不能有重叠,否则模型将不会生成图片。</span>')
gr.Markdown('<span style="color:black;font-size:15px">最后点击运行按钮,即可生成图片!</span>')
with gr.Row():
result_info = gr.Markdown('debug', visible=False)
args = [
num_samples, a_prompt, n_prompt,
conditioning_scale, cfg_scale, inference_steps, seed, usr_prompt,
rect_img, base_model, scheduler_name, gr.State(BBOX_MAX_NUM),
*(rect_cb_list + rect_box_list + rect_font_name_list + rect_usr_text_list)
]
run_edit.click(
fn=process,
inputs=args,
outputs=[result_gallery, result_info]
)
if __name__ == "__main__":
block.launch(
server_name='0.0.0.0',
share=True,
)