diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..119673380e13e775f7d9a603f3c5160ff5c77bb1 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+font/*.ttf filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index bf895dca6b969fa73a08ea544122ceee9b103e01..963f8c8ea50beb5623de4d1df758c15b12817efc 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,12 @@
 ---
 title: JoyType
-emoji: 📊
-colorFrom: gray
+emoji: 🔥
+colorFrom: green
 colorTo: blue
 sdk: gradio
-sdk_version: 4.37.2
+sdk_version: 3.50.0
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..748c90a156e3559154ed1f6bad6baaefeb5f7abc
--- /dev/null
+++ b/app.py
@@ -0,0 +1,261 @@
+# encoding=utf8
+
+
+import os
+import cv2
+import gradio as gr
+import numpy as np
+import re
+import json
+
+from huggingface_hub import login
+from functions import *
+from gradio.components import Component
+
+login(token=os.getenv('LOGIN_TOKEN'))
+css = './css/style.css'
+
+# Initial a Gradio Block with specific theme
+block = gr.Blocks(
+    theme=gr.themes.Base(),
+    css=css
+).queue()
+
+# Load javascript plugin
+with open('javascript/bboxHint.js', 'r', encoding="utf-8") as file:
+    value = file.read()
+escaped_value = json.dumps(value)
+
+with block:
+    block.load(
+        fn=None,
+        _js=f"""() => {{
+                const script = document.createElement("script");
+                const text =  document.createTextNode({escaped_value});
+                script.appendChild(text);
+                document.head.appendChild(script);
+            }}"""
+    )
+    gr.HTML(
+        '<div style="text-align: center; margin: 20px auto;"> \
+        <h1 style="font-size:5em">JoyType</h1> \
+        <h1 style="font-size:2.5em">A Robust Design for Multilingual Visual Text Creation</h1> \
+        </div>'
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            with gr.Accordion('Basic Settings(基础设置)', open=True):
+                with gr.Row(variant='compact'):
+                    usr_prompt = gr.Textbox(label='Prompt(提示词)', elem_id='usr_prompt')
+                with gr.Row(variant='compact'):
+                    base_model = gr.Dropdown(
+                        value='JoyType.v1.0', choices=model_list,
+                        label='Base Model(基模型)', elem_id='base_model', allow_custom_value=False
+                    )
+
+            with gr.Accordion('Advanced Settings(高级设置)', open=False):
+                with gr.Row(variant='compact'):
+                    image_width = gr.Slider(label='Image Width(宽度)', minimum=256, maximum=768, value=512, step=32)
+                    image_height = gr.Slider(label='Image Height(高度)', minimum=256, maximum=768, value=512, step=32)
+                with gr.Row(variant='compact'):
+                    num_samples = gr.Slider(label='Samples(生成数量)', minimum=1, maximum=4, value=2, step=1)
+                    inference_steps = gr.Slider(label='Steps(推理步数)', minimum=10, maximum=50, value=20, step=1)
+                with gr.Row(variant='compact'):
+                    conditioning_scale = gr.Slider(label='Text Strength(文字强度)', minimum=0.1, maximum=2., value=1., step=0.1)
+                    cfg_scale = gr.Slider(label='CFG Scale(CFG制强度)', minimum=1, maximum=20, value=7.5, step=0.5)
+                with gr.Row(variant='compact'):
+                    seed = gr.Slider(label='Seed(随机种子)', minimum=-1, maximum=2147483647, value=-1, step=1)
+                    scheduler_name = gr.Dropdown(
+                        value='PNDM', choices=[
+                            'PNDM', 'LMS', 'Euler', 'DPM', 'DDIM', 'Heun', 'Euler-Ancestral'
+                        ],
+                        label='Scheduler(采样器)', allow_custom_value=False
+                    )
+                with gr.Row(variant='compact'):
+                    a_prompt = gr.Textbox(
+                        label='Added Prompt(附加提示词)', max_lines=2,
+                        value='best quality, extremely detailed, supper legible text, '
+                              'clear text edges, clear strokes, neat writing, no watermarks'
+                    )
+                with gr.Row(variant='compact'):
+                    n_prompt = gr.Textbox(
+                        label='Negative Prompt(负向提示词)', max_lines=2,
+                        value='low-res, bad anatomy, extra digit, fewer digits, cropped, worst quality, '
+                              'low quality, watermark, unreadable text, messy words, distorted text, '
+                              'disorganized writing, advertising picture'
+                    )
+
+                base_model.change(
+                    fn=change_settings,
+                    inputs=base_model,
+                    outputs=[inference_steps, cfg_scale, scheduler_name]
+                )
+
+            with gr.Row():
+                with gr.Tab('Text Editing(文字编辑)', elem_id='MD-tab-t2i'):
+                    with gr.Row(variant='compact'):
+                        choice = gr.Slider(
+                            label=f'Text Boxes(可编辑文字框)',
+                            minimum=0, maximum=8, step=1, value=BBOX_INI_NUM
+                        )
+
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            rect_img = gr.Image(
+                                value=create_canvas(), label='Rect Position',
+                                elem_id='MD-bbox-rect-t2i', show_label=False, visible=True,
+                                height=300
+                            )
+                        with gr.Column(scale=3):
+                            rect_cb_list: list[Component] = []
+                            rect_box_list: list[Component] = []
+                            rect_font_name_list: list[Component] = []
+                            rect_usr_text_list: list[Component] = []
+
+                            with gr.Column():
+                                with gr.Row(elem_id='row_show'):
+                                    with gr.Column(scale=1, min_width=20):
+                                        gr.Markdown('<p align="center">Font(字体)</p>', elem_id='markdown_1')
+                                    with gr.Column(scale=2, min_width=20):
+                                        gr.Markdown('<p align="center">Text(文字内容)</p>', elem_id='markdown_2')
+
+                                row_layout = [gr.Row() for _ in range(BBOX_MAX_NUM)]
+                                for i in range(BBOX_MAX_NUM):
+                                    visible = True if i < BBOX_INI_NUM else False
+                                    with row_layout[i]:
+                                        fn = gr.Dropdown(
+                                            choices=font_list,
+                                            label='Font(字体)', value='CHN-华文行楷', visible=visible,
+                                            show_label=False, scale=1, allow_custom_value=False,
+                                            min_width=90, elem_id=f'font_input_{i}', container=False
+                                        )
+                                        ut = gr.Textbox(
+                                            label='Text(文字内容)', visible=visible, scale=2,
+                                            show_label=False, elem_id=f'text_input_{i}', container=False, max_lines=1
+                                        )
+                                    e = gr.Checkbox(label=f'{i}', value=visible, visible=False, min_width=10)
+
+                                    x = gr.Slider(label='x', value=0.4, minimum=0.0, maximum=1.0, step=0.0001,
+                                                  elem_id=f'MD-t2i-{i}-x',
+                                                  visible=False)
+                                    y = gr.Slider(label='y', value=0.4, minimum=0.0, maximum=1.0, step=0.0001,
+                                                  elem_id=f'MD-t2i-{i}-y',
+                                                  visible=False)
+                                    w = gr.Slider(label='w', value=0.2, minimum=0.0, maximum=1.0, step=0.0001,
+                                                  elem_id=f'MD-t2i-{i}-w',
+                                                  visible=False)
+                                    h = gr.Slider(label='h', value=0.2, minimum=0.0, maximum=1.0, step=0.0001,
+                                                  elem_id=f'MD-t2i-{i}-h',
+                                                  visible=False)
+                                    x.change(fn=None, inputs=x, outputs=x, _js=f'v => onBoxChange({i}, "x", v)',
+                                             show_progress=False, queue=False)
+                                    y.change(fn=None, inputs=y, outputs=y, _js=f'v => onBoxChange({i}, "y", v)',
+                                             show_progress=False, queue=False)
+                                    w.change(fn=None, inputs=w, outputs=w, _js=f'v => onBoxChange({i}, "w", v)',
+                                             show_progress=False, queue=False)
+                                    h.change(fn=None, inputs=h, outputs=h, _js=f'v => onBoxChange({i}, "h", v)',
+                                             show_progress=False, queue=False)
+                                    e.change(fn=None, inputs=e, outputs=e, _js=f'e => onBoxEnableClick({i}, e)',
+                                             queue=False)
+
+                                    rect_cb_list.extend([e])
+                                    rect_box_list.extend([x, y, w, h])
+                                    rect_font_name_list.extend([fn])
+                                    rect_usr_text_list.extend([ut])
+
+                            choice.change(
+                                fn=update_box_num,
+                                inputs=[choice],
+                                outputs=[
+                                    *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list
+                                ]
+                            )
+                    with gr.Row():
+                        gr.Markdown('')
+                        run_edit = gr.Button(value='Run(运行)', elem_classes='run', elem_id='run_edit')
+                        gr.Markdown('')
+                    with gr.Row():
+                        with gr.Accordion(label='Examples(示例)', open=True):
+                            img_container = gr.Image(visible=False, label='Text Layout(文字布局)')
+                            example_id = gr.Textbox(value=-1, visible=False, label='ID(编号)')
+                            gen_examples = gr.Examples(
+                                [
+                                    [1, 'templates/1.png', 'landscape, Chinese style, ink peaks, poster', model_list[0], 1648703813, 3, 1],
+                                    [2, 'templates/2.png', 'a clock and medicine bottle has texts and "time"', model_list[0], 1654615998, 2, 1],
+                                    [3, 'templates/3.png', '漂亮的风景照，很多山峰，清澈的湖水', model_list[3], 2078698098, 3, 1],
+                                    [4, 'templates/4.png', 'a vodka, on the bar, dim background', model_list[2], 443791646, 3, 1],
+                                    [5, 'templates/5.png', '画有玫瑰的卡片，明亮的背景', model_list[4], 516210890, 2, 1],
+                                    [6, 'templates/6.png', 'posters on the table, with pens, clear background, starry sky, moon', model_list[1], 228167646, 4, 1],
+                                    [7, 'templates/7.png', 'snowy landscape, domed cabin, winter scene, cozy atmosphere, soft lighting', model_list[5], 695897181, 3, 1],
+                                    [8, 'templates/8.png', '一张关于健康教育的卡片，上面有一些文字，有一些食物图标，背景里有一些水果喝饮料的图标，且背景是模糊的', model_list[1], 936188591, 6, 1],
+                                ],
+                                [example_id, img_container, usr_prompt, base_model, seed, choice, num_samples],
+                                examples_per_page=5,
+                                label=''
+                            )
+
+                        example_id.change(
+                            fn=load_box_list,
+                            inputs=[example_id, choice],
+                            outputs=[
+                                *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list, example_id
+                            ]
+                        )
+
+                    rect_img.clear(re_edit, None, [*rect_box_list, rect_img, image_width, image_height])
+                    image_width.release(resize_w, [image_width, rect_img], rect_img)
+                    image_height.release(resize_h, [image_height, rect_img], rect_img)
+                    
+        with gr.Column(scale=2):
+            with gr.Row():
+                result_gallery = gr.Gallery(
+                    label='Result(结果)', show_label=True, preview=True, columns=8,
+                    allow_preview=True, elem_id='gallery'
+                )
+            with gr.Row():
+                with gr.Tab("Introduction"):
+                    gr.Markdown('<span style="color:#3B5998;font-size:20px">What we can do</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">Generating images with accurately represented text in multi-language.</span>')
+                    gr.Markdown('<span style="color:#3B5998;font-size:20px">How to use</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">Enter a description of the image you want to generate in the "Prompt" text box.</span>')
+                    gr.Markdown('<span style="color:#3B5998;font-size:18px">Text Editing</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">You can drag the "Text Boxes" slider to set the number of text to be laid out, '
+                        'and set the corresponding font and text content respectively, Note that there must be no overlap between the text boxes, '
+                        'or the model will not generate an image.</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">Finally, click the Run button to generate a picture!</span>')
+                with gr.Tab("说明"):
+                    gr.Markdown('<span style="color:#3B5998;font-size:20px">我们能做什么</span>')
+                    gr.Markdown('<span style="color:black;font-size:15px">在多种语言上生成具有准确文本的图像</span>')
+                    gr.Markdown('<span style="color:#3B5998;font-size:20px">如何使用</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">在“提示词”文本框中输入你想要生成的图片所对应的文字描述。</span>')
+                    gr.Markdown('<span style="color:#3B5998;font-size:18px">文本编辑</span>')
+                    gr.Markdown(
+                        '<span style="color:black;font-size:15px">你可以拖动“可编辑文字框”滑块来设置需要布局的文字数量，并分别设置对应的字体和文字内容；'
+                        '请注意，文本框之间不能有重叠，否则模型将不会生成图片。</span>')
+                    gr.Markdown('<span style="color:black;font-size:15px">最后点击运行按钮，即可生成图片！</span>')
+            with gr.Row():
+                result_info = gr.Markdown('debug', visible=False)
+
+    args = [
+        num_samples, a_prompt, n_prompt,
+        conditioning_scale, cfg_scale, inference_steps, seed, usr_prompt, 
+        rect_img, base_model, scheduler_name, gr.State(BBOX_MAX_NUM),
+        *(rect_cb_list + rect_box_list + rect_font_name_list + rect_usr_text_list)
+    ]
+    run_edit.click(
+        fn=process,
+        inputs=args,
+        outputs=[result_gallery, result_info]
+    )
+
+
+if __name__ == "__main__":
+    block.launch(
+        server_name='0.0.0.0',
+        share=True,
+    )
diff --git a/css/style.css b/css/style.css
new file mode 100644
index 0000000000000000000000000000000000000000..e2a86bdb970fac242dc551b6a2f4d0676f7205e3
--- /dev/null
+++ b/css/style.css
@@ -0,0 +1,157 @@
+body, html {
+  height: 100%;
+  margin: 0;
+}
+.gradio_container {
+  height: 100%;
+}
+
+/**gallery**/
+#gallery {
+    height: 400px;
+    width: 100%;
+}
+
+/**every font dropdown in editing**/
+#font_input_0 input {
+    color: #ff0000;
+    font-size: 15px;
+}
+#font_input_1 input {
+    color: #ff9900;
+    font-size: 15px;
+}
+#font_input_2 input {
+    color: #996633;
+    font-size: 15px;
+}
+#font_input_3 input {
+    color: #33cc33;
+    font-size: 15px;
+}
+#font_input_4 input {
+    color: #33cccc;
+    font-size: 15px;
+}
+#font_input_5 input {
+    color: #0066ff;
+    font-size: 15px;
+}
+#font_input_6 input {
+    color: #ff3399;
+    font-size: 15px;
+}
+#font_input_7 input {
+    color: #cc00cc;
+    font-size: 15px;
+}
+#font_input_0.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_1.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_2.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_3.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_4.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_5.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_6.block.svelte-90oupt {
+    height: 30px;
+}
+#font_input_7.block.svelte-90oupt {
+    height: 30px;
+}
+
+/**every text box in editing**/
+#text_input_0 input {
+    font-size: 15px;
+}
+#text_input_1 input {
+    font-size: 15px;
+}
+#text_input_2 input {
+    font-size: 15px;
+}
+#text_input_3 input {
+    font-size: 15px;
+}
+#text_input_4 input {
+    font-size: 15px;
+}
+#text_input_5 input {
+    font-size: 15px;
+}
+#text_input_6 input {
+    font-size: 15px;
+}
+#text_input_7 input {
+    font-size: 15px;
+}
+#text_input_0.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_1.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_2.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_3.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_4.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_5.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_6.block.svelte-90oupt {
+    height: 30px;
+}
+#text_input_7.block.svelte-90oupt {
+    height: 30px;
+}
+
+#row_show {
+    display: block
+    height: 25px
+}
+#markdown_1 {
+    display: block;
+    height: 25px;
+}
+#markdown_2 {
+    display: block;
+    height: 25px;
+}
+#markdown_1 span {
+    display: block;
+    height: 25px;
+}
+#markdown_2 span {
+    display: block;
+    height: 25px;
+}
+
+#run_edit {
+    background-color: #ff4500;
+    color: white;
+}
+#run_upload {
+    background-color: #ff4500;
+    color: white;
+}
+
+/**************************************
+footer.svelte-1ax1toq {
+    display: none !important;
+}
+/**************************************/
diff --git a/font/Alice.ttf b/font/Alice.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..b500cb8825d06dfdcb33685fac3f1cd6accf5a7a
--- /dev/null
+++ b/font/Alice.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b7ea628cb473d4b4737c4ffcd3f760bf6ca325d1344a134579ca636e33d9917
+size 128720
diff --git a/font/Aoyagireisyosimo.ttf b/font/Aoyagireisyosimo.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..14f5f17ee5abf675f350a639936fbaa48174f2af
--- /dev/null
+++ b/font/Aoyagireisyosimo.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4c55ad5f72e65a482931d967725e97ff206eb3019c87281d9e5514a63bb8db9
+size 4412684
diff --git a/font/Automatons.ttf b/font/Automatons.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..a03b4b0d363806d9c856c986d22a293dc08dc3b6
--- /dev/null
+++ b/font/Automatons.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:951850cc3b0839d886fcb773accca92c22f6eda0178b76a72c81377978100408
+size 6784
diff --git a/font/BebasNeue.ttf b/font/BebasNeue.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..a43c7492bcbcfaabea5b063116879f42d65bb30e
--- /dev/null
+++ b/font/BebasNeue.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:830ea186acffc2316ed1a4e42319246ba3b46b04e33a211079249bf901193f04
+size 57676
diff --git a/font/Caveat.ttf b/font/Caveat.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..817d528f98135c21ab6f7ab2c9354308905d5d9e
--- /dev/null
+++ b/font/Caveat.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:263493f012c8ffbf3a69a38d43ce494c42d1cb2d44b7cb9eff10095f08fce719
+size 391068
diff --git a/font/ChosunGs.ttf b/font/ChosunGs.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..e2eec46e8857da2f5452db5bd4cdb5d703bc1eca
--- /dev/null
+++ b/font/ChosunGs.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e191bc30d23ce34797dcaf7a0965dedd67a2d85cc5dd87325ee96626cba7bea
+size 9260104
diff --git a/font/Dongle.ttf b/font/Dongle.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..930e06132d662334e1440d259c9ecb8d09108417
--- /dev/null
+++ b/font/Dongle.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:010703af6b86a860847eea86823387e05669faa75e81e6a0040398906a0f1fe7
+size 4458436
diff --git a/font/Filthyrich.ttf b/font/Filthyrich.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..362107066e0b8993348b4da99245417a22f20a8e
--- /dev/null
+++ b/font/Filthyrich.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb72bd16d5613612734a103a9cd9a7ffba83857675c8b53891eb2c3b8a3e582
+size 125132
diff --git a/font/GlTsukiji.ttf b/font/GlTsukiji.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..c0ca5ef029d5b89a1c7079c3ddc8f8ae1dc694d3
--- /dev/null
+++ b/font/GlTsukiji.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6907481e7548bc723ef312e7508b21ea2e4b30313a13e7e2042ef4ad0953f7c3
+size 334440
diff --git a/font/Gloock.ttf b/font/Gloock.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..054f3b4c8e2c7035dca70793186e6437351e79cf
--- /dev/null
+++ b/font/Gloock.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e2af503dfb3d1e093f0178863e2b163c59cdc4f4c5d152d50f51da19baf8fa
+size 94940
diff --git a/font/GodoMaum.ttf b/font/GodoMaum.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..c9b0a4930d5c8067601db9dd981ee18f99bfc89f
--- /dev/null
+++ b/font/GodoMaum.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ea6333b8a9b875d778598debd260695770a813fdbdd0d233845329151e43ffd
+size 2760760
diff --git a/font/KouzanMouhitu.ttf b/font/KouzanMouhitu.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..a609044181f450e91280b34d52f2442c753be4a7
--- /dev/null
+++ b/font/KouzanMouhitu.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:582f269574e71336f1ee8318664ff6cfafd84293b3267ddf39410015713c951e
+size 8238216
diff --git a/font/Lemon.ttf b/font/Lemon.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..bb1a494dac425b7211fe07e6e50406f56fc21680
--- /dev/null
+++ b/font/Lemon.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:664fb2be44c5a08ba5ec89d06c3d523e0c2650a18922e667b8d9573a88ec37c4
+size 73592
diff --git a/font/MKyrill.ttf b/font/MKyrill.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..f40ac29d3db7c0c4d84c08fd3b873dffb3c5750c
--- /dev/null
+++ b/font/MKyrill.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da5256add8bb73db799e2f354ea9e4bf2fd893c2d3af583021b09a67f892962f
+size 81868
diff --git a/font/Nextstep.ttf b/font/Nextstep.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..8f0d983786c063826ef7c6e4d0f055df5a275c49
--- /dev/null
+++ b/font/Nextstep.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eae79e803499e66a6f15a8fab6e19bccbeeb2fe3b51722894ff2f1584fb9cafa
+size 19952
diff --git a/font/Okesip.ttf b/font/Okesip.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..a73b25dd062722be525570f1c6bbb50dbec2c40f
--- /dev/null
+++ b/font/Okesip.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea383c52f8fe1d4473d1a0d8d5cca42fda375d9f317bf42a0bd21251a8976e28
+size 74056
diff --git a/font/Otomanopee.ttf b/font/Otomanopee.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..909c896551995464746ebe35f38351b4c58d03a1
--- /dev/null
+++ b/font/Otomanopee.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb4bbcf825544a5102991f5118bbaa2440977ea1f4d5446c22963d70fd435602
+size 366684
diff --git a/font/Playwrite.ttf b/font/Playwrite.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..2153d4d03d03588677ce8a022f08825187534664
--- /dev/null
+++ b/font/Playwrite.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17810b62177426ffbe7b661598f8d1a67254daad70aac6a375c6ee1863a3c711
+size 344044
diff --git a/font/Shrikhand.ttf b/font/Shrikhand.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..4f964174c3f525b0af09c41bdcb69cb87e2a3528
--- /dev/null
+++ b/font/Shrikhand.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00269b4efb014de272c9313e16b7e166617c6fdac819a2bb063ef1e371b5801b
+size 213868
diff --git a/font/UnDotum.ttf b/font/UnDotum.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..f9028025f04bb9c549c22c510d977a17829c2473
--- /dev/null
+++ b/font/UnDotum.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b8373e126bb61f59105cf7f54a47eb1b089c2b0aacb70c6cd688bd8ea76cdc9
+size 3656228
diff --git "a/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf" "b/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..904bc92ca1f75366fa22349261df4c44bfa2d154
--- /dev/null
+++ "b/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:361dc6d522d417fc5705948e65d191f7826147d390980f4cbdcfbca4a0200290
+size 4044420
diff --git "a/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf" "b/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..109dc3bc5a2905a46ce791470fd4b4e21c65d0b8
--- /dev/null
+++ "b/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e893a5a618b39f317362efd77f3c6aeb16149328cb66872c9db8cb457a71d32
+size 4009504
diff --git "a/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf" "b/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..c0d88c6af8e216cf2557311dfeba82f7da28b8a2
--- /dev/null
+++ "b/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15cec9ab9565e0851e144144f7023c4c59f4fcdea96710a75cf668049f79b3df
+size 17093408
diff --git "a/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf" "b/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..ac1979672145dd576fd2d4d355521b03dfc4ea1d
--- /dev/null
+++ "b/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d81299f3ed474a0a9ed80db21410c604c1ca561f9d36b8ff62ce4137d91c0cc8
+size 7171284
diff --git "a/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf" "b/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..3ac3a5e850e0ccf6bb7b91db0347fe7a4af03d5e
--- /dev/null
+++ "b/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93a253e524b7951e2394e1ac2bf981aa268ce0ae4b11934682de6c42f19354e1
+size 2719964
diff --git "a/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf" "b/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..56c74fd6fa828b53b35808c3cd2b64e317baa8a8
--- /dev/null
+++ "b/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd51d365ff4b1b5c58e2cc722226cd3109e9b33fcd6538b6599cf308dbc90e3
+size 6348828
diff --git "a/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf" "b/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..8399590c7cbaf2947674bc629089dfc679043694
--- /dev/null
+++ "b/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fac1f0ec044b63aa45c0775c17a2f03cbf6427b4bd7b93da87dde9e7e2020cfc
+size 10073644
diff --git "a/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf" "b/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..cf7debe88e4b09c33cbb50756e6efa92f5279bc8
--- /dev/null
+++ "b/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f04002cc0906d2bda376fb133d4a8160805db0113e92baf6da54aeafde88bd7
+size 4511804
diff --git "a/font/\351\233\267\347\233\226\344\275\223.ttf" "b/font/\351\233\267\347\233\226\344\275\223.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..559a20923326e4c54ca73c941bcef8ed09c5267e
--- /dev/null
+++ "b/font/\351\233\267\347\233\226\344\275\223.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33fd784e6aa6b2f8e8e6064280ea6fc473246b6371884155d75c55577e6db4c2
+size 4336920
diff --git "a/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf" "b/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..14f5f17ee5abf675f350a639936fbaa48174f2af
--- /dev/null
+++ "b/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4c55ad5f72e65a482931d967725e97ff206eb3019c87281d9e5514a63bb8db9
+size 4412684
diff --git "a/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf" "b/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf"
new file mode 100644
index 0000000000000000000000000000000000000000..921b7b09bf3076c76846aa9f741b580ca40c4d88
--- /dev/null
+++ "b/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30a8a257480dbe55fd872722fb6ec52a7939ea0d34ddfcedbdf3595430d62d6b
+size 11651532
diff --git a/functions.py b/functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e266d830a89b26d0dcd218033a0e30caf1f7020
--- /dev/null
+++ b/functions.py
@@ -0,0 +1,522 @@
+import json
+import os
+import cv2
+import random
+import numpy as np
+import gradio as gr
+import torch
+
+from zhipuai import ZhipuAI
+from pytorch_lightning import seed_everything
+from pprint import pprint
+from PIL import Image, ImageDraw, ImageFont
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+)
+from diffusers import (
+    DDIMScheduler,
+    PNDMScheduler,
+    EulerAncestralDiscreteScheduler,
+    DPMSolverMultistepScheduler,
+    EulerDiscreteScheduler,
+    LMSDiscreteScheduler,
+    HeunDiscreteScheduler
+)
+from controlnet_aux import (
+    PidiNetDetector,
+    HEDdetector
+)
+
+
+BBOX_MAX_NUM = 8
+BBOX_INI_NUM = 0
+MAX_LENGTH = 20
+device = 'cuda'
+pipeline = None
+pre_pipeline = None
+model_root = os.getenv('REPO_ROOT')
+scheduler_root = f'{model_root}/Scheduler'
+model_list =[
+    'JoyType.v1.0', 'RevAnimated-animation-动漫', 'GhostMix-animation-动漫',
+    'rpg.v5-fantasy_realism-奇幻写实', 'midjourneyPapercut-origami-折纸版画',
+    'dvarchExterior-architecture-建筑', 'awpainting.v13-portrait-人物肖像'
+]
+chn_example_dict = {
+    '漂亮的风景照，很多山峰，清澈的湖水': 'beautiful landscape, many peaks, clear lake',
+    '画有玫瑰的卡片，明亮的背景': 'a card with roses, bright background',
+    '一张关于健康教育的卡片，上面有一些文字，有一些食物图标，背景里有一些水果喝饮料的图标，且背景是模糊的': \
+        'a card for health education, with some writings on it, '
+        'food icons on the card, some fruits and drinking in the background, blur background '
+}
+match_dict = {
+    'JoyType.v1.0': 'JoyType-v1-1M',
+    'RevAnimated-animation-动漫': 'rev-animated-v1-2-2',
+    'GhostMix-animation-动漫': 'GhostMix_V2.0',
+    'rpg.v5-fantasy_realism-奇幻写实': 'rpg_v5',
+    'midjourneyPapercut-origami-折纸版画': 'midjourneyPapercut_v1',
+    'dvarchExterior-architecture-建筑': 'dvarchExterior',
+    'awpainting.v13-portrait-人物肖像': 'awpainting_v13'
+}
+font_list = [
+    'CHN-华文行楷',
+    'CHN-华文新魏',
+    'CHN-清松手写体',
+    'CHN-巴蜀墨迹',
+    'CHN-雷盖体',
+    'CHN-演示夏行楷',
+    'CHN-鸿雷板书简体',
+    'CHN-斑马字类',
+    'CHN-青柳隶书',
+    'CHN-辰宇落雁体',
+    'CHN-宅家麦克笔',
+    'ENG-Playwrite',
+    'ENG-Okesip',
+    'ENG-Shrikhand',
+    'ENG-Nextstep',
+    'ENG-Filthyrich',
+    'ENG-BebasNeue',
+    'ENG-Gloock',
+    'ENG-Lemon',
+    'RUS-Automatons',
+    'RUS-MKyrill',
+    'RUS-Alice',
+    'RUS-Caveat',
+    'KOR-ChosunGs',
+    'KOR-Dongle',
+    'KOR-GodoMaum',
+    'KOR-UnDotum',
+    'JPN-GlTsukiji',
+    'JPN-Aoyagireisyosimo',
+    'JPN-KouzanMouhitu',
+    'JPN-Otomanopee'
+]
+
+
+def change_settings(base_model):
+    if base_model == model_list[0]:
+        return gr.update(value=20), gr.update(value=7.5), gr.update(value='PNDM')
+    elif base_model == model_list[1]:
+        return gr.update(value=30), gr.update(value=8.5), gr.update(value='Euler')
+    elif base_model == model_list[2]:
+        return gr.update(value=32), gr.update(value=8.5), gr.update(value='Euler')
+    elif base_model == model_list[3]:
+        return gr.update(value=20), gr.update(value=7.5), gr.update(value='DPM')
+    elif base_model == model_list[4]:
+        return gr.update(value=25), gr.update(value=6.5), gr.update(value='Euler')
+    elif base_model == model_list[5]:
+        return gr.update(value=25), gr.update(value=8.5), gr.update(value='Euler')
+    elif base_model == model_list[6]:
+        return gr.update(value=25), gr.update(value=7), gr.update(value='DPM')
+    else:
+        pass
+
+
+def update_box_num(choice):
+    update_list_1 = []  # checkbox
+    update_list_2 = []  # font
+    update_list_3 = []  # text
+    update_list_4 = []  # bounding box
+    for i in range(BBOX_MAX_NUM):
+        if i < choice:
+            update_list_1.append(gr.update(value=True))
+            update_list_2.append(gr.update(visible=True))
+            update_list_3.append(gr.update(visible=True))
+            update_list_4.extend([gr.update(visible=False) for _ in range(4)])
+        else:
+            update_list_1.append(gr.update(value=False))
+            update_list_2.append(gr.update(visible=False, value='CHN-华文行楷'))
+            update_list_3.append(gr.update(visible=False, value=''))
+            update_list_4.extend([
+                gr.update(visible=False, value=0.4),
+                gr.update(visible=False, value=0.4),
+                gr.update(visible=False, value=0.2),
+                gr.update(visible=False, value=0.2)
+            ])
+
+    return *update_list_1, *update_list_2, *update_list_3, *update_list_4
+
+
+def load_box_list(example_id, choice):
+    with open(f'templates/{example_id}.json', 'r') as f:
+        info = json.load(f)
+    update_list1 = []
+    update_list2 = []
+    update_list3 = []
+    update_list4 = []
+
+    for i in range(BBOX_MAX_NUM):
+        visible = info['visible'][i]
+        pos = info['pos'][i * 4: (i + 1) * 4]
+        update_list1.append(gr.update(value=visible))
+        update_list2.append(gr.update(value=info['font'][i], visible=visible))
+        update_list3.append(gr.update(value=info['text'][i], visible=visible))
+        update_list4.extend([
+            gr.update(value=pos[0]),
+            gr.update(value=pos[1]),
+            gr.update(value=pos[2]),
+            gr.update(value=pos[3])
+        ])
+
+    return *update_list1, *update_list2, \
+        *update_list3, *update_list4, gr.update(value=-1)
+
+    
+def re_edit():
+    global BBOX_MAX_NUM
+    update_list = []
+    for i in range(BBOX_MAX_NUM):
+        update_list.extend([gr.update(value=0.4), gr.update(value=0.4), gr.update(value=0.2),
+                            gr.update(value=0.2)])
+    return *update_list, \
+        gr.Image(
+            value=create_canvas(),
+            label='Rect Position', elem_id='MD-bbox-rect-t2i',
+            show_label=False, visible=True
+        ), \
+        gr.Slider(value=512), gr.Slider(value=512)
+
+
+def resize_w(w, img):
+    return cv2.resize(img, (w, img.shape[0]))
+
+
+def resize_h(h, img):
+    return cv2.resize(img, (img.shape[1], h))
+
+
+def create_canvas(w=512, h=512, c=3, line=5):
+    image = np.full((h, w, c), 200, dtype=np.uint8)
+    for i in range(h):
+        if i % (w // line) == 0:
+            image[i, :, :] = 150
+    for j in range(w):
+        if j % (w // line) == 0:
+            image[:, j, :] = 150
+    image[h // 2 - 8:h // 2 + 8, w // 2 - 8:w // 2 + 8, :] = [200, 0, 0]
+    return image
+
+
+def canny(img):
+    low_threshold = 64
+    high_threshold = 100
+
+    img = cv2.Canny(img, low_threshold, high_threshold)
+    img = img[:, :, None]
+    img = np.concatenate([img, img, img], axis=2)
+    return Image.fromarray(img)
+
+
+def judge_overlap(coord_list1, coord_list2):
+    judge = coord_list1[0] < coord_list2[2] and coord_list1[2] > coord_list2[0] \
+            and coord_list1[1] < coord_list2[3] and coord_list1[3] > coord_list2[1]
+    return judge
+
+
+def parse_render_list(box_list, shape, box_num):
+    width = shape[0]
+    height = shape[1]
+    polygons = []
+    font_names = []
+    texts = []
+    valid_list = box_list[:box_num]
+    pos_list = box_list[box_num: 5 * box_num]
+    font_name_list = box_list[5 * box_num: 6 * box_num]
+    text_list = box_list[6 * box_num: 7 * box_num]
+    empty_flag = False
+
+    print(font_name_list, text_list)
+
+    for i, valid in enumerate(valid_list):
+        if valid:
+            pos = pos_list[i * 4: (i + 1) * 4]
+            top_left_x = int(pos[0] * width)
+            top_left_y = int(pos[1] * height)
+            w = int(pos[2] * width)
+            h = int(pos[3] * height)
+            font_name = str(font_name_list[i])
+            text = str(text_list[i])
+            if text == '':
+                empty_flag = True
+                text = 'JoyType'
+            if w <= 0 or h <= 0:
+                gr.Warning(f'Area of the box{i + 1} cannot be zero!')
+                return [], False
+            polygon = [
+                top_left_x,
+                top_left_y,
+                w, h
+            ]
+            try:
+                assert font_name in font_list
+                font_name = font_name.split('-')[-1]
+            except Exception as e:
+                gr.Warning('Please choose a correct font!')
+                return [], False
+
+            polygons.append(polygon)
+            font_names.append(font_name.split('-')[-1])
+            texts.append(text)
+
+    if empty_flag:
+        gr.Warning('Null strings will be filled automatically!')
+
+    for i in range(len(polygons)):
+        for j in range(i + 1, len(polygons)):
+            if judge_overlap(
+                    [polygons[i][0], polygons[i][1], polygons[i][0] + polygons[i][2], polygons[i][1] + polygons[i][3]],
+                    [polygons[j][0], polygons[j][1], polygons[j][0] + polygons[j][2], polygons[j][1] + polygons[j][3]]
+            ):
+                gr.Warning('Find overlapping boxes!')
+                return [], False
+
+    render_list = []
+    for i in range(len(polygons)):
+        text_dict = {}
+        text_dict['text'] = texts[i]
+        text_dict['polygon'] = polygons[i]
+        text_dict['font_name'] = font_names[i]
+        render_list.append(text_dict)
+
+    return render_list, True
+
+
+def render_all_text(render_list, shape, threshold=512):
+    width = shape[0]
+    height = shape[1]
+    board = Image.new('RGB', (width, height), 'black')
+
+    for text_dict in render_list:
+        text = text_dict['text']
+        polygon = text_dict['polygon']
+        font_name = text_dict['font_name']
+        if len(text) > MAX_LENGTH:
+            text = text[:MAX_LENGTH]
+            gr.Warning(f'{text}... exceeds the maximum length {MAX_LENGTH} and has been cropped.')
+
+        w, h = polygon[2:]
+        vert = True if w < h else False
+        image4ratio = Image.new('RGB', (1024, 1024), 'black')
+        draw = ImageDraw.Draw(image4ratio)
+
+        try:
+            font = ImageFont.truetype(f'./font/{font_name}.ttf', encoding='utf-8', size=50)
+        except FileNotFoundError:
+            font = ImageFont.truetype(f'./font/{font_name}.otf', encoding='utf-8', size=50)
+
+        if not vert:
+            draw.text(xy=(0, 0), text=text, font=font, fill='white')
+            _, _, _tw, _th = draw.textbbox(xy=(0, 0), text=text, font=font)
+            _th += 1
+        else:
+            _tw, y_c = 0, 0
+            for c in text:
+                draw.text(xy=(0, y_c), text=c, font=font, fill='white')
+                _l, _t, _r, _b = font.getbbox(c)
+                _tw = max(_tw, _r - _l)
+                y_c += _b
+            _th = y_c + 1
+
+        ratio = (_th * w) / (_tw * h)
+        text_img = image4ratio.crop((0, 0, _tw, _th))
+        x_offset, y_offset = 0, 0
+        if 0.8 <= ratio <= 1.2:
+            text_img = text_img.resize((w, h))
+        elif ratio < 0.75:
+            resize_h = int(_th * (w / _tw))
+            text_img = text_img.resize((w, resize_h))
+            y_offset = (h - resize_h) // 2
+        else:
+            resize_w = int(_tw * (h / _th))
+            text_img = text_img.resize((resize_w, h))
+            x_offset = (w - resize_w) // 2
+
+        board.paste(text_img, (polygon[0] + x_offset, polygon[1] + y_offset))
+
+    return board
+
+
+def load_pipeline(model_name, scheduler_name):
+    controlnet_path = os.path.join(model_root, f'{match_dict["JoyType.v1.0"]}')
+    model_path = os.path.join(model_root, model_name)
+    scheduler_name = scheduler_name.lower()
+
+    if scheduler_name == 'pndm':
+        scheduler = PNDMScheduler.from_pretrained(scheduler_root, subfolder='pndm')
+    if scheduler_name == 'lms':
+        scheduler = LMSDiscreteScheduler.from_pretrained(scheduler_root, subfolder='lms')
+    if scheduler_name == 'euler':
+        scheduler = EulerDiscreteScheduler.from_pretrained(scheduler_root, subfolder='euler')
+    if scheduler_name == 'dpm':
+        scheduler = DPMSolverMultistepScheduler.from_pretrained(scheduler_root, subfolder='dpm')
+    if scheduler_name == 'ddim':
+        scheduler = DDIMScheduler.from_pretrained(scheduler_root, subfolder='ddim')
+    if scheduler_name == 'heun':
+        scheduler = HeunDiscreteScheduler.from_pretrained(scheduler_root, subfolder='heun')
+    if scheduler_name == 'euler-ancestral':
+        scheduler = EulerAncestralDiscreteScheduler.from_pretrained(scheduler_root, subfolder='euler-ancestral')
+
+    controlnet = ControlNetModel.from_pretrained(
+        controlnet_path,
+        subfolder='controlnet',
+        torch_dtype=torch.float32
+    )
+    pipeline = StableDiffusionControlNetPipeline.from_pretrained(
+        model_path,
+        scheduler=scheduler,
+        controlnet=controlnet,
+        torch_dtype=torch.float32,
+    ).to(device)
+
+    return pipeline
+
+
+def preprocess_prompt(prompt):
+    client = ZhipuAI(api_key=os.getenv('ZHIPU_API_KEY'))
+    response = client.chat.completions.create(
+        model="glm-4-0520",
+        messages=[
+            {
+                'role': 'system',
+                'content': '''
+                   Stable Diffusion是一款利用深度学习的文生图模型，支持通过使用提示词来产生新的图像，描述要包含或省略的元素。
+                   我在这里引入Stable Diffusion算法中的Prompt概念，又被称为提示符。这里的Prompt通常可以用来描述图像，
+                   他由普通常见的单词构成，最好是可以在数据集来源站点找到的著名标签（比如Ddanbooru）。
+                   下面我将说明Prompt的生出步骤，这里的Prompt主要用于描述人物。在Prompt的生成中，你需要通过提示词来描述 人物属性，主题，外表，情绪，衣服，姿势，视角，动作，背景。
+                   用英语单词或短语甚至自然语言的标签来描述，并不局限于我给你的单词。然后将你想要的相似的提示词组合在一起，请使用英文半角,做分隔符，每个提示词不要带引号，并将这些按从最重要到最不重要的顺序 排列。
+                   另外请您注意，永远在每个 Prompt的前面加上引号里的内容， 
+                   “(((best quality))),(((ultra detailed))),(((masterpiece))),illustration,” 这是高质量的标志。
+                   人物属性中，1girl表示你生成了一个女孩，2girls表示生成了两个女孩，一次。另外再注意，Prompt中不能带有-和_。
+                   可以有空格和自然语言，但不要太多，单词不能重复。只返回Prompt。
+                   '''
+            },
+            {
+                'role': 'user', 
+                'content': prompt
+            }
+        ],
+        temperature=0.5,
+        max_tokens=2048,
+        top_p=1,
+        stream=False,
+    )
+
+    if response:
+        glm = []
+        glm_return_list = response.choices
+        for item in glm_return_list:
+            glm.append(item.message.content)
+
+        return {'flag': 1, 'data': glm}
+    else:
+        return {'flag': 0, 'data': {}}
+
+
+def process(
+        num_samples,
+        a_prompt,
+        n_prompt,
+        conditioning_scale,
+        cfg_scale,
+        inference_steps,
+        seed,
+        usr_prompt,
+        rect_img,
+        base_model,
+        scheduler_name,
+        box_num,
+        *box_list
+):
+    if usr_prompt == '':
+        gr.Warning('Must input a prompt!')
+        return None, gr.Markdown('error')
+
+    if seed == -1:
+        seed = random.randint(0, 2147483647)
+    seed_everything(seed)
+    
+    # Support Chinese Input
+    if usr_prompt in chn_example_dict.keys():
+        usr_prompt = chn_example_dict[usr_prompt]
+    else:
+        for ch in usr_prompt:
+            if '\u4e00' <= ch <= '\u9fff':
+                data = preprocess_prompt(usr_prompt)
+                if data['flag'] == 1:
+                    usr_prompt = data['data'][0][1: -1]
+                else:
+                    gr.Warning('Something went wrong while translating your prompt, please try again.')
+                    return None, gr.Markdown('error')
+                break
+
+    shape = (rect_img.shape[1], rect_img.shape[0])
+    render_list, flag = parse_render_list(box_list, shape, box_num)
+    if flag:
+        render_img = render_all_text(render_list, shape)
+    else:
+        return None, gr.Markdown('error')
+
+    model_name = match_dict[base_model]
+    render_img = canny(np.array(render_img))
+    
+    w, h = render_img.size
+    global pipeline, pre_pipeline
+    
+    if pre_pipeline != model_name or pipeline is None:
+        pre_pipeline = model_name
+        pipeline = load_pipeline(model_name, scheduler_name)
+    
+    batch_render_img = [render_img for _ in range(num_samples)]
+    batch_prompt = [f'{usr_prompt}, {a_prompt}' for _ in range(num_samples)]
+    batch_n_prompt = [n_prompt for _ in range(num_samples)]
+    
+    images = pipeline(
+        batch_prompt,
+        negative_prompt=batch_n_prompt,
+        image=batch_render_img,
+        controlnet_conditioning_scale=float(conditioning_scale),
+        guidance_scale=float(cfg_scale),
+        width=w,
+        height=h,
+        num_inference_steps=int(inference_steps),
+    ).images
+    
+    return images, gr.Markdown(f'{seed},  {usr_prompt},  {box_list}')
+
+
+def draw_example(box_list, color, id):
+    board = Image.fromarray(create_canvas())
+
+    w, h = board.size
+
+    draw = ImageDraw.Draw(board, mode='RGBA')
+    visible = box_list[:BBOX_MAX_NUM]
+    pos = box_list[BBOX_MAX_NUM: 5 * BBOX_MAX_NUM]
+    font = box_list[5 * BBOX_MAX_NUM: 6 * BBOX_MAX_NUM]
+    text = box_list[6 * BBOX_MAX_NUM:]
+
+    info = {
+        'visible': list(visible),
+        'pos': list(pos),
+        'font': list(font),
+        'text': list(text)
+    }
+
+    with open(f'templates/{id}.json', 'w') as f:
+        json.dump(info, f)
+
+    for i in range(BBOX_MAX_NUM):
+        if visible[i] is True:
+            polygon = pos[i * 4: (i + 1) * 4]
+            print(polygon)
+            left = w * polygon[0]
+            top = h * polygon[1]
+            right = left + w * polygon[2]
+            bottom = top + h * polygon[3]
+            draw.rectangle([left, top, right, bottom], outline=color[i][0], fill=color[i][1], width=3)
+
+    board.save(f'./examples/{id}.png')
+
+
+if __name__ == '__main__':
+    pass
diff --git a/javascript/bboxHint.js b/javascript/bboxHint.js
new file mode 100644
index 0000000000000000000000000000000000000000..dd2a8592bc23eaffc1731e6c50d451632f025963
--- /dev/null
+++ b/javascript/bboxHint.js
@@ -0,0 +1,550 @@
+/*
+Part of the implementation is borrowed and modified from multidiffusion-upscaler-for-automatic1111,
+publicly available at https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111
+*/
+
+const BBOX_MAX_NUM = 16;
+const BBOX_WARNING_SIZE = 1280;
+const DEFAULT_X = 0.4;
+const DEFAULT_Y = 0.4;
+const DEFAULT_H = 0.2;
+const DEFAULT_W = 0.2;
+
+// ref: https://html-color.codes/
+// 每个框对应的颜色
+const COLOR_MAP = [
+    ['#ff0000', 'rgba(255, 0, 0, 0.3)'],          // red
+    ['#ff9900', 'rgba(255, 153, 0, 0.3)'],        // orange
+    ['#996633', 'rgba(153, 102, 51, 0.3)'],       // brown
+    ['#33cc33', 'rgba(51, 204, 51, 0.3)'],        // green
+    ['#33cccc', 'rgba(51, 204, 204, 0.3)'],       // indigo
+    ['#0066ff', 'rgba(0, 102, 255, 0.3)'],        // blue
+    ['#ff3399', 'rgba(255, 51, 153, 0.3)'],       // hot pink
+    ['#cc00cc', 'rgba(204, 0, 204, 0.3)'],        // dark pink
+    ['#ff6666', 'rgba(255, 102, 102, 0.3)'],      // light red
+    ['#ffcc66', 'rgba(255, 204, 102, 0.3)'],      // light orange
+    ['#99cc00', 'rgba(153, 204, 0, 0.3)'],        // lime green
+    ['#ffff00', 'rgba(255, 255, 0, 0.3)'],        // yellow
+    ['#0099cc', 'rgba(0, 153, 204, 0.3)'],        // steel blue
+    ['#00cc99', 'rgba(0, 204, 153, 0.3)'],        // teal
+    ['#ff3399', 'rgba(255, 51, 153, 0.3)'],       // hot pink
+    ['#9933cc', 'rgba(153, 51, 204, 0.3)'],       // lavender
+    ['#6600ff', 'rgba(102, 0, 255, 0.3)'],        // purple
+];
+
+const RESIZE_BORDER = 5;
+const ROTATE_BORDER = 8;
+const MOVE_BORDER = 5;
+
+const t2i_bboxes = new Array(BBOX_MAX_NUM).fill(null);
+
+function gradioApp() {
+    const elems = document.getElementsByTagName('gradio-app')
+    const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot
+    return !!gradioShadowRoot ? gradioShadowRoot : document;
+}
+
+// ↓↓↓ called from gradio ↓↓↓
+function onCreateT2IRefClick(overwrite) {
+    let width, height;
+    if (overwrite) {
+        const overwriteInputs = gradioApp().querySelectorAll('#MD-overwrite-width-t2i input, #MD-overwrite-height-t2i input');
+        width  = parseInt(overwriteInputs[0].value);
+        height = parseInt(overwriteInputs[2].value);
+    } else {
+        const sizeInputs = gradioApp().querySelectorAll('#txt2img_width input, #txt2img_height input');
+        width  = parseInt(sizeInputs[0].value);
+        height = parseInt(sizeInputs[2].value);
+    }
+
+    if (isNaN(width))  width  = 512;
+    if (isNaN(height)) height = 512;
+
+    // Concat it to string to bypass the gradio bug
+    // 向黑恶势力低头
+    return width.toString() + 'x' + height.toString();
+}
+
+function onBoxEnableClick(idx, enable) {
+    let canvas = null;
+    let bboxes = null;
+    let locator = null;
+
+    // locator = () => gradioApp().querySelector('#MD-bbox-ref-t2i');
+    locator = () => gradioApp().querySelector('#MD-bbox-rect-t2i');
+    bboxes = t2i_bboxes;
+
+    ref_div = locator();
+    canvas = ref_div.querySelector('img');
+    if (!canvas) { return false; }
+
+    if (enable) {
+        // Check if the bounding box already exists
+        if (!bboxes[idx]) {
+            // Initialize bounding box
+            const bbox = [DEFAULT_X, DEFAULT_Y, DEFAULT_W, DEFAULT_H];
+            const colorMap = COLOR_MAP[idx % COLOR_MAP.length];
+            const div = document.createElement('div');
+            div.id = 'MD-bbox-t2i' + idx;
+            div.style.left       = '0px';
+            div.style.top        = '0px';
+            div.style.width      = '0px';
+            div.style.height     = '0px';
+            div.style.position   = 'absolute';
+            div.style.border     = '2px solid ' + colorMap[0];
+            div.style.background = colorMap[1];
+            div.style.zIndex     = '900';
+            div.style.display    = 'none';
+            // A text tip to warn the user if bbox is too large
+            const tip = document.createElement('span');
+            tip.id = 'MD-tip-t2i' + idx;
+            tip.style.left       = '50%';
+            tip.style.top        = '50%';
+            tip.style.position   = 'absolute';
+            tip.style.transform  = 'translate(-50%, -50%)';
+            tip.style.fontSize   = '12px';
+            tip.style.fontWeight = 'bold';
+            tip.style.textAlign  = 'center';
+            tip.style.color      = colorMap[0];
+            tip.style.zIndex     = '901';
+            tip.style.display    = 'none';
+            tip.innerHTML        = 'Warning: Region very large!<br>Take care of VRAM usage!';
+            div.appendChild(tip);
+            div.addEventListener('mousedown', function (e) {
+                if (e.button === 0) { onBoxMouseDown(e, idx); }
+            });
+            div.addEventListener('mousemove', function (e) {
+                updateCursorStyle(e, idx);
+            });
+
+            const shower = function() { // insert to DOM if necessary
+                if (!gradioApp().querySelector('#' + div.id)) {
+                    locator().appendChild(div);
+                }
+            }
+            bboxes[idx] = [div, bbox, shower];
+        }
+
+        // Show the bounding box
+        console.log('Here')
+        console.log(canvas)
+        console.log
+        displayBox(canvas, bboxes[idx]);
+        return true;
+    } else {
+        if (!bboxes[idx]) { return false; }
+        const [div, bbox, shower] = bboxes[idx];
+        div.style.display = 'none';
+    }
+    return false;
+}
+
+function onBoxChange(idx, what, v) {
+    // This function handles all the changes of the bounding box
+    // Including the rendering and python slider update
+    let bboxes = null;
+    let canvas = null;
+
+    bboxes = t2i_bboxes;
+    canvas = gradioApp().querySelector('#MD-bbox-rect-t2i img');
+
+    if (!bboxes[idx] || !canvas) {
+        switch (what) {
+            case 'x': return DEFAULT_X;
+            case 'y': return DEFAULT_Y;
+            case 'w': return DEFAULT_W;
+            case 'h': return DEFAULT_H;
+        }
+    }
+    const [div, bbox, shower] = bboxes[idx];
+    if (div.style.display === 'none') { return v; }
+
+    // parse trigger
+    switch (what) {
+        case 'x': bbox[0] = v; break;
+        case 'y': bbox[1] = v; break;
+        case 'w': bbox[2] = v; break;
+        case 'h': bbox[3] = v; break;
+    }
+    displayBox(canvas, bboxes[idx]);
+    return v;
+}
+
+// ↓↓↓ called from js ↓↓↓
+function getSeedInfo(id, current_seed) {
+    const info_id = '#html_info_txt2img';
+    const info_div = gradioApp().querySelector(info_id);
+    try{
+        current_seed = parseInt(current_seed);
+    } catch(e) {
+        current_seed = -1;
+    }
+    if (!info_div) return current_seed;
+    let info = info_div.innerHTML;
+    if (!info) return current_seed;
+    // remove all html tags
+    info = info.replace(/<[^>]*>/g, '');
+    // Find a json string 'region control:' in the info
+    // get its index
+    idx = info.indexOf('Region control');
+    if (idx == -1) return current_seed;
+    // get the json string (detect the bracket)
+    // find the first '{'
+    let start_idx = info.indexOf('{', idx);
+    let bracket = 1;
+    let end_idx = start_idx + 1;
+    while (bracket > 0 && end_idx < info.length) {
+        if (info[end_idx] == '{') bracket++;
+        if (info[end_idx] == '}') bracket--;
+        end_idx++;
+    }
+    if (bracket > 0) {
+        return current_seed;
+    }
+    // get the json string
+    let json_str = info.substring(start_idx, end_idx);
+    // replace the single quote to double quote
+    json_str = json_str.replace(/'/g, '"');
+    // replace python True to javascript true, False to false
+    json_str = json_str.replace(/True/g, 'true');
+    // parse the json string
+    let json = JSON.parse(json_str);
+    // get the seed if the region id is in the json
+    const region_id = 'Region ' + id.toString();
+    if (!(region_id in json)) return current_seed;
+    const region = json[region_id];
+    if (!('seed' in region)) return current_seed;
+    let seed = region['seed'];
+    try{
+        seed = parseInt(seed);
+    } catch(e) {
+        return current_seed;
+    }
+    return seed;
+}
+
+function displayBox(canvas, bbox_info) {
+    // check null input
+    const [div, bbox, shower] = bbox_info;
+    const [x, y, w, h] = bbox;
+    if (!canvas || !div || x == null || y == null || w == null || h == null) { return; }
+
+    // client: canvas widget display size
+    // natural: content image real size
+    let vpScale = Math.min(canvas.clientWidth / canvas.naturalWidth, canvas.clientHeight / canvas.naturalHeight);
+    let canvasCenterX = canvas.clientWidth  / 2;
+    let canvasCenterY = canvas.clientHeight / 2;
+    let scaledX = canvas.naturalWidth  * vpScale;
+    let scaledY = canvas.naturalHeight * vpScale;
+    let viewRectLeft  = canvasCenterX - scaledX / 2;
+    let viewRectRight = canvasCenterX + scaledX / 2;
+    let viewRectTop   = canvasCenterY - scaledY / 2;
+    let viewRectDown  = canvasCenterY + scaledY / 2;
+
+    let xDiv = viewRectLeft + scaledX * x;
+    let yDiv = viewRectTop  + scaledY * y;
+    let wDiv = Math.min(scaledX * w, viewRectRight - xDiv);
+    let hDiv = Math.min(scaledY * h, viewRectDown - yDiv);
+
+    // Calculate warning bbox size
+    let upscalerFactor = 1.0;
+    let maxSize = BBOX_WARNING_SIZE / upscalerFactor * vpScale;
+    let maxW = maxSize / scaledX;
+    let maxH = maxSize / scaledY;
+    if (w > maxW || h > maxH) {
+        div.querySelector('span').style.display = 'block';
+    } else {
+        div.querySelector('span').style.display = 'none';
+    }
+
+    // update <div> when not equal
+    div.style.left    = xDiv + 'px';
+    div.style.top     = yDiv + 'px';
+    div.style.width   = wDiv + 'px';
+    div.style.height  = hDiv + 'px';
+    div.style.display = 'block';
+
+    // insert it to DOM if not appear
+    shower();
+}
+
+function onBoxMouseDown(e, idx) {
+    let bboxes = null;
+    let canvas = null;
+
+    bboxes = t2i_bboxes;
+    canvas = gradioApp().querySelector('#MD-bbox-rect-t2i img');
+
+    // Get the bounding box
+    if (!canvas || !bboxes[idx]) { return; }
+    const [div, bbox, shower] = bboxes[idx];
+
+    // Check if the click is inside the bounding box
+    const boxRect = div.getBoundingClientRect();
+    let mouseX = e.clientX;
+    let mouseY = e.clientY;
+
+    const resizeLeft   = mouseX >= boxRect.left && mouseX <= boxRect.left + RESIZE_BORDER;
+    const resizeRight  = mouseX >= boxRect.right - RESIZE_BORDER && mouseX <= boxRect.right;
+    const resizeTop    = mouseY >= boxRect.top && mouseY <= boxRect.top + RESIZE_BORDER;
+    const resizeBottom = mouseY >= boxRect.bottom - RESIZE_BORDER && mouseY <= boxRect.bottom;
+
+    const moveHorizontal = mouseX >= boxRect.left + MOVE_BORDER && mouseX <= boxRect.right  - MOVE_BORDER;
+    const moveVertical   = mouseY >= boxRect.top  + MOVE_BORDER && mouseY <= boxRect.bottom - MOVE_BORDER;
+
+    if (!resizeLeft && !resizeRight && !resizeTop && !resizeBottom && !moveHorizontal && !moveVertical) { return; }
+
+    const horizontalPivot = resizeLeft ? bbox[0] + bbox[2] : bbox[0];
+    const verticalPivot   = resizeTop  ? bbox[1] + bbox[3] : bbox[1];
+
+    // Canvas can be regarded as invariant during the drag operation
+    // Calculate in advance to reduce overhead
+
+    // Calculate viewport scale based on the current canvas size and the natural image size
+    let vpScale = Math.min(canvas.clientWidth / canvas.naturalWidth, canvas.clientHeight / canvas.naturalHeight);
+    let vpOffset = canvas.getBoundingClientRect();
+
+    // Calculate scaled dimensions of the canvas
+    let scaledX = canvas.naturalWidth * vpScale;
+    let scaledY = canvas.naturalHeight * vpScale;
+
+    // Calculate the canvas center and view rectangle coordinates
+    let canvasCenterX = (vpOffset.left + window.scrollX) + canvas.clientWidth  / 2;
+    let canvasCenterY = (vpOffset.top  + window.scrollY) + canvas.clientHeight / 2;
+    let viewRectLeft  = canvasCenterX - scaledX / 2 - window.scrollX;
+    let viewRectRight = canvasCenterX + scaledX / 2 - window.scrollX;
+    let viewRectTop   = canvasCenterY - scaledY / 2 - window.scrollY;
+    let viewRectDown  = canvasCenterY + scaledY / 2 - window.scrollY;
+
+    mouseX = Math.min(Math.max(mouseX, viewRectLeft), viewRectRight);
+    mouseY = Math.min(Math.max(mouseY, viewRectTop),  viewRectDown);
+
+    const accordion = gradioApp().querySelector('#MD-tab-t2i');
+
+    // Move or resize the bounding box on mousemove
+    function onMouseMove(e) {
+        // Prevent selecting anything irrelevant
+        e.preventDefault();
+
+        // Get the new mouse position
+        let newMouseX = e.clientX;
+        let newMouseY = e.clientY;
+
+        // clamp the mouse position to the view rectangle
+        newMouseX = Math.min(Math.max(newMouseX, viewRectLeft), viewRectRight);
+        newMouseY = Math.min(Math.max(newMouseY, viewRectTop),  viewRectDown);
+
+        // Calculate the mouse movement delta
+        const dx = (newMouseX - mouseX) / scaledX;
+        const dy = (newMouseY - mouseY) / scaledY;
+
+        // Update the mouse position
+        mouseX = newMouseX;
+        mouseY = newMouseY;
+
+        // if no move just return
+        if (dx === 0 && dy === 0) { return; }
+
+        // Update the mouse position
+        let [x, y, w, h] = bbox;
+        if (moveHorizontal && moveVertical) {
+            // If moving the bounding box
+            x = Math.min(Math.max(x + dx, 0), 1 - w);
+            y = Math.min(Math.max(y + dy, 0), 1 - h);
+        } else {
+            // If resizing the bounding box
+            if (resizeLeft || resizeRight) {
+                if (x < horizontalPivot) {
+                    if (dx <= w) {
+                        // If still within the left side of the pivot
+                        x = x + dx;
+                        w = w - dx;
+                    } else {
+                        // If crossing the pivot
+                        w = dx - w;
+                        x = horizontalPivot;
+                    }
+                } else {
+                    if (w + dx < 0) {
+                        // If still within the right side of the pivot
+                        x = horizontalPivot + w + dx;
+                        w = - dx - w;
+                    } else {
+                        // If crossing the pivot
+                        x = horizontalPivot;
+                        w = w + dx;
+                    }
+                }
+
+                // Clamp the bounding box to the image
+                if (x < 0) {
+                    w = w + x;
+                    x = 0;
+                } else if (x + w > 1) {
+                    w = 1 - x;
+                }
+            }
+            // Same as above, but for the vertical axis
+            if (resizeTop || resizeBottom) {
+                if (y < verticalPivot) {
+                    if (dy <= h) {
+                        y = y + dy;
+                        h = h - dy;
+                    } else {
+                        h = dy - h;
+                        y = verticalPivot;
+                    }
+                } else {
+                    if (h + dy < 0) {
+                        y = verticalPivot + h + dy;
+                        h = - dy - h;
+                    } else {
+                        y = verticalPivot;
+                        h = h + dy;
+                    }
+                }
+                if (y < 0) {
+                    h = h + y;
+                    y = 0;
+                } else if (y + h > 1) {
+                    h = 1 - y;
+                }
+            }
+        }
+        const [div, old_bbox, _] = bboxes[idx];
+
+        // If all the values are the same, just return
+        if (old_bbox[0] === x && old_bbox[1] === y && old_bbox[2] === w && old_bbox[3] === h) { return; }
+        // else update the bbox
+        const event = new Event('input');
+        const coords = [x, y, w, h];
+        // <del>The querySelector is not very efficient, so we query it once and reuse it</del>
+        // caching will result gradio bugs that stucks bbox and cannot move & drag
+        const sliderIds = ['x', 'y', 'w', 'h'];
+        // We try to select the input sliders
+        const sliderSelectors = sliderIds.map(id => `#MD-${'t2i'}-${idx}-${id} input`).join(', ');
+        let sliderInputs = accordion.querySelectorAll(sliderSelectors);
+        // alert(sliderInputs.length)
+        if (sliderInputs.length == 0) {
+            // If we failed, the accordion is probably closed and sliders are removed in the dom, so we open it
+            accordion.querySelector('.label-wrap').click();
+            // and try again
+            sliderInputs = accordion.querySelectorAll(sliderSelectors);
+            // If we still failed, we just return
+            if (sliderInputs.length == 0) { return; }
+        }
+        for (let i = 0; i < 4; i++) {
+            if (old_bbox[i] !== coords[i]) {
+                sliderInputs[2*i].value = coords[i];
+                sliderInputs[2*i].dispatchEvent(event);
+            }
+        }
+    }
+
+    // Remove the mousemove and mouseup event listeners
+    function onMouseUp() {
+        document.removeEventListener('mousemove', onMouseMove);
+        document.removeEventListener('mouseup',   onMouseUp);
+    }
+
+    // Add the event listeners
+    document.addEventListener('mousemove', onMouseMove);
+    document.addEventListener('mouseup',   onMouseUp);
+}
+
+function updateCursorStyle(e, idx) {
+    // This function changes the cursor style when hovering over the bounding box
+    const bboxes = t2i_bboxes;
+    if (!bboxes[idx]) return;
+
+    const div = bboxes[idx][0];
+    const boxRect = div.getBoundingClientRect();
+    const mouseX = e.clientX;
+    const mouseY = e.clientY;
+
+    const boxCenterX = boxRect.left + boxRect.width / 2;
+    const rotateAreaLeft = boxCenterX - ROTATE_BORDER;
+    const rotateAreaRight = boxCenterX + ROTATE_BORDER;
+    const rotateAreaTop = boxRect.top + ROTATE_BORDER;
+    const rotateAreaBottom = boxRect.top - ROTATE_BORDER;
+
+    const resizeLeft   = mouseX >= boxRect.left && mouseX <= boxRect.left + RESIZE_BORDER;
+    const resizeRight  = mouseX >= boxRect.right - RESIZE_BORDER && mouseX <= boxRect.right;
+    const resizeTop    = mouseY >= boxRect.top && mouseY <= boxRect.top + RESIZE_BORDER;
+    const resizeBottom = mouseY >= boxRect.bottom - RESIZE_BORDER && mouseY <= boxRect.bottom;
+    const rotateTop    = mouseX >= rotateAreaLeft && mouseX <= rotateAreaRight && mouseY >= rotateAreaBottom && mouseY <= rotateAreaTop; //mouseX >= rotateAreaLeft && mouseX <= rotateAreaRight &&
+
+
+//    if (rotateTop) {
+//        div.style.cursor = 'crosshair';
+//    } else
+    if ((resizeLeft && resizeTop) || (resizeRight && resizeBottom)) {
+        div.style.cursor = 'nwse-resize';
+    } else if ((resizeLeft && resizeBottom) || (resizeRight && resizeTop)) {
+        div.style.cursor = 'nesw-resize';
+    } else if (resizeLeft || resizeRight) {
+        div.style.cursor = 'ew-resize';
+    } else if (resizeTop || resizeBottom) {
+        div.style.cursor = 'ns-resize';
+    } else {
+        div.style.cursor = 'move';
+    }
+}
+
+// ↓↓↓ auto called event listeners ↓↓↓
+
+function updateBoxes() {
+    // This function redraw all bounding boxes
+    let bboxes = null;
+    let canvas = null;
+
+    bboxes = t2i_bboxes;
+    canvas = gradioApp().querySelector('#MD-bbox-rect-t2i img');
+
+    if (!canvas) return;
+
+    for (let idx = 0; idx < bboxes.length; idx++) {
+        if (!bboxes[idx]) continue;
+        const [div, bbox, shower] = bboxes[idx];
+        if (div.style.display === 'none') { return; }
+
+        displayBox(canvas, bboxes[idx]);
+    }
+}
+
+window.addEventListener('resize', _ => {
+    updateBoxes(true);
+    updateBoxes(false);
+});
+
+//// ======== Gradio Bug Fix ========
+//// For Gradio versions > 3.16.0 and < 3.29.0, the accordion DOM will be deleted when it is closed.
+//// We need to judge the versions and listen to the accordion open event, rerender the bbox at that time.
+//// This silly bug fix is only for compatibility, we recommend to update the gradio version to 3.29.0 or higher.
+//try {
+//    const GRADIO_VERSIONS = window.gradio_config["version"].split(".");
+//    const gradio_major_version = parseInt(GRADIO_VERSIONS[0]);
+//    const gradio_minor_version = parseInt(GRADIO_VERSIONS[1]);
+//    if (gradio_major_version == 3 && gradio_minor_version > 16 && gradio_minor_version < 29) {
+//        let listener = e => {
+//            if (!e) { return; }
+//            if (!e.target) { return; }
+//            if (!e.target.classList) { return; }
+//            if (!e.target.classList.contains('label-wrap')) { return; }
+//            for (let tab of ['t2i']) {
+//                const div = gradioApp().querySelector('#MD-bbox-control-' + tab +' div.label-wrap');
+//                if (!div) { continue; }
+//                updateBoxes(tab === 't2i');
+//            }
+//        };
+//        window.addEventListener('DOMNodeInserted', listener);
+//    }
+//} catch (ignored) {
+//    // If the above code failed, the gradio version shouldn't be in the range of 3.16.0 to 3.29.0, so we just return.
+//}
+//// ======== Gradio Bug Fix ========
+
+//// 由于python程序中的所有bbox都绑定了点击事件，并且在点击后才会在前端中渲染，所以需要提前在js中初始化指定数量的bbox
+//for (let i = 0; i < 4; i++) {
+//    onBoxEnableClick(i, true)
+//}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b954bd6e258e8f76f9dfc64b07530d4c0004872
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,160 @@
+absl-py==2.1.0
+accelerate==0.31.0
+addict==2.4.0
+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+aliyun-python-sdk-core==2.15.1
+aliyun-python-sdk-kms==2.16.3
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+async-timeout==4.0.3
+attrs==23.2.0
+blinker==1.8.2
+boto3==1.34.125
+botocore==1.34.125
+cachetools==5.3.3
+certifi==2024.6.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+config==0.5.1
+contourpy==1.2.1
+controlnet-aux==0.0.9
+crcmod==1.7
+cryptography==42.0.8
+cycler==0.12.1
+datasets==2.18.0
+diffusers==0.28.2
+dill==0.3.8
+distro==1.9.0
+dnspython==2.6.1
+einops==0.8.0
+email_validator==2.1.1
+et-xmlfile==1.1.0
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.14.0
+Flask==3.0.3
+flatbuffers==24.3.25
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.2.0
+gast==0.5.4
+gradio==3.50.0
+gradio_client==0.6.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.2
+idna==3.7
+imageio==2.34.1
+importlib_metadata==7.1.0
+importlib_resources==6.4.0
+intel-openmp==2021.4.0
+itsdangerous==2.2.0
+jax==0.4.30
+jaxlib==0.4.30
+Jinja2==3.1.4
+jmespath==0.10.0
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+lazy_loader==0.4
+lightning-utilities==0.11.2
+linkify-it-py==2.0.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mediapipe==0.10.14
+mkl==2021.4.0
+ml-dtypes==0.4.0
+modelscope==1.15.0
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+networkx==3.3
+numpy==1.26.4
+openai==1.35.3
+opencv-contrib-python==4.10.0.84
+opencv-python==4.10.0.82
+opencv-python-headless==4.10.0.82
+openpyxl==3.1.5
+opt-einsum==3.3.0
+orjson==3.10.3
+oss2==2.18.5
+packaging==24.0
+pandas==2.2.2
+pillow==10.3.0
+platformdirs==4.2.2
+protobuf==4.25.3
+psutil==5.9.8
+pyarrow==16.1.0
+pyarrow-hotfix==0.6
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==2.7.3
+pydantic_core==2.18.4
+pydub==0.25.1
+Pygments==2.18.0
+PyJWT==2.8.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytorch-lightning==2.3.0
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.7
+s3transfer==0.10.1
+safetensors==0.4.3
+scikit-image==0.23.2
+scipy==1.13.1
+semantic-version==2.10.0
+shellingham==1.5.4
+simplejson==3.19.2
+six==1.16.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+sounddevice==0.4.7
+starlette==0.37.2
+sympy==1.12.1
+tbb==2021.12.0
+tifffile==2024.5.22
+timm==0.6.7
+tokenizers==0.19.1
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.1
+torchmetrics==1.4.0.post0
+torchvision==0.18.1
+tqdm==4.66.4
+transformers==4.41.2
+typer==0.12.3
+typing_extensions==4.12.1
+tzdata==2024.1
+uc-micro-py==1.0.3
+ujson==5.10.0
+urllib3==2.2.1
+uvicorn==0.30.1
+watchfiles==0.22.0
+websockets==11.0.3
+Werkzeug==3.0.3
+xxhash==3.4.1
+yapf==0.40.2
+yarl==1.9.4
+zhipuai==2.1.1.20240620.1
+zipp==3.19.2
diff --git a/templates/1.json b/templates/1.json
new file mode 100644
index 0000000000000000000000000000000000000000..5dba6c0314bfdea5a16ee475ce97880c06b36c94
--- /dev/null
+++ b/templates/1.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, false, false, false, false, false], "pos": [0.024161073825503594, 0.028187919463086675, 0.6295302013422815, 0.14966442953020198, 0.11476510067114097, 0.18187919463087276, 0.6060402684563758, 0.1395973154362417, 0.8268456375838927, 0.10134228187919483, 0.15973154362416148, 0.8140939597315439, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u5df4\u8700\u58a8\u8ff9", "CHN-\u5df4\u8700\u58a8\u8ff9", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u5929\u7136\u6c27\u5427", "\u767b\u9ad8\u671b\u8fdc", "\u4fc3\u8fdb\u5faa\u73af \u5f3a\u5065\u8eab\u4f53", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/1.png b/templates/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..03c5b6debed8bfd406f9a1bfe1fdd159ab4ba406
Binary files /dev/null and b/templates/1.png differ
diff --git a/templates/2.json b/templates/2.json
new file mode 100644
index 0000000000000000000000000000000000000000..baf7ef19be64a2d363181393777b55f3e1b0ee61
--- /dev/null
+++ b/templates/2.json
@@ -0,0 +1 @@
+{"visible": [true, true, false, false, false, false, false, false], "pos": [0.1885906040268458, 0.8302013422818793, 0.6496644295302016, 0.13624161073825505, 0.2026845637583891, 0.6590604026845638, 0.6161073825503363, 0.15302013422818816, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["RUS-Caveat", "CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u043b\u0435\u043a\u0430\u0440\u0441\u0442\u0432", "\u89c4\u5f8b\u670d\u836f", "", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/2.png b/templates/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..e1169e0b8a249e4c37bfd713f1b08682ad8048ab
Binary files /dev/null and b/templates/2.png differ
diff --git a/templates/3.json b/templates/3.json
new file mode 100644
index 0000000000000000000000000000000000000000..f4b0301be6004f83d5693df64efd31ef9af55b3b
--- /dev/null
+++ b/templates/3.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, false, false, false, false, false], "pos": [0.13825503355704757, 0.0476510067114097, 0.2838926174496645, 0.11946308724832223, 0.43422818791946327, 0.05100671140939635, 0.44832214765100653, 0.11946308724832207, 0.1382550335570471, 0.19865771812080546, 0.7402684563758394, 0.12281879194630875, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u6e05\u677e\u624b\u5199\u4f53", "JPN-GlTsukiji", "ENG-Filthyrich", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u4f60\u597d", "\u3053\u3093\u306b\u3061\u306f", "It's showtime", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/3.png b/templates/3.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca2f6f6d09eec6de38420c880f432d6bb2b25efb
Binary files /dev/null and b/templates/3.png differ
diff --git a/templates/4.json b/templates/4.json
new file mode 100644
index 0000000000000000000000000000000000000000..10d8fdebb487887d6f71fe85bbfdaef37da3a605
--- /dev/null
+++ b/templates/4.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, false, false, false, false, false], "pos": [0.5449664429530201, 0.05436241610738259, 0.4281879194630869, 0.15973154362416114, 0.43355704697986586, 0.7959731543624167, 0.5489932885906045, 0.18657718120805372, 0.024161073825503712, 0.037583892617449835, 0.20335570469798664, 0.46845637583892613, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["RUS-Alice", "CHN-\u6f14\u793a\u590f\u884c\u6977", "KOR-ChosunGs", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u0412\u043e\u0434\u043a\u0430", "\u8bf7\u9002\u91cf\u996e\u9152", "\uce90\uc8fc\uc5bc \ubc14", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/4.png b/templates/4.png
new file mode 100644
index 0000000000000000000000000000000000000000..629f1bdfae8b3ab16a7b10f8135d1546c0bd9d18
Binary files /dev/null and b/templates/4.png differ
diff --git a/templates/5.json b/templates/5.json
new file mode 100644
index 0000000000000000000000000000000000000000..3324fbde1640ec19c6c456e742bf132ee28718f2
--- /dev/null
+++ b/templates/5.json
@@ -0,0 +1 @@
+{"visible": [true, true, false, false, false, false, false, false], "pos": [0.3201342281879193, 0.017449664429530044, 0.364429530201342, 0.14966442953020143, 0.13825503355704727, 0.19194630872483293, 0.74026845637584, 0.13624161073825514, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u6e05\u677e\u624b\u5199\u4f53", "JPN-Aoyagireisyosimo", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u73ab\u7470", "\u6c17\u3092\u6674\u3089\u3057\u307e\u3059", "", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/5.png b/templates/5.png
new file mode 100644
index 0000000000000000000000000000000000000000..1bb17e8c08e718b7b7c3ff00e4ecb1e239580909
Binary files /dev/null and b/templates/5.png differ
diff --git a/templates/6.json b/templates/6.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b9be3729fb8ee8023bb71b5c34fcf173bca7ceb
--- /dev/null
+++ b/templates/6.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, true, false, false, false, false], "pos": [0.010738255033557227, 0.23892617449664466, 0.6060402684563759, 0.12617449664429542, 0.06107382550335593, 0.383892617449664, 0.3409395973154365, 0.10268456375838927, 0.06442953020134237, 0.500671140939596, 0.3375838926174498, 0.09597315436241619, 0.4637583892617447, 0.8637583892617449, 0.5053691275167783, 0.11946308724832194, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "ENG-Okesip", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u71ac\u591c\u6709\u5bb3\u5065\u5eb7", "\u4f11\u606f\u65e9", "\u7cbe\u795e\u597d", "Good Night", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/6.png b/templates/6.png
new file mode 100644
index 0000000000000000000000000000000000000000..e09900a69841c759a2d2029647f305e8fe6a19f2
Binary files /dev/null and b/templates/6.png differ
diff --git a/templates/7.json b/templates/7.json
new file mode 100644
index 0000000000000000000000000000000000000000..a09b069f0a3ad6e4bdb0c23c3d2843d5def14f0a
--- /dev/null
+++ b/templates/7.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, false, false, false, false, false], "pos": [0.014765100671140905, 0.14496644295302014, 0.27718120805369106, 0.13959731543624174, 0.01409395973154373, 0.010738255033557019, 0.5791946308724831, 0.13288590604026856, 0.46778523489932883, 0.8302013422818795, 0.5288590604026846, 0.15973154362416064, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u6e05\u677e\u624b\u5199\u4f53", "ENG-Nextstep", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u51ac\u81f3", "LUNAR 12/22", "\u4e8c\u5341\u56db\u8282\u6c14", "", "", "", "", ""]}
\ No newline at end of file
diff --git a/templates/7.png b/templates/7.png
new file mode 100644
index 0000000000000000000000000000000000000000..f1f66946a4d36cce7bd6f78235c3a32ee8fe8af1
Binary files /dev/null and b/templates/7.png differ
diff --git a/templates/8.json b/templates/8.json
new file mode 100644
index 0000000000000000000000000000000000000000..32f98c64573cc25e8ba31088771130411efa12f9
--- /dev/null
+++ b/templates/8.json
@@ -0,0 +1 @@
+{"visible": [true, true, true, true, true, true, false, false], "pos": [0.24228187919463118, 0.09463087248322188, 0.5322147651006713, 0.1261744966442954, 0.11812080536912777, 0.22953020134228164, 0.780536912751679, 0.10939597315436245, 0.20201342281879203, 0.34295302013422685, 0.5624161073825508, 0.11275167785234913, 0.19865771812080507, 0.46442953020134053, 0.6295302013422825, 0.10939597315436221, 0.20201342281879203, 0.5845637583892606, 0.502013422818791, 0.11946308724832219, 0.20201342281879256, 0.7120805369127512, 0.5053691275167782, 0.12281879194630883, 0.4, 0.4, 0.2, 0.2, 0.4, 0.4, 0.2, 0.2], "font": ["CHN-\u96f7\u76d6\u4f53", "CHN-\u534e\u6587\u65b0\u9b4f", "CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u6e05\u677e\u624b\u5199\u4f53", "CHN-\u534e\u6587\u884c\u6977", "CHN-\u534e\u6587\u884c\u6977"], "text": ["\u5982\u4f55\u9632\u63a7\u4e59\u6d41", "\u65e5\u5e38\u5e94\u5bf9\u63aa\u65bd", "\u996e\u54c1\uff1a\u6709\u52a9\u4e8e\u4fdd\u6301\u6c34\u5206", "\u9762\u98df\uff1a\u78b3\u6c34\u5316\u5408\u7269\u6613\u6d88\u5316", "\u9752\u83dc\uff1a\u53ef\u4ee5\u4fdd\u7559\u8425\u517b", "\u6c34\u679c\uff1a\u5bcc\u542b\u5fae\u91cf\u5143\u7d20", "", ""]}
\ No newline at end of file
diff --git a/templates/8.png b/templates/8.png
new file mode 100644
index 0000000000000000000000000000000000000000..2ba9ae217a1d48b67da4276fb174fe2bdf9831ac
Binary files /dev/null and b/templates/8.png differ