diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..119673380e13e775f7d9a603f3c5160ff5c77bb1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +font/*.ttf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index bf895dca6b969fa73a08ea544122ceee9b103e01..963f8c8ea50beb5623de4d1df758c15b12817efc 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ --- title: JoyType -emoji: 📊 -colorFrom: gray +emoji: 🔥 +colorFrom: green colorTo: blue sdk: gradio -sdk_version: 4.37.2 +sdk_version: 3.50.0 app_file: app.py pinned: false -license: apache-2.0 --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..748c90a156e3559154ed1f6bad6baaefeb5f7abc --- /dev/null +++ b/app.py @@ -0,0 +1,261 @@ +# encoding=utf8 + + +import os +import cv2 +import gradio as gr +import numpy as np +import re +import json + +from huggingface_hub import login +from functions import * +from gradio.components import Component + +login(token=os.getenv('LOGIN_TOKEN')) +css = './css/style.css' + +# Initial a Gradio Block with specific theme +block = gr.Blocks( + theme=gr.themes.Base(), + css=css +).queue() + +# Load javascript plugin +with open('javascript/bboxHint.js', 'r', encoding="utf-8") as file: + value = file.read() +escaped_value = json.dumps(value) + +with block: + block.load( + fn=None, + _js=f"""() => {{ + const script = document.createElement("script"); + const text = document.createTextNode({escaped_value}); + script.appendChild(text); + document.head.appendChild(script); + }}""" + ) + gr.HTML( + '
Font(字体)
', elem_id='markdown_1') + with gr.Column(scale=2, min_width=20): + gr.Markdown('Text(文字内容)
', elem_id='markdown_2') + + row_layout = [gr.Row() for _ in range(BBOX_MAX_NUM)] + for i in range(BBOX_MAX_NUM): + visible = True if i < BBOX_INI_NUM else False + with row_layout[i]: + fn = gr.Dropdown( + choices=font_list, + label='Font(字体)', value='CHN-华文行楷', visible=visible, + show_label=False, scale=1, allow_custom_value=False, + min_width=90, elem_id=f'font_input_{i}', container=False + ) + ut = gr.Textbox( + label='Text(文字内容)', visible=visible, scale=2, + show_label=False, elem_id=f'text_input_{i}', container=False, max_lines=1 + ) + e = gr.Checkbox(label=f'{i}', value=visible, visible=False, min_width=10) + + x = gr.Slider(label='x', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, + elem_id=f'MD-t2i-{i}-x', + visible=False) + y = gr.Slider(label='y', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, + elem_id=f'MD-t2i-{i}-y', + visible=False) + w = gr.Slider(label='w', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, + elem_id=f'MD-t2i-{i}-w', + visible=False) + h = gr.Slider(label='h', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, + elem_id=f'MD-t2i-{i}-h', + visible=False) + x.change(fn=None, inputs=x, outputs=x, _js=f'v => onBoxChange({i}, "x", v)', + show_progress=False, queue=False) + y.change(fn=None, inputs=y, outputs=y, _js=f'v => onBoxChange({i}, "y", v)', + show_progress=False, queue=False) + w.change(fn=None, inputs=w, outputs=w, _js=f'v => onBoxChange({i}, "w", v)', + show_progress=False, queue=False) + h.change(fn=None, inputs=h, outputs=h, _js=f'v => onBoxChange({i}, "h", v)', + show_progress=False, queue=False) + e.change(fn=None, inputs=e, outputs=e, _js=f'e => onBoxEnableClick({i}, e)', + queue=False) + + rect_cb_list.extend([e]) + rect_box_list.extend([x, y, w, h]) + rect_font_name_list.extend([fn]) + rect_usr_text_list.extend([ut]) + + choice.change( + fn=update_box_num, + inputs=[choice], + outputs=[ + *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list + ] + ) + with gr.Row(): + gr.Markdown('') + run_edit = gr.Button(value='Run(运行)', elem_classes='run', elem_id='run_edit') + gr.Markdown('') + with gr.Row(): + with gr.Accordion(label='Examples(示例)', open=True): + img_container = gr.Image(visible=False, label='Text Layout(文字布局)') + example_id = gr.Textbox(value=-1, visible=False, label='ID(编号)') + gen_examples = gr.Examples( + [ + [1, 'templates/1.png', 'landscape, Chinese style, ink peaks, poster', model_list[0], 1648703813, 3, 1], + [2, 'templates/2.png', 'a clock and medicine bottle has texts and "time"', model_list[0], 1654615998, 2, 1], + [3, 'templates/3.png', '漂亮的风景照,很多山峰,清澈的湖水', model_list[3], 2078698098, 3, 1], + [4, 'templates/4.png', 'a vodka, on the bar, dim background', model_list[2], 443791646, 3, 1], + [5, 'templates/5.png', '画有玫瑰的卡片,明亮的背景', model_list[4], 516210890, 2, 1], + [6, 'templates/6.png', 'posters on the table, with pens, clear background, starry sky, moon', model_list[1], 228167646, 4, 1], + [7, 'templates/7.png', 'snowy landscape, domed cabin, winter scene, cozy atmosphere, soft lighting', model_list[5], 695897181, 3, 1], + [8, 'templates/8.png', '一张关于健康教育的卡片,上面有一些文字,有一些食物图标,背景里有一些水果喝饮料的图标,且背景是模糊的', model_list[1], 936188591, 6, 1], + ], + [example_id, img_container, usr_prompt, base_model, seed, choice, num_samples], + examples_per_page=5, + label='' + ) + + example_id.change( + fn=load_box_list, + inputs=[example_id, choice], + outputs=[ + *rect_cb_list, *rect_font_name_list, *rect_usr_text_list, *rect_box_list, example_id + ] + ) + + rect_img.clear(re_edit, None, [*rect_box_list, rect_img, image_width, image_height]) + image_width.release(resize_w, [image_width, rect_img], rect_img) + image_height.release(resize_h, [image_height, rect_img], rect_img) + + with gr.Column(scale=2): + with gr.Row(): + result_gallery = gr.Gallery( + label='Result(结果)', show_label=True, preview=True, columns=8, + allow_preview=True, elem_id='gallery' + ) + with gr.Row(): + with gr.Tab("Introduction"): + gr.Markdown('What we can do') + gr.Markdown( + 'Generating images with accurately represented text in multi-language.') + gr.Markdown('How to use') + gr.Markdown( + 'Enter a description of the image you want to generate in the "Prompt" text box.') + gr.Markdown('Text Editing') + gr.Markdown( + 'You can drag the "Text Boxes" slider to set the number of text to be laid out, ' + 'and set the corresponding font and text content respectively, Note that there must be no overlap between the text boxes, ' + 'or the model will not generate an image.') + gr.Markdown( + 'Finally, click the Run button to generate a picture!') + with gr.Tab("说明"): + gr.Markdown('我们能做什么') + gr.Markdown('在多种语言上生成具有准确文本的图像') + gr.Markdown('如何使用') + gr.Markdown( + '在“提示词”文本框中输入你想要生成的图片所对应的文字描述。') + gr.Markdown('文本编辑') + gr.Markdown( + '你可以拖动“可编辑文字框”滑块来设置需要布局的文字数量,并分别设置对应的字体和文字内容;' + '请注意,文本框之间不能有重叠,否则模型将不会生成图片。') + gr.Markdown('最后点击运行按钮,即可生成图片!') + with gr.Row(): + result_info = gr.Markdown('debug', visible=False) + + args = [ + num_samples, a_prompt, n_prompt, + conditioning_scale, cfg_scale, inference_steps, seed, usr_prompt, + rect_img, base_model, scheduler_name, gr.State(BBOX_MAX_NUM), + *(rect_cb_list + rect_box_list + rect_font_name_list + rect_usr_text_list) + ] + run_edit.click( + fn=process, + inputs=args, + outputs=[result_gallery, result_info] + ) + + +if __name__ == "__main__": + block.launch( + server_name='0.0.0.0', + share=True, + ) diff --git a/css/style.css b/css/style.css new file mode 100644 index 0000000000000000000000000000000000000000..e2a86bdb970fac242dc551b6a2f4d0676f7205e3 --- /dev/null +++ b/css/style.css @@ -0,0 +1,157 @@ +body, html { + height: 100%; + margin: 0; +} +.gradio_container { + height: 100%; +} + +/**gallery**/ +#gallery { + height: 400px; + width: 100%; +} + +/**every font dropdown in editing**/ +#font_input_0 input { + color: #ff0000; + font-size: 15px; +} +#font_input_1 input { + color: #ff9900; + font-size: 15px; +} +#font_input_2 input { + color: #996633; + font-size: 15px; +} +#font_input_3 input { + color: #33cc33; + font-size: 15px; +} +#font_input_4 input { + color: #33cccc; + font-size: 15px; +} +#font_input_5 input { + color: #0066ff; + font-size: 15px; +} +#font_input_6 input { + color: #ff3399; + font-size: 15px; +} +#font_input_7 input { + color: #cc00cc; + font-size: 15px; +} +#font_input_0.block.svelte-90oupt { + height: 30px; +} +#font_input_1.block.svelte-90oupt { + height: 30px; +} +#font_input_2.block.svelte-90oupt { + height: 30px; +} +#font_input_3.block.svelte-90oupt { + height: 30px; +} +#font_input_4.block.svelte-90oupt { + height: 30px; +} +#font_input_5.block.svelte-90oupt { + height: 30px; +} +#font_input_6.block.svelte-90oupt { + height: 30px; +} +#font_input_7.block.svelte-90oupt { + height: 30px; +} + +/**every text box in editing**/ +#text_input_0 input { + font-size: 15px; +} +#text_input_1 input { + font-size: 15px; +} +#text_input_2 input { + font-size: 15px; +} +#text_input_3 input { + font-size: 15px; +} +#text_input_4 input { + font-size: 15px; +} +#text_input_5 input { + font-size: 15px; +} +#text_input_6 input { + font-size: 15px; +} +#text_input_7 input { + font-size: 15px; +} +#text_input_0.block.svelte-90oupt { + height: 30px; +} +#text_input_1.block.svelte-90oupt { + height: 30px; +} +#text_input_2.block.svelte-90oupt { + height: 30px; +} +#text_input_3.block.svelte-90oupt { + height: 30px; +} +#text_input_4.block.svelte-90oupt { + height: 30px; +} +#text_input_5.block.svelte-90oupt { + height: 30px; +} +#text_input_6.block.svelte-90oupt { + height: 30px; +} +#text_input_7.block.svelte-90oupt { + height: 30px; +} + +#row_show { + display: block + height: 25px +} +#markdown_1 { + display: block; + height: 25px; +} +#markdown_2 { + display: block; + height: 25px; +} +#markdown_1 span { + display: block; + height: 25px; +} +#markdown_2 span { + display: block; + height: 25px; +} + +#run_edit { + background-color: #ff4500; + color: white; +} +#run_upload { + background-color: #ff4500; + color: white; +} + +/************************************** +footer.svelte-1ax1toq { + display: none !important; +} +/**************************************/ diff --git a/font/Alice.ttf b/font/Alice.ttf new file mode 100644 index 0000000000000000000000000000000000000000..b500cb8825d06dfdcb33685fac3f1cd6accf5a7a --- /dev/null +++ b/font/Alice.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7ea628cb473d4b4737c4ffcd3f760bf6ca325d1344a134579ca636e33d9917 +size 128720 diff --git a/font/Aoyagireisyosimo.ttf b/font/Aoyagireisyosimo.ttf new file mode 100644 index 0000000000000000000000000000000000000000..14f5f17ee5abf675f350a639936fbaa48174f2af --- /dev/null +++ b/font/Aoyagireisyosimo.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c55ad5f72e65a482931d967725e97ff206eb3019c87281d9e5514a63bb8db9 +size 4412684 diff --git a/font/Automatons.ttf b/font/Automatons.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a03b4b0d363806d9c856c986d22a293dc08dc3b6 --- /dev/null +++ b/font/Automatons.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951850cc3b0839d886fcb773accca92c22f6eda0178b76a72c81377978100408 +size 6784 diff --git a/font/BebasNeue.ttf b/font/BebasNeue.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a43c7492bcbcfaabea5b063116879f42d65bb30e --- /dev/null +++ b/font/BebasNeue.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830ea186acffc2316ed1a4e42319246ba3b46b04e33a211079249bf901193f04 +size 57676 diff --git a/font/Caveat.ttf b/font/Caveat.ttf new file mode 100644 index 0000000000000000000000000000000000000000..817d528f98135c21ab6f7ab2c9354308905d5d9e --- /dev/null +++ b/font/Caveat.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263493f012c8ffbf3a69a38d43ce494c42d1cb2d44b7cb9eff10095f08fce719 +size 391068 diff --git a/font/ChosunGs.ttf b/font/ChosunGs.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e2eec46e8857da2f5452db5bd4cdb5d703bc1eca --- /dev/null +++ b/font/ChosunGs.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e191bc30d23ce34797dcaf7a0965dedd67a2d85cc5dd87325ee96626cba7bea +size 9260104 diff --git a/font/Dongle.ttf b/font/Dongle.ttf new file mode 100644 index 0000000000000000000000000000000000000000..930e06132d662334e1440d259c9ecb8d09108417 --- /dev/null +++ b/font/Dongle.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010703af6b86a860847eea86823387e05669faa75e81e6a0040398906a0f1fe7 +size 4458436 diff --git a/font/Filthyrich.ttf b/font/Filthyrich.ttf new file mode 100644 index 0000000000000000000000000000000000000000..362107066e0b8993348b4da99245417a22f20a8e --- /dev/null +++ b/font/Filthyrich.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb72bd16d5613612734a103a9cd9a7ffba83857675c8b53891eb2c3b8a3e582 +size 125132 diff --git a/font/GlTsukiji.ttf b/font/GlTsukiji.ttf new file mode 100644 index 0000000000000000000000000000000000000000..c0ca5ef029d5b89a1c7079c3ddc8f8ae1dc694d3 --- /dev/null +++ b/font/GlTsukiji.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6907481e7548bc723ef312e7508b21ea2e4b30313a13e7e2042ef4ad0953f7c3 +size 334440 diff --git a/font/Gloock.ttf b/font/Gloock.ttf new file mode 100644 index 0000000000000000000000000000000000000000..054f3b4c8e2c7035dca70793186e6437351e79cf --- /dev/null +++ b/font/Gloock.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e2af503dfb3d1e093f0178863e2b163c59cdc4f4c5d152d50f51da19baf8fa +size 94940 diff --git a/font/GodoMaum.ttf b/font/GodoMaum.ttf new file mode 100644 index 0000000000000000000000000000000000000000..c9b0a4930d5c8067601db9dd981ee18f99bfc89f --- /dev/null +++ b/font/GodoMaum.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea6333b8a9b875d778598debd260695770a813fdbdd0d233845329151e43ffd +size 2760760 diff --git a/font/KouzanMouhitu.ttf b/font/KouzanMouhitu.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a609044181f450e91280b34d52f2442c753be4a7 --- /dev/null +++ b/font/KouzanMouhitu.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582f269574e71336f1ee8318664ff6cfafd84293b3267ddf39410015713c951e +size 8238216 diff --git a/font/Lemon.ttf b/font/Lemon.ttf new file mode 100644 index 0000000000000000000000000000000000000000..bb1a494dac425b7211fe07e6e50406f56fc21680 --- /dev/null +++ b/font/Lemon.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:664fb2be44c5a08ba5ec89d06c3d523e0c2650a18922e667b8d9573a88ec37c4 +size 73592 diff --git a/font/MKyrill.ttf b/font/MKyrill.ttf new file mode 100644 index 0000000000000000000000000000000000000000..f40ac29d3db7c0c4d84c08fd3b873dffb3c5750c --- /dev/null +++ b/font/MKyrill.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5256add8bb73db799e2f354ea9e4bf2fd893c2d3af583021b09a67f892962f +size 81868 diff --git a/font/Nextstep.ttf b/font/Nextstep.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8f0d983786c063826ef7c6e4d0f055df5a275c49 --- /dev/null +++ b/font/Nextstep.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae79e803499e66a6f15a8fab6e19bccbeeb2fe3b51722894ff2f1584fb9cafa +size 19952 diff --git a/font/Okesip.ttf b/font/Okesip.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a73b25dd062722be525570f1c6bbb50dbec2c40f --- /dev/null +++ b/font/Okesip.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea383c52f8fe1d4473d1a0d8d5cca42fda375d9f317bf42a0bd21251a8976e28 +size 74056 diff --git a/font/Otomanopee.ttf b/font/Otomanopee.ttf new file mode 100644 index 0000000000000000000000000000000000000000..909c896551995464746ebe35f38351b4c58d03a1 --- /dev/null +++ b/font/Otomanopee.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4bbcf825544a5102991f5118bbaa2440977ea1f4d5446c22963d70fd435602 +size 366684 diff --git a/font/Playwrite.ttf b/font/Playwrite.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2153d4d03d03588677ce8a022f08825187534664 --- /dev/null +++ b/font/Playwrite.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17810b62177426ffbe7b661598f8d1a67254daad70aac6a375c6ee1863a3c711 +size 344044 diff --git a/font/Shrikhand.ttf b/font/Shrikhand.ttf new file mode 100644 index 0000000000000000000000000000000000000000..4f964174c3f525b0af09c41bdcb69cb87e2a3528 --- /dev/null +++ b/font/Shrikhand.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00269b4efb014de272c9313e16b7e166617c6fdac819a2bb063ef1e371b5801b +size 213868 diff --git a/font/UnDotum.ttf b/font/UnDotum.ttf new file mode 100644 index 0000000000000000000000000000000000000000..f9028025f04bb9c549c22c510d977a17829c2473 --- /dev/null +++ b/font/UnDotum.ttf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8373e126bb61f59105cf7f54a47eb1b089c2b0aacb70c6cd688bd8ea76cdc9 +size 3656228 diff --git "a/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf" "b/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..904bc92ca1f75366fa22349261df4c44bfa2d154 --- /dev/null +++ "b/font/\345\215\216\346\226\207\346\226\260\351\255\217.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361dc6d522d417fc5705948e65d191f7826147d390980f4cbdcfbca4a0200290 +size 4044420 diff --git "a/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf" "b/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..109dc3bc5a2905a46ce791470fd4b4e21c65d0b8 --- /dev/null +++ "b/font/\345\215\216\346\226\207\350\241\214\346\245\267.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e893a5a618b39f317362efd77f3c6aeb16149328cb66872c9db8cb457a71d32 +size 4009504 diff --git "a/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf" "b/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..c0d88c6af8e216cf2557311dfeba82f7da28b8a2 --- /dev/null +++ "b/font/\345\256\205\345\256\266\351\272\246\345\205\213\347\254\224.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15cec9ab9565e0851e144144f7023c4c59f4fcdea96710a75cf668049f79b3df +size 17093408 diff --git "a/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf" "b/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..ac1979672145dd576fd2d4d355521b03dfc4ea1d --- /dev/null +++ "b/font/\345\267\264\350\234\200\345\242\250\350\277\271.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d81299f3ed474a0a9ed80db21410c604c1ca561f9d36b8ff62ce4137d91c0cc8 +size 7171284 diff --git "a/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf" "b/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..3ac3a5e850e0ccf6bb7b91db0347fe7a4af03d5e --- /dev/null +++ "b/font/\346\226\221\351\251\254\345\255\227\347\261\273.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a253e524b7951e2394e1ac2bf981aa268ce0ae4b11934682de6c42f19354e1 +size 2719964 diff --git "a/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf" "b/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..56c74fd6fa828b53b35808c3cd2b64e317baa8a8 --- /dev/null +++ "b/font/\346\270\205\346\235\276\346\211\213\345\206\231\344\275\223.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd51d365ff4b1b5c58e2cc722226cd3109e9b33fcd6538b6599cf308dbc90e3 +size 6348828 diff --git "a/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf" "b/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..8399590c7cbaf2947674bc629089dfc679043694 --- /dev/null +++ "b/font/\346\274\224\347\244\272\345\244\217\350\241\214\346\245\267.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac1f0ec044b63aa45c0775c17a2f03cbf6427b4bd7b93da87dde9e7e2020cfc +size 10073644 diff --git "a/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf" "b/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..cf7debe88e4b09c33cbb50756e6efa92f5279bc8 --- /dev/null +++ "b/font/\350\276\260\345\256\207\350\220\275\351\233\201\344\275\223.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f04002cc0906d2bda376fb133d4a8160805db0113e92baf6da54aeafde88bd7 +size 4511804 diff --git "a/font/\351\233\267\347\233\226\344\275\223.ttf" "b/font/\351\233\267\347\233\226\344\275\223.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..559a20923326e4c54ca73c941bcef8ed09c5267e --- /dev/null +++ "b/font/\351\233\267\347\233\226\344\275\223.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fd784e6aa6b2f8e8e6064280ea6fc473246b6371884155d75c55577e6db4c2 +size 4336920 diff --git "a/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf" "b/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..14f5f17ee5abf675f350a639936fbaa48174f2af --- /dev/null +++ "b/font/\351\235\222\346\237\263\351\232\266\344\271\246.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c55ad5f72e65a482931d967725e97ff206eb3019c87281d9e5514a63bb8db9 +size 4412684 diff --git "a/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf" "b/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf" new file mode 100644 index 0000000000000000000000000000000000000000..921b7b09bf3076c76846aa9f741b580ca40c4d88 --- /dev/null +++ "b/font/\351\270\277\351\233\267\346\235\277\344\271\246\347\256\200\344\275\223.ttf" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a8a257480dbe55fd872722fb6ec52a7939ea0d34ddfcedbdf3595430d62d6b +size 11651532 diff --git a/functions.py b/functions.py new file mode 100644 index 0000000000000000000000000000000000000000..2e266d830a89b26d0dcd218033a0e30caf1f7020 --- /dev/null +++ b/functions.py @@ -0,0 +1,522 @@ +import json +import os +import cv2 +import random +import numpy as np +import gradio as gr +import torch + +from zhipuai import ZhipuAI +from pytorch_lightning import seed_everything +from pprint import pprint +from PIL import Image, ImageDraw, ImageFont +from diffusers import ( + ControlNetModel, + StableDiffusionControlNetPipeline, +) +from diffusers import ( + DDIMScheduler, + PNDMScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + HeunDiscreteScheduler +) +from controlnet_aux import ( + PidiNetDetector, + HEDdetector +) + + +BBOX_MAX_NUM = 8 +BBOX_INI_NUM = 0 +MAX_LENGTH = 20 +device = 'cuda' +pipeline = None +pre_pipeline = None +model_root = os.getenv('REPO_ROOT') +scheduler_root = f'{model_root}/Scheduler' +model_list =[ + 'JoyType.v1.0', 'RevAnimated-animation-动漫', 'GhostMix-animation-动漫', + 'rpg.v5-fantasy_realism-奇幻写实', 'midjourneyPapercut-origami-折纸版画', + 'dvarchExterior-architecture-建筑', 'awpainting.v13-portrait-人物肖像' +] +chn_example_dict = { + '漂亮的风景照,很多山峰,清澈的湖水': 'beautiful landscape, many peaks, clear lake', + '画有玫瑰的卡片,明亮的背景': 'a card with roses, bright background', + '一张关于健康教育的卡片,上面有一些文字,有一些食物图标,背景里有一些水果喝饮料的图标,且背景是模糊的': \ + 'a card for health education, with some writings on it, ' + 'food icons on the card, some fruits and drinking in the background, blur background ' +} +match_dict = { + 'JoyType.v1.0': 'JoyType-v1-1M', + 'RevAnimated-animation-动漫': 'rev-animated-v1-2-2', + 'GhostMix-animation-动漫': 'GhostMix_V2.0', + 'rpg.v5-fantasy_realism-奇幻写实': 'rpg_v5', + 'midjourneyPapercut-origami-折纸版画': 'midjourneyPapercut_v1', + 'dvarchExterior-architecture-建筑': 'dvarchExterior', + 'awpainting.v13-portrait-人物肖像': 'awpainting_v13' +} +font_list = [ + 'CHN-华文行楷', + 'CHN-华文新魏', + 'CHN-清松手写体', + 'CHN-巴蜀墨迹', + 'CHN-雷盖体', + 'CHN-演示夏行楷', + 'CHN-鸿雷板书简体', + 'CHN-斑马字类', + 'CHN-青柳隶书', + 'CHN-辰宇落雁体', + 'CHN-宅家麦克笔', + 'ENG-Playwrite', + 'ENG-Okesip', + 'ENG-Shrikhand', + 'ENG-Nextstep', + 'ENG-Filthyrich', + 'ENG-BebasNeue', + 'ENG-Gloock', + 'ENG-Lemon', + 'RUS-Automatons', + 'RUS-MKyrill', + 'RUS-Alice', + 'RUS-Caveat', + 'KOR-ChosunGs', + 'KOR-Dongle', + 'KOR-GodoMaum', + 'KOR-UnDotum', + 'JPN-GlTsukiji', + 'JPN-Aoyagireisyosimo', + 'JPN-KouzanMouhitu', + 'JPN-Otomanopee' +] + + +def change_settings(base_model): + if base_model == model_list[0]: + return gr.update(value=20), gr.update(value=7.5), gr.update(value='PNDM') + elif base_model == model_list[1]: + return gr.update(value=30), gr.update(value=8.5), gr.update(value='Euler') + elif base_model == model_list[2]: + return gr.update(value=32), gr.update(value=8.5), gr.update(value='Euler') + elif base_model == model_list[3]: + return gr.update(value=20), gr.update(value=7.5), gr.update(value='DPM') + elif base_model == model_list[4]: + return gr.update(value=25), gr.update(value=6.5), gr.update(value='Euler') + elif base_model == model_list[5]: + return gr.update(value=25), gr.update(value=8.5), gr.update(value='Euler') + elif base_model == model_list[6]: + return gr.update(value=25), gr.update(value=7), gr.update(value='DPM') + else: + pass + + +def update_box_num(choice): + update_list_1 = [] # checkbox + update_list_2 = [] # font + update_list_3 = [] # text + update_list_4 = [] # bounding box + for i in range(BBOX_MAX_NUM): + if i < choice: + update_list_1.append(gr.update(value=True)) + update_list_2.append(gr.update(visible=True)) + update_list_3.append(gr.update(visible=True)) + update_list_4.extend([gr.update(visible=False) for _ in range(4)]) + else: + update_list_1.append(gr.update(value=False)) + update_list_2.append(gr.update(visible=False, value='CHN-华文行楷')) + update_list_3.append(gr.update(visible=False, value='')) + update_list_4.extend([ + gr.update(visible=False, value=0.4), + gr.update(visible=False, value=0.4), + gr.update(visible=False, value=0.2), + gr.update(visible=False, value=0.2) + ]) + + return *update_list_1, *update_list_2, *update_list_3, *update_list_4 + + +def load_box_list(example_id, choice): + with open(f'templates/{example_id}.json', 'r') as f: + info = json.load(f) + update_list1 = [] + update_list2 = [] + update_list3 = [] + update_list4 = [] + + for i in range(BBOX_MAX_NUM): + visible = info['visible'][i] + pos = info['pos'][i * 4: (i + 1) * 4] + update_list1.append(gr.update(value=visible)) + update_list2.append(gr.update(value=info['font'][i], visible=visible)) + update_list3.append(gr.update(value=info['text'][i], visible=visible)) + update_list4.extend([ + gr.update(value=pos[0]), + gr.update(value=pos[1]), + gr.update(value=pos[2]), + gr.update(value=pos[3]) + ]) + + return *update_list1, *update_list2, \ + *update_list3, *update_list4, gr.update(value=-1) + + +def re_edit(): + global BBOX_MAX_NUM + update_list = [] + for i in range(BBOX_MAX_NUM): + update_list.extend([gr.update(value=0.4), gr.update(value=0.4), gr.update(value=0.2), + gr.update(value=0.2)]) + return *update_list, \ + gr.Image( + value=create_canvas(), + label='Rect Position', elem_id='MD-bbox-rect-t2i', + show_label=False, visible=True + ), \ + gr.Slider(value=512), gr.Slider(value=512) + + +def resize_w(w, img): + return cv2.resize(img, (w, img.shape[0])) + + +def resize_h(h, img): + return cv2.resize(img, (img.shape[1], h)) + + +def create_canvas(w=512, h=512, c=3, line=5): + image = np.full((h, w, c), 200, dtype=np.uint8) + for i in range(h): + if i % (w // line) == 0: + image[i, :, :] = 150 + for j in range(w): + if j % (w // line) == 0: + image[:, j, :] = 150 + image[h // 2 - 8:h // 2 + 8, w // 2 - 8:w // 2 + 8, :] = [200, 0, 0] + return image + + +def canny(img): + low_threshold = 64 + high_threshold = 100 + + img = cv2.Canny(img, low_threshold, high_threshold) + img = img[:, :, None] + img = np.concatenate([img, img, img], axis=2) + return Image.fromarray(img) + + +def judge_overlap(coord_list1, coord_list2): + judge = coord_list1[0] < coord_list2[2] and coord_list1[2] > coord_list2[0] \ + and coord_list1[1] < coord_list2[3] and coord_list1[3] > coord_list2[1] + return judge + + +def parse_render_list(box_list, shape, box_num): + width = shape[0] + height = shape[1] + polygons = [] + font_names = [] + texts = [] + valid_list = box_list[:box_num] + pos_list = box_list[box_num: 5 * box_num] + font_name_list = box_list[5 * box_num: 6 * box_num] + text_list = box_list[6 * box_num: 7 * box_num] + empty_flag = False + + print(font_name_list, text_list) + + for i, valid in enumerate(valid_list): + if valid: + pos = pos_list[i * 4: (i + 1) * 4] + top_left_x = int(pos[0] * width) + top_left_y = int(pos[1] * height) + w = int(pos[2] * width) + h = int(pos[3] * height) + font_name = str(font_name_list[i]) + text = str(text_list[i]) + if text == '': + empty_flag = True + text = 'JoyType' + if w <= 0 or h <= 0: + gr.Warning(f'Area of the box{i + 1} cannot be zero!') + return [], False + polygon = [ + top_left_x, + top_left_y, + w, h + ] + try: + assert font_name in font_list + font_name = font_name.split('-')[-1] + except Exception as e: + gr.Warning('Please choose a correct font!') + return [], False + + polygons.append(polygon) + font_names.append(font_name.split('-')[-1]) + texts.append(text) + + if empty_flag: + gr.Warning('Null strings will be filled automatically!') + + for i in range(len(polygons)): + for j in range(i + 1, len(polygons)): + if judge_overlap( + [polygons[i][0], polygons[i][1], polygons[i][0] + polygons[i][2], polygons[i][1] + polygons[i][3]], + [polygons[j][0], polygons[j][1], polygons[j][0] + polygons[j][2], polygons[j][1] + polygons[j][3]] + ): + gr.Warning('Find overlapping boxes!') + return [], False + + render_list = [] + for i in range(len(polygons)): + text_dict = {} + text_dict['text'] = texts[i] + text_dict['polygon'] = polygons[i] + text_dict['font_name'] = font_names[i] + render_list.append(text_dict) + + return render_list, True + + +def render_all_text(render_list, shape, threshold=512): + width = shape[0] + height = shape[1] + board = Image.new('RGB', (width, height), 'black') + + for text_dict in render_list: + text = text_dict['text'] + polygon = text_dict['polygon'] + font_name = text_dict['font_name'] + if len(text) > MAX_LENGTH: + text = text[:MAX_LENGTH] + gr.Warning(f'{text}... exceeds the maximum length {MAX_LENGTH} and has been cropped.') + + w, h = polygon[2:] + vert = True if w < h else False + image4ratio = Image.new('RGB', (1024, 1024), 'black') + draw = ImageDraw.Draw(image4ratio) + + try: + font = ImageFont.truetype(f'./font/{font_name}.ttf', encoding='utf-8', size=50) + except FileNotFoundError: + font = ImageFont.truetype(f'./font/{font_name}.otf', encoding='utf-8', size=50) + + if not vert: + draw.text(xy=(0, 0), text=text, font=font, fill='white') + _, _, _tw, _th = draw.textbbox(xy=(0, 0), text=text, font=font) + _th += 1 + else: + _tw, y_c = 0, 0 + for c in text: + draw.text(xy=(0, y_c), text=c, font=font, fill='white') + _l, _t, _r, _b = font.getbbox(c) + _tw = max(_tw, _r - _l) + y_c += _b + _th = y_c + 1 + + ratio = (_th * w) / (_tw * h) + text_img = image4ratio.crop((0, 0, _tw, _th)) + x_offset, y_offset = 0, 0 + if 0.8 <= ratio <= 1.2: + text_img = text_img.resize((w, h)) + elif ratio < 0.75: + resize_h = int(_th * (w / _tw)) + text_img = text_img.resize((w, resize_h)) + y_offset = (h - resize_h) // 2 + else: + resize_w = int(_tw * (h / _th)) + text_img = text_img.resize((resize_w, h)) + x_offset = (w - resize_w) // 2 + + board.paste(text_img, (polygon[0] + x_offset, polygon[1] + y_offset)) + + return board + + +def load_pipeline(model_name, scheduler_name): + controlnet_path = os.path.join(model_root, f'{match_dict["JoyType.v1.0"]}') + model_path = os.path.join(model_root, model_name) + scheduler_name = scheduler_name.lower() + + if scheduler_name == 'pndm': + scheduler = PNDMScheduler.from_pretrained(scheduler_root, subfolder='pndm') + if scheduler_name == 'lms': + scheduler = LMSDiscreteScheduler.from_pretrained(scheduler_root, subfolder='lms') + if scheduler_name == 'euler': + scheduler = EulerDiscreteScheduler.from_pretrained(scheduler_root, subfolder='euler') + if scheduler_name == 'dpm': + scheduler = DPMSolverMultistepScheduler.from_pretrained(scheduler_root, subfolder='dpm') + if scheduler_name == 'ddim': + scheduler = DDIMScheduler.from_pretrained(scheduler_root, subfolder='ddim') + if scheduler_name == 'heun': + scheduler = HeunDiscreteScheduler.from_pretrained(scheduler_root, subfolder='heun') + if scheduler_name == 'euler-ancestral': + scheduler = EulerAncestralDiscreteScheduler.from_pretrained(scheduler_root, subfolder='euler-ancestral') + + controlnet = ControlNetModel.from_pretrained( + controlnet_path, + subfolder='controlnet', + torch_dtype=torch.float32 + ) + pipeline = StableDiffusionControlNetPipeline.from_pretrained( + model_path, + scheduler=scheduler, + controlnet=controlnet, + torch_dtype=torch.float32, + ).to(device) + + return pipeline + + +def preprocess_prompt(prompt): + client = ZhipuAI(api_key=os.getenv('ZHIPU_API_KEY')) + response = client.chat.completions.create( + model="glm-4-0520", + messages=[ + { + 'role': 'system', + 'content': ''' + Stable Diffusion是一款利用深度学习的文生图模型,支持通过使用提示词来产生新的图像,描述要包含或省略的元素。 + 我在这里引入Stable Diffusion算法中的Prompt概念,又被称为提示符。这里的Prompt通常可以用来描述图像, + 他由普通常见的单词构成,最好是可以在数据集来源站点找到的著名标签(比如Ddanbooru)。 + 下面我将说明Prompt的生出步骤,这里的Prompt主要用于描述人物。在Prompt的生成中,你需要通过提示词来描述 人物属性,主题,外表,情绪,衣服,姿势,视角,动作,背景。 + 用英语单词或短语甚至自然语言的标签来描述,并不局限于我给你的单词。然后将你想要的相似的提示词组合在一起,请使用英文半角,做分隔符,每个提示词不要带引号,并将这些按从最重要到最不重要的顺序 排列。 + 另外请您注意,永远在每个 Prompt的前面加上引号里的内容, + “(((best quality))),(((ultra detailed))),(((masterpiece))),illustration,” 这是高质量的标志。 + 人物属性中,1girl表示你生成了一个女孩,2girls表示生成了两个女孩,一次。另外再注意,Prompt中不能带有-和_。 + 可以有空格和自然语言,但不要太多,单词不能重复。只返回Prompt。 + ''' + }, + { + 'role': 'user', + 'content': prompt + } + ], + temperature=0.5, + max_tokens=2048, + top_p=1, + stream=False, + ) + + if response: + glm = [] + glm_return_list = response.choices + for item in glm_return_list: + glm.append(item.message.content) + + return {'flag': 1, 'data': glm} + else: + return {'flag': 0, 'data': {}} + + +def process( + num_samples, + a_prompt, + n_prompt, + conditioning_scale, + cfg_scale, + inference_steps, + seed, + usr_prompt, + rect_img, + base_model, + scheduler_name, + box_num, + *box_list +): + if usr_prompt == '': + gr.Warning('Must input a prompt!') + return None, gr.Markdown('error') + + if seed == -1: + seed = random.randint(0, 2147483647) + seed_everything(seed) + + # Support Chinese Input + if usr_prompt in chn_example_dict.keys(): + usr_prompt = chn_example_dict[usr_prompt] + else: + for ch in usr_prompt: + if '\u4e00' <= ch <= '\u9fff': + data = preprocess_prompt(usr_prompt) + if data['flag'] == 1: + usr_prompt = data['data'][0][1: -1] + else: + gr.Warning('Something went wrong while translating your prompt, please try again.') + return None, gr.Markdown('error') + break + + shape = (rect_img.shape[1], rect_img.shape[0]) + render_list, flag = parse_render_list(box_list, shape, box_num) + if flag: + render_img = render_all_text(render_list, shape) + else: + return None, gr.Markdown('error') + + model_name = match_dict[base_model] + render_img = canny(np.array(render_img)) + + w, h = render_img.size + global pipeline, pre_pipeline + + if pre_pipeline != model_name or pipeline is None: + pre_pipeline = model_name + pipeline = load_pipeline(model_name, scheduler_name) + + batch_render_img = [render_img for _ in range(num_samples)] + batch_prompt = [f'{usr_prompt}, {a_prompt}' for _ in range(num_samples)] + batch_n_prompt = [n_prompt for _ in range(num_samples)] + + images = pipeline( + batch_prompt, + negative_prompt=batch_n_prompt, + image=batch_render_img, + controlnet_conditioning_scale=float(conditioning_scale), + guidance_scale=float(cfg_scale), + width=w, + height=h, + num_inference_steps=int(inference_steps), + ).images + + return images, gr.Markdown(f'{seed}, {usr_prompt}, {box_list}') + + +def draw_example(box_list, color, id): + board = Image.fromarray(create_canvas()) + + w, h = board.size + + draw = ImageDraw.Draw(board, mode='RGBA') + visible = box_list[:BBOX_MAX_NUM] + pos = box_list[BBOX_MAX_NUM: 5 * BBOX_MAX_NUM] + font = box_list[5 * BBOX_MAX_NUM: 6 * BBOX_MAX_NUM] + text = box_list[6 * BBOX_MAX_NUM:] + + info = { + 'visible': list(visible), + 'pos': list(pos), + 'font': list(font), + 'text': list(text) + } + + with open(f'templates/{id}.json', 'w') as f: + json.dump(info, f) + + for i in range(BBOX_MAX_NUM): + if visible[i] is True: + polygon = pos[i * 4: (i + 1) * 4] + print(polygon) + left = w * polygon[0] + top = h * polygon[1] + right = left + w * polygon[2] + bottom = top + h * polygon[3] + draw.rectangle([left, top, right, bottom], outline=color[i][0], fill=color[i][1], width=3) + + board.save(f'./examples/{id}.png') + + +if __name__ == '__main__': + pass diff --git a/javascript/bboxHint.js b/javascript/bboxHint.js new file mode 100644 index 0000000000000000000000000000000000000000..dd2a8592bc23eaffc1731e6c50d451632f025963 --- /dev/null +++ b/javascript/bboxHint.js @@ -0,0 +1,550 @@ +/* +Part of the implementation is borrowed and modified from multidiffusion-upscaler-for-automatic1111, +publicly available at https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111 +*/ + +const BBOX_MAX_NUM = 16; +const BBOX_WARNING_SIZE = 1280; +const DEFAULT_X = 0.4; +const DEFAULT_Y = 0.4; +const DEFAULT_H = 0.2; +const DEFAULT_W = 0.2; + +// ref: https://html-color.codes/ +// 每个框对应的颜色 +const COLOR_MAP = [ + ['#ff0000', 'rgba(255, 0, 0, 0.3)'], // red + ['#ff9900', 'rgba(255, 153, 0, 0.3)'], // orange + ['#996633', 'rgba(153, 102, 51, 0.3)'], // brown + ['#33cc33', 'rgba(51, 204, 51, 0.3)'], // green + ['#33cccc', 'rgba(51, 204, 204, 0.3)'], // indigo + ['#0066ff', 'rgba(0, 102, 255, 0.3)'], // blue + ['#ff3399', 'rgba(255, 51, 153, 0.3)'], // hot pink + ['#cc00cc', 'rgba(204, 0, 204, 0.3)'], // dark pink + ['#ff6666', 'rgba(255, 102, 102, 0.3)'], // light red + ['#ffcc66', 'rgba(255, 204, 102, 0.3)'], // light orange + ['#99cc00', 'rgba(153, 204, 0, 0.3)'], // lime green + ['#ffff00', 'rgba(255, 255, 0, 0.3)'], // yellow + ['#0099cc', 'rgba(0, 153, 204, 0.3)'], // steel blue + ['#00cc99', 'rgba(0, 204, 153, 0.3)'], // teal + ['#ff3399', 'rgba(255, 51, 153, 0.3)'], // hot pink + ['#9933cc', 'rgba(153, 51, 204, 0.3)'], // lavender + ['#6600ff', 'rgba(102, 0, 255, 0.3)'], // purple +]; + +const RESIZE_BORDER = 5; +const ROTATE_BORDER = 8; +const MOVE_BORDER = 5; + +const t2i_bboxes = new Array(BBOX_MAX_NUM).fill(null); + +function gradioApp() { + const elems = document.getElementsByTagName('gradio-app') + const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot + return !!gradioShadowRoot ? gradioShadowRoot : document; +} + +// ↓↓↓ called from gradio ↓↓↓ +function onCreateT2IRefClick(overwrite) { + let width, height; + if (overwrite) { + const overwriteInputs = gradioApp().querySelectorAll('#MD-overwrite-width-t2i input, #MD-overwrite-height-t2i input'); + width = parseInt(overwriteInputs[0].value); + height = parseInt(overwriteInputs[2].value); + } else { + const sizeInputs = gradioApp().querySelectorAll('#txt2img_width input, #txt2img_height input'); + width = parseInt(sizeInputs[0].value); + height = parseInt(sizeInputs[2].value); + } + + if (isNaN(width)) width = 512; + if (isNaN(height)) height = 512; + + // Concat it to string to bypass the gradio bug + // 向黑恶势力低头 + return width.toString() + 'x' + height.toString(); +} + +function onBoxEnableClick(idx, enable) { + let canvas = null; + let bboxes = null; + let locator = null; + + // locator = () => gradioApp().querySelector('#MD-bbox-ref-t2i'); + locator = () => gradioApp().querySelector('#MD-bbox-rect-t2i'); + bboxes = t2i_bboxes; + + ref_div = locator(); + canvas = ref_div.querySelector('img'); + if (!canvas) { return false; } + + if (enable) { + // Check if the bounding box already exists + if (!bboxes[idx]) { + // Initialize bounding box + const bbox = [DEFAULT_X, DEFAULT_Y, DEFAULT_W, DEFAULT_H]; + const colorMap = COLOR_MAP[idx % COLOR_MAP.length]; + const div = document.createElement('div'); + div.id = 'MD-bbox-t2i' + idx; + div.style.left = '0px'; + div.style.top = '0px'; + div.style.width = '0px'; + div.style.height = '0px'; + div.style.position = 'absolute'; + div.style.border = '2px solid ' + colorMap[0]; + div.style.background = colorMap[1]; + div.style.zIndex = '900'; + div.style.display = 'none'; + // A text tip to warn the user if bbox is too large + const tip = document.createElement('span'); + tip.id = 'MD-tip-t2i' + idx; + tip.style.left = '50%'; + tip.style.top = '50%'; + tip.style.position = 'absolute'; + tip.style.transform = 'translate(-50%, -50%)'; + tip.style.fontSize = '12px'; + tip.style.fontWeight = 'bold'; + tip.style.textAlign = 'center'; + tip.style.color = colorMap[0]; + tip.style.zIndex = '901'; + tip.style.display = 'none'; + tip.innerHTML = 'Warning: Region very large!