import gradio as gr from src.process import * THEME=gr.themes.Default(font=[gr.themes.GoogleFont("IBM Plex Mono"), "ui-monospace","monospace"], primary_hue="lime", secondary_hue="emerald", neutral_hue="stone", ) def create_demo(): with gr.Blocks(theme=THEME) as demo: gr.Markdown('# 🎭 theaTRON') gr.Markdown('<img src="file/data/image-only.png">') gr.Markdown('Type what you want to see and the app will create images with the faces in the input while preserving body pose. Should work for multiple humans too!') prompt = gr.Textbox(value="Astronauts, photograph inside the ISS International Space Station corridor", label="Prompt:") n_prompt = gr.Textbox(value="", label="Negative Prompt: Avoid these features in the image...") with gr.Group(): with gr.Box(): with gr.Column(): with gr.Row() as main_blocks: with gr.Column() as input_step: gr.Markdown('# Input') with gr.Tab("Upload Image"): image_upload = gr.Image(source='upload', height=500, type='pil',#numpy', tool=None, elem_classes="image_upload", label='Image Upload') with gr.Tab("Webcam"): image_cam = gr.Image(source='webcam', height=500, type='pil',#numpy', tool=None, elem_classes="image_upload", label='Webcam') send_button = gr.Button(label='Generate', value='Generate') with gr.Column() as output_step: gr.Markdown('# Output') output_image = gr.Gallery(label="Generated images", show_label=False, preview=True, elem_id="output_image", object_fit="contain", height="auto") with gr.Accordion('Settings', open=False): num_steps = gr.Slider(label='Steps', minimum=1, maximum=100, value=25, step=1) original_resolution=gr.Checkbox(value=False, label="Preserve Resolution", info="Prevent Downscaling to 512 pixels (default)") seed = gr.Slider(label='Seed', minimum=-1, maximum=2147483647, step=1, randomize=True) with gr.Accordion('How does it work?', open=False): gr.Markdown('This demo was created by Mikolaj Czerkawski [@mikonvergence](https://twitter.com/mikonvergence) based on several 🌱 open-source tools. It only puts together existing models, so in some sense, nothing new here!') gr.Markdown('## Pipeline Details') gr.Markdown('### Step 1: Face Detection') gr.Markdown('I use the model from kornia for face detection, since at the time of release, Segment Anything does not yet work with text prompts. Thank you, kornia team! https://kornia.readthedocs.io/en/latest/applications/face_detection.html') gr.Markdown('### Step 2: Segment Anything') gr.Markdown('I use Segment Anything from Meta (via HF transformers library) to segment the face based on the detection points. https://huggingface.co/docs/transformers/main/model_doc/sam') gr.Markdown('### Step 3: Mask Post-processing') gr.Markdown('The mask is blurred to achieve a smoother blend when recomposing the photo. Also, if a separate mask is used for the top of the head and the face, then the potential space between the two masks is morphologically filled.') gr.Markdown('### Step 4: ControlNetInpaint') gr.Markdown('Finally, the resulting mask is used with the ControlNetInpaint tool [ControlNetInpaint](https://github.com/mikonvergence/ControlNetInpaint) and the pose guide.') gr.Markdown('---') gr.Markdown('# Learn More About Diffusion 💡') gr.Markdown('Check out my open-source ⏩[DiffusionFastForward](https://github.com/mikonvergence/DiffusionFastForward) course. It contains example code, executable notebooks, videos, notes, and a few use cases for training from scratch!') send_button.click(fn=forward, inputs=[image_cam, image_upload, prompt, n_prompt, num_steps, seed, original_resolution], outputs=[output_image]) return demo if __name__ == "__main__": demo = create_demo() demo.launch(debug=True, share=True)