|
import gradio as gr |
|
import torch |
|
from diffusers import StableDiffusionPipeline |
|
from torchvision.models.segmentation import fcn_resnet50 |
|
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ToPILImage |
|
from PIL import Image |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
text_to_image_pipe = StableDiffusionPipeline.from_pretrained( |
|
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32 |
|
).to(device) |
|
|
|
|
|
unet_model = fcn_resnet50(pretrained=True).eval().to(device) |
|
|
|
|
|
preprocess = Compose([ |
|
Resize((512, 512)), |
|
ToTensor(), |
|
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
|
]) |
|
|
|
postprocess = Compose([ |
|
ToPILImage(), |
|
]) |
|
|
|
|
|
|
|
def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps): |
|
image = text_to_image_pipe( |
|
prompt, |
|
negative_prompt=negative_prompt, |
|
guidance_scale=guidance_scale, |
|
num_inference_steps=num_inference_steps, |
|
).images[0] |
|
return image |
|
|
|
|
|
|
|
def apply_dynamic_unet(init_image, strength): |
|
with torch.no_grad(): |
|
image_tensor = preprocess(init_image).unsqueeze(0).to(device) |
|
output = unet_model(image_tensor)["out"][0] |
|
output = torch.softmax(output, dim=0) |
|
mask = output.argmax(dim=0).float().cpu() |
|
blended = (strength * mask.unsqueeze(0) + (1 - strength) * image_tensor[0].cpu()).clamp(0, 1) |
|
blended_image = postprocess(blended) |
|
return blended_image |
|
|
|
|
|
|
|
with gr.Blocks(theme='Respair/[email protected]') as demo: |
|
gr.Markdown("# Text-to-Image and Image-to-Image ") |
|
|
|
with gr.Tab("Text-to-Image"): |
|
with gr.Row(): |
|
text_prompt = gr.Textbox(label="Prompt", placeholder="Enter your text here...") |
|
text_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...") |
|
with gr.Row(): |
|
guidance_scale = gr.Slider(1, 20, value=7.5, step=0.1, label="Guidance Scale") |
|
num_inference_steps = gr.Slider(10, 100, value=50, step=1, label="Inference Steps") |
|
with gr.Row(): |
|
generate_btn = gr.Button("Generate", elem_classes=["primary-button"]) |
|
with gr.Row(): |
|
text_output = gr.Image(label="Generated Image") |
|
|
|
generate_btn.click( |
|
text_to_image, |
|
inputs=[text_prompt, text_negative_prompt, guidance_scale, num_inference_steps], |
|
outputs=text_output, |
|
) |
|
|
|
with gr.Tab("Image-to-Image"): |
|
with gr.Row(): |
|
init_image = gr.Image(type="pil", label="Upload Initial Image") |
|
with gr.Row(): |
|
strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Blend Strength") |
|
with gr.Row(): |
|
img_generate_btn = gr.Button("Apply UNet", elem_classes=["primary-button"]) |
|
with gr.Row(): |
|
img_output = gr.Image(label="Modified Image") |
|
|
|
img_generate_btn.click(apply_dynamic_unet, inputs=[init_image, strength], outputs=img_output) |
|
|
|
demo.launch(share=True) |
|
|