anushka81
commited on
Commit
·
bad655a
1
Parent(s):
54c658a
changes
Browse files- app.py +57 -20
- requirements.txt +1 -2
app.py
CHANGED
@@ -1,21 +1,59 @@
|
|
1 |
import gradio as gr
|
2 |
-
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
|
3 |
import torch
|
|
|
|
|
|
|
4 |
from PIL import Image
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
return image
|
10 |
|
11 |
-
def image_to_image(prompt, negative_prompt, init_image, strength, guidance_scale, num_inference_steps):
|
12 |
-
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
|
13 |
-
init_image = init_image.convert("RGB").resize((512, 512))
|
14 |
-
image = pipe(prompt, negative_prompt=negative_prompt, init_image=init_image, strength=strength, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps).images[0]
|
15 |
-
return image
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
with gr.Blocks(theme='Respair/[email protected]') as demo:
|
18 |
-
gr.Markdown("# Text-to-Image and Image-to-Image")
|
19 |
|
20 |
with gr.Tab("Text-to-Image"):
|
21 |
with gr.Row():
|
@@ -28,24 +66,23 @@ with gr.Blocks(theme='Respair/[email protected]') as demo:
|
|
28 |
generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
|
29 |
with gr.Row():
|
30 |
text_output = gr.Image(label="Generated Image")
|
31 |
-
|
32 |
-
generate_btn.click(
|
|
|
|
|
|
|
|
|
33 |
|
34 |
with gr.Tab("Image-to-Image"):
|
35 |
with gr.Row():
|
36 |
-
init_image = gr.Image(type="pil",
|
37 |
-
with gr.Row():
|
38 |
-
img_prompt = gr.Textbox(label="Prompt", placeholder="Describe modifications...")
|
39 |
-
img_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
|
40 |
with gr.Row():
|
41 |
-
strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Strength")
|
42 |
-
img_guidance_scale = gr.Slider(1, 20, value=7.5, step=0.1, label="Guidance Scale")
|
43 |
-
img_num_inference_steps = gr.Slider(10, 100, value=50, step=1, label="Inference Steps")
|
44 |
with gr.Row():
|
45 |
-
img_generate_btn = gr.Button("
|
46 |
with gr.Row():
|
47 |
img_output = gr.Image(label="Modified Image")
|
48 |
|
49 |
-
img_generate_btn.click(
|
50 |
|
51 |
demo.launch(share=True)
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import torch
|
3 |
+
from diffusers import StableDiffusionPipeline
|
4 |
+
from torchvision.models.segmentation import fcn_resnet50
|
5 |
+
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ToPILImage
|
6 |
from PIL import Image
|
7 |
|
8 |
+
# Device configuration
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
|
11 |
+
# Load Stable Diffusion for text-to-image
|
12 |
+
text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
|
13 |
+
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
|
14 |
+
).to(device)
|
15 |
+
|
16 |
+
# Load a pre-trained FCN model for image-to-image transformations
|
17 |
+
unet_model = fcn_resnet50(pretrained=True).eval().to(device)
|
18 |
+
|
19 |
+
# Transforms for UNet
|
20 |
+
preprocess = Compose([
|
21 |
+
Resize((512, 512)),
|
22 |
+
ToTensor(),
|
23 |
+
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
24 |
+
])
|
25 |
+
|
26 |
+
postprocess = Compose([
|
27 |
+
ToPILImage(),
|
28 |
+
])
|
29 |
+
|
30 |
+
|
31 |
+
# Function for Text-to-Image
|
32 |
def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
|
33 |
+
image = text_to_image_pipe(
|
34 |
+
prompt,
|
35 |
+
negative_prompt=negative_prompt,
|
36 |
+
guidance_scale=guidance_scale,
|
37 |
+
num_inference_steps=num_inference_steps,
|
38 |
+
).images[0]
|
39 |
return image
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# Function for Image-to-Image using Dynamic UNet
|
43 |
+
def apply_dynamic_unet(init_image, strength):
|
44 |
+
with torch.no_grad():
|
45 |
+
image_tensor = preprocess(init_image).unsqueeze(0).to(device)
|
46 |
+
output = unet_model(image_tensor)["out"][0]
|
47 |
+
output = torch.softmax(output, dim=0) # Normalize predictions
|
48 |
+
mask = output.argmax(dim=0).float().cpu()
|
49 |
+
blended = (strength * mask.unsqueeze(0) + (1 - strength) * image_tensor[0].cpu()).clamp(0, 1)
|
50 |
+
blended_image = postprocess(blended)
|
51 |
+
return blended_image
|
52 |
+
|
53 |
+
|
54 |
+
# Gradio Interface
|
55 |
with gr.Blocks(theme='Respair/[email protected]') as demo:
|
56 |
+
gr.Markdown("# Text-to-Image and Image-to-Image ")
|
57 |
|
58 |
with gr.Tab("Text-to-Image"):
|
59 |
with gr.Row():
|
|
|
66 |
generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
|
67 |
with gr.Row():
|
68 |
text_output = gr.Image(label="Generated Image")
|
69 |
+
|
70 |
+
generate_btn.click(
|
71 |
+
text_to_image,
|
72 |
+
inputs=[text_prompt, text_negative_prompt, guidance_scale, num_inference_steps],
|
73 |
+
outputs=text_output,
|
74 |
+
)
|
75 |
|
76 |
with gr.Tab("Image-to-Image"):
|
77 |
with gr.Row():
|
78 |
+
init_image = gr.Image(type="pil", label="Upload Initial Image")
|
|
|
|
|
|
|
79 |
with gr.Row():
|
80 |
+
strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Blend Strength")
|
|
|
|
|
81 |
with gr.Row():
|
82 |
+
img_generate_btn = gr.Button("Apply UNet", elem_classes=["primary-button"])
|
83 |
with gr.Row():
|
84 |
img_output = gr.Image(label="Modified Image")
|
85 |
|
86 |
+
img_generate_btn.click(apply_dynamic_unet, inputs=[init_image, strength], outputs=img_output)
|
87 |
|
88 |
demo.launch(share=True)
|
requirements.txt
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
gradio
|
2 |
torch
|
|
|
3 |
diffusers
|
4 |
transformers
|
5 |
-
accelerate
|
6 |
-
huggingface_hub
|
7 |
Pillow
|
|
|
1 |
gradio
|
2 |
torch
|
3 |
+
torchvision
|
4 |
diffusers
|
5 |
transformers
|
|
|
|
|
6 |
Pillow
|