anushka81 commited on
Commit
f48676a
·
1 Parent(s): d13ee8a

sd for i2i and t2i

Browse files
Files changed (1) hide show
  1. app.py +34 -42
app.py CHANGED
@@ -1,37 +1,24 @@
1
  import gradio as gr
2
  import torch
3
- from diffusers import StableDiffusionPipeline
4
- from torchvision.models.segmentation import fcn_resnet50
5
- from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ToPILImage
6
  from PIL import Image
7
 
8
  # Device configuration
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
- # Load Stable Diffusion for text-to-image
12
  text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
13
  "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
14
  ).to(device)
15
 
16
- # Load a pre-trained FCN model for image-to-image transformations
17
- unet_model = fcn_resnet50(pretrained=True).eval().to(device)
18
-
19
- # Transforms for UNet
20
- preprocess = Compose([
21
- Resize((512, 512)),
22
- ToTensor(),
23
- Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
24
- ])
25
-
26
- postprocess = Compose([
27
- ToPILImage(),
28
- ])
29
-
30
 
31
  # Function for Text-to-Image
32
  def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
33
  image = text_to_image_pipe(
34
- prompt,
35
  negative_prompt=negative_prompt,
36
  guidance_scale=guidance_scale,
37
  num_inference_steps=num_inference_steps,
@@ -39,26 +26,26 @@ def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
39
  return image
40
 
41
 
42
- # Function for Image-to-Image using Dynamic UNet
43
- def apply_dynamic_unet(init_image, prompt, strength):
44
- # Placeholder for prompt-based logic
45
- print(f"Received prompt: {prompt}") # You can define prompt-based rules here.
46
-
47
- with torch.no_grad():
48
- image_tensor = preprocess(init_image).unsqueeze(0).to(device)
49
- output = unet_model(image_tensor)["out"][0]
50
- output = torch.softmax(output, dim=0) # Normalize predictions
51
- mask = output.argmax(dim=0).float().cpu()
52
- blended = (strength * mask.unsqueeze(0) + (1 - strength) * image_tensor[0].cpu()).clamp(0, 1)
53
- blended_image = postprocess(blended)
54
- return blended_image
55
 
56
 
57
  # Gradio Interface
58
- with gr.Blocks(theme='Respair/[email protected]') as demo:
59
- gr.Markdown("# Text-to-Image and Image-to-Image ")
60
-
61
  with gr.Tab("Text-to-Image"):
 
62
  with gr.Row():
63
  text_prompt = gr.Textbox(label="Prompt", placeholder="Enter your text here...")
64
  text_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
@@ -76,23 +63,28 @@ with gr.Blocks(theme='Respair/[email protected]') as demo:
76
  outputs=text_output,
77
  )
78
 
79
- # Gradio Tab with Prompt
80
  with gr.Tab("Image-to-Image"):
81
  gr.Markdown(
82
- "**Transform uploaded images using a dynamic UNet model.**\n"
83
- "Provide a prompt to describe the transformation and use the `Blend Strength` slider to adjust blending."
84
  )
85
  with gr.Row():
86
  init_image = gr.Image(type="pil", label="Upload Initial Image")
87
  with gr.Row():
88
- img_prompt = gr.Textbox(label="Prompt", placeholder="Describe the transformation (optional)...")
 
89
  with gr.Row():
90
- strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Blend Strength")
 
 
91
  with gr.Row():
92
- img_generate_btn = gr.Button("Apply UNet", elem_classes=["primary-button"])
93
  with gr.Row():
94
  img_output = gr.Image(label="Modified Image")
95
 
96
- img_generate_btn.click(apply_dynamic_unet, inputs=[init_image, img_prompt, strength], outputs=img_output)
 
 
 
 
97
 
98
  demo.launch(share=True)
 
1
  import gradio as gr
2
  import torch
3
+ from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 
 
4
  from PIL import Image
5
 
6
  # Device configuration
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
+ # Load Stable Diffusion pipelines
10
  text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
11
  "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
12
  ).to(device)
13
 
14
+ image_to_image_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
15
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
16
+ ).to(device)
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Function for Text-to-Image
19
  def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
20
  image = text_to_image_pipe(
21
+ prompt=prompt,
22
  negative_prompt=negative_prompt,
23
  guidance_scale=guidance_scale,
24
  num_inference_steps=num_inference_steps,
 
26
  return image
27
 
28
 
29
+ # Function for Image-to-Image
30
+ def image_to_image(prompt, negative_prompt, init_image, strength, guidance_scale, num_inference_steps):
31
+ init_image = init_image.convert("RGB").resize((512, 512)) # Ensure the image is resized
32
+ image = image_to_image_pipe(
33
+ prompt=prompt,
34
+ negative_prompt=negative_prompt,
35
+ init_image=init_image,
36
+ strength=strength,
37
+ guidance_scale=guidance_scale,
38
+ num_inference_steps=num_inference_steps,
39
+ ).images[0]
40
+ return image
 
41
 
42
 
43
  # Gradio Interface
44
+ with gr.Blocks(theme='NoCrypt/miku') as demo:
45
+ gr.Markdown("# Text-to-Image and Image-to-Image generation")
46
+
47
  with gr.Tab("Text-to-Image"):
48
+ gr.Markdown("**Generate images from text prompts **")
49
  with gr.Row():
50
  text_prompt = gr.Textbox(label="Prompt", placeholder="Enter your text here...")
51
  text_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
 
63
  outputs=text_output,
64
  )
65
 
 
66
  with gr.Tab("Image-to-Image"):
67
  gr.Markdown(
68
+ "**Modify images - Upload an image, provide a prompt describing the transformation, and adjust settings for desired results."
 
69
  )
70
  with gr.Row():
71
  init_image = gr.Image(type="pil", label="Upload Initial Image")
72
  with gr.Row():
73
+ img_prompt = gr.Textbox(label="Prompt", placeholder="Describe modifications...")
74
+ img_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
75
  with gr.Row():
76
+ strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Strength")
77
+ img_guidance_scale = gr.Slider(1, 20, value=7.5, step=0.1, label="Guidance Scale")
78
+ img_num_inference_steps = gr.Slider(10, 100, value=50, step=1, label="Inference Steps")
79
  with gr.Row():
80
+ img_generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
81
  with gr.Row():
82
  img_output = gr.Image(label="Modified Image")
83
 
84
+ img_generate_btn.click(
85
+ image_to_image,
86
+ inputs=[img_prompt, img_negative_prompt, init_image, strength, img_guidance_scale, img_num_inference_steps],
87
+ outputs=img_output,
88
+ )
89
 
90
  demo.launch(share=True)