jwengr commited on
Commit
4d8f187
1 Parent(s): f36d393

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -17
README.md CHANGED
@@ -5,47 +5,73 @@ base_model:
5
  pipeline_tag: image-to-image
6
  ---
7
  ![image_gray_masked](gray-masked.png)
8
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import torch
10
  import numpy as np
11
-
12
  from PIL import Image
13
  from diffusers.utils import load_image
14
- from transformers import AutoConfig, AutoModel, ModelCard
15
 
 
16
  img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
17
  mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
18
 
19
- image_gray = load_image(img_url).resize((512, 512)).convert('L').convert('RGB') # image must be 3 channel
20
  mask_image = load_image(mask_url).resize((512, 512))
21
- mask = (np.array(mask_image)>128)*1
22
- image_gray_masked = Image.fromarray(((1-mask) * np.array(image_gray)).astype(np.uint8))
23
 
24
- # Load the gray-inpaint model
25
  gray_inpaintor = AutoModel.from_pretrained(
26
  'jwengr/stable-diffusion-2-gray-inpaint-to-rgb',
27
  subfolder='gray-inpaint',
28
- trust_remote_code=True,
29
  )
30
-
31
- Load the gray2rgb model
32
  gray2rgb = AutoModel.from_pretrained(
33
  'jwengr/stable-diffusion-2-gray-inpaint-to-rgb',
34
  subfolder='gray2rgb',
35
- trust_remote_code=True,
36
  )
37
 
38
- Move models to GPU
39
  gray_inpaintor.to('cuda')
40
  gray2rgb.to('cuda')
41
 
42
- # Enable memory-efficient attention
43
  # gray2rgb.unet.enable_xformers_memory_efficient_attention()
44
  # gray_inpaintor.unet.enable_xformers_memory_efficient_attention()
45
 
46
- with torch.autocast('cuda',dtype=torch.bfloat16):
 
47
  with torch.no_grad():
48
- # each model's input image should be one of PIL.Image, List[PIL.Image], preprocessed tensor (B,3,H,W). Image must be 3 channel
49
- image_gray_restored = gray_inpaintor(image_gray_masked, num_inference_steps=250, seed=10)[0].convert('L') # you can pass 'mask' arg explictly. mask : Tensor (B,1,512,512)
50
  image_restored = gray2rgb(image_gray_restored.convert('RGB'))
51
- ```
 
5
  pipeline_tag: image-to-image
6
  ---
7
  ![image_gray_masked](gray-masked.png)
8
+ ![image_gray_restored](gray-inpaint-example.png)
9
+ ![image_restored](gray-to-rgb-example.png)
10
+ ---
11
+
12
+ # **Stable Diffusion 2-Based Gray-Inpainting to RGB**
13
+
14
+ This model pipeline demonstrates an advanced workflow for restoring grayscale images, performing inpainting, and converting them to RGB. The pipeline leverages two models based on the Stable Diffusion 2 architecture:
15
+
16
+ 1. **Gray-Inpainting Model**: Fills missing regions of a grayscale image using a masked inpainting process.
17
+ 2. **Gray-to-RGB Conversion Model**: Converts the grayscale image (or inpainted output) into a full-color RGB image.
18
+
19
+ ---
20
+
21
+ ## **Pipeline Workflow**
22
+
23
+ 1. **Load Grayscale and Mask Images**:
24
+ - Grayscale image input is preprocessed to ensure it has 3 channels (`RGB` format).
25
+ - A binary mask identifies areas to restore or inpaint.
26
+
27
+ 2. **Apply Gray-Inpainting**:
28
+ - The inpainting model takes the grayscale masked image and restores the missing regions using `num_inference_steps`.
29
+
30
+ 3. **Convert to RGB**:
31
+ - The restored grayscale image is then processed by the gray-to-RGB model to produce a full-color output.
32
+
33
+ ---
34
+
35
+ ## **Code Example**
36
+
37
+ ```python
38
  import torch
39
  import numpy as np
 
40
  from PIL import Image
41
  from diffusers.utils import load_image
42
+ from transformers import AutoModel
43
 
44
+ # Load and preprocess images
45
  img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
46
  mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
47
 
48
+ image_gray = load_image(img_url).resize((512, 512)).convert('L').convert('RGB') # Ensure 3-channel input
49
  mask_image = load_image(mask_url).resize((512, 512))
50
+ mask = (np.array(mask_image) > 128) * 1
51
+ image_gray_masked = Image.fromarray(((1 - mask) * np.array(image_gray)).astype(np.uint8))
52
 
53
+ # Load models
54
  gray_inpaintor = AutoModel.from_pretrained(
55
  'jwengr/stable-diffusion-2-gray-inpaint-to-rgb',
56
  subfolder='gray-inpaint',
57
+ trust_remote_code=True
58
  )
 
 
59
  gray2rgb = AutoModel.from_pretrained(
60
  'jwengr/stable-diffusion-2-gray-inpaint-to-rgb',
61
  subfolder='gray2rgb',
62
+ trust_remote_code=True
63
  )
64
 
65
+ # Move models to GPU
66
  gray_inpaintor.to('cuda')
67
  gray2rgb.to('cuda')
68
 
69
+ # Memory-efficient attention (optional)
70
  # gray2rgb.unet.enable_xformers_memory_efficient_attention()
71
  # gray_inpaintor.unet.enable_xformers_memory_efficient_attention()
72
 
73
+ # Perform image restoration and conversion
74
+ with torch.autocast('cuda', dtype=torch.bfloat16):
75
  with torch.no_grad():
76
+ image_gray_restored = gray_inpaintor(image_gray_masked, num_inference_steps=250, seed=10)[0].convert('L')
 
77
  image_restored = gray2rgb(image_gray_restored.convert('RGB'))