File size: 7,270 Bytes
bfc97b7 9657c92 bfc97b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import os
os.system("git clone https://huggingface.co/Cene655/ImagenT5-3B")
#%%capture
#!git lfs install
#!git clone https://huggingface.co/Cene655/ImagenT5-3B
#%%capture
#!pip install git+https://github.com/cene555/Imagen-pytorch.git
#!pip install git+https://github.com/openai/CLIP.git
#%%capture
#!git clone https://github.com/xinntao/Real-ESRGAN.git
#%cd Real-ESRGAN
#%%capture
#!pip install basicsr
# facexlib and gfpgan are for face enhancement
#!pip install facexlib
#!pip install gfpgan
#%%capture
#!pip install -r requirements.txt
#!python setup.py develop
#!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models
#Imports
from PIL import Image
from IPython.display import display
import torch as th
from imagen_pytorch.model_creation import create_model_and_diffusion as create_model_and_diffusion_dalle2
from imagen_pytorch.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_dalle2
from transformers import AutoTokenizer
import cv2
import glob
import os
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
from gfpgan import GFPGANer
has_cuda = th.cuda.is_available()
device = th.device('cpu' if not has_cuda else 'cuda')
Setting Up
def model_fn(x_t, ts, **kwargs):
guidance_scale = 5
half = x_t[: len(x_t) // 2]
combined = th.cat([half, half], dim=0)
model_out = model(combined, ts, **kwargs)
eps, rest = model_out[:, :3], model_out[:, 3:]
cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
eps = th.cat([half_eps, half_eps], dim=0)
return th.cat([eps, rest], dim=1)
def show_images(batch: th.Tensor):
""" Display a batch of images inline."""
scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
display(Image.fromarray(reshaped.numpy()))
def get_numpy_img(img):
scaled = ((img + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
reshaped = scaled.permute(2, 0, 3, 1).reshape([img.shape[2], -1, 3])
return cv2.cvtColor(reshaped.numpy(), cv2.COLOR_BGR2RGB)
def _fix_path(path):
d = th.load(path)
checkpoint = {}
for key in d.keys():
checkpoint[key.replace('module.','')] = d[key]
return checkpoint
options = model_and_diffusion_defaults_dalle2()
options['use_fp16'] = False
options['diffusion_steps'] = 200
options['num_res_blocks'] = 3
options['t5_name'] = 't5-3b'
options['cache_text_emb'] = True
model, diffusion = create_model_and_diffusion_dalle2(**options)
model.eval()
#if has_cuda:
# model.convert_to_fp16()
model.to(device)
model.load_state_dict(_fix_path('/content/ImagenT5-3B/model.pt'))
print('total base parameters', sum(x.numel() for x in model.parameters()))
total base parameters 1550556742
num_params = sum(param.numel() for param in model.parameters())
num_params
1550556742
realesrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
num_block=23, num_grow_ch=32, scale=4)
netscale = 4
upsampler = RealESRGANer(
scale=netscale,
model_path='/content/Real-ESRGAN/experiments/pretrained_models/RealESRGAN_x4plus.pth',
model=realesrgan_model,
tile=0,
tile_pad=10,
pre_pad=0,
half=True
)
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=4,
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler
)
tokenizer = AutoTokenizer.from_pretrained(options['t5_name'])
/usr/local/lib/python3.7/dist-packages/transformers/models/t5/tokenization_t5_fast.py:161: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-3b automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.
FutureWarning,
#@title What do you want to generate?
prompt = 'A photo of cat'#@param {type:"string"}
def gen_img(prompt):
text_encoding = tokenizer(
prompt,
max_length=128,
padding="max_length",
truncation=True,
return_attention_mask=True,
add_special_tokens=True,
return_tensors="pt"
)
uncond_text_encoding = tokenizer(
'',
max_length=128,
padding="max_length",
truncation=True,
return_attention_mask=True,
add_special_tokens=True,
return_tensors="pt"
)
import numpy as np
batch_size = 4
cond_tokens = th.from_numpy(np.array([text_encoding['input_ids'][0].numpy() for i in range(batch_size)]))
uncond_tokens = th.from_numpy(np.array([uncond_text_encoding['input_ids'][0].numpy() for i in range(batch_size)]))
cond_attention_mask = th.from_numpy(np.array([text_encoding['attention_mask'][0].numpy() for i in range(batch_size)]))
uncond_attention_mask = th.from_numpy(np.array([uncond_text_encoding['attention_mask'][0].numpy() for i in range(batch_size)]))
model_kwargs = {}
model_kwargs["tokens"] = th.cat((cond_tokens,
uncond_tokens)).to(device)
model_kwargs["mask"] = th.cat((cond_attention_mask,
uncond_attention_mask)).to(device)
Generation
model.del_cache()
sample = diffusion.p_sample_loop(
model_fn,
(batch_size * 2, 3, 64, 64),
clip_denoised=True,
model_kwargs=model_kwargs,
device='cuda',
progress=True,
)[:batch_size]
model.del_cache()
return sample
demo = gr.Blocks()
with demo:
gr.Markdown("<h1><center>cene555/Imagen-pytorch</center></h1>")
gr.Markdown(
"<div>github repo <a href='https://github.com/cene555/Imagen-pytorch/blob/main/images/2.jpg'>here</a></div>"
"<div>hf model <a href='https://huggingface.co/Cene655/ImagenT5-3B/tree/main'>here</a></div>"
)
with gr.Row():
b0 = gr.Button("generate")
b1 = gr.Button("upscale")
with gr.Row():
desc = gr.Textbox(label="description",placeholder="an impressionist painting of a white vase")
with gr.Row():
intermediate_image = gr.Image(label="portrait",type="filepath", shape=(256,256))
output_image = gr.Image(label="portrait",type="filepath", shape=(256,256))
b0.click(gen_img,inputs=[desc],outputs=[intermediate_image])
b1.click(upscale_img, inputs=[ intermediate_image], outputs=output_image)
#examples=examples
demo.launch(enable_queue=True, debug=True)
|