Tiger-Model / Tiger Model /generation.py
FANG DAI
Upload 126 files
2ad255e verified
raw
history blame
13.5 kB
# Copyright 2024 Hui Lu, Fang Dai, Siqiong Yao.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, DPMSolverMultistepScheduler
from diffusers.utils import load_image
import numpy as np
import torch
from PIL import Image,ImageDraw,ImageFont,ImageFont
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, DDIMScheduler
from diffusers import DiffusionPipeline
import torch
import os
import random
from torchvision import transforms
import cv2 as cv
######################################################################## Foreback Generation #########################################################################
def make_inpaint_condition(image, image_mask):
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
image[image_mask > 0.5] = -1.0 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image1 = torch.from_numpy(image)
return image1
fig_name = "Image.png"
init_image = Image.open("../Figure/paper/image/%s" % fig_name)
init_image = init_image.resize((512, 512))
mask_image_nd = Image.open("../Figure/paper/mask_nd/%s" % fig_name)
mask_image_nd = mask_image_nd.resize((512, 512))
control_image_nd = make_inpaint_condition(init_image, mask_image_nd)
#### background
mask_image_bg = Image.open("../Figure/paper/mask_bg/%s" % fig_name)
mask_image_bg = mask_image_bg.resize((512, 512))
con_name = "Background_Source.png"
control_image_bg = Image.open('../dataset/Allclass/condition_bg/%s' % con_name)
control_image_bg = control_image_bg.resize((512, 512))
# controlnet_nd = ControlNetModel.from_pretrained(
# "../modelsaved/finetrainmodel/checkpoint-3000/controlnet", torch_dtype=torch.float16)
# pipe_nd = StableDiffusionControlNetInpaintPipeline.from_pretrained(
# "../model/pretrainmodel", controlnet=controlnet_nd, torch_dtype=torch.float16)
# pipe_nd.scheduler = DDIMScheduler.from_config(pipe_nd.scheduler.config)
# pipe_nd.enable_model_cpu_offload()
# prompt =[ ["papillary","follicular","medullary"],
# ["malignant","benign"],
# ["solid","cystic","spongiform"],
# ["wider-than-tall","taller-than-wide","circular"],
# ["clear","unclear"],
# ["irregular","regular"],
# ["uneven echo", "even echo"],
# ["low echo", "strong echo"],
# ["white points", "no points"],
# ["enormous nodes", "middle nodes","mini nodes"]]
# for i in range(100):
# seed_nd = random.randint(1,1000000)
# generator = torch.Generator().manual_seed(seed_nd)
# image_nd = pipe_nd(
# # prompt = "papillary malignant solid taller white points uneven echo low echo middle nodes",
# # prompt = "malignant follicular solid cystic uneven echo",
# prompt = "malignant medullary, solid, cystic, uneven echo white points enormous nodes",
# negative_prompt = "",
# num_inference_steps=50,
# guidance_scale = 8,
# generator=generator,
# eta=1.0,
# controlnet_conditioning_scale = 0.2,
# image=init_image,
# mask_image=mask_image_nd,
# control_image=control_image_nd,
# ).images[0]
# image_nd.save("../Figure/%s_%s.png" % (fig_name,seed_nd))
seed_bg = random.randint(1,1000000)
generator = torch.Generator().manual_seed(seed_bg)
controlnet_bg = ControlNetModel.from_pretrained(
"../modelsaved/finetrainmodel/checkpoint-5000/controlnet", torch_dtype=torch.float16)
pipe_bg = StableDiffusionControlNetInpaintPipeline.from_pretrained(
"../model/pretrainmodel", controlnet=controlnet_bg, torch_dtype=torch.float16
)
pipe_bg.scheduler = DDIMScheduler.from_config(pipe_bg.scheduler.config)
pipe_bg.enable_model_cpu_offload()
image_bg = pipe_bg(
prompt = "black and white definition detail",
negative_prompt = "",
num_inference_steps=20,
guidance_scale = 0.02,
generator=generator,
eta=1.0,
controlnet_conditioning_scale = 1.0,
# control_guidance_start = 0.01,
# control_guidance_end = 0.7,
image=init_image,
mask_image=mask_image_bg,
control_image=control_image_bg,
).images[0]
image_bg.save("/export/home/daifang/Diffusion/own_code/Figure/PTC_T_bg2.png")
image_nd = np.array(image_nd)
image_bg = np.array(image_bg)
im2 = np.concatenate((image_nd, image_bg), axis=1)
im2 = Image.fromarray(im2)
im2.save("../Figure/generationfigure.png")
############################################################################################################################################################################
# image = np.array(image)
# init_image = np.array(init_image)
# mask_image = np.array(mask_image)
# control_image = np.array(control_image)
# control_image1 = np.zeros_like(init_image)
# control_image1[:,:,0] = control_image
# control_image1[:,:,1] = control_image
# control_image1[:,:,2] = control_image
# im2 = np.concatenate((init_image, mask_image, control_image1, image), axis=1)
# im4 = Image.fromarray(im2)
# image_bg.save("../Figure/img_%s_%s.png" % (fig_name, seed))
# image_nd = np.array(image)
# init_image = np.array(init_image)
# mask_image = np.array(mask_image)
# im2 = np.concatenate((init_image, mask_image), axis=1)
# im3 = np.concatenate((im2, image), axis=1)
# im3 = Image.fromarray(im3)
# draw = ImageDraw.Draw(img3)
# draw.text((0,60),'δ½ ε₯½',(0,0,0),font=font)
# image_nd.save("../Figure/noide_%s_%s2.png" % (fig_name, seed))
######################################################################## background #########################################################################
# def make_inpaint_condition(image, image_mask):
# image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
# image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
# assert image.shape[0:1] == image_mask.shape[0:1]
# image[image_mask > 0.5] = -1.0 # set as masked pixel
# image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
# image1 = torch.from_numpy(image)
# image = torch.from_numpy(image)[0]
# toPIL = transforms.ToPILImage()
# pic = toPIL(image)
# pic.save('random.jpg')
# return image1
# from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
# from diffusers.utils import load_image
# import torch
# base_model_path = "../model/pretrainmodel"
# controlnet_path = "../modelsaved/finetrainmodel/checkpoint-17000/controlnet"
# controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16, use_safetensors=True)
# pipe = StableDiffusionControlNetPipeline.from_pretrained(
# base_model_path, controlnet=controlnet, torch_dtype=torch.float16, use_safetensors=True
# )
# # speed up diffusion process with faster scheduler and memory optimization
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# # remove following line if xformers is not installed
# pipe.enable_model_cpu_offload()
# control_image = load_image("../dataset/controlnet_condition_nd/condition_nd/20191101_094933_7.png")
# prompt = "thyroid malignant solid clean irregular white points "
# # generate image
# generator = torch.manual_seed(0)
# image = pipe(prompt, num_inference_steps=50, generator=generator, image=control_image).images[0]
# image.save("../Figure/outputfigure.png")
# prompt ( or , optional) β€” The prompt or prompts to guide image generation. If not defined, you need to pass .strList[str]prompt_embeds
# image (, , , , , , β€” , or ): The initial image to be used as the starting point for the image generation process. Can also accept image latents as , and if passing latents directly they are not encoded again.torch.FloatTensorPIL.Image.Imagenp.ndarrayList[torch.FloatTensor]List[PIL.Image.Image]List[np.ndarray]List[List[torch.FloatTensor]]List[List[np.ndarray]]List[List[PIL.Image.Image]]image
# control_image (, , , , , , β€” , or ): The ControlNet input condition to provide guidance to the for generation. If the type is specified as , it is passed to ControlNet as is. can also be accepted as an image. The dimensions of the output image defaults to ’s dimensions. If height and/or width are passed, is resized accordingly. If multiple ControlNets are specified in , images must be passed as a list such that each element of the list can be correctly batched for input to a single ControlNet.torch.FloatTensorPIL.Image.Imagenp.ndarrayList[torch.FloatTensor]List[PIL.Image.Image]List[np.ndarray]List[List[torch.FloatTensor]]List[List[np.ndarray]]List[List[PIL.Image.Image]]unettorch.FloatTensorPIL.Image.Imageimageimageinit
# height (, optional, defaults to ) β€” The height in pixels of the generated image.intself.unet.config.sample_size * self.vae_scale_factor
# width (, optional, defaults to ) β€” The width in pixels of the generated image.intself.unet.config.sample_size * self.vae_scale_factor
# num_inference_steps (, optional, defaults to 50) β€” The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.int
# guidance_scale (, optional, defaults to 7.5) β€” A higher guidance scale value encourages the model to generate images closely linked to the text at the expense of lower image quality. Guidance scale is enabled when .floatpromptguidance_scale > 1
# negative_prompt ( or , optional) β€” The prompt or prompts to guide what to not include in image generation. If not defined, you need to pass instead. Ignored when not using guidance ().strList[str]negative_prompt_embedsguidance_scale < 1
# num_images_per_prompt (, optional, defaults to 1) β€” The number of images to generate per prompt.int
# eta (, optional, defaults to 0.0) β€” Corresponds to parameter eta (Ξ·) from the DDIM paper. Only applies to the DDIMScheduler, and is ignored in other schedulers.float
# generator ( or , optional) β€” A torch.Generator to make generation deterministic.torch.GeneratorList[torch.Generator]
# latents (, optional) β€” Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor is generated by sampling using the supplied random .torch.FloatTensorgenerator
# prompt_embeds (, optional) β€” Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, text embeddings are generated from the input argument.torch.FloatTensorprompt
# negative_prompt_embeds (, optional) β€” Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, are generated from the input argument.torch.FloatTensornegative_prompt_embedsnegative_prompt
# output_type (, optional, defaults to ) β€” The output format of the generated image. Choose between or .str"pil"PIL.Imagenp.array
# return_dict (, optional, defaults to ) β€” Whether or not to return a StableDiffusionPipelineOutput instead of a plain tuple.boolTrue
# callback (, optional) β€” A function that calls every steps during inference. The function is called with the following arguments: .Callablecallback_stepscallback(step: int, timestep: int, latents: torch.FloatTensor)
# callback_steps (, optional, defaults to 1) β€” The frequency at which the function is called. If not specified, the callback is called at every step.intcallback
# cross_attention_kwargs (, optional) β€” A kwargs dictionary that if specified is passed along to the as defined in self.processor.dictAttentionProcessor
# controlnet_conditioning_scale ( or , optional, defaults to 1.0) β€” The outputs of the ControlNet are multiplied by before they are added to the residual in the original . If multiple ControlNets are specified in , you can set the corresponding scale as a list.floatList[float]controlnet_conditioning_scaleunetinit
# guess_mode (, optional, defaults to ) β€” The ControlNet encoder tries to recognize the content of the input image even if you remove all prompts. A value between 3.0 and 5.0 is recommended.boolFalseguidance_scale
# control_guidance_start ( or , optional, defaults to 0.0) β€” The percentage of total steps at which the ControlNet starts applying.floatList[float]
# control_guidance_end ( or , optional, defaults to 1.0) β€” The percentage of total steps at which the ControlNet stops applying.floatList[float]
# draw.text((5, 5), prompt, fill = (255, 255, 255))
# image.save("/export/home/daifang/Diffusion/own_code/Figure/PTCtest/%s_%s_%s.png" % (i,seed,prompt.replace(" ","_").replace("(","").replace(")","").replace("[","").replace("]","")))