Spaces:
Running
on
Zero
Running
on
Zero
File size: 11,236 Bytes
3407e44 865391b 3407e44 865391b 07ad183 865391b ba409e9 865391b 07ad183 865391b a3a2cf9 865391b ba409e9 a3a2cf9 865391b 3407e44 865391b 3407e44 865391b 07ad183 865391b 3407e44 865391b 3407e44 865391b e30cd49 865391b e30cd49 a184ef6 865391b e30cd49 71a8eaf 865391b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import gradio as gr
import torch
import os
import glob
import spaces
import numpy as np
from datetime import datetime
from PIL import Image
from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler
from pipline_StableDiffusionXL_ConsistentID import ConsistentIDStableDiffusionXLPipeline
from huggingface_hub import hf_hub_download
### Model can be imported from https://github.com/zllrunning/face-parsing.PyTorch?tab=readme-ov-file
### We use the ckpt of 79999_iter.pth: https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812
### Thanks for the open source of face-parsing model.
from models.BiSeNet.model import BiSeNet
# zero = torch.Tensor([0]).cuda()
# print(zero.device) # <-- 'cpu' 🤔
# device = zero.device # "cuda"
device = "cuda"
# Gets the absolute path of the current script
script_directory = os.path.dirname(os.path.realpath(__file__))
# download ConsistentID checkpoint to cache
base_model_path = "SG161222/RealVisXL_V3.0"
consistentID_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="ConsistentID_SDXL-v1.bin", repo_type="model")
### Load base model
pipe = ConsistentIDStableDiffusionXLPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
safety_checker=None, # use_safetensors=True,
variant="fp16"
).to(device)
### Load other pretrained models
## BiSenet
bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
bise_net = BiSeNet(n_classes = 19)
bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
bise_net.cuda()
# import sys
# sys.path.append("./models/LLaVA1.5/LLaVA/")
# from llava_infer.model.builder import load_pretrained_model
# from llava_infer.mm_utils import get_model_name_from_path
# from llava_infer.eval.run_llava import eval_model
### Load Llava for prompt enhancement
# llva_model_path = "liuhaotian/llava-v1.5-7b"
# llva_tokenizer, llva_model, llva_image_processor, llva_context_len = load_pretrained_model(
# model_path=llva_model_path,
# model_base=None,
# model_name=get_model_name_from_path(llva_model_path),)
# llva_model.to(device)
### Load consistentID_model checkpoint
pipe.load_ConsistentID_model(
os.path.dirname(consistentID_path),
bise_net,
subfolder="",
weight_name=os.path.basename(consistentID_path),
trigger_word="img",
)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
### Load to cuda
pipe.to(device)
pipe.image_encoder.to(device)
pipe.image_proj_model.to(device)
pipe.FacialEncoder.to(device)
@spaces.GPU
def process(selected_template_images,costum_image,prompt
,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
if model_selected_tab==0:
select_images = load_image(Image.open(selected_template_images))
else:
select_images = load_image(Image.fromarray(costum_image))
if prompt_selected_tab==0:
prompt = prompt_selected
negative_prompt = ""
need_safetycheck = False
else:
need_safetycheck = True
# hyper-parameter
num_steps = 50
seed_set = torch.randint(0, 1000, (1,)).item()
# merge_steps = 30
@torch.inference_mode()
def Enhance_prompt(prompt,select_images):
llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
args = type('Args', (), {
"model_path": llva_model_path,
"model_base": None,
"model_name": get_model_name_from_path(llva_model_path),
"query": llva_prompt,
"conv_mode": None,
"image_file": select_images,
"sep": ",",
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 512
})()
Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
return Enhanced_prompt
if prompt == "":
prompt = "A woman, in a forest"
prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
else:
# prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
# prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
print(prompt)
if negative_prompt == "":
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
#Extend Prompt
prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
negative_prompt = negative_prompt + negtive_prompt_group
# seed = torch.randint(0, 1000, (1,)).item()
generator = torch.Generator(device=device).manual_seed(seed_set)
images = pipe(
prompt=prompt,
width=width,
height=height,
input_id_images=select_images,
input_image_path=selected_template_images, ### path maybe not right, do not use
negative_prompt=negative_prompt,
num_images_per_prompt=1,
num_inference_steps=num_steps,
start_merge_step=merge_steps,
generator=generator,
retouching=retouching,
need_safetycheck=need_safetycheck,
).images[0]
current_date = datetime.today()
return np.array(images)
# Gets the templates
script_directory = os.path.dirname(os.path.realpath(__file__))
preset_template = glob.glob("./images/templates/*.png")
preset_template = preset_template + glob.glob("./images/templates/*.jpg")
with gr.Blocks(title="ConsistentID_SDXL Demo") as demo:
gr.Markdown("# ConsistentID_SDXL Demo")
gr.Markdown("\
Put the reference figure to be redrawn into the box below (There is a small probability of referensing failure. You can submit it repeatedly)")
gr.Markdown("\
If you find our work interesting, please leave a star in GitHub for us!<br>\
https://github.com/JackAILab/ConsistentID")
with gr.Row():
with gr.Column():
model_selected_tab = gr.State(0)
with gr.TabItem("template images") as template_images_tab:
template_gallery_list = [(i, i) for i in preset_template]
gallery = gr.Gallery(template_gallery_list,columns=[4], rows=[2], object_fit="contain", height="auto",show_label=False)
def select_function(evt: gr.SelectData):
return preset_template[evt.index]
selected_template_images = gr.Text(show_label=False, visible=False, placeholder="Selected")
print(f"=========selected_template_images : {selected_template_images}=============== \r\n ")
gallery.select(select_function, None, selected_template_images)
with gr.TabItem("Upload Image") as upload_image_tab:
costum_image = gr.Image(label="Upload Image")
model_selected_tabs = [template_images_tab, upload_image_tab]
for i, tab in enumerate(model_selected_tabs):
tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[model_selected_tab])
with gr.Column():
prompt_selected_tab = gr.State(0)
with gr.TabItem("template prompts") as template_prompts_tab:
prompt_selected = gr.Dropdown(value="A person, police officer, half body shot", elem_id='dropdown', choices=[
"A woman in a wedding dress",
"A woman, queen, in a gorgeous palace",
"A man sitting at the beach with sunset",
"A person, police officer, half body shot",
"A man, sailor, in a boat above ocean",
"A women wearing headphone, listening music",
"A man, firefighter, half body shot"], label=f"prepared prompts")
with gr.TabItem("custom prompt") as custom_prompt_tab:
prompt = gr.Textbox(label="prompt",placeholder="A man/woman wearing a santa hat")
nagetive_prompt = gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry")
prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
for i, tab in enumerate(prompt_selected_tabs):
tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
retouching = gr.Checkbox(label="face retouching",value=False,visible=False)
width = gr.Slider(label="image width",minimum=512,maximum=1280,value=864,step=8)
height = gr.Slider(label="image height",minimum=512,maximum=1280,value=1152,step=8)
width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
seed_set = gr.Slider(label="set the random seed for different results",minimum=1,maximum=2147483647,value=2024,step=1)
btn = gr.Button("Run")
with gr.Column():
out = gr.Image(label="Output")
gr.Markdown('''
N.B.:<br/>
- If the proportion of face in the image is too small, the probability of an error will be slightly higher, and the similarity will also significantly decrease.)
- At the same time, use prompt with \"man\" or \"woman\" instead of \"person\" as much as possible, as that may cause the model to be confused whether the protagonist is male or female.
- Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
''')
btn.click(fn=process, inputs=[selected_template_images,costum_image,prompt,nagetive_prompt,prompt_selected,retouching
,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set], outputs=out)
demo.launch() |