i0switch's picture
Update app.py
61b1f58 verified
raw
history blame
10.5 kB
# app.py โ€” InstantID ร— Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
"""Persistent-cache backend for InstantID portrait generation.
- Caches model assets under /data when writable, else ~/.cache
- Robust download with retry + multiple fallback URLs per asset
"""
import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
from pathlib import Path
from PIL import Image
from diffusers import (
StableDiffusionPipeline, ControlNetModel,
DPMSolverMultistepScheduler, AutoencoderKL,
)
from insightface.app import FaceAnalysis
##############################################################################
# 0. Cache dir & helpers
##############################################################################
PERSIST_BASE = Path("/data")
CACHE_ROOT = (PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
else Path.home() / ".cache" / "instantid_cache")
print("cache โ†’", CACHE_ROOT)
MODELS_DIR = CACHE_ROOT / "models"
LORA_DIR = MODELS_DIR / "Lora"
EMB_DIR = CACHE_ROOT / "embeddings"
UPSCALE_DIR = CACHE_ROOT / "realesrgan"
for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
p.mkdir(parents=True, exist_ok=True)
def dl(url: str, dst: Path, attempts: int = 2):
if dst.exists():
print("โœ“", dst.relative_to(CACHE_ROOT)); return
for i in range(1, attempts + 1):
print(f"โฌ‡ {dst.name} (try {i}/{attempts})")
if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
return
raise RuntimeError(f"download failed โ†’ {url}")
##############################################################################
# 1. Asset download
##############################################################################
print("โ€” asset check โ€”")
# 1-A. base ckpt
BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
dl("https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16", BASE_CKPT)
# 1-B. IP-Adapter core + FaceID LoRA
IP_BIN_FILE = LORA_DIR / "ip-adapter-plus-face_sd15.bin"
dl("https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-plus-face_sd15.bin", IP_BIN_FILE)
LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
dl("https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors", LORA_FILE)
# 1-C. textual-inversion embeddings
EMB_URLS = {
"ng_deepnegative_v1_75t.pt": [
"https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
"https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
],
"badhandv4.pt": [
"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
"https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
],
"CyberRealistic_Negative-neg.pt": [
"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
"https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
],
"UnrealisticDream.pt": [
"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
"https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
],
}
for fname, urls in EMB_URLS.items():
dst = EMB_DIR / fname
for idx, u in enumerate(urls, 1):
try:
dl(u, dst); break
except RuntimeError:
if idx == len(urls): raise
print(" โ†ณ fallback URL โ€ฆ")
# 1-D. Real-ESRGAN weights 8ร—
RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
RRG_URLS = [
"https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
"https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
]
for idx, link in enumerate(RRG_URLS, 1):
try:
dl(link, RRG_WEIGHTS); break
except RuntimeError:
if idx == len(RRG_URLS): raise
print(" โ†ณ fallback URL โ€ฆ")
##############################################################################
# 2. Runtime init
##############################################################################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
print("device:", device, "| dtype:", dtype)
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if torch.cuda.is_available() else ["CPUExecutionProvider"]
face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
controlnet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype)
pipe = StableDiffusionPipeline.from_single_file(BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2)
pipe.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype).to(device)
pipe.controlnet = controlnet
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
pipe.load_ip_adapter(str(LORA_DIR), subfolder="", weight_name=IP_BIN_FILE.name)
# load FaceID LoRA (ฮ” only LoRA weights, not full IP-Adapter)
pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
pipe.set_ip_adapter_scale(0.65)
for emb in EMB_DIR.glob("*.*"):
try:
pipe.load_textual_inversion(emb, token=emb.stem)
print("emb loaded โ†’", emb.stem)
except Exception:
print("emb skip โ†’", emb.name)
pipe.to(device)
print("pipeline ready โœ”")
##############################################################################
# 3. Upscaler
##############################################################################
try:
from basicsr.archs.rrdb_arch import RRDBNet
try:
from realesrgan import RealESRGAN
except ImportError:
from realesrgan import RealESRGANer as RealESRGAN
rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
upsampler = RealESRGAN(device, rrdb, scale=8)
upsampler.load_weights(str(RRG_WEIGHTS))
UPSCALE_OK = True
except Exception as e:
print("Real-ESRGAN disabled โ†’", e)
UPSCALE_OK = False
##############################################################################
# 4. Prompts & generation
##############################################################################
BASE_PROMPT = (
"(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n"
"photo of {subject},\n"
"cinematic lighting, golden hour, rim light, shallow depth of field,\n"
"textured skin, high detail, shot on Canon EOS R5, 85 mm f/1.4, ISO 200,\n"
"<lora:ip-adapter-faceid-plusv2_sd15_lora:0.65>, (face),\n"
"(aesthetic:1.1), (cinematic:0.8)"
)
# [!!] ไธ‹่จ˜ใฎNEG_PROMPTใ‚’ไฟฎๆญฃใ—ใพใ—ใŸใ€‚ไธ่ฆใชใ‚ซใƒณใƒžใจ้‡่ค‡ใ—ใŸๆ–‡ๅญ—ๅˆ—ใ‚’ๅ‰Š้™คใ—ใ€ๅ˜ไธ€ใฎๆ–‡ๅญ—ๅˆ—ใซใชใ‚‹ใ‚ˆใ†ใซใ—ใพใ—ใŸใ€‚
NEG_PROMPT = (
"ng_deepnegative_v1_75t, CyberRealistic_Negative-neg, UnrealisticDream, "
"(worst quality:2), (low quality:1.8), lowres, (jpeg artifacts:1.2), "
"painting, sketch, illustration, drawing, cartoon, anime, cgi, render, 3d, "
"monochrome, grayscale, text, logo, watermark, signature, username, "
"(MajicNegative_V2:0.8), bad hands, extra digits, fused fingers, malformed limbs, "
"missing arms, missing legs, (badhandv4:0.7), BadNegAnatomyV1-neg, skin blemishes, acnes, age spot, glans"
)
@spaces.GPU(duration=90)
def generate(
face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
progress=gr.Progress(track_tqdm=True)
):
if face_np is None or face_np.size == 0:
raise gr.Error("้ก”็”ปๅƒใ‚’ใ‚ขใƒƒใƒ—ใƒญใƒผใƒ‰ใ—ใฆใใ ใ•ใ„ใ€‚")
prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
if add_prompt:
prompt += ", " + add_prompt
neg = NEG_PROMPT + (", " + add_neg if add_neg else "")
pipe.set_ip_adapter_scale(ip_scale)
img_in = Image.fromarray(face_np)
result = pipe(
prompt=prompt,
negative_prompt=neg,
ip_adapter_image=img_in,
image=img_in,
controlnet_conditioning_scale=0.9,
num_inference_steps=int(steps) + 5,
guidance_scale=cfg,
width=int(w),
height=int(h),
).images[0]
if upscale:
if UPSCALE_OK:
up, _ = upsampler.enhance(cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor)
result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
else:
result = result.resize((int(result.width * up_factor), int(result.height * up_factor)), Image.LANCZOS)
return result
##############################################################################
# 5. Gradio UI
##############################################################################
with gr.Blocks() as demo:
gr.Markdown("# InstantID โ€“ Beautiful Realistic Asians v7")
with gr.Row():
with gr.Column():
face_in = gr.Image(label="้ก”ๅ†™็œŸ", type="numpy")
subj_in = gr.Textbox(label="่ขซๅ†™ไฝ“่ชฌๆ˜Ž", placeholder="e.g. woman in black suit, smiling")
add_in = gr.Textbox(label="่ฟฝๅŠ ใƒ—ใƒญใƒณใƒ—ใƒˆ")
addneg_in = gr.Textbox(label="่ฟฝๅŠ ใƒใ‚ฌใƒ†ใ‚ฃใƒ–")
ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG")
step_sld = gr.Slider(10, 50, 20, step=1, label="Steps")
w_sld = gr.Slider(512, 1024, 512, step=64, label="ๅน…")
h_sld = gr.Slider(512, 1024, 768, step=64, label="้ซ˜ใ•")
up_ck = gr.Checkbox(label="ใ‚ขใƒƒใƒ—ใ‚นใ‚ฑใƒผใƒซ", value=True)
up_fac = gr.Slider(1, 8, 2, step=1, label="ๅ€็އ")
btn = gr.Button("็”Ÿๆˆ", variant="primary")
with gr.Column():
out_img = gr.Image(label="็ตๆžœ")
btn.click(
generate,
[face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
out_img,
api_name="predict",
)
print("launching โ€ฆ")
demo.queue().launch(show_error=True)