Spaces:

i0switch
/

my-image-generator

Running on Zero

App Files Files Community

i0switch commited on Jun 22

Commit

2da6c3a

verified ·

1 Parent(s): bebb126

Update app.py

Browse files

Files changed (1) hide show

app.py +239 -155

app.py CHANGED Viewed

@@ -1,191 +1,275 @@
-# app.py — BRA v7 (AIGaming repo) × InstantID × ZeroGPU
-# 2025-06-22
-##############################################################################
-# torchvision 0.17+ 互換パッチ（functional_tensor → functional）
-##############################################################################
-import sys, types
-try:
-    import torchvision.transforms.functional as F
-    if "torchvision.transforms.functional_tensor" not in sys.modules:
-        faux = types.ModuleType("torchvision.transforms.functional_tensor")
-        # 必要最低限だけ持たせる
-        faux.rgb_to_grayscale = getattr(F, "rgb_to_grayscale", None)
-        sys.modules["torchvision.transforms.functional_tensor"] = faux
-except Exception as e:
-    print("[WARN] torchvision compatibility patch failed:", e)
-##############################################################################
-# 0. diffusers-0.27 互換: cached_download() パッチ
-##############################################################################
-from huggingface_hub import hf_hub_download
-import huggingface_hub as _hf
-if not hasattr(_hf, "cached_download"):
-    _hf.cached_download = hf_hub_download
-##############################################################################
-# 1. ライブラリ
-##############################################################################
-import os, io, base64, subprocess, traceback
 from pathlib import Path
-from typing import Optional
-import numpy as np
-import torch, gradio as gr, spaces
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from PIL import Image
 from diffusers import (
-    StableDiffusionControlNetPipeline,
-    ControlNetModel,
-    DPMSolverMultistepScheduler,
 )
-from diffusers.loaders import AttnProcsLayers
 from insightface.app import FaceAnalysis
-from realesrgan import RealESRGANer
 ##############################################################################
-# 2. キャッシュパス
 ##############################################################################
-ROOT      = Path("/data") if Path("/data").exists() else Path.home() / ".cache/instantid"
-MODELS    = ROOT / "models"; LORA = ROOT / "lora"; UPSCALE = ROOT / "realesrgan"
-for p in (MODELS, LORA, UPSCALE): p.mkdir(parents=True, exist_ok=True)
 ##############################################################################
-# 3. モデル ID / ファイル
 ##############################################################################
-# --- BRA v7 (公開) ---
-BRA_REPO = "AIGaming/beautiful_realistic_asians"
-BRA_FILE = "beautifulRealistic_v7.safetensors"
-BRA_REV  = "801a9b1999dd7018e58a1e2b432fdccd3d1d723d"     # 固定 revision
-# --- IP-Adapter 本体 & LoRA ---
-IP_REPO,  IP_BIN  = "h94/IP-Adapter", "models/ip-adapter-plus-face_sd15.bin"
-LORA_REPO,IP_LORA = "h94/IP-Adapter-FaceID", "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
-# --- ControlNet (MediaPipe Face) ---
-CN_REPO, CN_SUBF = "CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"
-# --- Real-ESRGAN ---
-ESRGAN_REPO, ESRGAN_FILE = "aimagelab/realesrgan", "RealESRGAN_x4plus.pth"
-##############################################################################
-# 4. HF Hub ダウンロード
-##############################################################################
-def dl(repo: str, file: str, sub: str | None = None, rev: str | None = None) -> Path:
-    return Path(hf_hub_download(repo, file, subfolder=sub,
-                                revision=rev, cache_dir=str(MODELS)))
 ##############################################################################
-# 5. グローバル
 ##############################################################################
-pipe: Optional[StableDiffusionControlNetPipeline] = None
-face_analyser: Optional[FaceAnalysis] = None
-upsampler: Optional[RealESRGANer] = None
-##############################################################################
-# 6. 初期化
-##############################################################################
-def init():
-    global pipe, face_analyser, upsampler
-    if pipe is not None:
-        return
-    print("[INIT] downloading models…")
-    # 6-1 BRA v7
-    bra_ckpt = dl(BRA_REPO, BRA_FILE, rev=BRA_REV)
-    # 6-2 ControlNet
-    cn = ControlNetModel.from_pretrained(
-        CN_REPO, subfolder=CN_SUBF, torch_dtype=torch.float16,
-        cache_dir=str(MODELS)
-    )
-    # 6-3 Pipeline from .safetensors + ControlNet
-    pipe_ = StableDiffusionControlNetPipeline.from_single_file(
-        bra_ckpt, controlnet=cn, torch_dtype=torch.float16,
-        safety_checker=None
-    )
-    pipe_.scheduler = DPMSolverMultistepScheduler.from_config(pipe_.scheduler.config)
-    # 6-4 IP-Adapter
-    ip_lora = dl(LORA_REPO, IP_LORA)
-    ### 最終修正 ### subfolder引数に空文字列""を渡し、TypeErrorを回避する
-    pipe_.load_ip_adapter(IP_REPO, "", weight_name=IP_BIN, cache_dir=str(MODELS))
-    AttnProcsLayers(pipe_.unet.attn_processors).load_lora_weights(
-        ip_lora, adapter_name="ip_faceid", safe_load=True
-    )
-    pipe_.set_adapters(["ip_faceid"], adapter_weights=[0.6])
-    pipe_.to("cuda"); pipe_ = pipe_
-    pipe   = pipe_
-    face_analyser = FaceAnalysis(
-        name="buffalo_l", root=str(MODELS), providers=["CUDAExecutionProvider"]
-    ); face_analyser.prepare(ctx_id=0, det_size=(640,640))
-    esr = dl(ESRGAN_REPO, ESRGAN_FILE)
-    upsampler = RealESRGANer(scale=4, model_path=str(esr), half=True,
-                             tile=512, tile_pad=10, pre_pad=0, gpu_id=0)
-    print("[INIT] ready.")
-##############################################################################
-# 7. プロンプト
-##############################################################################
-BASE = "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k, cinematic lighting, textured skin, "
-NEG  = "verybadimagenegative_v1.3, ng_deepnegative_v1_75t, (worst quality:2), (low quality:2), lowres, blurry, bad anatomy, bad hands, extra digits, watermark, signature"
 ##############################################################################
-# 8. 生成コア
 ##############################################################################
-@spaces.GPU(duration=60)
-def generate(face: Image.Image, subj: str, add: str, neg: str,
-             cfg: float, ipw: float, steps: int, w: int, h: int,
-             up: bool, upf: int, progress=gr.Progress(track_tqdm=True)):
-    if pipe is None:
-        init()
-    if len(face_analyser.get(np.array(face))) == 0:
-        raise ValueError("顔が検出できません。他の画像でお試しください。")
-    pipe.set_adapters(["ip_faceid"], adapter_weights=[ipw])
-    img = pipe(prompt=BASE+subj+", "+add,
-               negative_prompt=NEG+", "+neg,
-               num_inference_steps=steps, guidance_scale=cfg,
-               image=face, width=w, height=h).images[0]
-    if up:
-        upsampler.scale = int(upf)
-        img, _ = upsampler.enhance(np.array(img)); img = Image.fromarray(img)
-    return img
 ##############################################################################
-# 9. Gradio UI
 ##############################################################################
-with gr.Blocks(title="BRA v7 × InstantID (ZeroGPU)") as demo:
-    gr.Markdown("## BRA v7 × InstantID")
-    with gr.Row():
-        f = gr.Image(type="pil", label="Face ID"); s = gr.Textbox(label="被写体説明")
-    ap = gr.Textbox(label="追加プロンプト"); ng = gr.Textbox(label="追加ネガ")
-    with gr.Row():
-        cf = gr.Slider(1,20,7.5,0.5,"CFG"); ip = gr.Slider(0.1,1.0,0.6,0.05,"IP-Adapter Weight")
-    with gr.Row():
-        st = gr.Slider(10,50,30,1,"Steps"); W = gr.Slider(512,1024,768,64,"W"); H = gr.Slider(512,1024,768,64,"H")
-    with gr.Row():
-        up = gr.Checkbox(label="Real-ESRGAN"); upf = gr.Radio([4,8], value=4, label="アップスケール")
-    btn = gr.Button("Generate"); out = gr.Image(type="pil", label="Result")
-    btn.click(generate, [f,s,ap,ng,cf,ip,st,W,H,up,upf], out, show_progress=True)
-##############################################################################
-# 10. FastAPI
-##############################################################################
-app = FastAPI()
-@app.post("/api/generate")
-async def api_gen(subj: str=Form(...), cfg: float=Form(7.5), stp: int=Form(30),
-                  ipw: float=Form(0.6), W: int=Form(768), H: int=Form(768),
-                  file: UploadFile=File(...)):
-    img = Image.open(io.BytesIO(await file.read())).convert("RGB")
-    res = generate(img, subj, "", "", cfg, ipw, stp, W, H, False, 4)
-    buf = io.BytesIO(); res.save(buf,"PNG")
-    return {"image":"data:image/png;base64,"+base64.b64encode(buf.getvalue()).decode()}
 ##############################################################################
-# 11. Launch
 ##############################################################################
-demo.queue(default_concurrency_limit=2).launch(share=False)

+# app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
+"""Persistent-cache backend for InstantID portrait generation.
+   * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
+   * wget を使った簡易リトライ DL
+"""
+# --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
+import types, sys
+from torchvision.transforms import functional as F
+mod = types.ModuleType("torchvision.transforms.functional_tensor")
+# 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
+mod.rgb_to_grayscale = F.rgb_to_grayscale
+sys.modules["torchvision.transforms.functional_tensor"] = mod
+# ---------------------------------------------------------------------------
+import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 from pathlib import Path
 from PIL import Image
 from diffusers import (
+    StableDiffusionPipeline, ControlNetModel,
+    DPMSolverMultistepScheduler, AutoencoderKL,
 )
 from insightface.app import FaceAnalysis
 ##############################################################################
+# 0. キャッシュ用ディレクトリ
 ##############################################################################
+PERSIST_BASE = Path("/data")
+CACHE_ROOT = (
+    PERSIST_BASE / "instantid_cache"
+    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
+    else Path.home() / ".cache" / "instantid_cache"
+)
+print("cache →", CACHE_ROOT)
+MODELS_DIR  = CACHE_ROOT / "models"
+LORA_DIR    = MODELS_DIR / "Lora"            # FaceID LoRA などを置く
+EMB_DIR     = CACHE_ROOT / "embeddings"
+UPSCALE_DIR = CACHE_ROOT / "realesrgan"
+for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
+    p.mkdir(parents=True, exist_ok=True)
+def dl(url: str, dst: Path, attempts: int = 2):
+    """wget + リトライの簡易ダウンローダ"""
+    if dst.exists():
+        print("✓", dst.relative_to(CACHE_ROOT)); return
+    for i in range(1, attempts + 1):
+        print(f"⬇ {dst.name} (try {i}/{attempts})")
+        if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
+            return
+    raise RuntimeError(f"download failed → {url}")
 ##############################################################################
+# 1. 必要アセットのダウンロード
 ##############################################################################
+print("— asset check —")
+# 1-A. ベース checkpoint
+BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
+dl(
+    "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
+    BASE_CKPT,
+)
+# 1-B. FaceID LoRA（Δのみ）
+LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
+dl(
+    "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
+    LORA_FILE,
+)
+# 1-C. textual inversion Embeddings
+EMB_URLS = {
+    "ng_deepnegative_v1_75t.pt": [
+        "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
+        "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
+    ],
+    "badhandv4.pt": [
+        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
+        "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
+    ],
+    "CyberRealistic_Negative-neg.pt": [
+        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
+        "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
+    ],
+    "UnrealisticDream.pt": [
+        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
+        "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
+    ],
+}
+for fname, urls in EMB_URLS.items():
+    dst = EMB_DIR / fname
+    for idx, u in enumerate(urls, 1):
+        try:
+            dl(u, dst); break
+        except RuntimeError:
+            if idx == len(urls): raise
+            print("    ↳ fallback URL …")
+# 1-D. Real-ESRGAN weights (×8)
+RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
+RRG_URLS = [
+    "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
+    "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
+    "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
+]
+for idx, link in enumerate(RRG_URLS, 1):
+    try:
+        dl(link, RRG_WEIGHTS); break
+    except RuntimeError:
+        if idx == len(RRG_URLS): raise
+        print("    ↳ fallback URL …")
 ##############################################################################
+# 2. ランタイム初期化
 ##############################################################################
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
+print("device:", device, "| dtype:", dtype)
+providers = (
+    ["CUDAExecutionProvider", "CPUExecutionProvider"]
+    if torch.cuda.is_available()
+    else ["CPUExecutionProvider"]
+)
+face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
+face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
+# ControlNet + SD パイプライン
+controlnet = ControlNetModel.from_pretrained(
+    "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
+)
+pipe = StableDiffusionPipeline.from_single_file(
+    BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
+)
+pipe.vae = AutoencoderKL.from_pretrained(
+    "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
+).to(device)
+pipe.controlnet = controlnet
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+    pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
+)
+# --- ここが核心：画像エンコーダ込みで公式レポから直接ロード ------------------
+pipe.load_ip_adapter(
+    "h94/IP-Adapter",               # Hugging Face Hub ID
+    subfolder="models",             # ip-adapter-plus-face_sd15.bin が入っているフォルダ
+    weight_name="ip-adapter-plus-face_sd15.bin",
+)
+# ---------------------------------------------------------------------------
+# FaceID LoRA（差分 LoRA のみ）
+pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
+pipe.set_ip_adapter_scale(0.65)
+# textual inversion 読み込み
+for emb in EMB_DIR.glob("*.*"):
+    try:
+        pipe.load_textual_inversion(emb, token=emb.stem)
+        print("emb loaded →", emb.stem)
+    except Exception:
+        print("emb skip →", emb.name)
+pipe.to(device)
+print("pipeline ready ✔")
 ##############################################################################
+# 3. アップスケーラ
 ##############################################################################
+try:
+    from basicsr.archs.rrdb_arch import RRDBNet
+    try:
+        from realesrgan import RealESRGAN
+    except ImportError:
+        from realesrgan import RealESRGANer as RealESRGAN
+    rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
+    upsampler = RealESRGAN(device, rrdb, scale=8)
+    upsampler.load_weights(str(RRG_WEIGHTS))
+    UPSCALE_OK = True
+except Exception as e:
+    print("Real-ESRGAN disabled →", e)
+    UPSCALE_OK = False
 ##############################################################################
+# 4. プロンプト & 生成関数
 ##############################################################################
+BASE_PROMPT = (
+    "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n"
+    "photo of {subject},\n"
+    "cinematic lighting, golden hour, rim light, shallow depth of field,\n"
+    "textured skin, high detail, shot on Canon EOS R5, 85 mm f/1.4, ISO 200,\n"
+    "<lora:ip-adapter-faceid-plusv2_sd15_lora:0.65>, (face),\n"
+    "(aesthetic:1.1), (cinematic:0.8)"
+)
+NEG_PROMPT = (
+    "ng_deepnegative_v1_75t, CyberRealistic_Negative-neg, UnrealisticDream, "
+    "(worst quality:2), (low quality:1.8), lowres, (jpeg artifacts:1.2), "
+    "painting, sketch, illustration, drawing, cartoon, anime, cgi, render, 3d, "
+    "monochrome, grayscale, text, logo, watermark, signature, username, "
+    "(MajicNegative_V2:0.8), bad hands, extra digits, fused fingers, malformed limbs, "
+    "missing arms, missing legs, (badhandv4:0.7), BadNegAnatomyV1-neg, skin blemishes, acnes, age spot, glans"
+)
+@spaces.GPU(duration=90)
+def generate(
+    face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if face_np is None or face_np.size == 0:
+        raise gr.Error("顔画像をアップロードしてください。")
+    prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
+    if add_prompt:
+        prompt += ", " + add_prompt
+    neg = NEG_PROMPT + (", " + add_neg if add_neg else "")
+    pipe.set_ip_adapter_scale(ip_scale)
+    img_in = Image.fromarray(face_np)
+    result = pipe(
+        prompt=prompt,
+        negative_prompt=neg,
+        ip_adapter_image=img_in,
+        image=img_in,
+        controlnet_conditioning_scale=0.9,
+        num_inference_steps=int(steps) + 5,
+        guidance_scale=cfg,
+        width=int(w),
+        height=int(h),
+    ).images[0]
+    if upscale:
+        if UPSCALE_OK:
+            up, _ = upsampler.enhance(
+                cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
+            )
+            result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
+        else:
+            result = result.resize(
+                (int(result.width * up_factor), int(result.height * up_factor)),
+                Image.LANCZOS,
+            )
+    return result
 ##############################################################################
+# 5. Gradio UI
 ##############################################################################
+with gr.Blocks() as demo:
+    gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
+    with gr.Row():
+        with gr.Column():
+            face_in   = gr.Image(label="顔写真", type="numpy")
+            subj_in   = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
+            add_in    = gr.Textbox(label="追加プロンプト")
+            addneg_in = gr.Textbox(label="追加ネガティブ")
+            ip_sld    = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
+            cfg_sld   = gr.Slider(1, 15, 6, step=0.5, label="CFG")
+            step_sld  = gr.Slider(10, 50, 20, step=1, label="Steps")
+            w_sld     = gr.Slider(512, 1024, 512, step=64, label="幅")
+            h_sld     = gr.Slider(512, 1024, 768, step=64, label="高さ")
+            up_ck     = gr.Checkbox(label="アップスケール", value=True)
+            up_fac    = gr.Slider(1, 8, 2, step=1, label="倍率")
+            btn       = gr.Button("生成", variant="primary")
+        with gr.Column():
+            out_img = gr.Image(label="結果")
+    btn.click(
+        generate,
+        [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
+        out_img,
+        api_name="predict",
+    )
+print("launching …")
+demo.queue().launch(show_error=True)