Spaces:

i0switch
/

my-image-generation

Running on Zero

App Files Files Community

i0switch commited on 7 days ago

Commit

b7d7077

verified ·

1 Parent(s): 478bf4d

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -215

app.py CHANGED Viewed

@@ -1,294 +1,176 @@
-# app.py — InstantID × Beautiful Realistic Asians v7（ZeroGPU / ControlNetMediaPipeFace）
-# 2025-06-22 版
 ##############################################################################
-# 0. 旧 API → 新 API 互換パッチ（必ず diffusers import の前に置く）
 ##############################################################################
 from huggingface_hub import hf_hub_download
-import huggingface_hub as _hf_hub
-# diffusers-0.27 は cached_download() を呼び出すため、HF-Hub ≥0.28 でも使えるように注入
-if not hasattr(_hf_hub, "cached_download"):
-    _hf_hub.cached_download = hf_hub_download  # :contentReference[oaicite:1]{index=1}
 ##############################################################################
-# 1. 標準 & 外部ライブラリ
 ##############################################################################
 import os, io, base64, subprocess, traceback
 from pathlib import Path
 from typing import Optional
 import numpy as np
-import torch
-import gradio as gr
-import spaces
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from PIL import Image
 from diffusers import (
     StableDiffusionControlNetPipeline,
     ControlNetModel,
     DPMSolverMultistepScheduler,
-    AutoencoderKL,
 )
 from diffusers.loaders import AttnProcsLayers
 from insightface.app import FaceAnalysis
-from basicsr.utils.download_util import load_file_from_url
 from realesrgan import RealESRGANer
 ##############################################################################
-# 2. キャッシュ & 永続パス
 ##############################################################################
-PERSIST_BASE = Path("/data")
-CACHE_ROOT = (
-    PERSIST_BASE / "instantid_cache"
-    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
-    else Path.home() / ".cache" / "instantid_cache"
-)
-MODELS_DIR = CACHE_ROOT / "models"
-LORA_DIR   = CACHE_ROOT / "lora"
-UPSCALE_DIR = CACHE_ROOT / "realesrgan"
-for p in (MODELS_DIR, LORA_DIR, UPSCALE_DIR):
-    p.mkdir(parents=True, exist_ok=True)
 ##############################################################################
-# 3. モデル識別子 & ファイル名
 ##############################################################################
-# すべて HF Hub 側にバイナリがあるため、curl ではなく hf_hub_download() を推奨
-BRA_REPO    = "i0switch-assets/Beautiful_Realistic_Asians_v7"
-BRA_FILE    = "beautiful_realistic_asians_v7_fp16.safetensors"
-IP_REPO     = "h94/IP-Adapter"
-IP_FILE_BIN = "ip-adapter-plus-face_sd15.bin"       # Git LFS バイナリ :contentReference[oaicite:2]{index=2}
-IP_LORA_REPO = "h94/IP-Adapter-FaceID"
-IP_FILE_LORA = "ip-adapter-faceid-plusv2_sd15_lora.safetensors"  # Git LFS バイナリ
-CN_REPO   = "CrucibleAI/ControlNetMediaPipeFace"  # 公開・無認証で DL 可 :contentReference[oaicite:3]{index=3}
-CN_FOLDER = "diffusion_sd15"                      # SD-1.5 用フォルダ :contentReference[oaicite:4]{index=4}
-REALESRGAN_REPO = "aimagelab/realesrgan"
-REALESRGAN_FILE = "RealESRGAN_x4plus.pth"
 ##############################################################################
-# 4. ダウンローダ（HF Hub 優先）
 ##############################################################################
-def dl_hf(repo: str, filename: str, subfolder: Optional[str] = None) -> Path:
-    """HF Hub から大容量バイナリを安全に取得（Git LFS ポインタ問題を回避）"""
-    return Path(
-        hf_hub_download(
-            repo_id=repo,
-            filename=filename,
-            subfolder=subfolder,
-            cache_dir=str(MODELS_DIR),
-        )
-    )
-def dl_http(url: str, dst: Path):
-    """小さなファイルのみ curl で取得（retry 付）"""
-    if dst.exists():
-        return dst
-    for _ in range(2):
-        try:
-            subprocess.check_call(["curl", "-L", "-o", str(dst), url])
-            return dst
-        except subprocess.CalledProcessError:
-            pass
-    load_file_from_url(url=url, model_dir=str(dst.parent), file_name=dst.name)
-    return dst
 ##############################################################################
-# 5. グローバル変数（lazy-load）
 ##############################################################################
 pipe: Optional[StableDiffusionControlNetPipeline] = None
 face_analyser: Optional[FaceAnalysis] = None
 upsampler: Optional[RealESRGANer] = None
 ##############################################################################
-# 6. パイプライン初期化
 ##############################################################################
-def initialize_pipelines():
     global pipe, face_analyser, upsampler
     if pipe is not None:
         return
-    print("[INIT] Downloading model assets …")
-    # 6-1 主要モデル
-    bra_ckpt = dl_hf(BRA_REPO, BRA_FILE)
-    ip_bin   = dl_hf(IP_REPO,  IP_FILE_BIN)
-    ip_lora  = dl_hf(IP_LORA_REPO, IP_FILE_LORA)
-    cn_model = ControlNetModel.from_pretrained(
-        CN_REPO, subfolder=CN_FOLDER, torch_dtype=torch.float16, cache_dir=str(MODELS_DIR)
     )
-    # 6-2 Diffusers パイプライン
-    pipe_tmp = StableDiffusionControlNetPipeline.from_pretrained(
-        "runwayml/stable-diffusion-v1-5",
-        controlnet=cn_model,
-        vae=AutoencoderKL.from_pretrained(
-            "stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
-        ),
-        torch_dtype=torch.float16,
-        cache_dir=str(MODELS_DIR),
-        safety_checker=None,
-    )
-    pipe_tmp.scheduler = DPMSolverMultistepScheduler.from_pretrained(
-        "runwayml/stable-diffusion-v1-5",
-        subfolder="scheduler",
-        cache_dir=str(MODELS_DIR),
     )
-    # 6-3 IP-Adapter ロード（必須 3 引数） :contentReference[oaicite:5]{index=5}
-    pipe_tmp.load_ip_adapter(
-        str(ip_bin.parent),   # repo_or_path
-        "",                   # subfolder（直下なので空文字）
-        ip_bin.name           # weight_name
-    )
-    AttnProcsLayers(pipe_tmp.unet.attn_processors).load_lora_weights(
         ip_lora, adapter_name="ip_faceid", safe_load=True
     )
-    pipe_tmp.set_adapters(["ip_faceid"], adapter_weights=[0.6])
-    pipe_tmp.to("cuda")
-    pipe = pipe_tmp
-    # 6-4 InsightFace
     face_analyser = FaceAnalysis(
-        name="buffalo_l", root=str(MODELS_DIR), providers=["CUDAExecutionProvider"]
-    )
-    face_analyser.prepare(ctx_id=0, det_size=(640, 640))
-    # 6-5 Real-ESRGAN
-    re_ckpt = dl_hf(REALESRGAN_REPO, REALESRGAN_FILE)
-    upsampler = RealESRGANer(
-        scale=4,
-        model_path=str(re_ckpt),
-        half=True,
-        tile=512, tile_pad=10, pre_pad=0, gpu_id=0
-    )
-    print("[INIT] Pipelines ready.")
 ##############################################################################
-# 7. プロンプトテンプレ
 ##############################################################################
-BASE_PROMPT = (
-    "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k, "
-    "cinematic lighting, textured skin, "
-)
-NEG_PROMPT = (
-    "verybadimagenegative_v1.3, ng_deepnegative_v1_75t, "
-    "(worst quality:2), (low quality:2), lowres, blurry, bad anatomy, "
-    "bad hands, extra digits, watermark, signature"
-)
 ##############################################################################
-# 8. 生成コア（GPU アタッチ）
 ##############################################################################
-@spaces.GPU(duration=60)  # ZeroGPU で 60 s まで実行可 :contentReference[oaicite:6]{index=6}
-def generate_core(
-    face_img: Image.Image,
-    subject: str,
-    add_prompt: str = "",
-    add_neg: str = "",
-    cfg: float = 7.5,
-    ip_scale: float = 0.6,
-    steps: int = 30,
-    w: int = 768,
-    h: int = 768,
-    upscale: bool = False,
-    up_factor: int = 4,
-    progress: gr.Progress = gr.Progress(track_tqdm=True),
-):
-    try:
-        if pipe is None:
-            initialize_pipelines()
-        if len(face_analyser.get(np.array(face_img))) == 0:
-            raise ValueError("顔が検出できません。別の画像でお試しください。")
-        pipe.set_adapters(["ip_faceid"], adapter_weights=[ip_scale])
-        prompt   = BASE_PROMPT + subject + ", " + add_prompt
-        negative = NEG_PROMPT  + ", " + add_neg
-        result = pipe(
-            prompt=prompt,
-            negative_prompt=negative,
-            num_inference_steps=int(steps),
-            guidance_scale=float(cfg),
-            image=face_img,
-            control_image=None,
-            width=int(w), height=int(h),
-        ).images[0]
-        if upscale:
-            upsampler.scale = 4 if up_factor == 4 else 8
-            result, _ = upsampler.enhance(np.array(result))
-            result = Image.fromarray(result)
-        return result
-    except Exception as e:
-        traceback.print_exc()
-        raise e
 ##############################################################################
 # 9. Gradio UI
 ##############################################################################
-with gr.Blocks(title="InstantID × BRA v7 (ZeroGPU)") as demo:
-    gr.Markdown("## InstantID × Beautiful Realistic Asians v7")
     with gr.Row():
-        face_img = gr.Image(type="pil", label="Face ID", sources=["upload"])
-        subject  = gr.Textbox(label="被写体説明（例: 30代日本人女性、黒髪セミロング）", interactive=True)
-    add_prompt = gr.Textbox(label="追加プロンプト", interactive=True)
-    add_neg    = gr.Textbox(label="追加ネガティブ", interactive=True)
     with gr.Row():
-        cfg      = gr.Slider(1, 20, value=7.5, step=0.5, label="CFG Scale")
-        ip_scale = gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="IP-Adapter Weight")
     with gr.Row():
-        steps = gr.Slider(10, 50, value=30, step=1, label="Steps")
-        w     = gr.Slider(512, 1024, value=768, step=64, label="Width")
-        h     = gr.Slider(512, 1024, value=768, step=64, label="Height")
     with gr.Row():
-        upscale   = gr.Checkbox(label="Real-ESRGAN Upscale", value=False)
-        up_factor = gr.Radio([4, 8], value=4, label="Upscale Factor")
-    run_btn   = gr.Button("Generate")
-    output_im = gr.Image(type="pil", label="Result")
-    run_btn.click(
-        fn=generate_core,
-        inputs=[face_img, subject, add_prompt, add_neg,
-                cfg, ip_scale, steps, w, h, upscale, up_factor],
-        outputs=output_im, show_progress=True
-    )
 ##############################################################################
-# 10. FastAPI REST
 ##############################################################################
 app = FastAPI()
 @app.post("/api/generate")
-async def api_generate(
-    subject: str = Form(...),
-    cfg: float = Form(7.5),
-    steps: int = Form(30),
-    ip_scale: float = Form(0.6),
-    w: int = Form(768),
-    h: int = Form(768),
-    file: UploadFile = File(...),
-):
-    try:
-        img = Image.open(io.BytesIO(await file.read())).convert("RGB")  # noqa
-        res = generate_core(img, subject, "", "", cfg, ip_scale, steps, w, h, False, 4)
-        buf = io.BytesIO(); res.save(buf, format="PNG")
-        return {"image": "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()}
-    except Exception as e:
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))
 ##############################################################################
-# 11. Launch（Gradio が自動で Uvicorn を起動）
 ##############################################################################
-demo.queue(default_concurrency_limit=2).launch(share=False)  # :contentReference[oaicite:7]{index=7}

+# app.py — BRA v7 (AIGaming repo) × InstantID × ZeroGPU
+# 2025-06-22
 ##############################################################################
+# 0. diffusers-0.27 互換: cached_download() パッチ
 ##############################################################################
 from huggingface_hub import hf_hub_download
+import huggingface_hub as _hf
+if not hasattr(_hf, "cached_download"):
+    _hf.cached_download = hf_hub_download
 ##############################################################################
+# 1. ライブラリ
 ##############################################################################
 import os, io, base64, subprocess, traceback
 from pathlib import Path
 from typing import Optional
 import numpy as np
+import torch, gradio as gr, spaces
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from PIL import Image
 from diffusers import (
     StableDiffusionControlNetPipeline,
     ControlNetModel,
     DPMSolverMultistepScheduler,
 )
 from diffusers.loaders import AttnProcsLayers
 from insightface.app import FaceAnalysis
 from realesrgan import RealESRGANer
 ##############################################################################
+# 2. キャッシュパス
 ##############################################################################
+ROOT      = Path("/data") if Path("/data").exists() else Path.home() / ".cache/instantid"
+MODELS    = ROOT / "models"; LORA = ROOT / "lora"; UPSCALE = ROOT / "realesrgan"
+for p in (MODELS, LORA, UPSCALE): p.mkdir(parents=True, exist_ok=True)
 ##############################################################################
+# 3. モデル ID / ファイル
 ##############################################################################
+# --- BRA v7 (公開) ---
+BRA_REPO = "AIGaming/beautiful_realistic_asians"          # :contentReference[oaicite:1]{index=1}
+BRA_FILE = "beautifulRealistic_v7.safetensors"
+BRA_REV  = "801a9b1999dd7018e58a1e2b432fdccd3d1d723d"     # 固定 revision
+# --- IP-Adapter 本体 & LoRA ---
+IP_REPO,  IP_BIN  = "h94/IP-Adapter",        "ip-adapter-plus-face_sd15.bin"
+LORA_REPO,IP_LORA = "h94/IP-Adapter-FaceID", "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
+# --- ControlNet (MediaPipe Face) ---
+CN_REPO, CN_SUBF = "CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"
+# --- Real-ESRGAN ---
+ESRGAN_REPO, ESRGAN_FILE = "aimagelab/realesrgan", "RealESRGAN_x4plus.pth"
 ##############################################################################
+# 4. HF Hub ダウンロード
 ##############################################################################
+def dl(repo: str, file: str, sub: str | None = None, rev: str | None = None) -> Path:
+    return Path(hf_hub_download(repo, file, subfolder=sub,
+                                revision=rev, cache_dir=str(MODELS)))
 ##############################################################################
+# 5. グローバル
 ##############################################################################
 pipe: Optional[StableDiffusionControlNetPipeline] = None
 face_analyser: Optional[FaceAnalysis] = None
 upsampler: Optional[RealESRGANer] = None
 ##############################################################################
+# 6. 初期化
 ##############################################################################
+def init():
     global pipe, face_analyser, upsampler
     if pipe is not None:
         return
+    print("[INIT] downloading models…")
+    # 6-1 BRA v7
+    bra_ckpt = dl(BRA_REPO, BRA_FILE, rev=BRA_REV)
+    # 6-2 ControlNet
+    cn = ControlNetModel.from_pretrained(
+        CN_REPO, subfolder=CN_SUBF, torch_dtype=torch.float16,
+        cache_dir=str(MODELS)
     )
+    # 6-3 Pipeline from .safetensors + ControlNet
+    pipe_ = StableDiffusionControlNetPipeline.from_single_file(
+        bra_ckpt, controlnet=cn, torch_dtype=torch.float16,
+        safety_checker=None
     )
+    pipe_.scheduler = DPMSolverMultistepScheduler.from_config(pipe_.scheduler.config)
+    # 6-4 IP-Adapter
+    ip_bin  = dl(IP_REPO,  IP_BIN)
+    ip_lora = dl(LORA_REPO, IP_LORA)
+    pipe_.load_ip_adapter(str(ip_bin.parent), "", ip_bin.name)
+    AttnProcsLayers(pipe_.unet.attn_processors).load_lora_weights(
         ip_lora, adapter_name="ip_faceid", safe_load=True
     )
+    pipe_.set_adapters(["ip_faceid"], adapter_weights=[0.6])
+    pipe_.to("cuda"); pipe_ = pipe_
+    pipe   = pipe_
     face_analyser = FaceAnalysis(
+        name="buffalo_l", root=str(MODELS), providers=["CUDAExecutionProvider"]
+    ); face_analyser.prepare(ctx_id=0, det_size=(640,640))
+    esr = dl(ESRGAN_REPO, ESRGAN_FILE)
+    upsampler = RealESRGANer(scale=4, model_path=str(esr), half=True,
+                             tile=512, tile_pad=10, pre_pad=0, gpu_id=0)
+    print("[INIT] ready.")
 ##############################################################################
+# 7. プロンプト
 ##############################################################################
+BASE = "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k, cinematic lighting, textured skin, "
+NEG  = "verybadimagenegative_v1.3, ng_deepnegative_v1_75t, (worst quality:2), (low quality:2), lowres, blurry, bad anatomy, bad hands, extra digits, watermark, signature"
 ##############################################################################
+# 8. 生成コア
 ##############################################################################
+@spaces.GPU(duration=60)
+def generate(face: Image.Image, subj: str, add: str, neg: str,
+             cfg: float, ipw: float, steps: int, w: int, h: int,
+             up: bool, upf: int, progress=gr.Progress(track_tqdm=True)):
+    if pipe is None:
+        init()
+    if len(face_analyser.get(np.array(face))) == 0:
+        raise ValueError("顔が検出できません。他の画像でお試しください。")
+    pipe.set_adapters(["ip_faceid"], adapter_weights=[ipw])
+    img = pipe(prompt=BASE+subj+", "+add,
+               negative_prompt=NEG+", "+neg,
+               num_inference_steps=steps, guidance_scale=cfg,
+               image=face, width=w, height=h).images[0]
+    if up:
+        upsampler.scale = 4 if upf==4 else 8
+        img, _ = upsampler.enhance(np.array(img)); img = Image.fromarray(img)
+    return img
 ##############################################################################
 # 9. Gradio UI
 ##############################################################################
+with gr.Blocks(title="BRA v7 × InstantID (ZeroGPU)") as demo:
+    gr.Markdown("## BRA v7 × InstantID")
     with gr.Row():
+        f = gr.Image(type="pil", label="Face ID"); s = gr.Textbox(label="被写体説明")
+    ap = gr.Textbox(label="追加プロンプト"); ng = gr.Textbox(label="追加ネガ")
     with gr.Row():
+        cf = gr.Slider(1,20,7.5,0.5,"CFG"); ip = gr.Slider(0.1,1.0,0.6,0.05,"IP-Adapter Weight")
     with gr.Row():
+        st = gr.Slider(10,50,30,1,"Steps"); W = gr.Slider(512,1024,768,64,"W"); H = gr.Slider(512,1024,768,64,"H")
     with gr.Row():
+        up = gr.Checkbox(label="Real-ESRGAN"); upf = gr.Radio([4,8], value=4, label="アップスケール")
+    btn = gr.Button("Generate"); out = gr.Image(type="pil", label="Result")
+    btn.click(generate, [f,s,ap,ng,cf,ip,st,W,H,up,upf], out, show_progress=True)
 ##############################################################################
+# 10. FastAPI
 ##############################################################################
 app = FastAPI()
 @app.post("/api/generate")
+async def api_gen(subj: str=Form(...), cfg: float=Form(7.5), stp: int=Form(30),
+                  ipw: float=Form(0.6), W: int=Form(768), H: int=Form(768),
+                  file: UploadFile=File(...)):
+    img = Image.open(io.BytesIO(await file.read())).convert("RGB")
+    res = generate(img, subj, "", "", cfg, ipw, stp, W, H, False, 4)
+    buf = io.BytesIO(); res.save(buf,"PNG")
+    return {"image":"data:image/png;base64,"+base64.b64encode(buf.getvalue()).decode()}
 ##############################################################################
+# 11. Launch
 ##############################################################################
+demo.queue(default_concurrency_limit=2).launch(share=False)