# app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-ready, FastAPI + Gradio)
# 2025-06-21 版
#
# ───────────────────────────────────────────────────────────────
#  主な特徴
#   • @spaces.GPU(duration=60) を公開名 generate_core() に付与
#   • パイプラインは lazy-load で初回推論時に GPU へロード
#   • モデル資産は /data または ~/.cache に永続化
#   • Real-ESRGAN アップスケール (x4 / x8) オプション
#   • Gradio UI + FastAPI REST を 1 プロセスで共存
#   • Uvicorn 手動起動は不要（Spaces が自前で立てる）
# ───────────────────────────────────────────────────────────────

import os
import io
import base64
import subprocess
import traceback
from pathlib import Path
from typing import Optional

import numpy as np
import torch
import gradio as gr
import spaces
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from PIL import Image

from diffusers import (
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    DPMSolverMultistepScheduler,
    AutoencoderKL,
)
from diffusers.loaders import AttnProcsLayers
from insightface.app import FaceAnalysis
from basicsr.utils.download_util import load_file_from_url
from realesrgan import RealESRGANer

# ==============================================================
# 0. キャッシュディレクトリとダウンローダ
# ==============================================================

PERSIST_BASE = Path("/data")
CACHE_ROOT = (
    PERSIST_BASE / "instantid_cache"
    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
    else Path.home() / ".cache" / "instantid_cache"
)
MODELS_DIR = CACHE_ROOT / "models"
LORA_DIR = CACHE_ROOT / "lora"
UPSCALE_DIR = CACHE_ROOT / "realesrgan"
for _p in (MODELS_DIR, LORA_DIR, UPSCALE_DIR):
    _p.mkdir(parents=True, exist_ok=True)


def download(url: str, dst: Path, attempts: int = 2):
    """単純リトライ付きダウンローダ（curl or basicsr fallback）"""
    if dst.exists():
        return dst
    for i in range(1, attempts + 1):
        try:
            subprocess.check_call(["curl", "-L", "-o", str(dst), url])
            return dst
        except subprocess.CalledProcessError:
            print(f"[DL] Retry {i}/{attempts} failed: {url}")
    # 最後に basicsr のダウンローダでフォールバック
    load_file_from_url(url=url, model_dir=str(dst.parent), file_name=dst.name)
    return dst


# ==============================================================
# 1. モデル URL 定義
# ==============================================================

BRA_V7_URL = (
    "https://huggingface.co/i0switch-assets/Beautiful_Realistic_Asians_v7/"
    "resolve/main/beautiful_realistic_asians_v7_fp16.safetensors"
)
IP_ADAPTER_BIN_URL = (
    "https://huggingface.co/h94/IP-Adapter/resolve/main/ip-adapter-plus-face_sd15.bin"
)
IP_ADAPTER_LORA_URL = (
    "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/"
    "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
)
REALESRGAN_URL = (
    "https://huggingface.co/aimagelab/realesrgan/resolve/main/RealESRGAN_x4plus.pth"
)

# ==============================================================
# 2. グローバル変数（lazy-load される）
# ==============================================================

pipe: Optional[StableDiffusionControlNetPipeline] = None
face_analyser: Optional[FaceAnalysis] = None
upsampler: Optional[RealESRGANer] = None

# ==============================================================
# 3. パイプライン初期化
# ==============================================================


def initialize_pipelines():
    global pipe, face_analyser, upsampler

    if pipe is not None:
        return  # 既に初期化済み

    print("[INIT] Downloading model assets …")

    # ---- 3-1. 基本モデル & IP-Adapter ----
    bra_ckpt = download(BRA_V7_URL, MODELS_DIR / "bra_v7.safetensors")
    ip_bin = download(IP_ADAPTER_BIN_URL, MODELS_DIR / "ip_adapter.bin")
    ip_lora = download(IP_ADAPTER_LORA_URL, LORA_DIR / "ip_adapter_faceid.lora")

    # ---- 3-2. ControlNet (InstantID) ----
    controlnet = ControlNetModel.from_pretrained(
        "InstantID/ControlNet-Mediapipe-Face",
        torch_dtype=torch.float16,
        cache_dir=str(MODELS_DIR),
    )

    # ---- 3-3. Diffusers パイプライン ----
    pipe_local_files_only = {
        "controlnet": controlnet,
        "vae": AutoencoderKL.from_pretrained(
            "stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
        ),
        "torch_dtype": torch.float16,
        "safety_checker": None,
    }
    pipe_base = "runwayml/stable-diffusion-v1-5"
    pipe_kwargs = dict(
        local_files_only=False,
        cache_dir=str(MODELS_DIR),
        load_safety_checker=False,
    )
    pipe_tmp = StableDiffusionControlNetPipeline.from_pretrained(
        pipe_base, **pipe_local_files_only, **pipe_kwargs
    )
    pipe_tmp.scheduler = DPMSolverMultistepScheduler.from_pretrained(
        pipe_base, subfolder="scheduler", cache_dir=str(MODELS_DIR)
    )
    # LoRA / IP-Adapter
    pipe_tmp.load_ip_adapter(
        ip_bin,
        subfolder=None,
        weight_name=None,
    )
    ip_layers = AttnProcsLayers(pipe_tmp.unet.attn_processors)
    ip_layers.load_lora_weights(ip_lora, adapter_name="ip_faceid", safe_load=True)
    pipe_tmp.set_adapters(["ip_faceid"], adapter_weights=[0.6])
    pipe_tmp.to("cuda")

    pipe = pipe_tmp

    # ---- 3-4. InsightFace ----
    face_analyser = FaceAnalysis(
        name="buffalo_l", root=str(MODELS_DIR), providers=["CUDAExecutionProvider"]
    )
    face_analyser.prepare(ctx_id=0, det_size=(640, 640))

    # ---- 3-5. Real-ESRGAN ----
    esrgan_ckpt = download(REALESRGAN_URL, UPSCALE_DIR / "realesrgan_x4plus.pth")
    upsampler = RealESRGANer(
        scale=4,
        model_path=str(esrgan_ckpt),
        half=True,
        tile=512,
        tile_pad=10,
        pre_pad=0,
        gpu_id=0,
    )

    print("[INIT] Pipelines ready.")


# ==============================================================
# 4. プロンプト設定
# ==============================================================

BASE_PROMPT = (
    "(masterpiece:1.2), best quality, ultra-realistic, 8k, RAW photo, "
    "cinematic lighting, textured skin, "
)
NEG_PROMPT = (
    "verybadimagenegative_v1.3, ng_deepnegative_v1_75t, "
    "(worst quality:2), (low quality:2), lowres, blurry, bad anatomy, "
    "bad hands, extra digits, cropped, watermark, signature"
)

# ==============================================================
# 5. 生成コア関数（GPU を掴む）
# ==============================================================


@spaces.GPU(duration=60)
def generate_core(
    face_img: Image.Image,
    subject: str,
    add_prompt: str = "",
    add_neg: str = "",
    cfg: float = 7.5,
    ip_scale: float = 0.6,
    steps: int = 30,
    w: int = 768,
    h: int = 768,
    upscale: bool = False,
    up_factor: int = 4,
    progress: gr.Progress = gr.Progress(track_tqdm=True),
):
    """
    メイン生成関数
    """
    try:
        if pipe is None:
            initialize_pipelines()

        face_np = np.array(face_img)
        face_info = face_analyser.get(face_np)
        if len(face_info) == 0:
            raise ValueError("顔が検出できませんでした。")

        pipe.set_adapters(["ip_faceid"], adapter_weights=[ip_scale])

        prompt = BASE_PROMPT + subject + ", " + add_prompt
        negative = NEG_PROMPT + ", " + add_neg

        result = pipe(
            prompt=prompt,
            negative_prompt=negative,
            num_inference_steps=int(steps),
            guidance_scale=float(cfg),
            image=face_img,
            control_image=None,
            width=int(w),
            height=int(h),
        ).images[0]

        if upscale and upsampler is not None:
            scale = 4 if up_factor == 4 else 8
            upsampler.scale = scale
            result, _ = upsampler.enhance(np.array(result))
            result = Image.fromarray(result)

        return result

    except Exception as e:
        traceback.print_exc()
        raise e


# ==============================================================
# 6. Gradio UI
# ==============================================================

with gr.Blocks(title="InstantID × BRA v7 (ZeroGPU)") as demo:
    gr.Markdown("## InstantID × Beautiful Realistic Asians v7")
    with gr.Row():
        face_img = gr.Image(type="pil", label="Face ID", sources=["upload"])
        subject = gr.Textbox(
            label="被写体説明（例: '30代日本人女性、黒髪セミロング'）", interactive=True
        )
    add_prompt = gr.Textbox(label="追加プロンプト", interactive=True)
    add_neg = gr.Textbox(label="追加ネガティブ", interactive=True)
    with gr.Row():
        cfg = gr.Slider(1, 20, value=7.5, step=0.5, label="CFG Scale")
        ip_scale = gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="IP-Adapter Weight")
    with gr.Row():
        steps = gr.Slider(10, 50, value=30, step=1, label="Steps")
        w = gr.Slider(512, 1024, value=768, step=64, label="Width")
        h = gr.Slider(512, 1024, value=768, step=64, label="Height")
    with gr.Row():
        upscale = gr.Checkbox(label="Real-ESRGAN Upscale", value=False)
        up_factor = gr.Radio([4, 8], value=4, label="Upscale Factor")
    run_btn = gr.Button("Generate")

    output_img = gr.Image(type="pil", label="Result")

    run_btn.click(
        fn=generate_core,
        inputs=[
            face_img,
            subject,
            add_prompt,
            add_neg,
            cfg,
            ip_scale,
            steps,
            w,
            h,
            upscale,
            up_factor,
        ],
        outputs=output_img,
        show_progress=True,
    )

# ==============================================================
# 7. FastAPI エンドポイント
# ==============================================================

app = FastAPI()


@app.post("/api/generate")
async def api_generate(
    subject: str = Form(...),
    cfg: float = Form(7.5),
    steps: int = Form(30),
    ip_scale: float = Form(0.6),
    w: int = Form(768),
    h: int = Form(768),
    file: UploadFile = File(...),
):
    try:
        img_bytes = await file.read()
        pil = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        res = generate_core(
            face_img=pil,
            subject=subject,
            add_prompt="",
            add_neg="",
            cfg=cfg,
            ip_scale=ip_scale,
            steps=steps,
            w=w,
            h=h,
            upscale=False,
            up_factor=4,
        )
        buf = io.BytesIO()
        res.save(buf, format="PNG")
        b64 = base64.b64encode(buf.getvalue()).decode()
        return {"image": f"data:image/png;base64,{b64}"}
    except Exception as e:
        traceback.print_exc()
        raise HTTPException(status_code=500, detail=str(e))


# ==============================================================
# 8. Launch
# ==============================================================

# Spaces が自動で Uvicorn を起動するため、手動起動は不要。
demo.queue(concurrency_count=2).launch(share=False)