i0switch commited on
Commit
61b1f58
·
verified ·
1 Parent(s): 571ecde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -167
app.py CHANGED
@@ -1,177 +1,235 @@
1
- # app.py — BRA v7 (AIGaming repo) × InstantID × ZeroGPU
2
- # 2025-06-22
3
-
4
- ##############################################################################
5
- # 0. diffusers-0.27 互換: cached_download() パッチ
6
- ##############################################################################
7
- from huggingface_hub import hf_hub_download
8
- import huggingface_hub as _hf
9
- if not hasattr(_hf, "cached_download"):
10
- _hf.cached_download = hf_hub_download
11
-
12
- ##############################################################################
13
- # 1. ライブラリ
14
- ##############################################################################
15
- import os, io, base64, subprocess, traceback
16
  from pathlib import Path
17
- from typing import Optional
18
- import numpy as np
19
- import torch, gradio as gr, spaces
20
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
21
  from PIL import Image
22
  from diffusers import (
23
- StableDiffusionControlNetPipeline,
24
- ControlNetModel,
25
- DPMSolverMultistepScheduler,
26
  )
27
- from diffusers.loaders import AttnProcsLayers
28
  from insightface.app import FaceAnalysis
29
- from realesrgan import RealESRGANer
30
 
31
  ##############################################################################
32
- # 2. キャッシュパス
33
- ##############################################################################
34
- ROOT = Path("/data") if Path("/data").exists() else Path.home() / ".cache/instantid"
35
- MODELS = ROOT / "models"; LORA = ROOT / "lora"; UPSCALE = ROOT / "realesrgan"
36
- for p in (MODELS, LORA, UPSCALE): p.mkdir(parents=True, exist_ok=True)
37
-
38
- ##############################################################################
39
- # 3. モデル ID / ファイル
40
- ##############################################################################
41
- # --- BRA v7 (公開) ---
42
- BRA_REPO = "AIGaming/beautiful_realistic_asians"
43
- BRA_FILE = "beautifulRealistic_v7.safetensors"
44
- BRA_REV = "801a9b1999dd7018e58a1e2b432fdccd3d1d723d" # 固定 revision
45
-
46
- # --- IP-Adapter 本体 & LoRA ---
47
- ### 最終・根本修正 ### 存在する正しいファイル名を指定し、不要なパスを削除
48
- IP_REPO, IP_BIN = "h94/IP-Adapter-FaceID", "ip-adapter-faceid-plusv2_sd15.bin"
49
- LORA_REPO,IP_LORA = "h94/IP-Adapter-FaceID", "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
50
-
51
- # --- ControlNet (MediaPipe Face) ---
52
- CN_REPO, CN_SUBF = "CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"
53
-
54
- # --- Real-ESRGAN ---
55
- ESRGAN_REPO, ESRGAN_FILE = "aimagelab/realesrgan", "RealESRGAN_x4plus.pth"
56
-
57
- ##############################################################################
58
- # 4. HF Hub ダウンロード
59
- ##############################################################################
60
- def dl(repo: str, file: str, sub: str | None = None, rev: str | None = None) -> Path:
61
- return Path(hf_hub_download(repo, file, subfolder=sub,
62
- revision=rev, cache_dir=str(MODELS)))
63
-
64
- ##############################################################################
65
- # 5. グローバル
66
- ##############################################################################
67
- pipe: Optional[StableDiffusionControlNetPipeline] = None
68
- face_analyser: Optional[FaceAnalysis] = None
69
- upsampler: Optional[RealESRGANer] = None
70
-
71
- ##############################################################################
72
- # 6. 初期化
73
- ##############################################################################
74
- def init():
75
- global pipe, face_analyser, upsampler
76
- if pipe is not None:
77
- return
78
- print("[INIT] downloading models…")
79
-
80
- # 6-1 BRA v7
81
- bra_ckpt = dl(BRA_REPO, BRA_FILE, rev=BRA_REV)
82
-
83
- # 6-2 ControlNet
84
- cn = ControlNetModel.from_pretrained(
85
- CN_REPO, subfolder=CN_SUBF, torch_dtype=torch.float16,
86
- cache_dir=str(MODELS)
87
- )
88
-
89
- # 6-3 Pipeline from .safetensors + ControlNet
90
- pipe_ = StableDiffusionControlNetPipeline.from_single_file(
91
- bra_ckpt, controlnet=cn, torch_dtype=torch.float16,
92
- safety_checker=None
93
- )
94
- pipe_.scheduler = DPMSolverMultistepScheduler.from_config(pipe_.scheduler.config)
95
-
96
- # 6-4 IP-Adapter
97
- # 正しいリポジトリとファイル名を指定し、ライブラリにダウンロードと読み込みを任せる
98
- pipe_.load_ip_adapter(IP_REPO, "", weight_name=IP_BIN, cache_dir=str(MODELS))
99
- AttnProcsLayers(pipe_.unet.attn_processors).load_lora_weights(
100
- LORA_REPO, weight_name=IP_LORA, adapter_name="ip_faceid", safe_load=True, cache_dir=str(MODELS)
101
- )
102
-
103
- pipe_.set_adapters(["ip_faceid"], adapter_weights=[0.6])
104
- pipe_.to("cuda"); pipe_ = pipe_
105
-
106
- pipe = pipe_
107
- face_analyser = FaceAnalysis(
108
- name="buffalo_l", root=str(MODELS), providers=["CUDAExecutionProvider"]
109
- ); face_analyser.prepare(ctx_id=0, det_size=(640,640))
110
-
111
- esr = dl(ESRGAN_REPO, ESRGAN_FILE)
112
- upsampler = RealESRGANer(scale=4, model_path=str(esr), half=True,
113
- tile=512, tile_pad=10, pre_pad=0, gpu_id=0)
114
- print("[INIT] ready.")
115
-
116
- ##############################################################################
117
- # 7. プロンプト
118
- ##############################################################################
119
- BASE = "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k, cinematic lighting, textured skin, "
120
- NEG = "verybadimagenegative_v1.3, ng_deepnegative_v1_75t, (worst quality:2), (low quality:2), lowres, blurry, bad anatomy, bad hands, extra digits, watermark, signature"
121
-
122
- ##############################################################################
123
- # 8. 生成コア
124
- ##############################################################################
125
- @spaces.GPU(duration=60)
126
- def generate(face: Image.Image, subj: str, add: str, neg: str,
127
- cfg: float, ipw: float, steps: int, w: int, h: int,
128
- up: bool, upf: int, progress=gr.Progress(track_tqdm=True)):
129
- if pipe is None:
130
- init()
131
- if len(face_analyser.get(np.array(face))) == 0:
132
- raise ValueError("顔が検出できません。他の画像でお試しください。")
133
- pipe.set_adapters(["ip_faceid"], adapter_weights=[ipw])
134
- img = pipe(prompt=BASE+subj+", "+add,
135
- negative_prompt=NEG+", "+neg,
136
- num_inference_steps=steps, guidance_scale=cfg,
137
- image=face, width=w, height=h).images[0]
138
- if up:
139
- upsampler.scale = int(upf)
140
- img, _ = upsampler.enhance(np.array(img)); img = Image.fromarray(img)
141
- return img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- ##############################################################################
144
- # 9. Gradio UI
145
- ##############################################################################
146
- with gr.Blocks(title="BRA v7 × InstantID (ZeroGPU)") as demo:
147
- gr.Markdown("## BRA v7 × InstantID")
148
- with gr.Row():
149
- f = gr.Image(type="pil", label="Face ID"); s = gr.Textbox(label="被写体説明")
150
- ap = gr.Textbox(label="追加プロンプト"); ng = gr.Textbox(label="追加ネガ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  with gr.Row():
152
- cf = gr.Slider(1,20,7.5,0.5,"CFG"); ip = gr.Slider(0.1,1.0,0.6,0.05,"IP-Adapter Weight")
153
- with gr.Row():
154
- st = gr.Slider(10,50,30,1,"Steps"); W = gr.Slider(512,1024,768,64,"W"); H = gr.Slider(512,1024,768,64,"H")
155
- with gr.Row():
156
- up = gr.Checkbox(label="Real-ESRGAN"); upf = gr.Radio([4,8], value=4, label="アップスケール")
157
- btn = gr.Button("Generate"); out = gr.Image(type="pil", label="Result")
158
- btn.click(generate, [f,s,ap,ng,cf,ip,st,W,H,up,upf], out, show_progress=True)
159
-
160
- ##############################################################################
161
- # 10. FastAPI
162
- ##############################################################################
163
- app = FastAPI()
164
-
165
- @app.post("/api/generate")
166
- async def api_gen(subj: str=Form(...), cfg: float=Form(7.5), stp: int=Form(30),
167
- ipw: float=Form(0.6), W: int=Form(768), H: int=Form(768),
168
- file: UploadFile=File(...)):
169
- img = Image.open(io.BytesIO(await file.read())).convert("RGB")
170
- res = generate(img, subj, "", "", cfg, ipw, stp, W, H, False, 4)
171
- buf = io.BytesIO(); res.save(buf,"PNG")
172
- return {"image":"data:image/png;base64,"+base64.b6_encode(buf.getvalue()).decode()}
 
173
 
174
- ##############################################################################
175
- # 11. Launch
176
- ##############################################################################
177
- demo.queue(default_concurrency_limit=2).launch(share=False)
 
1
+ # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
2
+ """Persistent-cache backend for InstantID portrait generation.
3
+ - Caches model assets under /data when writable, else ~/.cache
4
+ - Robust download with retry + multiple fallback URLs per asset
5
+ """
6
+ import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 
 
 
 
 
 
 
 
 
7
  from pathlib import Path
 
 
 
 
8
  from PIL import Image
9
  from diffusers import (
10
+ StableDiffusionPipeline, ControlNetModel,
11
+ DPMSolverMultistepScheduler, AutoencoderKL,
 
12
  )
 
13
  from insightface.app import FaceAnalysis
 
14
 
15
  ##############################################################################
16
+ # 0. Cache dir & helpers
17
+ ##############################################################################
18
+ PERSIST_BASE = Path("/data")
19
+ CACHE_ROOT = (PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
20
+ else Path.home() / ".cache" / "instantid_cache")
21
+ print("cache →", CACHE_ROOT)
22
+
23
+ MODELS_DIR = CACHE_ROOT / "models"
24
+ LORA_DIR = MODELS_DIR / "Lora"
25
+ EMB_DIR = CACHE_ROOT / "embeddings"
26
+ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
27
+ for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
28
+ p.mkdir(parents=True, exist_ok=True)
29
+
30
+
31
+ def dl(url: str, dst: Path, attempts: int = 2):
32
+ if dst.exists():
33
+ print("", dst.relative_to(CACHE_ROOT)); return
34
+ for i in range(1, attempts + 1):
35
+ print(f"⬇ {dst.name} (try {i}/{attempts})")
36
+ if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
37
+ return
38
+ raise RuntimeError(f"download failed → {url}")
39
+
40
+ ##############################################################################
41
+ # 1. Asset download
42
+ ##############################################################################
43
+ print("— asset check —")
44
+
45
+ # 1-A. base ckpt
46
+ BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
47
+ dl("https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16", BASE_CKPT)
48
+
49
+ # 1-B. IP-Adapter core + FaceID LoRA
50
+ IP_BIN_FILE = LORA_DIR / "ip-adapter-plus-face_sd15.bin"
51
+ dl("https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-plus-face_sd15.bin", IP_BIN_FILE)
52
+
53
+ LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
54
+ dl("https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors", LORA_FILE)
55
+
56
+ # 1-C. textual-inversion embeddings
57
+ EMB_URLS = {
58
+ "ng_deepnegative_v1_75t.pt": [
59
+ "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
60
+ "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
61
+ ],
62
+ "badhandv4.pt": [
63
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
64
+ "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
65
+ ],
66
+ "CyberRealistic_Negative-neg.pt": [
67
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
68
+ "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
69
+ ],
70
+ "UnrealisticDream.pt": [
71
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
72
+ "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
73
+ ],
74
+ }
75
+ for fname, urls in EMB_URLS.items():
76
+ dst = EMB_DIR / fname
77
+ for idx, u in enumerate(urls, 1):
78
+ try:
79
+ dl(u, dst); break
80
+ except RuntimeError:
81
+ if idx == len(urls): raise
82
+ print(" fallback URL …")
83
+
84
+ # 1-D. Real-ESRGAN weights
85
+ RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
86
+ RRG_URLS = [
87
+ "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
88
+ "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
89
+ "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
90
+ ]
91
+ for idx, link in enumerate(RRG_URLS, 1):
92
+ try:
93
+ dl(link, RRG_WEIGHTS); break
94
+ except RuntimeError:
95
+ if idx == len(RRG_URLS): raise
96
+ print(" fallback URL …")
97
+
98
+ ##############################################################################
99
+ # 2. Runtime init
100
+ ##############################################################################
101
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
103
+ print("device:", device, "| dtype:", dtype)
104
+
105
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if torch.cuda.is_available() else ["CPUExecutionProvider"]
106
+ face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
107
+ face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
108
+
109
+ controlnet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype)
110
+ pipe = StableDiffusionPipeline.from_single_file(BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2)
111
+ pipe.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype).to(device)
112
+ pipe.controlnet = controlnet
113
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
114
+
115
+ pipe.load_ip_adapter(str(LORA_DIR), subfolder="", weight_name=IP_BIN_FILE.name)
116
+ # load FaceID LoRA (Δ only LoRA weights, not full IP-Adapter)
117
+ pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
118
+ pipe.set_ip_adapter_scale(0.65)
119
+
120
+ for emb in EMB_DIR.glob("*.*"):
121
+ try:
122
+ pipe.load_textual_inversion(emb, token=emb.stem)
123
+ print("emb loaded →", emb.stem)
124
+ except Exception:
125
+ print("emb skip →", emb.name)
126
+ pipe.to(device)
127
+ print("pipeline ready ✔")
128
+
129
+ ##############################################################################
130
+ # 3. Upscaler
131
+ ##############################################################################
132
+ try:
133
+ from basicsr.archs.rrdb_arch import RRDBNet
134
+ try:
135
+ from realesrgan import RealESRGAN
136
+ except ImportError:
137
+ from realesrgan import RealESRGANer as RealESRGAN
138
+ rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
139
+ upsampler = RealESRGAN(device, rrdb, scale=8)
140
+ upsampler.load_weights(str(RRG_WEIGHTS))
141
+ UPSCALE_OK = True
142
+ except Exception as e:
143
+ print("Real-ESRGAN disabled →", e)
144
+ UPSCALE_OK = False
145
+
146
+ ##############################################################################
147
+ # 4. Prompts & generation
148
+ ##############################################################################
149
+ BASE_PROMPT = (
150
+ "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n"
151
+ "photo of {subject},\n"
152
+ "cinematic lighting, golden hour, rim light, shallow depth of field,\n"
153
+ "textured skin, high detail, shot on Canon EOS R5, 85 mm f/1.4, ISO 200,\n"
154
+ "<lora:ip-adapter-faceid-plusv2_sd15_lora:0.65>, (face),\n"
155
+ "(aesthetic:1.1), (cinematic:0.8)"
156
+ )
157
+ # [!!] 下記のNEG_PROMPTを修正しました。不要なカンマと重複した文字列を削除し、単一の文字列になるようにしました。
158
+ NEG_PROMPT = (
159
+ "ng_deepnegative_v1_75t, CyberRealistic_Negative-neg, UnrealisticDream, "
160
+ "(worst quality:2), (low quality:1.8), lowres, (jpeg artifacts:1.2), "
161
+ "painting, sketch, illustration, drawing, cartoon, anime, cgi, render, 3d, "
162
+ "monochrome, grayscale, text, logo, watermark, signature, username, "
163
+ "(MajicNegative_V2:0.8), bad hands, extra digits, fused fingers, malformed limbs, "
164
+ "missing arms, missing legs, (badhandv4:0.7), BadNegAnatomyV1-neg, skin blemishes, acnes, age spot, glans"
165
+ )
166
 
167
+ @spaces.GPU(duration=90)
168
+ def generate(
169
+ face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
170
+ progress=gr.Progress(track_tqdm=True)
171
+ ):
172
+ if face_np is None or face_np.size == 0:
173
+ raise gr.Error("顔画像をアップロードしてください。")
174
+
175
+ prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
176
+ if add_prompt:
177
+ prompt += ", " + add_prompt
178
+ neg = NEG_PROMPT + (", " + add_neg if add_neg else "")
179
+
180
+ pipe.set_ip_adapter_scale(ip_scale)
181
+ img_in = Image.fromarray(face_np)
182
+
183
+ result = pipe(
184
+ prompt=prompt,
185
+ negative_prompt=neg,
186
+ ip_adapter_image=img_in,
187
+ image=img_in,
188
+ controlnet_conditioning_scale=0.9,
189
+ num_inference_steps=int(steps) + 5,
190
+ guidance_scale=cfg,
191
+ width=int(w),
192
+ height=int(h),
193
+ ).images[0]
194
+
195
+ if upscale:
196
+ if UPSCALE_OK:
197
+ up, _ = upsampler.enhance(cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor)
198
+ result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
199
+ else:
200
+ result = result.resize((int(result.width * up_factor), int(result.height * up_factor)), Image.LANCZOS)
201
+
202
+ return result
203
+
204
+ ##############################################################################
205
+ # 5. Gradio UI
206
+ ##############################################################################
207
+
208
+ with gr.Blocks() as demo:
209
+ gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
210
  with gr.Row():
211
+ with gr.Column():
212
+ face_in = gr.Image(label="顔写真", type="numpy")
213
+ subj_in = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
214
+ add_in = gr.Textbox(label="追加プロンプト")
215
+ addneg_in = gr.Textbox(label="追加ネガティブ")
216
+ ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
217
+ cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG")
218
+ step_sld = gr.Slider(10, 50, 20, step=1, label="Steps")
219
+ w_sld = gr.Slider(512, 1024, 512, step=64, label="幅")
220
+ h_sld = gr.Slider(512, 1024, 768, step=64, label="高さ")
221
+ up_ck = gr.Checkbox(label="アップスケール", value=True)
222
+ up_fac = gr.Slider(1, 8, 2, step=1, label="倍率")
223
+ btn = gr.Button("生成", variant="primary")
224
+ with gr.Column():
225
+ out_img = gr.Image(label="結果")
226
+
227
+ btn.click(
228
+ generate,
229
+ [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
230
+ out_img,
231
+ api_name="predict",
232
+ )
233
 
234
+ print("launching …")
235
+ demo.queue().launch(show_error=True)