pixel3dmm / app.py
alexnasa's picture
alltogether
9db6838
raw
history blame
9.5 kB
import spaces
import torch._dynamo
torch._dynamo.disable()
import os
# Force Dynamo off at import‐time of torch, pytorch3d, etc.
os.environ["TORCHDYNAMO_DISABLE"] = "1"
import subprocess
import tempfile
import uuid
import glob
import shutil
import time
import gradio as gr
import sys
from PIL import Image
import importlib, site, sys
# Re-discover all .pth/.egg-link files
for sitedir in site.getsitepackages():
site.addsitedir(sitedir)
# Clear caches so importlib will pick up new modules
importlib.invalidate_caches()
# Set environment variables
os.environ["PIXEL3DMM_CODE_BASE"] = f"{os.getcwd()}"
os.environ["PIXEL3DMM_PREPROCESSED_DATA"] = f"{os.getcwd()}/proprocess_results"
os.environ["PIXEL3DMM_TRACKING_OUTPUT"] = f"{os.getcwd()}/tracking_results"
def sh(cmd): subprocess.check_call(cmd, shell=True)
sh("pip install -e .")
# tell Python to re-scan site-packages now that the egg-link exists
import importlib, site; site.addsitedir(site.getsitepackages()[0]); importlib.invalidate_caches()
from pixel3dmm import env_paths
sh("cd src/pixel3dmm/preprocessing/facer && pip install -e . && cd ../../../..")
sh("cd src/pixel3dmm/preprocessing/PIPNet/FaceBoxesV2/utils && sh make.sh && cd ../../../../../..")
def install_cuda_toolkit():
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
os.environ["CUDA_HOME"] = "/usr/local/cuda"
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
os.environ["CUDA_HOME"],
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
)
# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0"
print("==> finished installation")
install_cuda_toolkit()
from omegaconf import OmegaConf
from pixel3dmm.network_inference import normals_n_uvs
DEVICE = "cuda"
# 2. Empty cache for our heavy objects
_model_cache = {}
# Utility to select first image from a folder
def first_image_from_dir(directory):
patterns = ["*.jpg", "*.png", "*.jpeg"]
files = []
for p in patterns:
files.extend(glob.glob(os.path.join(directory, p)))
if not files:
return None
return sorted(files)[0]
# Function to reset the UI and state
def reset_all():
return (
None, # crop_img
None, # normals_img
None, # uv_img
None, # track_img
"Awaiting new image upload...", # status
{}, # state
gr.update(interactive=True), # preprocess_btn
gr.update(interactive=True), # normals_btn
gr.update(interactive=True), # uv_map_btn
gr.update(interactive=True) # track_btn
)
# Step 1: Preprocess the input image (Save and Crop)
@spaces.GPU()
def preprocess_image(image_array, state):
if image_array is None:
return "❌ Please upload an image first.", None, state, gr.update(interactive=True), gr.update(interactive=True)
session_id = str(uuid.uuid4())
base_dir = os.path.join(os.environ["PIXEL3DMM_PREPROCESSED_DATA"], session_id)
os.makedirs(base_dir, exist_ok=True)
state.update({"session_id": session_id, "base_dir": base_dir})
img = Image.fromarray(image_array)
saved_image_path = os.path.join(base_dir, f"{session_id}.png")
img.save(saved_image_path)
state["image_path"] = saved_image_path
try:
p = subprocess.run([
"python", "scripts/run_preprocessing.py", "--video_or_images_path", saved_image_path
], check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
err = f"❌ Preprocess failed (exit {e.returncode}).\n\n{e.stdout}\n{e.stderr}"
shutil.rmtree(base_dir)
return err, None, {}, gr.update(interactive=True), gr.update(interactive=True)
crop_dir = os.path.join(base_dir, "cropped")
image = first_image_from_dir(crop_dir)
return "✅ Step 1 complete. Ready for Normals.", image, state, gr.update(interactive=True), gr.update(interactive=True)
# Step 2: Normals inference → normals image
@spaces.GPU()
def step2_normals(state):
base_conf = OmegaConf.load("configs/base.yaml")
if "normals_model" not in _model_cache:
from pixel3dmm.lightning.p3dmm_system import system as p3dmm_system
model = p3dmm_system.load_from_checkpoint(f"{env_paths.CKPT_N_PRED}", strict=False)
model = model.eval().to(DEVICE)
_model_cache["normals_model"] = model
session_id = state.get("session_id")
base_conf.video_name = f'{session_id}'
normals_n_uvs(base_conf, _model_cache["normals_model"])
normals_dir = os.path.join(state["base_dir"], "p3dmm", "normals")
image = first_image_from_dir(normals_dir)
return "✅ Step 2 complete. Ready for UV Map.", image, state, gr.update(interactive=True), gr.update(interactive=True)
# Step 3: UV map inference → uv map image
@spaces.GPU()
def step3_uv_map(state):
base_conf = OmegaConf.load("configs/base.yaml")
if "uv_model" not in _model_cache:
from pixel3dmm.lightning.p3dmm_system import system as p3dmm_system
model = p3dmm_system.load_from_checkpoint(f"{env_paths.CKPT_UV_PRED}", strict=False)
model = model.eval().to(DEVICE)
_model_cache["uv_model"] = model
session_id = state.get("session_id")
base_conf.video_name = f'{session_id}'
base_conf.model.prediction_type = "uv_map"
normals_n_uvs(base_conf, _model_cache["uv_model"])
uv_dir = os.path.join(state["base_dir"], "p3dmm", "uv_map")
image = first_image_from_dir(uv_dir)
return "✅ Step 3 complete. Ready for Tracking.", image, state, gr.update(interactive=True), gr.update(interactive=True)
# Step 4: Tracking → final tracking image
@spaces.GPU()
def step4_track(state):
tracking_conf = OmegaConf.load("configs/tracking.yaml")
# Lazy init + caching of FLAME model on GPU
if "flame_model" not in _model_cache:
import os
import torch
import numpy as np
import trimesh
from pytorch3d.io import load_obj
from pixel3dmm.tracking.flame.FLAME import FLAME
from pixel3dmm.tracking.renderer_nvdiffrast import NVDRenderer
from pixel3dmm.tracking.tracker import Tracker
flame = FLAME(tracking_conf) # CPU instantiation
flame = flame.to(DEVICE) # CUDA init happens here
_model_cache["flame_model"] = flame
_mesh_file = env_paths.head_template
_obj_faces = load_obj(_mesh_file)[1]
_model_cache["diff_renderer"] = NVDRenderer(
image_size=tracking_conf.size,
obj_filename=_mesh_file,
no_sh=False,
white_bg=True
).to(DEVICE)
flame_model = _model_cache["flame_model"]
diff_renderer = _model_cache["diff_renderer"]
session_id = state.get("session_id")
tracking_conf.video_name = f'{session_id}'
tracker = Tracker(tracking_conf, flame_model, diff_renderer)
tracker.run()
tracking_dir = os.path.join(os.environ["PIXEL3DMM_TRACKING_OUTPUT"], session_id, "frames")
image = first_image_from_dir(tracking_dir)
return "✅ Pipeline complete!", image, state, gr.update(interactive=True)
# New: run all steps sequentially
@spaces.GPU()
def run_pipeline(image_array, state):
# Step 1: Preprocess
status1, crop_img, state, _, _ = preprocess_image(image_array, state)
if "❌" in status1:
return status1, None, None, None, None, {}
# Step 2: Normals
status2, normals_img, state, _, _ = step2_normals(state)
# Step 3: UV Map
status3, uv_img, state, _, _ = step3_uv_map(state)
# Step 4: Tracking
status4, track_img, state, _ = step4_track(state)
final_status = "\n".join([status1, status2, status3, status4])
return final_status, crop_img, normals_img, uv_img, track_img, state
# Build Gradio UI
demo = gr.Blocks()
with demo:
gr.Markdown("## Image Processing Pipeline (Single Button)")
gr.Markdown("Upload an image and click 'Run Pipeline' to execute all steps.")
with gr.Row():
with gr.Column():
image_in = gr.Image(label="Upload Image", type="numpy", height=512)
status = gr.Textbox(label="Status", lines=4, interactive=True, value="Upload an image to start.")
state = gr.State({})
with gr.Column():
with gr.Row():
crop_img = gr.Image(label="Preprocessed", height=256)
normals_img = gr.Image(label="Normals", height=256)
with gr.Row():
uv_img = gr.Image(label="UV Map", height=256)
track_img = gr.Image(label="Tracking", height=256)
run_btn = gr.Button("Run Pipeline")
# Single button click
run_btn.click(
fn=run_pipeline,
inputs=[image_in, state],
outputs=[status, crop_img, normals_img, uv_img, track_img, state]
)
image_in.upload(fn=reset_all, inputs=None, outputs=[crop_img, normals_img, uv_img, track_img, status, state])
demo.queue()
demo.launch(share=True, ssr_mode=False)