Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import os | |
import gradio as gr | |
import numpy as np | |
import torch | |
from PIL import Image | |
import trimesh | |
import random | |
from transformers import AutoModelForImageSegmentation | |
from torchvision import transforms | |
from huggingface_hub import hf_hub_download, snapshot_download | |
import subprocess | |
import shutil | |
# install others | |
subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True) | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
DTYPE = torch.float16 | |
print("DEVICE: ", DEVICE) | |
DEFAULT_FACE_NUMBER = 100000 | |
MAX_SEED = np.iinfo(np.int32).max | |
TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git" | |
MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git" | |
RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4" | |
TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG" | |
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp") | |
os.makedirs(TMP_DIR, exist_ok=True) | |
TRIPOSG_CODE_DIR = "./triposg" | |
if not os.path.exists(TRIPOSG_CODE_DIR): | |
os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}") | |
MV_ADAPTER_CODE_DIR = "./mv_adapter" | |
if not os.path.exists(MV_ADAPTER_CODE_DIR): | |
os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR}") | |
import sys | |
sys.path.append(TRIPOSG_CODE_DIR) | |
sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts")) | |
sys.path.append(MV_ADAPTER_CODE_DIR) | |
sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts")) | |
HEADER = """ | |
# 🔮 Image to 3D with [TripoSG](https://github.com/VAST-AI-Research/TripoSG) | |
## State-of-the-art Open Source 3D Generation Using Large-Scale Rectified Flow Transformers | |
<p style="font-size: 1.1em;">By <a href="https://www.tripo3d.ai/" style="color: #1E90FF; text-decoration: none; font-weight: bold;">Tripo</a></p> | |
## 📋 Quick Start Guide: | |
1. **Upload an image** (single object works best) | |
2. Click **Generate Shape** to create the 3D mesh | |
3. Click **Apply Texture** to add textures | |
4. Use **Download GLB** to save your 3D model | |
5. Adjust parameters under **Generation Settings** for fine-tuning | |
Best results come from clean, well-lit images with clear subject isolation. Try it now! | |
<p style="font-size: 0.9em; margin-top: 10px;">Texture generation powered by <a href="https://github.com/huanngzh/MV-Adapter" style="color: #1E90FF; text-decoration: none;">MV-Adapter</a> - a versatile multi-view adapter for consistent texture generation. Try the <a href="https://huggingface.co/spaces/VAST-AI/MV-Adapter-I2MV-SDXL" style="color: #1E90FF; text-decoration: none;">MV-Adapter demo</a> for multi-view image generation.</p> | |
""" | |
# # triposg | |
from image_process import prepare_image | |
from briarmbg import BriaRMBG | |
snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL) | |
rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE) | |
rmbg_net.eval() | |
from triposg.pipelines.pipeline_triposg import TripoSGPipeline | |
snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL) | |
triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE) | |
# mv adapter | |
NUM_VIEWS = 6 | |
from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg | |
from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid | |
from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render | |
mv_adapter_pipe = prepare_pipeline( | |
base_model="stabilityai/stable-diffusion-xl-base-1.0", | |
vae_model="madebyollin/sdxl-vae-fp16-fix", | |
unet_model=None, | |
lora_model=None, | |
adapter_path="huanngzh/mv-adapter", | |
scheduler=None, | |
num_views=NUM_VIEWS, | |
device=DEVICE, | |
dtype=torch.float16, | |
) | |
birefnet = AutoModelForImageSegmentation.from_pretrained( | |
"ZhengPeng7/BiRefNet", trust_remote_code=True | |
) | |
birefnet.to(DEVICE) | |
transform_image = transforms.Compose( | |
[ | |
transforms.Resize((1024, 1024)), | |
transforms.ToTensor(), | |
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), | |
] | |
) | |
remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE) | |
if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"): | |
hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints") | |
if not os.path.exists("checkpoints/big-lama.pt"): | |
subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True) | |
def start_session(req: gr.Request): | |
save_dir = os.path.join(TMP_DIR, str(req.session_hash)) | |
os.makedirs(save_dir, exist_ok=True) | |
print("start session, mkdir", save_dir) | |
def end_session(req: gr.Request): | |
save_dir = os.path.join(TMP_DIR, str(req.session_hash)) | |
shutil.rmtree(save_dir) | |
def get_random_hex(): | |
random_bytes = os.urandom(8) | |
random_hex = random_bytes.hex() | |
return random_hex | |
def get_random_seed(randomize_seed, seed): | |
if randomize_seed: | |
seed = random.randint(0, MAX_SEED) | |
return seed | |
def run_full(image: str, req: gr.Request): | |
seed = 0 | |
num_inference_steps = 50 | |
guidance_scale = 7.5 | |
simplify = True | |
target_face_num = DEFAULT_FACE_NUMBER | |
image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) | |
outputs = triposg_pipe( | |
image=image_seg, | |
generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), | |
num_inference_steps=num_inference_steps, | |
guidance_scale=guidance_scale | |
).samples[0] | |
print("mesh extraction done") | |
mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) | |
if simplify: | |
print("start simplify") | |
from utils import simplify_mesh | |
mesh = simplify_mesh(mesh, target_face_num) | |
save_dir = os.path.join(TMP_DIR, "examples") | |
os.makedirs(save_dir, exist_ok=True) | |
mesh_path = os.path.join(save_dir, f"triposg_{get_random_hex()}.glb") | |
mesh.export(mesh_path) | |
print("save to ", mesh_path) | |
torch.cuda.empty_cache() | |
height, width = 768, 768 | |
# Prepare cameras | |
cameras = get_orthogonal_camera( | |
elevation_deg=[0, 0, 0, 0, 89.99, -89.99], | |
distance=[1.8] * NUM_VIEWS, | |
left=-0.55, | |
right=0.55, | |
bottom=-0.55, | |
top=0.55, | |
azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], | |
device=DEVICE, | |
) | |
ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") | |
mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) | |
render_out = render( | |
ctx, | |
mesh, | |
cameras, | |
height=height, | |
width=width, | |
render_attr=False, | |
normal_background=0.0, | |
) | |
control_images = ( | |
torch.cat( | |
[ | |
(render_out.pos + 0.5).clamp(0, 1), | |
(render_out.normal / 2 + 0.5).clamp(0, 1), | |
], | |
dim=-1, | |
) | |
.permute(0, 3, 1, 2) | |
.to(DEVICE) | |
) | |
image = Image.open(image) | |
image = remove_bg_fn(image) | |
image = preprocess_image(image, height, width) | |
pipe_kwargs = {} | |
if seed != -1 and isinstance(seed, int): | |
pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) | |
images = mv_adapter_pipe( | |
"high quality", | |
height=height, | |
width=width, | |
num_inference_steps=15, | |
guidance_scale=3.0, | |
num_images_per_prompt=NUM_VIEWS, | |
control_image=control_images, | |
control_conditioning_scale=1.0, | |
reference_image=image, | |
reference_conditioning_scale=1.0, | |
negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", | |
cross_attention_kwargs={"scale": 1.0}, | |
**pipe_kwargs, | |
).images | |
torch.cuda.empty_cache() | |
mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png") | |
make_image_grid(images, rows=1).save(mv_image_path) | |
from texture import TexturePipeline, ModProcessConfig | |
texture_pipe = TexturePipeline( | |
upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", | |
inpaint_ckpt_path="checkpoints/big-lama.pt", | |
device=DEVICE, | |
) | |
textured_glb_path = texture_pipe( | |
mesh_path=mesh_path, | |
save_dir=save_dir, | |
save_name=f"texture_mesh_{get_random_hex()}.glb", | |
uv_unwarp=True, | |
uv_size=4096, | |
rgb_path=mv_image_path, | |
rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), | |
camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], | |
) | |
return image_seg, mesh_path, textured_glb_path | |
def run_segmentation(image: str): | |
image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) | |
return image | |
def image_to_3d( | |
image: Image.Image, | |
seed: int, | |
num_inference_steps: int, | |
guidance_scale: float, | |
simplify: bool, | |
target_face_num: int, | |
req: gr.Request | |
): | |
outputs = triposg_pipe( | |
image=image, | |
generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), | |
num_inference_steps=num_inference_steps, | |
guidance_scale=guidance_scale | |
).samples[0] | |
print("mesh extraction done") | |
mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) | |
if simplify: | |
print("start simplify") | |
from utils import simplify_mesh | |
mesh = simplify_mesh(mesh, target_face_num) | |
save_dir = os.path.join(TMP_DIR, str(req.session_hash)) | |
mesh_path = os.path.join(save_dir, f"triposg_{get_random_hex()}.glb") | |
mesh.export(mesh_path) | |
print("save to ", mesh_path) | |
torch.cuda.empty_cache() | |
return mesh_path | |
def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request): | |
height, width = 768, 768 | |
# Prepare cameras | |
cameras = get_orthogonal_camera( | |
elevation_deg=[0, 0, 0, 0, 89.99, -89.99], | |
distance=[1.8] * NUM_VIEWS, | |
left=-0.55, | |
right=0.55, | |
bottom=-0.55, | |
top=0.55, | |
azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], | |
device=DEVICE, | |
) | |
ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") | |
mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) | |
render_out = render( | |
ctx, | |
mesh, | |
cameras, | |
height=height, | |
width=width, | |
render_attr=False, | |
normal_background=0.0, | |
) | |
control_images = ( | |
torch.cat( | |
[ | |
(render_out.pos + 0.5).clamp(0, 1), | |
(render_out.normal / 2 + 0.5).clamp(0, 1), | |
], | |
dim=-1, | |
) | |
.permute(0, 3, 1, 2) | |
.to(DEVICE) | |
) | |
image = Image.open(image) | |
image = remove_bg_fn(image) | |
image = preprocess_image(image, height, width) | |
pipe_kwargs = {} | |
if seed != -1 and isinstance(seed, int): | |
pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) | |
images = mv_adapter_pipe( | |
"high quality", | |
height=height, | |
width=width, | |
num_inference_steps=15, | |
guidance_scale=3.0, | |
num_images_per_prompt=NUM_VIEWS, | |
control_image=control_images, | |
control_conditioning_scale=1.0, | |
reference_image=image, | |
reference_conditioning_scale=1.0, | |
negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", | |
cross_attention_kwargs={"scale": 1.0}, | |
**pipe_kwargs, | |
).images | |
torch.cuda.empty_cache() | |
save_dir = os.path.join(TMP_DIR, str(req.session_hash)) | |
mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png") | |
make_image_grid(images, rows=1).save(mv_image_path) | |
from texture import TexturePipeline, ModProcessConfig | |
texture_pipe = TexturePipeline( | |
upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", | |
inpaint_ckpt_path="checkpoints/big-lama.pt", | |
device=DEVICE, | |
) | |
textured_glb_path = texture_pipe( | |
mesh_path=mesh_path, | |
save_dir=save_dir, | |
save_name=f"texture_mesh_{get_random_hex()}.glb", | |
uv_unwarp=True, | |
uv_size=4096, | |
rgb_path=mv_image_path, | |
rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), | |
camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], | |
) | |
return textured_glb_path | |
with gr.Blocks(title="TripoSG") as demo: | |
gr.Markdown(HEADER) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
image_prompts = gr.Image(label="Input Image", type="filepath") | |
seg_image = gr.Image( | |
label="Segmentation Result", type="pil", format="png", interactive=False | |
) | |
with gr.Accordion("Generation Settings", open=True): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=0, | |
value=0 | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=8, | |
maximum=50, | |
step=1, | |
value=50, | |
) | |
guidance_scale = gr.Slider( | |
label="CFG scale", | |
minimum=0.0, | |
maximum=20.0, | |
step=0.1, | |
value=7.0, | |
) | |
with gr.Row(): | |
reduce_face = gr.Checkbox(label="Simplify Mesh", value=True) | |
target_face_num = gr.Slider(maximum=1000000, minimum=10000, value=DEFAULT_FACE_NUMBER, label="Target Face Number") | |
gen_button = gr.Button("Generate Shape", variant="primary") | |
gen_texture_button = gr.Button("Apply Texture", interactive=False) | |
with gr.Column(): | |
model_output = gr.Model3D(label="Generated GLB", interactive=False) | |
textured_model_output = gr.Model3D(label="Textured GLB", interactive=False) | |
with gr.Row(): | |
examples = gr.Examples( | |
examples=[ | |
f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}" | |
for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data") | |
], | |
fn=run_full, | |
inputs=[image_prompts], | |
outputs=[seg_image, model_output, textured_model_output], | |
cache_examples=True, | |
) | |
gen_button.click( | |
run_segmentation, | |
inputs=[image_prompts], | |
outputs=[seg_image] | |
).then( | |
get_random_seed, | |
inputs=[randomize_seed, seed], | |
outputs=[seed], | |
).then( | |
image_to_3d, | |
inputs=[ | |
seg_image, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
reduce_face, | |
target_face_num | |
], | |
outputs=[model_output] | |
).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button]) | |
gen_texture_button.click( | |
run_texture, | |
inputs=[image_prompts, model_output, seed], | |
outputs=[textured_model_output] | |
) | |
demo.load(start_session) | |
demo.unload(end_session) | |
demo.launch() |