ModelMan

Runtime error

App Files Files Community

ModelMan / gradio_app.py

SIGMitch

Update gradio_app.py

4c2fab3 verified about 1 year ago

raw

history blame

17.5 kB

	import spaces
	import argparse
	import os
	import json
	import torch
	import sys
	import time
	import importlib
	import numpy as np
	from omegaconf import OmegaConf
	from huggingface_hub import hf_hub_download

	from collections import OrderedDict
	import trimesh
	import gradio as gr
	from typing import Any
	from einops import rearrange

	proj_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.append(os.path.join(proj_dir))

	import tempfile

	from apps.utils import *

	_TITLE = '''ModelMan'''
	_DESCRIPTION = '''
	'''
	_CITE_ = r"""
	---
	📝 Citation

	```
	@article
	```
	"""
	from apps.third_party.CRM.pipelines import TwoStagePipeline
	from apps.third_party.LGM.pipeline_mvdream import MVDreamPipeline
	from apps.third_party.Era3D.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
	from apps.third_party.Era3D.data.single_image_dataset import SingleImageDataset

	import re
	import os
	import stat

	RD, WD, XD = 4, 2, 1
	BNS = [RD, WD, XD]
	MDS = [
	[stat.S_IRUSR, stat.S_IRGRP, stat.S_IROTH],
	[stat.S_IWUSR, stat.S_IWGRP, stat.S_IWOTH],
	[stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH]
	]

	def chmod(path, mode):
	if isinstance(mode, int):
	mode = str(mode)
	if not re.match("^[0-7]{1,3}$", mode):
	raise Exception("mode does not conform to ^[0-7]{1,3}$ pattern")
	mode = "{0:0>3}".format(mode)
	mode_num = 0
	for midx, m in enumerate(mode):
	for bnidx, bn in enumerate(BNS):
	if (int(m) & bn) > 0:
	mode_num += MDS[bnidx][midx]
	os.chmod(path, mode_num)

	chmod(f"{parent_dir}/apps/third_party/InstantMeshes", "777")

	device = None
	model = None
	cached_dir = None
	generator = None

	sys.path.append(f"apps/third_party/CRM")
	crm_pipeline = None

	sys.path.append(f"apps/third_party/LGM")
	imgaedream_pipeline = None

	sys.path.append(f"apps/third_party/Era3D")
	era3d_pipeline = None

	@spaces.GPU(duration=120)
	def gen_mvimg(
	mvimg_model, image, seed, guidance_scale, step, text, neg_text, elevation, backgroud_color
	):
	global device
	if seed == 0:
	seed = np.random.randint(1, 65535)
	global generator
	generator = torch.Generator(device)
	generator.manual_seed(seed)

	if mvimg_model == "CRM":
	global crm_pipeline
	crm_pipeline.set_seed(seed)
	background = Image.new("RGBA", image.size, (127, 127, 127))
	image = Image.alpha_composite(background, image)
	mv_imgs = crm_pipeline(
	image,
	scale=guidance_scale,
	step=step
	)["stage1_images"]
	return mv_imgs[5], mv_imgs[3], mv_imgs[2], mv_imgs[0]

	elif mvimg_model == "ImageDream":
	global imagedream_pipeline
	background = Image.new("RGBA", image.size, backgroud_color)
	image = Image.alpha_composite(background, image)
	image = np.array(image).astype(np.float32) / 255.0
	image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
	mv_imgs = imagedream_pipeline(
	text,
	image,
	negative_prompt=neg_text,
	guidance_scale=guidance_scale,
	num_inference_steps=step,
	elevation=elevation,
	generator=generator,
	)
	return mv_imgs[1], mv_imgs[2], mv_imgs[3], mv_imgs[0]

	elif mvimg_model == "Era3D":
	global era3d_pipeline
	era3d_pipeline.to(device)
	era3d_pipeline.unet.enable_xformers_memory_efficient_attention()
	era3d_pipeline.set_progress_bar_config(disable=True)

	crop_size = 420
	batch = SingleImageDataset(root_dir='', num_views=6, img_wh=[512, 512], bg_color='white',
	crop_size=crop_size, single_image=image, prompt_embeds_path='apps/third_party/Era3D/data/fixed_prompt_embeds_6view')[0]
	imgs_in = torch.cat([batch['imgs_in']]*2, dim=0)
	imgs_in = rearrange(imgs_in, "B Nv C H W -> (B Nv) C H W")# (B*Nv, 3, H, W)

	normal_prompt_embeddings, clr_prompt_embeddings = batch['normal_prompt_embeddings'], batch['color_prompt_embeddings']
	prompt_embeddings = torch.cat([normal_prompt_embeddings, clr_prompt_embeddings], dim=0)
	prompt_embeddings = rearrange(prompt_embeddings, "B Nv N C -> (B Nv) N C")

	imgs_in = imgs_in.to(dtype=torch.float16)
	prompt_embeddings = prompt_embeddings.to(dtype=torch.float16)

	mv_imgs = era3d_pipeline(
	imgs_in,
	None,
	prompt_embeds=prompt_embeddings,
	generator=generator,
	guidance_scale=guidance_scale,
	num_inference_steps=step,
	num_images_per_prompt=1,
	**{'eta': 1.0}
	).images
	return mv_imgs[6], mv_imgs[8], mv_imgs[9], mv_imgs[10]

	@spaces.GPU
	def image2mesh(view_front: np.ndarray,
	view_right: np.ndarray,
	view_back: np.ndarray,
	view_left: np.ndarray,
	more: bool = False,
	scheluder_name: str ="DDIMScheduler",
	guidance_scale: int = 7.5,
	steps: int = 50,
	seed: int = 4,
	octree_depth: int = 7):

	sample_inputs = {
	"mvimages": [[
	Image.fromarray(view_front),
	Image.fromarray(view_right),
	Image.fromarray(view_back),
	Image.fromarray(view_left)
	]]
	}

	global model
	latents = model.sample(
	sample_inputs,
	sample_times=1,
	guidance_scale=guidance_scale,
	return_intermediates=False,
	steps=steps,
	seed=seed

	)[0]

	# decode the latents to mesh
	box_v = 1.1
	mesh_outputs, _ = model.shape_model.extract_geometry(
	latents,
	bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
	octree_depth=octree_depth
	)
	assert len(mesh_outputs) == 1, "Only support single mesh output for gradio demo"
	mesh = trimesh.Trimesh(mesh_outputs[0][0], mesh_outputs[0][1])
	# filepath = f"{cached_dir}/{time.time()}.obj"
	filepath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
	mesh.export(filepath, include_normals=True)

	if 'Remesh' in more:
	remeshed_filepath = tempfile.NamedTemporaryFile(suffix=f"_remeshed.obj", delete=False).name
	print("Remeshing with Instant Meshes...")
	# target_face_count = int(len(mesh.faces)/10)
	target_face_count = 2000
	command = f"{proj_dir}/apps/third_party/InstantMeshes {filepath} -f {target_face_count} -o {remeshed_filepath}"
	os.system(command)
	del filepath
	filepath = remeshed_filepath
	# filepath = filepath.replace('.obj', '_remeshed.obj')

	return filepath

	if __name__=="__main__":
	parser = argparse.ArgumentParser()
	# parser.add_argument("--model_path", type=str, required=True, help="Path to the object file",)
	parser.add_argument("--cached_dir", type=str, default="./gradio_cached_dir")
	parser.add_argument("--device", type=int, default=0)
	args = parser.parse_args()

	cached_dir = args.cached_dir
	os.makedirs(args.cached_dir, exist_ok=True)
	device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
	print(f"using device: {device}")

	# for multi-view images generation
	background_choice = OrderedDict({
	"Alpha as Mask": "Alpha as Mask",
	"Auto Remove Background": "Auto Remove Background",
	"Original Image": "Original Image",
	})
	mvimg_model_config_list = [
	"Era3D",
	"CRM",
	"ImageDream"
	]
	if "Era3D" in mvimg_model_config_list:
	# cfg = load_config("apps/third_party/Era3D/configs/test_unclip-512-6view.yaml")
	# schema = OmegaConf.structured(TestConfig)
	# cfg = OmegaConf.merge(schema, cfg)
	era3d_pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
	'pengHTYX/MacLab-Era3D-512-6view',
	dtype=torch.float16,
	)
	# enable xformers
	# era3d_pipeline.unet.enable_xformers_memory_efficient_attention()
	# era3d_pipeline.to(device)
	if "CRM" in mvimg_model_config_list:
	stage1_config = OmegaConf.load(f"apps/third_party/CRM/configs/nf7_v3_SNR_rd_size_stroke.yaml").config
	stage1_sampler_config = stage1_config.sampler
	stage1_model_config = stage1_config.models
	stage1_model_config.resume = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth", repo_type="model")
	stage1_model_config.config = f"apps/third_party/CRM/" + stage1_model_config.config
	crm_pipeline = TwoStagePipeline(
	stage1_model_config,
	stage1_sampler_config,
	device=device,
	dtype=torch.float16
	)
	if "ImageDream" in mvimg_model_config_list:
	imagedream_pipeline = MVDreamPipeline.from_pretrained(
	"ashawkey/imagedream-ipmv-diffusers", # remote weights
	torch_dtype=torch.float16,
	trust_remote_code=True,
	)


	# for 3D latent set diffusion
	ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/model.ckpt", repo_type="model")
	config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/config.yaml", repo_type="model")
	# ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model-300k.ckpt", repo_type="model")
	# config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml", repo_type="model")
	scheluder_dict = OrderedDict({
	"DDIMScheduler": 'diffusers.schedulers.DDIMScheduler',
	# "DPMSolverMultistepScheduler": 'diffusers.schedulers.DPMSolverMultistepScheduler', # not support yet
	# "UniPCMultistepScheduler": 'diffusers.schedulers.UniPCMultistepScheduler', # not support yet
	})

	# main GUI
	custom_theme = gr.themes.Soft(primary_hue="blue").set(
	button_secondary_background_fill="*neutral_100",
	button_secondary_background_fill_hover="*neutral_200")
	custom_css = '''#disp_image {
	text-align: center; /* Horizontally center the content */
	}'''

	with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown('# ' + _TITLE)
	gr.Markdown(_DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=2):
	with gr.Column():
	# input image
	with gr.Row():
	image_input = gr.Image(
	label="Image Input",
	image_mode="RGBA",
	sources="upload",
	type="pil",
	)
	run_btn = gr.Button('Generate', variant='primary', interactive=True)

	with gr.Row():
	gr.Markdown('''Try a different <b>seed and MV Model</b> for better results. Good Luck :)''')
	with gr.Row():
	seed = gr.Number(0, label='Seed', show_label=True)
	mvimg_model = gr.Dropdown(value="CRM", label="MV Image Model", choices=list(mvimg_model_config_list))
	more = gr.CheckboxGroup(["Remesh"], label="More", show_label=False)

	with gr.Row():
	# input prompt
	text = gr.Textbox(label="Prompt (Opt.)", info="only works for ImageDream")

	with gr.Accordion('Advanced options', open=False):
	# negative prompt
	neg_text = gr.Textbox(label="Negative Prompt", value='ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate')
	# elevation
	elevation = gr.Slider(label="elevation", minimum=-90, maximum=90, step=1, value=0)

	with gr.Row():
	gr.Examples(
	examples=[os.path.join("./apps/examples", i) for i in os.listdir("./apps/examples")],
	inputs=[image_input],
	examples_per_page=8
	)

	with gr.Column(scale=4):
	with gr.Row():
	output_model_obj = gr.Model3D(
	label="Output Model (OBJ Format)",
	camera_position=(90.0, 90.0, 3.5),
	interactive=False,
	)
	# with gr.Row():
	# gr.Markdown('''*please note that the model is fliped due to the gradio viewer, please download the obj file and you will get the correct orientation.''')

	with gr.Row():
	view_front = gr.Image(label="Front", interactive=True, show_label=True)
	view_right = gr.Image(label="Right", interactive=True, show_label=True)
	view_back = gr.Image(label="Back", interactive=True, show_label=True)
	view_left = gr.Image(label="Left", interactive=True, show_label=True)

	with gr.Accordion('Advanced options', open=False):
	with gr.Row(equal_height=True):
	run_mv_btn = gr.Button('Only Generate 2D', interactive=True)
	run_3d_btn = gr.Button('Only Generate 3D', interactive=True)

	with gr.Accordion('Advanced options (2D)', open=False):
	with gr.Row():
	foreground_ratio = gr.Slider(
	label="Foreground Ratio",
	minimum=0.5,
	maximum=1.0,
	value=1.0,
	step=0.05,
	)

	with gr.Row():
	background_choice = gr.Dropdown(label="Backgroud Choice", value="Auto Remove Background",choices=list(background_choice.keys()))
	rmbg_type = gr.Dropdown(label="Backgroud Remove Type", value="rembg",choices=['sam', "rembg"])
	backgroud_color = gr.ColorPicker(label="Background Color", value="#FFFFFF", interactive=True)
	# backgroud_color = gr.ColorPicker(label="Background Color", value="#7F7F7F", interactive=True)

	with gr.Row():
	mvimg_guidance_scale = gr.Number(value=3.0, minimum=1, maximum=10, label="2D Guidance Scale")
	mvimg_steps = gr.Number(value=30, minimum=20, maximum=100, label="2D Sample Steps")

	with gr.Accordion('Advanced options (3D)', open=False):
	with gr.Row():
	guidance_scale = gr.Number(label="3D Guidance Scale", value=3.0, minimum=1.0, maximum=10.0)
	steps = gr.Number(value=50, minimum=20, maximum=100, label="3D Sample Steps")

	with gr.Row():
	scheduler = gr.Dropdown(label="scheluder", value="DDIMScheduler",choices=list(scheluder_dict.keys()))
	octree_depth = gr.Slider(label="Octree Depth", value=7, minimum=4, maximum=8, step=1)

	gr.Markdown(_CITE_)

	outputs = [output_model_obj]
	rmbg = RMBG(device)

	model = load_model(ckpt_path, config_path, device)

	run_btn.click(fn=check_input_image, inputs=[image_input]
	).success(
	fn=rmbg.run,
	inputs=[rmbg_type, image_input, foreground_ratio, background_choice, backgroud_color],
	outputs=[image_input]
	).success(
	fn=gen_mvimg,
	inputs=[mvimg_model, image_input, seed, mvimg_guidance_scale, mvimg_steps, text, neg_text, elevation, backgroud_color],
	outputs=[view_front, view_right, view_back, view_left]
	).success(
	fn=image2mesh,
	inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, steps, seed, octree_depth],
	outputs=outputs,
	api_name="generate_img2obj")
	run_mv_btn.click(fn=gen_mvimg,
	inputs=[mvimg_model, image_input, seed, mvimg_guidance_scale, mvimg_steps, text, neg_text, elevation, backgroud_color],
	outputs=[view_front, view_right, view_back, view_left]
	)
	run_3d_btn.click(fn=image2mesh,
	inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, steps, seed, octree_depth],
	outputs=outputs,
	api_name="generate_img2obj")

	demo.queue().launch(share=True, allowed_paths=[args.cached_dir])