DyrusQZ commited on
Commit
04c1907
·
1 Parent(s): 7457a7b

to fix gs error

Browse files
LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc CHANGED
Binary files a/LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc and b/LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc differ
 
LHM/models/rendering/gs_renderer.py CHANGED
@@ -818,7 +818,7 @@ class GS3DRenderer(nn.Module):
818
  def hyper_step(self, step):
819
  self.gs_net.hyper_step(step)
820
 
821
- @torch.no_grad()
822
  def forward_single_view(
823
  self,
824
  gs: GaussianModel,
@@ -829,14 +829,14 @@ class GS3DRenderer(nn.Module):
829
  # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
830
  screenspace_points = (
831
  torch.zeros_like(
832
- gs.xyz, dtype=gs.xyz.dtype, requires_grad=True, device=self.device
833
  )
834
  + 0
835
  )
836
- try:
837
- screenspace_points.retain_grad()
838
- except:
839
- pass
840
 
841
  bg_color = background_color
842
  # Set up rasterization configuration
@@ -877,23 +877,25 @@ class GS3DRenderer(nn.Module):
877
  shs = None
878
  colors_precomp = None
879
  if self.gs_net.use_rgb:
880
- colors_precomp = gs.shs.squeeze(1).float()
881
  shs = None
882
  else:
883
  colors_precomp = None
884
- shs = gs.shs.float()
885
 
886
  # Rasterize visible Gaussians to image, obtain their radii (on screen).
887
  # NOTE that dadong tries to regress rgb not shs
888
  # with torch.autocast(device_type=self.device.type, dtype=torch.float32):
 
 
889
  rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
890
- means3D=means3D.float(),
891
- means2D=means2D.float(),
892
  shs=shs,
893
  colors_precomp=colors_precomp,
894
- opacities=opacity.float(),
895
- scales=scales.float(),
896
- rotations=rotations.float(),
897
  cov3D_precomp=cov3D_precomp,
898
  )
899
 
@@ -1322,7 +1324,7 @@ class GS3DRenderer(nn.Module):
1322
  gs_attr_list.append(gs_attr)
1323
 
1324
  return gs_attr_list, query_points, smplx_data
1325
- @torch.no_grad()
1326
  def forward_animate_gs(
1327
  self,
1328
  gs_attr_list,
 
818
  def hyper_step(self, step):
819
  self.gs_net.hyper_step(step)
820
 
821
+
822
  def forward_single_view(
823
  self,
824
  gs: GaussianModel,
 
829
  # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
830
  screenspace_points = (
831
  torch.zeros_like(
832
+ gs.xyz, dtype=gs.xyz.dtype, requires_grad=False, device=self.device
833
  )
834
  + 0
835
  )
836
+ # try:
837
+ # screenspace_points.retain_grad()
838
+ # except:
839
+ # pass
840
 
841
  bg_color = background_color
842
  # Set up rasterization configuration
 
877
  shs = None
878
  colors_precomp = None
879
  if self.gs_net.use_rgb:
880
+ colors_precomp = gs.shs.squeeze(1)
881
  shs = None
882
  else:
883
  colors_precomp = None
884
+ shs = gs.shs
885
 
886
  # Rasterize visible Gaussians to image, obtain their radii (on screen).
887
  # NOTE that dadong tries to regress rgb not shs
888
  # with torch.autocast(device_type=self.device.type, dtype=torch.float32):
889
+ print(means3D.device, means2D.device, colors_precomp.device, opacity.device, rotations.device, self.device)
890
+ print(means3D.dtype, means2D.dtype, colors_precomp.dtype)
891
  rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
892
+ means3D=means3D,
893
+ means2D=means2D,
894
  shs=shs,
895
  colors_precomp=colors_precomp,
896
+ opacities=opacity,
897
+ scales=scales,
898
+ rotations=rotations,
899
  cov3D_precomp=cov3D_precomp,
900
  )
901
 
 
1324
  gs_attr_list.append(gs_attr)
1325
 
1326
  return gs_attr_list, query_points, smplx_data
1327
+
1328
  def forward_animate_gs(
1329
  self,
1330
  gs_attr_list,
app.py CHANGED
@@ -13,772 +13,772 @@
13
  # limitations under the License.
14
 
15
 
16
- import os
17
- os.system("rm -rf /data-nvme/zerogpu-offload/")
18
- import cv2
19
- import time
20
- from PIL import Image
21
- import numpy as np
22
- import gradio as gr
23
- import base64
24
- import spaces
25
- import torch
26
- torch._dynamo.config.disable = True
27
- import subprocess
28
- import os
29
- import argparse
30
- from omegaconf import OmegaConf
31
- from rembg import remove
32
- from engine.pose_estimation.pose_estimator import PoseEstimator
33
- from LHM.utils.face_detector import VGGHeadDetector
34
- from LHM.utils.hf_hub import wrap_model_hub
35
- from LHM.runners.infer.utils import (
36
- calc_new_tgt_size_by_aspect,
37
- center_crop_according_to_mask,
38
- prepare_motion_seqs,
39
- resize_image_keepaspect_np,
40
- )
41
- from engine.SegmentAPI.base import Bbox
42
-
43
- def get_bbox(mask):
44
- height, width = mask.shape
45
- pha = mask / 255.0
46
- pha[pha < 0.5] = 0.0
47
- pha[pha >= 0.5] = 1.0
48
-
49
- # obtain bbox
50
- _h, _w = np.where(pha == 1)
51
-
52
- whwh = [
53
- _w.min().item(),
54
- _h.min().item(),
55
- _w.max().item(),
56
- _h.max().item(),
57
- ]
58
-
59
- box = Bbox(whwh)
60
-
61
- # scale box to 1.05
62
- scale_box = box.scale(1.1, width=width, height=height)
63
- return scale_box
64
-
65
- def infer_preprocess_image(
66
- rgb_path,
67
- mask,
68
- intr,
69
- pad_ratio,
70
- bg_color,
71
- max_tgt_size,
72
- aspect_standard,
73
- enlarge_ratio,
74
- render_tgt_size,
75
- multiply,
76
- need_mask=True,
77
- ):
78
- """inferece
79
- image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
80
- max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
81
- render_tgt_size=source_size, multiply=14, need_mask=True)
82
-
83
- """
84
-
85
- rgb = np.array(Image.open(rgb_path))
86
- rgb_raw = rgb.copy()
87
-
88
- bbox = get_bbox(mask)
89
- bbox_list = bbox.get_box()
90
-
91
- rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
92
- mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
93
-
94
- h, w, _ = rgb.shape
95
- assert w < h
96
- cur_ratio = h / w
97
- scale_ratio = cur_ratio / aspect_standard
98
-
99
- target_w = int(min(w * scale_ratio, h))
100
- offset_w = (target_w - w) // 2
101
- # resize to target ratio.
102
- if offset_w > 0:
103
- rgb = np.pad(
104
- rgb,
105
- ((0, 0), (offset_w, offset_w), (0, 0)),
106
- mode="constant",
107
- constant_values=255,
108
- )
109
- mask = np.pad(
110
- mask,
111
- ((0, 0), (offset_w, offset_w)),
112
- mode="constant",
113
- constant_values=0,
114
- )
115
- else:
116
- offset_w = -offset_w
117
- rgb = rgb[:,offset_w:-offset_w,:]
118
- mask = mask[:,offset_w:-offset_w]
119
-
120
- # resize to target ratio.
121
-
122
- rgb = np.pad(
123
- rgb,
124
- ((0, 0), (offset_w, offset_w), (0, 0)),
125
- mode="constant",
126
- constant_values=255,
127
- )
128
-
129
- mask = np.pad(
130
- mask,
131
- ((0, 0), (offset_w, offset_w)),
132
- mode="constant",
133
- constant_values=0,
134
- )
135
-
136
- rgb = rgb / 255.0 # normalize to [0, 1]
137
- mask = mask / 255.0
138
-
139
- mask = (mask > 0.5).astype(np.float32)
140
- rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
141
-
142
- # resize to specific size require by preprocessor of smplx-estimator.
143
- rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
144
- mask = resize_image_keepaspect_np(mask, max_tgt_size)
145
-
146
- # crop image to enlarge human area.
147
- rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
148
- rgb, mask, aspect_standard, enlarge_ratio
149
- )
150
- if intr is not None:
151
- intr[0, 2] -= offset_x
152
- intr[1, 2] -= offset_y
153
-
154
- # resize to render_tgt_size for training
155
-
156
- tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
157
- cur_hw=rgb.shape[:2],
158
- aspect_standard=aspect_standard,
159
- tgt_size=render_tgt_size,
160
- multiply=multiply,
161
- )
162
-
163
- rgb = cv2.resize(
164
- rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
165
- )
166
- mask = cv2.resize(
167
- mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
168
- )
169
-
170
- if intr is not None:
171
-
172
- # ******************** Merge *********************** #
173
- intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
174
- assert (
175
- abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
176
- ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
177
- assert (
178
- abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
179
- ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
180
-
181
- # ******************** Merge *********************** #
182
- intr[0, 2] = rgb.shape[1] // 2
183
- intr[1, 2] = rgb.shape[0] // 2
184
-
185
- rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
186
- mask = (
187
- torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
188
- ) # [1, 1, H, W]
189
- return rgb, mask, intr
190
-
191
- def parse_configs():
192
-
193
- parser = argparse.ArgumentParser()
194
- parser.add_argument("--config", type=str)
195
- parser.add_argument("--infer", type=str)
196
- args, unknown = parser.parse_known_args()
197
-
198
- cfg = OmegaConf.create()
199
- cli_cfg = OmegaConf.from_cli(unknown)
200
-
201
- # parse from ENV
202
- if os.environ.get("APP_INFER") is not None:
203
- args.infer = os.environ.get("APP_INFER")
204
- if os.environ.get("APP_MODEL_NAME") is not None:
205
- cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
206
-
207
- args.config = args.infer if args.config is None else args.config
208
-
209
- if args.config is not None:
210
- cfg_train = OmegaConf.load(args.config)
211
- cfg.source_size = cfg_train.dataset.source_image_res
212
- try:
213
- cfg.src_head_size = cfg_train.dataset.src_head_size
214
- except:
215
- cfg.src_head_size = 112
216
- cfg.render_size = cfg_train.dataset.render_image.high
217
- _relative_path = os.path.join(
218
- cfg_train.experiment.parent,
219
- cfg_train.experiment.child,
220
- os.path.basename(cli_cfg.model_name).split("_")[-1],
221
- )
222
-
223
- cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
224
- cfg.image_dump = os.path.join("exps", "images", _relative_path)
225
- cfg.video_dump = os.path.join("exps", "videos", _relative_path) # output path
226
-
227
- if args.infer is not None:
228
- cfg_infer = OmegaConf.load(args.infer)
229
- cfg.merge_with(cfg_infer)
230
- cfg.setdefault(
231
- "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
232
- )
233
- cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
234
- cfg.setdefault(
235
- "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
236
- )
237
- cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
238
-
239
- cfg.motion_video_read_fps = 6
240
- cfg.merge_with(cli_cfg)
241
-
242
- cfg.setdefault("logger", "INFO")
243
-
244
- assert cfg.model_name is not None, "model_name is required"
245
-
246
- return cfg, cfg_train
247
-
248
- def _build_model(cfg):
249
- from LHM.models import model_dict
250
-
251
- hf_model_cls = wrap_model_hub(model_dict["human_lrm_sapdino_bh_sd3_5"])
252
- model = hf_model_cls.from_pretrained(cfg.model_name)
253
-
254
- return model
255
-
256
- def launch_pretrained():
257
- from huggingface_hub import snapshot_download, hf_hub_download
258
- hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='assets.tar', local_dir="./")
259
- os.system("tar -xvf assets.tar && rm assets.tar")
260
- hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
261
- os.system("tar -xvf LHM-0.5B.tar && rm LHM-0.5B.tar")
262
- hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
263
- os.system("tar -xvf LHM_prior_model.tar && rm LHM_prior_model.tar")
264
-
265
- def launch_env_not_compile_with_cuda():
266
- os.system("pip install chumpy")
267
- os.system("pip uninstall -y basicsr")
268
- os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
269
- # os.system("pip install -e ./third_party/sam2")
270
- os.system("pip install numpy==1.23.0")
271
- # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
272
- # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
273
- # os.system("pip install git+https://github.com/camenduru/simple-knn/")
274
- os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html")
275
-
276
-
277
- def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
278
- '''Inference code avoid repeat forward.
279
- '''
280
- render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int(
281
- render_intrs[0, 0, 0, 2] * 2
282
- )
283
- # render target views
284
- render_res_list = []
285
- num_views = render_c2ws.shape[1]
286
- start_time = time.time()
287
-
288
- # render target views
289
- render_res_list = []
290
-
291
- for view_idx in range(num_views):
292
- render_res = renderer.forward_animate_gs(
293
- gs_model_list,
294
- query_points,
295
- renderer.get_single_view_smpl_data(smplx_params, view_idx),
296
- render_c2ws[:, view_idx : view_idx + 1],
297
- render_intrs[:, view_idx : view_idx + 1],
298
- render_h,
299
- render_w,
300
- render_bg_colors[:, view_idx : view_idx + 1],
301
- )
302
- render_res_list.append(render_res)
303
- print(
304
- f"time elpased(animate gs model per frame):{(time.time() - start_time)/num_views}"
305
- )
306
-
307
- out = defaultdict(list)
308
- for res in render_res_list:
309
- for k, v in res.items():
310
- if isinstance(v[0], torch.Tensor):
311
- out[k].append(v.detach().cpu())
312
- else:
313
- out[k].append(v)
314
- for k, v in out.items():
315
- # print(f"out key:{k}")
316
- if isinstance(v[0], torch.Tensor):
317
- out[k] = torch.concat(v, dim=1)
318
- if k in ["comp_rgb", "comp_mask", "comp_depth"]:
319
- out[k] = out[k][0].permute(
320
- 0, 2, 3, 1
321
- ) # [1, Nv, 3, H, W] -> [Nv, 3, H, W] - > [Nv, H, W, 3]
322
- else:
323
- out[k] = v
324
- return out
325
-
326
- def assert_input_image(input_image):
327
- if input_image is None:
328
- raise gr.Error("No image selected or uploaded!")
329
-
330
- def prepare_working_dir():
331
- import tempfile
332
- working_dir = tempfile.TemporaryDirectory()
333
- return working_dir
334
-
335
- def init_preprocessor():
336
- from LHM.utils.preprocess import Preprocessor
337
- global preprocessor
338
- preprocessor = Preprocessor()
339
-
340
- def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool, working_dir):
341
- image_raw = os.path.join(working_dir.name, "raw.png")
342
- with Image.fromarray(image_in) as img:
343
- img.save(image_raw)
344
- image_out = os.path.join(working_dir.name, "rembg.png")
345
- success = preprocessor.preprocess(image_path=image_raw, save_path=image_out, rmbg=remove_bg, recenter=recenter)
346
- assert success, f"Failed under preprocess_fn!"
347
- return image_out
348
-
349
- def get_image_base64(path):
350
- with open(path, "rb") as image_file:
351
- encoded_string = base64.b64encode(image_file.read()).decode()
352
- return f"data:image/png;base64,{encoded_string}"
353
-
354
-
355
- def demo_lhm(pose_estimator, face_detector, lhm, cfg):
356
-
357
- @spaces.GPU
358
- def core_fn(image: str, video_params, working_dir):
359
- image_raw = os.path.join(working_dir.name, "raw.png")
360
- with Image.fromarray(image) as img:
361
- img.save(image_raw)
362
 
363
- base_vid = os.path.basename(video_params).split("_")[0]
364
- smplx_params_dir = os.path.join("./assets/sample_motion", base_vid, "smplx_params")
365
 
366
- dump_video_path = os.path.join(working_dir.name, "output.mp4")
367
- dump_image_path = os.path.join(working_dir.name, "output.png")
368
 
369
 
370
- # prepare dump paths
371
- omit_prefix = os.path.dirname(image_raw)
372
- image_name = os.path.basename(image_raw)
373
- uid = image_name.split(".")[0]
374
- subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
375
- subdir_path = (
376
- subdir_path[1:] if subdir_path.startswith("/") else subdir_path
377
- )
378
- print("subdir_path and uid:", subdir_path, uid)
379
 
380
- motion_seqs_dir = smplx_params_dir
381
 
382
- motion_name = os.path.dirname(
383
- motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
384
- )
385
-
386
- motion_name = os.path.basename(motion_name)
387
-
388
- dump_image_dir = os.path.dirname(dump_image_path)
389
- os.makedirs(dump_image_dir, exist_ok=True)
390
-
391
- print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
392
-
393
- dump_tmp_dir = dump_image_dir
394
-
395
- shape_pose = pose_estimator(image_raw)
396
- assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}"
397
-
398
- if os.path.exists(dump_video_path):
399
- return dump_image_path, dump_video_path
400
- source_size = cfg.source_size
401
- render_size = cfg.render_size
402
- render_fps = 30
403
-
404
- aspect_standard = 5.0 / 3
405
- motion_img_need_mask = cfg.get("motion_img_need_mask", False) # False
406
- vis_motion = cfg.get("vis_motion", False) # False
407
-
408
-
409
- input_np = cv2.imread(image_raw)
410
- output_np = remove(input_np)
411
- parsing_mask = output_np[:,:,3]
412
-
413
- # prepare reference image
414
- image, _, _ = infer_preprocess_image(
415
- image_raw,
416
- mask=parsing_mask,
417
- intr=None,
418
- pad_ratio=0,
419
- bg_color=1.0,
420
- max_tgt_size=896,
421
- aspect_standard=aspect_standard,
422
- enlarge_ratio=[1.0, 1.0],
423
- render_tgt_size=source_size,
424
- multiply=14,
425
- need_mask=True,
426
- )
427
-
428
- try:
429
- rgb = np.array(Image.open(image_path))
430
- rgb = torch.from_numpy(rgb).permute(2, 0, 1)
431
- bbox = face_detector.detect_face(rgb)
432
- head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
433
- head_rgb = head_rgb.permute(1, 2, 0)
434
- src_head_rgb = head_rgb.cpu().numpy()
435
- except:
436
- print("w/o head input!")
437
- src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
438
-
439
- # resize to dino size
440
- try:
441
- src_head_rgb = cv2.resize(
442
- src_head_rgb,
443
- dsize=(cfg.src_head_size, cfg.src_head_size),
444
- interpolation=cv2.INTER_AREA,
445
- ) # resize to dino size
446
- except:
447
- src_head_rgb = np.zeros(
448
- (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
449
- )
450
-
451
- src_head_rgb = (
452
- torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
453
- ) # [1, 3, H, W]
454
-
455
- save_ref_img_path = os.path.join(
456
- dump_tmp_dir, "output.png"
457
- )
458
- vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
459
- np.uint8
460
- )
461
- Image.fromarray(vis_ref_img).save(save_ref_img_path)
462
-
463
- # read motion seq
464
- motion_name = os.path.dirname(
465
- motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
466
- )
467
- motion_name = os.path.basename(motion_name)
468
-
469
- motion_seq = prepare_motion_seqs(
470
- motion_seqs_dir,
471
- None,
472
- save_root=dump_tmp_dir,
473
- fps=30,
474
- bg_color=1.0,
475
- aspect_standard=aspect_standard,
476
- enlarge_ratio=[1.0, 1, 0],
477
- render_image_res=render_size,
478
- multiply=16,
479
- need_mask=motion_img_need_mask,
480
- vis_motion=vis_motion,
481
- )
482
-
483
- camera_size = len(motion_seq["motion_seqs"])
484
- shape_param = shape_pose.beta
485
-
486
- device = "cuda"
487
- dtype = torch.float32
488
- shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
489
-
490
- lhm.to(dtype)
491
-
492
- smplx_params = motion_seq['smplx_params']
493
- smplx_params['betas'] = shape_param.to(device)
494
-
495
- gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
496
- image.unsqueeze(0).to(device, dtype),
497
- src_head_rgb.unsqueeze(0).to(device, dtype),
498
- None,
499
- None,
500
- render_c2ws=motion_seq["render_c2ws"].to(device),
501
- render_intrs=motion_seq["render_intrs"].to(device),
502
- render_bg_colors=motion_seq["render_bg_colors"].to(device),
503
- smplx_params={
504
- k: v.to(device) for k, v in smplx_params.items()
505
- },
506
- )
507
-
508
-
509
- # rendering !!!!
510
-
511
- start_time = time.time()
512
- batch_dict = dict()
513
- batch_size = 40 # avoid memeory out!
514
-
515
- for batch_i in range(0, camera_size, batch_size):
516
- with torch.no_grad():
517
- # TODO check device and dtype
518
- # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
519
- keys = [
520
- "root_pose",
521
- "body_pose",
522
- "jaw_pose",
523
- "leye_pose",
524
- "reye_pose",
525
- "lhand_pose",
526
- "rhand_pose",
527
- "trans",
528
- "focal",
529
- "princpt",
530
- "img_size_wh",
531
- "expr",
532
- ]
533
- batch_smplx_params = dict()
534
- batch_smplx_params["betas"] = shape_param.to(device)
535
- batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
536
- for key in keys:
537
- batch_smplx_params[key] = motion_seq["smplx_params"][key][
538
- :, batch_i : batch_i + batch_size
539
- ].to(device)
540
-
541
- res = lhm.animation_infer(gs_model_list, query_points, batch_smplx_params,
542
- render_c2ws=motion_seq["render_c2ws"][
543
- :, batch_i : batch_i + batch_size
544
- ].to(device),
545
- render_intrs=motion_seq["render_intrs"][
546
- :, batch_i : batch_i + batch_size
547
- ].to(device),
548
- render_bg_colors=motion_seq["render_bg_colors"][
549
- :, batch_i : batch_i + batch_size
550
- ].to(device),
551
- )
552
-
553
- for accumulate_key in ["comp_rgb", "comp_mask"]:
554
- if accumulate_key not in batch_dict:
555
- batch_dict[accumulate_key] = []
556
- batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
557
- del res
558
- torch.cuda.empty_cache()
559
-
560
- for accumulate_key in ["comp_rgb", "comp_mask"]:
561
- batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
562
-
563
- print(f"time elapsed: {time.time() - start_time}")
564
- rgb = batch_dict["comp_rgb"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
565
- mask = batch_dict["comp_mask"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
566
- mask[mask < 0.5] = 0.0
567
-
568
- rgb = rgb * mask + (1 - mask) * 1
569
- rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
570
-
571
- if vis_motion:
572
- # print(rgb.shape, motion_seq["vis_motion_render"].shape)
573
-
574
- vis_ref_img = np.tile(
575
- cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
576
- None, :, :, :
577
- ],
578
- (rgb.shape[0], 1, 1, 1),
579
- )
580
- rgb = np.concatenate(
581
- [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
582
- )
583
-
584
- os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
585
-
586
- images_to_video(
587
- rgb,
588
- output_path=dump_video_path,
589
- fps=render_fps,
590
- gradio_codec=False,
591
- verbose=True,
592
- )
593
-
594
- # self.infer_single(
595
- # image_path,
596
- # motion_seqs_dir=motion_seqs_dir,
597
- # motion_img_dir=None,
598
- # motion_video_read_fps=30,
599
- # export_video=False,
600
- # export_mesh=False,
601
- # dump_tmp_dir=dump_image_dir,
602
- # dump_image_dir=dump_image_dir,
603
- # dump_video_path=dump_video_path,
604
- # shape_param=shape_pose.beta,
605
- # )
606
-
607
- # status = spaces.GPU(infer_impl(
608
- # gradio_demo_image=image_raw,
609
- # gradio_motion_file=smplx_params_dir,
610
- # gradio_masked_image=dump_image_path,
611
- # gradio_video_save_path=dump_video_path
612
- # ))
613
-
614
- return dump_image_path, dump_video_path
615
- # if status:
616
- # return dump_image_path, dump_video_path
617
- # else:
618
- # return None, None
619
-
620
- _TITLE = '''LHM: Large Animatable Human Model'''
621
-
622
- _DESCRIPTION = '''
623
- <strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong>
624
- '''
625
-
626
- with gr.Blocks(analytics_enabled=False) as demo:
627
-
628
- # </div>
629
- logo_url = "./assets/rgba_logo_new.png"
630
- logo_base64 = get_image_base64(logo_url)
631
- gr.HTML(
632
- f"""
633
- <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
634
- <div>
635
- <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1>
636
- </div>
637
- </div>
638
- """
639
- )
640
- gr.HTML(
641
- """<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors.</h4></p>"""
642
- )
643
-
644
- # DISPLAY
645
- with gr.Row():
646
-
647
- with gr.Column(variant='panel', scale=1):
648
- with gr.Tabs(elem_id="openlrm_input_image"):
649
- with gr.TabItem('Input Image'):
650
- with gr.Row():
651
- input_image = gr.Image(label="Input Image", image_mode="RGBA", height=480, width=270, sources="upload", type="numpy", elem_id="content_image")
652
- # EXAMPLES
653
- with gr.Row():
654
- examples = [
655
- ['assets/sample_input/joker.jpg'],
656
- ['assets/sample_input/anime.png'],
657
- ['assets/sample_input/basket.png'],
658
- ['assets/sample_input/ai_woman1.JPG'],
659
- ['assets/sample_input/anime2.JPG'],
660
- ['assets/sample_input/anime3.JPG'],
661
- ['assets/sample_input/boy1.png'],
662
- ['assets/sample_input/choplin.jpg'],
663
- ['assets/sample_input/eins.JPG'],
664
- ['assets/sample_input/girl1.png'],
665
- ['assets/sample_input/girl2.png'],
666
- ['assets/sample_input/robot.jpg'],
667
- ]
668
- gr.Examples(
669
- examples=examples,
670
- inputs=[input_image],
671
- examples_per_page=20,
672
- )
673
-
674
- with gr.Column():
675
- with gr.Tabs(elem_id="openlrm_input_video"):
676
- with gr.TabItem('Input Video'):
677
- with gr.Row():
678
- video_input = gr.Video(label="Input Video",height=480, width=270, interactive=False)
679
-
680
- examples = [
681
- # './assets/sample_motion/danaotiangong/danaotiangong_origin.mp4',
682
- './assets/sample_motion/ex5/ex5_origin.mp4',
683
- './assets/sample_motion/girl2/girl2_origin.mp4',
684
- './assets/sample_motion/jntm/jntm_origin.mp4',
685
- './assets/sample_motion/mimo1/mimo1_origin.mp4',
686
- './assets/sample_motion/mimo2/mimo2_origin.mp4',
687
- './assets/sample_motion/mimo4/mimo4_origin.mp4',
688
- './assets/sample_motion/mimo5/mimo5_origin.mp4',
689
- './assets/sample_motion/mimo6/mimo6_origin.mp4',
690
- './assets/sample_motion/nezha/nezha_origin.mp4',
691
- './assets/sample_motion/taiji/taiji_origin.mp4'
692
- ]
693
-
694
- gr.Examples(
695
- examples=examples,
696
- inputs=[video_input],
697
- examples_per_page=20,
698
- )
699
- with gr.Column(variant='panel', scale=1):
700
- with gr.Tabs(elem_id="openlrm_processed_image"):
701
- with gr.TabItem('Processed Image'):
702
- with gr.Row():
703
- processed_image = gr.Image(label="Processed Image", image_mode="RGBA", type="filepath", elem_id="processed_image", height=480, width=270, interactive=False)
704
-
705
- with gr.Column(variant='panel', scale=1):
706
- with gr.Tabs(elem_id="openlrm_render_video"):
707
- with gr.TabItem('Rendered Video'):
708
- with gr.Row():
709
- output_video = gr.Video(label="Rendered Video", format="mp4", height=480, width=270, autoplay=True)
710
-
711
- # SETTING
712
- with gr.Row():
713
- with gr.Column(variant='panel', scale=1):
714
- submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary')
715
-
716
-
717
- working_dir = gr.State()
718
- submit.click(
719
- fn=assert_input_image,
720
- inputs=[input_image],
721
- queue=False,
722
- ).success(
723
- fn=prepare_working_dir,
724
- outputs=[working_dir],
725
- queue=False,
726
- ).success(
727
- fn=core_fn,
728
- inputs=[input_image, video_input, working_dir], # video_params refer to smpl dir
729
- outputs=[processed_image, output_video],
730
- )
731
-
732
- demo.queue()
733
- demo.launch()
734
-
735
-
736
- def launch_gradio_app():
737
-
738
- os.environ.update({
739
- "APP_ENABLED": "1",
740
- "APP_MODEL_NAME": "./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/",
741
- "APP_INFER": "./configs/inference/human-lrm-500M.yaml",
742
- "APP_TYPE": "infer.human_lrm",
743
- "NUMBA_THREADING_LAYER": 'omp',
744
- })
745
-
746
- # from LHM.runners import REGISTRY_RUNNERS
747
- # RunnerClass = REGISTRY_RUNNERS[os.getenv("APP_TYPE")]
748
- # with RunnerClass() as runner:
749
- # runner.to('cuda')
750
- # demo_lhm(infer_impl=runner.infer)
751
-
752
- facedetector = VGGHeadDetector(
753
- "./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd",
754
- device='cpu',
755
- )
756
- facedetector.to('cuda')
757
-
758
- pose_estimator = PoseEstimator(
759
- "./pretrained_models/human_model_files/", device='cpu'
760
- )
761
- pose_estimator.to('cuda')
762
- pose_estimator.device = 'cuda'
763
-
764
- cfg, cfg_train = parse_configs()
765
- lhm = _build_model(cfg)
766
- lhm.to('cuda')
767
-
768
- demo_lhm(pose_estimator, facedetector, lhm, cfg)
769
-
770
-
771
-
772
- if __name__ == '__main__':
773
- # launch_pretrained()
774
- # launch_env_not_compile_with_cuda()
775
- # os.system("rm -rf /data-nvme/zerogpu-offload/")
776
- launch_gradio_app()
777
 
778
- # import gradio as gr
779
 
780
- # def greet(name):
781
- # return "Hello " + name + "!!"
782
 
783
- # demo = gr.Interface(fn=greet, inputs="text", outputs="text")
784
- # demo.launch()
 
13
  # limitations under the License.
14
 
15
 
16
+ # import os
17
+ # os.system("rm -rf /data-nvme/zerogpu-offload/")
18
+ # import cv2
19
+ # import time
20
+ # from PIL import Image
21
+ # import numpy as np
22
+ # import gradio as gr
23
+ # import base64
24
+ # import spaces
25
+ # import torch
26
+ # torch._dynamo.config.disable = True
27
+ # import subprocess
28
+ # import os
29
+ # import argparse
30
+ # from omegaconf import OmegaConf
31
+ # from rembg import remove
32
+ # from engine.pose_estimation.pose_estimator import PoseEstimator
33
+ # from LHM.utils.face_detector import VGGHeadDetector
34
+ # from LHM.utils.hf_hub import wrap_model_hub
35
+ # from LHM.runners.infer.utils import (
36
+ # calc_new_tgt_size_by_aspect,
37
+ # center_crop_according_to_mask,
38
+ # prepare_motion_seqs,
39
+ # resize_image_keepaspect_np,
40
+ # )
41
+ # from engine.SegmentAPI.base import Bbox
42
+
43
+ # def get_bbox(mask):
44
+ # height, width = mask.shape
45
+ # pha = mask / 255.0
46
+ # pha[pha < 0.5] = 0.0
47
+ # pha[pha >= 0.5] = 1.0
48
+
49
+ # # obtain bbox
50
+ # _h, _w = np.where(pha == 1)
51
+
52
+ # whwh = [
53
+ # _w.min().item(),
54
+ # _h.min().item(),
55
+ # _w.max().item(),
56
+ # _h.max().item(),
57
+ # ]
58
+
59
+ # box = Bbox(whwh)
60
+
61
+ # # scale box to 1.05
62
+ # scale_box = box.scale(1.1, width=width, height=height)
63
+ # return scale_box
64
+
65
+ # def infer_preprocess_image(
66
+ # rgb_path,
67
+ # mask,
68
+ # intr,
69
+ # pad_ratio,
70
+ # bg_color,
71
+ # max_tgt_size,
72
+ # aspect_standard,
73
+ # enlarge_ratio,
74
+ # render_tgt_size,
75
+ # multiply,
76
+ # need_mask=True,
77
+ # ):
78
+ # """inferece
79
+ # image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
80
+ # max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
81
+ # render_tgt_size=source_size, multiply=14, need_mask=True)
82
+
83
+ # """
84
+
85
+ # rgb = np.array(Image.open(rgb_path))
86
+ # rgb_raw = rgb.copy()
87
+
88
+ # bbox = get_bbox(mask)
89
+ # bbox_list = bbox.get_box()
90
+
91
+ # rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
92
+ # mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
93
+
94
+ # h, w, _ = rgb.shape
95
+ # assert w < h
96
+ # cur_ratio = h / w
97
+ # scale_ratio = cur_ratio / aspect_standard
98
+
99
+ # target_w = int(min(w * scale_ratio, h))
100
+ # offset_w = (target_w - w) // 2
101
+ # # resize to target ratio.
102
+ # if offset_w > 0:
103
+ # rgb = np.pad(
104
+ # rgb,
105
+ # ((0, 0), (offset_w, offset_w), (0, 0)),
106
+ # mode="constant",
107
+ # constant_values=255,
108
+ # )
109
+ # mask = np.pad(
110
+ # mask,
111
+ # ((0, 0), (offset_w, offset_w)),
112
+ # mode="constant",
113
+ # constant_values=0,
114
+ # )
115
+ # else:
116
+ # offset_w = -offset_w
117
+ # rgb = rgb[:,offset_w:-offset_w,:]
118
+ # mask = mask[:,offset_w:-offset_w]
119
+
120
+ # # resize to target ratio.
121
+
122
+ # rgb = np.pad(
123
+ # rgb,
124
+ # ((0, 0), (offset_w, offset_w), (0, 0)),
125
+ # mode="constant",
126
+ # constant_values=255,
127
+ # )
128
+
129
+ # mask = np.pad(
130
+ # mask,
131
+ # ((0, 0), (offset_w, offset_w)),
132
+ # mode="constant",
133
+ # constant_values=0,
134
+ # )
135
+
136
+ # rgb = rgb / 255.0 # normalize to [0, 1]
137
+ # mask = mask / 255.0
138
+
139
+ # mask = (mask > 0.5).astype(np.float32)
140
+ # rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
141
+
142
+ # # resize to specific size require by preprocessor of smplx-estimator.
143
+ # rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
144
+ # mask = resize_image_keepaspect_np(mask, max_tgt_size)
145
+
146
+ # # crop image to enlarge human area.
147
+ # rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
148
+ # rgb, mask, aspect_standard, enlarge_ratio
149
+ # )
150
+ # if intr is not None:
151
+ # intr[0, 2] -= offset_x
152
+ # intr[1, 2] -= offset_y
153
+
154
+ # # resize to render_tgt_size for training
155
+
156
+ # tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
157
+ # cur_hw=rgb.shape[:2],
158
+ # aspect_standard=aspect_standard,
159
+ # tgt_size=render_tgt_size,
160
+ # multiply=multiply,
161
+ # )
162
+
163
+ # rgb = cv2.resize(
164
+ # rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
165
+ # )
166
+ # mask = cv2.resize(
167
+ # mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
168
+ # )
169
+
170
+ # if intr is not None:
171
+
172
+ # # ******************** Merge *********************** #
173
+ # intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
174
+ # assert (
175
+ # abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
176
+ # ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
177
+ # assert (
178
+ # abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
179
+ # ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
180
+
181
+ # # ******************** Merge *********************** #
182
+ # intr[0, 2] = rgb.shape[1] // 2
183
+ # intr[1, 2] = rgb.shape[0] // 2
184
+
185
+ # rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
186
+ # mask = (
187
+ # torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
188
+ # ) # [1, 1, H, W]
189
+ # return rgb, mask, intr
190
+
191
+ # def parse_configs():
192
+
193
+ # parser = argparse.ArgumentParser()
194
+ # parser.add_argument("--config", type=str)
195
+ # parser.add_argument("--infer", type=str)
196
+ # args, unknown = parser.parse_known_args()
197
+
198
+ # cfg = OmegaConf.create()
199
+ # cli_cfg = OmegaConf.from_cli(unknown)
200
+
201
+ # # parse from ENV
202
+ # if os.environ.get("APP_INFER") is not None:
203
+ # args.infer = os.environ.get("APP_INFER")
204
+ # if os.environ.get("APP_MODEL_NAME") is not None:
205
+ # cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
206
+
207
+ # args.config = args.infer if args.config is None else args.config
208
+
209
+ # if args.config is not None:
210
+ # cfg_train = OmegaConf.load(args.config)
211
+ # cfg.source_size = cfg_train.dataset.source_image_res
212
+ # try:
213
+ # cfg.src_head_size = cfg_train.dataset.src_head_size
214
+ # except:
215
+ # cfg.src_head_size = 112
216
+ # cfg.render_size = cfg_train.dataset.render_image.high
217
+ # _relative_path = os.path.join(
218
+ # cfg_train.experiment.parent,
219
+ # cfg_train.experiment.child,
220
+ # os.path.basename(cli_cfg.model_name).split("_")[-1],
221
+ # )
222
+
223
+ # cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
224
+ # cfg.image_dump = os.path.join("exps", "images", _relative_path)
225
+ # cfg.video_dump = os.path.join("exps", "videos", _relative_path) # output path
226
+
227
+ # if args.infer is not None:
228
+ # cfg_infer = OmegaConf.load(args.infer)
229
+ # cfg.merge_with(cfg_infer)
230
+ # cfg.setdefault(
231
+ # "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
232
+ # )
233
+ # cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
234
+ # cfg.setdefault(
235
+ # "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
236
+ # )
237
+ # cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
238
+
239
+ # cfg.motion_video_read_fps = 6
240
+ # cfg.merge_with(cli_cfg)
241
+
242
+ # cfg.setdefault("logger", "INFO")
243
+
244
+ # assert cfg.model_name is not None, "model_name is required"
245
+
246
+ # return cfg, cfg_train
247
+
248
+ # def _build_model(cfg):
249
+ # from LHM.models import model_dict
250
+
251
+ # hf_model_cls = wrap_model_hub(model_dict["human_lrm_sapdino_bh_sd3_5"])
252
+ # model = hf_model_cls.from_pretrained(cfg.model_name)
253
+
254
+ # return model
255
+
256
+ # def launch_pretrained():
257
+ # from huggingface_hub import snapshot_download, hf_hub_download
258
+ # hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='assets.tar', local_dir="./")
259
+ # os.system("tar -xvf assets.tar && rm assets.tar")
260
+ # hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
261
+ # os.system("tar -xvf LHM-0.5B.tar && rm LHM-0.5B.tar")
262
+ # hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
263
+ # os.system("tar -xvf LHM_prior_model.tar && rm LHM_prior_model.tar")
264
+
265
+ # def launch_env_not_compile_with_cuda():
266
+ # os.system("pip install chumpy")
267
+ # os.system("pip uninstall -y basicsr")
268
+ # os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
269
+ # # os.system("pip install -e ./third_party/sam2")
270
+ # os.system("pip install numpy==1.23.0")
271
+ # # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
272
+ # # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
273
+ # # os.system("pip install git+https://github.com/camenduru/simple-knn/")
274
+ # os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html")
275
+
276
+
277
+ # def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
278
+ # '''Inference code avoid repeat forward.
279
+ # '''
280
+ # render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int(
281
+ # render_intrs[0, 0, 0, 2] * 2
282
+ # )
283
+ # # render target views
284
+ # render_res_list = []
285
+ # num_views = render_c2ws.shape[1]
286
+ # start_time = time.time()
287
+
288
+ # # render target views
289
+ # render_res_list = []
290
+
291
+ # for view_idx in range(num_views):
292
+ # render_res = renderer.forward_animate_gs(
293
+ # gs_model_list,
294
+ # query_points,
295
+ # renderer.get_single_view_smpl_data(smplx_params, view_idx),
296
+ # render_c2ws[:, view_idx : view_idx + 1],
297
+ # render_intrs[:, view_idx : view_idx + 1],
298
+ # render_h,
299
+ # render_w,
300
+ # render_bg_colors[:, view_idx : view_idx + 1],
301
+ # )
302
+ # render_res_list.append(render_res)
303
+ # print(
304
+ # f"time elpased(animate gs model per frame):{(time.time() - start_time)/num_views}"
305
+ # )
306
+
307
+ # out = defaultdict(list)
308
+ # for res in render_res_list:
309
+ # for k, v in res.items():
310
+ # if isinstance(v[0], torch.Tensor):
311
+ # out[k].append(v.detach().cpu())
312
+ # else:
313
+ # out[k].append(v)
314
+ # for k, v in out.items():
315
+ # # print(f"out key:{k}")
316
+ # if isinstance(v[0], torch.Tensor):
317
+ # out[k] = torch.concat(v, dim=1)
318
+ # if k in ["comp_rgb", "comp_mask", "comp_depth"]:
319
+ # out[k] = out[k][0].permute(
320
+ # 0, 2, 3, 1
321
+ # ) # [1, Nv, 3, H, W] -> [Nv, 3, H, W] - > [Nv, H, W, 3]
322
+ # else:
323
+ # out[k] = v
324
+ # return out
325
+
326
+ # def assert_input_image(input_image):
327
+ # if input_image is None:
328
+ # raise gr.Error("No image selected or uploaded!")
329
+
330
+ # def prepare_working_dir():
331
+ # import tempfile
332
+ # working_dir = tempfile.TemporaryDirectory()
333
+ # return working_dir
334
+
335
+ # def init_preprocessor():
336
+ # from LHM.utils.preprocess import Preprocessor
337
+ # global preprocessor
338
+ # preprocessor = Preprocessor()
339
+
340
+ # def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool, working_dir):
341
+ # image_raw = os.path.join(working_dir.name, "raw.png")
342
+ # with Image.fromarray(image_in) as img:
343
+ # img.save(image_raw)
344
+ # image_out = os.path.join(working_dir.name, "rembg.png")
345
+ # success = preprocessor.preprocess(image_path=image_raw, save_path=image_out, rmbg=remove_bg, recenter=recenter)
346
+ # assert success, f"Failed under preprocess_fn!"
347
+ # return image_out
348
+
349
+ # def get_image_base64(path):
350
+ # with open(path, "rb") as image_file:
351
+ # encoded_string = base64.b64encode(image_file.read()).decode()
352
+ # return f"data:image/png;base64,{encoded_string}"
353
+
354
+
355
+ # def demo_lhm(pose_estimator, face_detector, lhm, cfg):
356
+
357
+ # @spaces.GPU
358
+ # def core_fn(image: str, video_params, working_dir):
359
+ # image_raw = os.path.join(working_dir.name, "raw.png")
360
+ # with Image.fromarray(image) as img:
361
+ # img.save(image_raw)
362
 
363
+ # base_vid = os.path.basename(video_params).split("_")[0]
364
+ # smplx_params_dir = os.path.join("./assets/sample_motion", base_vid, "smplx_params")
365
 
366
+ # dump_video_path = os.path.join(working_dir.name, "output.mp4")
367
+ # dump_image_path = os.path.join(working_dir.name, "output.png")
368
 
369
 
370
+ # # prepare dump paths
371
+ # omit_prefix = os.path.dirname(image_raw)
372
+ # image_name = os.path.basename(image_raw)
373
+ # uid = image_name.split(".")[0]
374
+ # subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
375
+ # subdir_path = (
376
+ # subdir_path[1:] if subdir_path.startswith("/") else subdir_path
377
+ # )
378
+ # print("subdir_path and uid:", subdir_path, uid)
379
 
380
+ # motion_seqs_dir = smplx_params_dir
381
 
382
+ # motion_name = os.path.dirname(
383
+ # motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
384
+ # )
385
+
386
+ # motion_name = os.path.basename(motion_name)
387
+
388
+ # dump_image_dir = os.path.dirname(dump_image_path)
389
+ # os.makedirs(dump_image_dir, exist_ok=True)
390
+
391
+ # print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
392
+
393
+ # dump_tmp_dir = dump_image_dir
394
+
395
+ # shape_pose = pose_estimator(image_raw)
396
+ # assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}"
397
+
398
+ # if os.path.exists(dump_video_path):
399
+ # return dump_image_path, dump_video_path
400
+ # source_size = cfg.source_size
401
+ # render_size = cfg.render_size
402
+ # render_fps = 30
403
+
404
+ # aspect_standard = 5.0 / 3
405
+ # motion_img_need_mask = cfg.get("motion_img_need_mask", False) # False
406
+ # vis_motion = cfg.get("vis_motion", False) # False
407
+
408
+
409
+ # input_np = cv2.imread(image_raw)
410
+ # output_np = remove(input_np)
411
+ # # cv2.imwrite("./vis.png", output_np)
412
+ # parsing_mask = output_np[:,:,3]
413
+
414
+ # # prepare reference image
415
+ # image, _, _ = infer_preprocess_image(
416
+ # image_raw,
417
+ # mask=parsing_mask,
418
+ # intr=None,
419
+ # pad_ratio=0,
420
+ # bg_color=1.0,
421
+ # max_tgt_size=896,
422
+ # aspect_standard=aspect_standard,
423
+ # enlarge_ratio=[1.0, 1.0],
424
+ # render_tgt_size=source_size,
425
+ # multiply=14,
426
+ # need_mask=True,
427
+ # )
428
+
429
+ # try:
430
+ # rgb = np.array(Image.open(image_path))
431
+ # rgb = torch.from_numpy(rgb).permute(2, 0, 1)
432
+ # bbox = face_detector.detect_face(rgb)
433
+ # head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
434
+ # head_rgb = head_rgb.permute(1, 2, 0)
435
+ # src_head_rgb = head_rgb.cpu().numpy()
436
+ # except:
437
+ # print("w/o head input!")
438
+ # src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
439
+
440
+ # # resize to dino size
441
+ # try:
442
+ # src_head_rgb = cv2.resize(
443
+ # src_head_rgb,
444
+ # dsize=(cfg.src_head_size, cfg.src_head_size),
445
+ # interpolation=cv2.INTER_AREA,
446
+ # ) # resize to dino size
447
+ # except:
448
+ # src_head_rgb = np.zeros(
449
+ # (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
450
+ # )
451
+
452
+ # src_head_rgb = (
453
+ # torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
454
+ # ) # [1, 3, H, W]
455
+
456
+ # save_ref_img_path = os.path.join(
457
+ # dump_tmp_dir, "output.png"
458
+ # )
459
+ # vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
460
+ # np.uint8
461
+ # )
462
+ # Image.fromarray(vis_ref_img).save(save_ref_img_path)
463
+
464
+ # # read motion seq
465
+ # motion_name = os.path.dirname(
466
+ # motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
467
+ # )
468
+ # motion_name = os.path.basename(motion_name)
469
+
470
+ # motion_seq = prepare_motion_seqs(
471
+ # motion_seqs_dir,
472
+ # None,
473
+ # save_root=dump_tmp_dir,
474
+ # fps=30,
475
+ # bg_color=1.0,
476
+ # aspect_standard=aspect_standard,
477
+ # enlarge_ratio=[1.0, 1, 0],
478
+ # render_image_res=render_size,
479
+ # multiply=16,
480
+ # need_mask=motion_img_need_mask,
481
+ # vis_motion=vis_motion,
482
+ # )
483
+
484
+ # camera_size = len(motion_seq["motion_seqs"])
485
+ # shape_param = shape_pose.beta
486
+
487
+ # device = "cuda"
488
+ # dtype = torch.float32
489
+ # shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
490
+
491
+ # lhm.to(dtype)
492
+
493
+ # smplx_params = motion_seq['smplx_params']
494
+ # smplx_params['betas'] = shape_param.to(device)
495
+
496
+ # gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
497
+ # image.unsqueeze(0).to(device, dtype),
498
+ # src_head_rgb.unsqueeze(0).to(device, dtype),
499
+ # None,
500
+ # None,
501
+ # render_c2ws=motion_seq["render_c2ws"].to(device),
502
+ # render_intrs=motion_seq["render_intrs"].to(device),
503
+ # render_bg_colors=motion_seq["render_bg_colors"].to(device),
504
+ # smplx_params={
505
+ # k: v.to(device) for k, v in smplx_params.items()
506
+ # },
507
+ # )
508
+
509
+
510
+ # # rendering !!!!
511
+
512
+ # start_time = time.time()
513
+ # batch_dict = dict()
514
+ # batch_size = 40 # avoid memeory out!
515
+
516
+ # for batch_i in range(0, camera_size, batch_size):
517
+ # with torch.no_grad():
518
+ # # TODO check device and dtype
519
+ # # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
520
+ # keys = [
521
+ # "root_pose",
522
+ # "body_pose",
523
+ # "jaw_pose",
524
+ # "leye_pose",
525
+ # "reye_pose",
526
+ # "lhand_pose",
527
+ # "rhand_pose",
528
+ # "trans",
529
+ # "focal",
530
+ # "princpt",
531
+ # "img_size_wh",
532
+ # "expr",
533
+ # ]
534
+ # batch_smplx_params = dict()
535
+ # batch_smplx_params["betas"] = shape_param.to(device)
536
+ # batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
537
+ # for key in keys:
538
+ # batch_smplx_params[key] = motion_seq["smplx_params"][key][
539
+ # :, batch_i : batch_i + batch_size
540
+ # ].to(device)
541
+
542
+ # res = lhm.animation_infer(gs_model_list, query_points, batch_smplx_params,
543
+ # render_c2ws=motion_seq["render_c2ws"][
544
+ # :, batch_i : batch_i + batch_size
545
+ # ].to(device),
546
+ # render_intrs=motion_seq["render_intrs"][
547
+ # :, batch_i : batch_i + batch_size
548
+ # ].to(device),
549
+ # render_bg_colors=motion_seq["render_bg_colors"][
550
+ # :, batch_i : batch_i + batch_size
551
+ # ].to(device),
552
+ # )
553
+
554
+ # for accumulate_key in ["comp_rgb", "comp_mask"]:
555
+ # if accumulate_key not in batch_dict:
556
+ # batch_dict[accumulate_key] = []
557
+ # batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
558
+ # del res
559
+ # torch.cuda.empty_cache()
560
+
561
+ # for accumulate_key in ["comp_rgb", "comp_mask"]:
562
+ # batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
563
+
564
+ # print(f"time elapsed: {time.time() - start_time}")
565
+ # rgb = batch_dict["comp_rgb"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
566
+ # mask = batch_dict["comp_mask"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
567
+ # mask[mask < 0.5] = 0.0
568
+
569
+ # rgb = rgb * mask + (1 - mask) * 1
570
+ # rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
571
+
572
+ # if vis_motion:
573
+ # # print(rgb.shape, motion_seq["vis_motion_render"].shape)
574
+
575
+ # vis_ref_img = np.tile(
576
+ # cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
577
+ # None, :, :, :
578
+ # ],
579
+ # (rgb.shape[0], 1, 1, 1),
580
+ # )
581
+ # rgb = np.concatenate(
582
+ # [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
583
+ # )
584
+
585
+ # os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
586
+
587
+ # images_to_video(
588
+ # rgb,
589
+ # output_path=dump_video_path,
590
+ # fps=render_fps,
591
+ # gradio_codec=False,
592
+ # verbose=True,
593
+ # )
594
+
595
+ # # self.infer_single(
596
+ # # image_path,
597
+ # # motion_seqs_dir=motion_seqs_dir,
598
+ # # motion_img_dir=None,
599
+ # # motion_video_read_fps=30,
600
+ # # export_video=False,
601
+ # # export_mesh=False,
602
+ # # dump_tmp_dir=dump_image_dir,
603
+ # # dump_image_dir=dump_image_dir,
604
+ # # dump_video_path=dump_video_path,
605
+ # # shape_param=shape_pose.beta,
606
+ # # )
607
+
608
+ # # status = spaces.GPU(infer_impl(
609
+ # # gradio_demo_image=image_raw,
610
+ # # gradio_motion_file=smplx_params_dir,
611
+ # # gradio_masked_image=dump_image_path,
612
+ # # gradio_video_save_path=dump_video_path
613
+ # # ))
614
+
615
+ # return dump_image_path, dump_video_path
616
+ # # if status:
617
+ # # return dump_image_path, dump_video_path
618
+ # # else:
619
+ # # return None, None
620
+
621
+ # _TITLE = '''LHM: Large Animatable Human Model'''
622
+
623
+ # _DESCRIPTION = '''
624
+ # <strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong>
625
+ # '''
626
+
627
+ # with gr.Blocks(analytics_enabled=False) as demo:
628
+
629
+ # # </div>
630
+ # logo_url = "./assets/rgba_logo_new.png"
631
+ # logo_base64 = get_image_base64(logo_url)
632
+ # gr.HTML(
633
+ # f"""
634
+ # <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
635
+ # <div>
636
+ # <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1>
637
+ # </div>
638
+ # </div>
639
+ # """
640
+ # )
641
+ # gr.HTML(
642
+ # """<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors.</h4></p>"""
643
+ # )
644
+
645
+ # # DISPLAY
646
+ # with gr.Row():
647
+
648
+ # with gr.Column(variant='panel', scale=1):
649
+ # with gr.Tabs(elem_id="openlrm_input_image"):
650
+ # with gr.TabItem('Input Image'):
651
+ # with gr.Row():
652
+ # input_image = gr.Image(label="Input Image", image_mode="RGBA", height=480, width=270, sources="upload", type="numpy", elem_id="content_image")
653
+ # # EXAMPLES
654
+ # with gr.Row():
655
+ # examples = [
656
+ # ['assets/sample_input/joker.jpg'],
657
+ # ['assets/sample_input/anime.png'],
658
+ # ['assets/sample_input/basket.png'],
659
+ # ['assets/sample_input/ai_woman1.JPG'],
660
+ # ['assets/sample_input/anime2.JPG'],
661
+ # ['assets/sample_input/anime3.JPG'],
662
+ # ['assets/sample_input/boy1.png'],
663
+ # ['assets/sample_input/choplin.jpg'],
664
+ # ['assets/sample_input/eins.JPG'],
665
+ # ['assets/sample_input/girl1.png'],
666
+ # ['assets/sample_input/girl2.png'],
667
+ # ['assets/sample_input/robot.jpg'],
668
+ # ]
669
+ # gr.Examples(
670
+ # examples=examples,
671
+ # inputs=[input_image],
672
+ # examples_per_page=20,
673
+ # )
674
+
675
+ # with gr.Column():
676
+ # with gr.Tabs(elem_id="openlrm_input_video"):
677
+ # with gr.TabItem('Input Video'):
678
+ # with gr.Row():
679
+ # video_input = gr.Video(label="Input Video",height=480, width=270, interactive=False)
680
+
681
+ # examples = [
682
+ # # './assets/sample_motion/danaotiangong/danaotiangong_origin.mp4',
683
+ # './assets/sample_motion/ex5/ex5_origin.mp4',
684
+ # './assets/sample_motion/girl2/girl2_origin.mp4',
685
+ # './assets/sample_motion/jntm/jntm_origin.mp4',
686
+ # './assets/sample_motion/mimo1/mimo1_origin.mp4',
687
+ # './assets/sample_motion/mimo2/mimo2_origin.mp4',
688
+ # './assets/sample_motion/mimo4/mimo4_origin.mp4',
689
+ # './assets/sample_motion/mimo5/mimo5_origin.mp4',
690
+ # './assets/sample_motion/mimo6/mimo6_origin.mp4',
691
+ # './assets/sample_motion/nezha/nezha_origin.mp4',
692
+ # './assets/sample_motion/taiji/taiji_origin.mp4'
693
+ # ]
694
+
695
+ # gr.Examples(
696
+ # examples=examples,
697
+ # inputs=[video_input],
698
+ # examples_per_page=20,
699
+ # )
700
+ # with gr.Column(variant='panel', scale=1):
701
+ # with gr.Tabs(elem_id="openlrm_processed_image"):
702
+ # with gr.TabItem('Processed Image'):
703
+ # with gr.Row():
704
+ # processed_image = gr.Image(label="Processed Image", image_mode="RGBA", type="filepath", elem_id="processed_image", height=480, width=270, interactive=False)
705
+
706
+ # with gr.Column(variant='panel', scale=1):
707
+ # with gr.Tabs(elem_id="openlrm_render_video"):
708
+ # with gr.TabItem('Rendered Video'):
709
+ # with gr.Row():
710
+ # output_video = gr.Video(label="Rendered Video", format="mp4", height=480, width=270, autoplay=True)
711
+
712
+ # # SETTING
713
+ # with gr.Row():
714
+ # with gr.Column(variant='panel', scale=1):
715
+ # submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary')
716
+
717
+
718
+ # working_dir = gr.State()
719
+ # submit.click(
720
+ # fn=assert_input_image,
721
+ # inputs=[input_image],
722
+ # queue=False,
723
+ # ).success(
724
+ # fn=prepare_working_dir,
725
+ # outputs=[working_dir],
726
+ # queue=False,
727
+ # ).success(
728
+ # fn=core_fn,
729
+ # inputs=[input_image, video_input, working_dir], # video_params refer to smpl dir
730
+ # outputs=[processed_image, output_video],
731
+ # )
732
+
733
+ # demo.queue()
734
+ # demo.launch()
735
+
736
+
737
+ # def launch_gradio_app():
738
+
739
+ # os.environ.update({
740
+ # "APP_ENABLED": "1",
741
+ # "APP_MODEL_NAME": "./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/",
742
+ # "APP_INFER": "./configs/inference/human-lrm-500M.yaml",
743
+ # "APP_TYPE": "infer.human_lrm",
744
+ # "NUMBA_THREADING_LAYER": 'omp',
745
+ # })
746
+
747
+ # # from LHM.runners import REGISTRY_RUNNERS
748
+ # # RunnerClass = REGISTRY_RUNNERS[os.getenv("APP_TYPE")]
749
+ # # with RunnerClass() as runner:
750
+ # # runner.to('cuda')
751
+ # # demo_lhm(infer_impl=runner.infer)
752
+
753
+ # facedetector = VGGHeadDetector(
754
+ # "./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd",
755
+ # device='cpu',
756
+ # )
757
+ # facedetector.to('cuda')
758
+
759
+ # pose_estimator = PoseEstimator(
760
+ # "./pretrained_models/human_model_files/", device='cpu'
761
+ # )
762
+ # pose_estimator.to('cuda')
763
+ # pose_estimator.device = 'cuda'
764
+
765
+ # cfg, cfg_train = parse_configs()
766
+ # lhm = _build_model(cfg)
767
+ # lhm.to('cuda')
768
+
769
+ # demo_lhm(pose_estimator, facedetector, lhm, cfg)
770
+
771
+
772
+
773
+ # if __name__ == '__main__':
774
+ # # launch_pretrained()
775
+ # # launch_env_not_compile_with_cuda()
776
+ # launch_gradio_app()
777
 
778
+ import gradio as gr
779
 
780
+ def greet(name):
781
+ return "Hello " + name + "!!"
782
 
783
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
784
+ demo.launch()
wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18969ebb896d921bc4c54630e5edf990898ee9505c2cc46c4feb3486a959ce54
3
- size 3373299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f3184936fcc68139947905916039ddf5973c5e3c0bd2d4680565bf89934e22
3
+ size 3408819
wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21ffecc42d12fe431e71ded0297c2b3ab4586b668a432d41e58d7440a15b274d
3
- size 3130569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07408595ab166dfcc6cdd4422b8e94cb615abcf7c799ef0d43a66ad746f09373
3
+ size 3182640