Amanpreet commited on
Commit
82d4b57
·
1 Parent(s): 9d2691d
VideoToNPZ/gen_skes.py CHANGED
@@ -13,6 +13,17 @@ import signal
13
 
14
  warnings.filterwarnings('ignore')
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
17
  from tools.utils import get_path
18
  from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
@@ -23,6 +34,7 @@ from tools.preprocess import load_kpts_json, h36m_coco_format, revise_kpts, revi
23
  from tools.inference import gen_pose
24
  from tools.vis_kpts import plot_keypoint
25
 
 
26
  cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
27
  model_dir = chk_root + 'gastnet/'
28
  sys.path.insert(1, lib_root)
@@ -37,14 +49,6 @@ adj = adj_mx_from_skeleton(skeleton)
37
  joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
38
  kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
39
 
40
- def signal_handler(sig, frame):
41
- print("\nInterrupted by user, shutting down...")
42
- if 'pool' in locals() and pool is not None:
43
- pool.terminate()
44
- pool.join()
45
- sys.exit(0)
46
-
47
- signal.signal(signal.SIGINT, signal_handler)
48
 
49
  def load_model_layer():
50
  chk = model_dir + '81_frame_model.bin'
@@ -63,6 +67,11 @@ def load_model_layer():
63
  return model_pos
64
 
65
  def generate_skeletons(video=''):
 
 
 
 
 
66
  cap = cv2.VideoCapture(video)
67
  width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
68
  height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
 
13
 
14
  warnings.filterwarnings('ignore')
15
 
16
+ def signal_handler(sig, frame):
17
+ print("\nInterrupted by user, shutting down...")
18
+ if 'loader_thread' in globals() and loader_thread.is_alive():
19
+ loader_thread.join(timeout=1.0) # Give the thread 1 second to finish
20
+ if torch.cuda.is_available():
21
+ torch.cuda.empty_cache() # Free GPU memory immediately
22
+ os.exit(0)
23
+
24
+ # Register the signal handler
25
+ signal.signal(signal.SIGINT, signal_handler)
26
+
27
  sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
28
  from tools.utils import get_path
29
  from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
 
34
  from tools.inference import gen_pose
35
  from tools.vis_kpts import plot_keypoint
36
 
37
+
38
  cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
39
  model_dir = chk_root + 'gastnet/'
40
  sys.path.insert(1, lib_root)
 
49
  joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
50
  kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
51
 
 
 
 
 
 
 
 
 
52
 
53
  def load_model_layer():
54
  chk = model_dir + '81_frame_model.bin'
 
67
  return model_pos
68
 
69
  def generate_skeletons(video=''):
70
+ def force_exit(sig, frame):
71
+ print("\nForce terminating...")
72
+ os._exit(1)
73
+ signal.signal(signal.SIGINT, force_exit)
74
+
75
  cap = cv2.VideoCapture(video)
76
  width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
77
  height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
VideoToNPZ/lib/pose/hrnet/pose_estimation/gen_kpts.py CHANGED
@@ -17,6 +17,18 @@ import json
17
  import torch.multiprocessing as mp
18
  from functools import partial
19
  from io import StringIO
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  import _init_paths
22
  from _init_paths import get_path
@@ -50,7 +62,7 @@ def parse_args():
50
  parser.add_argument('-a', '--animation', action='store_true', help='output animation')
51
  parser.add_argument('-np', '--num-person', type=int, default=1)
52
  parser.add_argument("-v", "--video", type=str, default='camera')
53
- parser.add_argument('--batch-size', type=int, default=16)
54
  args = parser.parse_args()
55
  return args
56
 
@@ -69,7 +81,7 @@ def model_load(config, use_fp16=False):
69
  new_state_dict[k] = v
70
  model.load_state_dict(new_state_dict)
71
  if torch.cuda.is_available() and use_fp16:
72
- model = model.half().cuda()
73
  elif torch.cuda.is_available():
74
  model = model.cuda()
75
  model.eval()
@@ -78,7 +90,7 @@ def model_load(config, use_fp16=False):
78
  def load_default_model():
79
  args = parse_args()
80
  reset_config(args)
81
- model = eval('models.' + config.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)
82
  if torch.cuda.is_available():
83
  model = model.cuda()
84
  state_dict = torch.load(cfg.OUTPUT_DIR)
@@ -100,7 +112,7 @@ def frame_loader(video, queue, video_length):
100
  queue.put(None)
101
  cap.release()
102
 
103
- def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device):
104
  if not frames:
105
  return [], []
106
 
@@ -114,14 +126,15 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
114
  if bboxs is None or not bboxs.any():
115
  continue
116
 
117
- people_track = Sort().update(bboxs)
118
  if people_track.shape[0] == 0:
119
  continue
120
- people_track_ = people_track[-min(num_person, people_track.shape[0]):, :-1]
121
- track_bboxs = [[round(i, 2) for i in list(bbox)] for bbox in people_track_]
 
122
 
123
- inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg, len(track_bboxs))
124
- inputs = inputs[:, [2, 1, 0]]
125
  batch_bboxs.append(track_bboxs)
126
  batch_centers.append(center)
127
  batch_scales.append(scale)
@@ -131,12 +144,11 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
131
  return [], []
132
 
133
  inputs = torch.cat(batch_inputs, dim=0).to(device)
134
- if use_fp16:
135
- inputs = inputs.half()
136
-
137
  with torch.no_grad():
138
  outputs = pose_model(inputs)
139
- outputs = outputs.cpu().float()
140
 
141
  kpts_result = []
142
  scores_result = []
@@ -147,24 +159,36 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
147
  np.asarray(center).flatten(), np.asarray(scale).flatten())
148
  offset += batch_size
149
 
150
- kpts = np.zeros((len(preds), 17, 2), dtype=np.float32)
151
- scores = np.zeros((len(preds), 17), dtype=np.float32)
152
- for j, (kpt, score) in enumerate(zip(preds, maxvals)):
153
- kpts[j] = kpt
154
- scores[j] = score.squeeze()
155
  kpts_result.append(kpts)
156
  scores_result.append(scores)
157
 
158
  return kpts_result, scores_result
159
 
160
- def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=16, animation=False):
 
 
 
 
 
 
 
 
161
  args = parse_args()
162
  reset_config(args)
163
 
164
  cap = cv2.VideoCapture(video)
165
  assert cap.isOpened(), 'Cannot capture source'
166
-
167
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
 
 
168
  use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
169
  batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
170
 
@@ -173,31 +197,23 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
173
  people_sort = Sort()
174
 
175
  video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
176
- print('Recording 2D pose ...')
177
- sys.stdout.flush() # Ensure initial message shows up immediately
178
 
179
  if animation:
180
- # Animation mode uses frame-by-frame processing like in the backup code
181
  kpts_result = []
182
  scores_result = []
183
-
184
  for i in range(video_length):
185
  ret, frame = cap.read()
186
  if not ret:
187
  break
188
-
189
- # Detect humans
190
  bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
191
-
192
  if bboxs is None or not bboxs.any():
193
- print('No person detected!')
194
- sys.stdout.flush()
195
  continue
196
-
197
- # Track people
198
  people_track = people_sort.update(bboxs)
199
-
200
- # Select people to track
201
  if people_track.shape[0] == 1:
202
  people_track_ = people_track[-1, :-1].reshape(1, 4)
203
  elif people_track.shape[0] >= 2:
@@ -205,51 +221,48 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
205
  people_track_ = people_track_[::-1]
206
  else:
207
  continue
208
-
209
  track_bboxs = []
210
  for bbox in people_track_:
211
  bbox = [round(i, 2) for i in list(bbox)]
212
  track_bboxs.append(bbox)
213
-
214
  with torch.no_grad():
215
- # Preprocess and get pose predictions
216
  inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
217
- inputs = inputs[:, [2, 1, 0]]
218
-
219
- if torch.cuda.is_available():
220
  inputs = inputs.cuda()
 
 
221
  output = pose_model(inputs)
222
-
223
- # Compute coordinates
224
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
225
-
226
  if gen_output:
227
- # Store results for later processing
228
  kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
229
  scores = np.zeros((num_person, 17), dtype=np.float32)
230
-
231
  for j, kpt in enumerate(preds):
232
  kpts[j] = kpt
233
-
234
  for j, score in enumerate(maxvals):
235
  scores[j] = score.squeeze()
236
-
237
  kpts_result.append(kpts)
238
  scores_result.append(scores)
239
-
240
  else:
241
- # Visualize results in real-time
242
  index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
243
  list(map(lambda x: write(x, frame), index_bboxs))
244
  plot_keypoint(frame, preds, maxvals, 0.3)
245
-
246
  cv2.imshow('frame', frame)
247
  key = cv2.waitKey(1)
248
  if key & 0xFF == ord('q'):
249
  break
250
  else:
251
- # Optimized batch processing with Queue (no StringIO redirection)
252
- frame_queue = mp.Queue(maxsize=batch_size * 2)
253
  loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
254
  loader_thread.start()
255
 
@@ -258,112 +271,89 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
258
  kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
259
  scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
260
  frame_idx = 0
 
261
 
262
- pool = None # Initialize pool outside try block for cleanup
263
  try:
264
  if device.type == 'cuda':
265
- # GPU batch processing
266
  batch_frames = []
267
  with torch.no_grad():
268
  for i in range(video_length):
269
- frame = frame_queue.get()
270
  if frame is None:
271
  break
272
  batch_frames.append(frame)
273
- progress = (i + 1) / video_length * 100
274
- print(f"PROGRESS:{progress:.2f}")
275
- sys.stdout.flush() # Force per-frame update
276
-
277
  if len(batch_frames) >= batch_size:
278
  kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
279
  det_dim, num_person, args.thred_score,
280
- use_fp16, device)
281
  for kpts, scores in zip(kpts_batch, scores_batch):
282
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
283
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
284
  frame_idx += 1
285
  batch_frames = []
286
- progress = (frame_idx / video_length) * 100
287
- print(f"PROGRESS:{progress:.2f}")
288
- sys.stdout.flush() # Force after batch
289
-
290
- # Process remaining frames
291
- if batch_frames:
292
- kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
293
- det_dim, num_person, args.thred_score,
294
- use_fp16, device)
295
- for kpts, scores in zip(kpts_batch, scores_batch):
296
- kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
297
- scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
298
- frame_idx += 1
299
- progress = (frame_idx / video_length) * 100
300
- print(f"PROGRESS:{progress:.2f}")
301
- sys.stdout.flush() # Force after batch
302
-
303
  # Process remaining frames
304
  if batch_frames:
305
  kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
306
  det_dim, num_person, args.thred_score,
307
- use_fp16, device)
308
  for kpts, scores in zip(kpts_batch, scores_batch):
309
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
310
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
311
  frame_idx += 1
312
  progress = (frame_idx / video_length) * 100
313
- print(f"PROGRESS:{progress:.2f}")
314
- sys.stdout.flush() # Force final update
315
  else:
316
- # CPU batch processing with multiprocessing
317
- pool = mp.Pool(processes=mp.cpu_count())
318
- process_func = partial(process_batch, human_model=human_model, pose_model=pose_model,
319
- det_dim=det_dim, num_person=num_person, thred_score=args.thred_score,
320
- use_fp16=use_fp16, device=device)
321
-
322
  batch_frames = []
323
  with torch.no_grad():
324
  for i in range(video_length):
325
- frame = frame_queue.get()
326
  if frame is None:
327
  break
328
  batch_frames.append(frame)
329
- progress = (i + 1) / video_length * 100
330
- print(f"PROGRESS:{progress:.2f}")
331
- sys.stdout.flush() # Force per-frame update
332
  if len(batch_frames) >= batch_size:
333
- kpts_batch, scores_batch = process_func(batch_frames)
 
 
334
  for kpts, scores in zip(kpts_batch, scores_batch):
335
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
336
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
337
  frame_idx += 1
338
  batch_frames = []
339
- progress = (frame_idx / video_length) * 100
340
- print(f"PROGRESS:{progress:.2f}")
341
- sys.stdout.flush() # Force after batch
342
-
 
 
343
  # Process remaining frames
344
  if batch_frames:
345
- kpts_batch, scores_batch = process_func(batch_frames)
 
 
346
  for kpts, scores in zip(kpts_batch, scores_batch):
347
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
348
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
349
  frame_idx += 1
350
  progress = (frame_idx / video_length) * 100
351
- print(f"PROGRESS:{progress:.2f}")
352
- sys.stdout.flush() # Force final update
353
-
354
- pool.close()
355
- pool.join()
356
- except KeyboardInterrupt:
357
- print("\nInterrupted by user, shutting down...")
358
- sys.stdout.flush()
359
- if pool is not None:
360
- pool.terminate()
361
- pool.join()
362
  loader_thread.join()
363
- sys.exit(0)
 
 
 
 
 
364
 
365
- loader_thread.join()
366
-
367
  if gen_output and kpts_result.any():
368
  keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
369
  scores = scores_result[:frame_idx].transpose(1, 0, 2)
@@ -389,8 +379,12 @@ def gen_img_kpts(image, human_model, pose_model, human_sort, det_dim=416, num_pe
389
  with torch.no_grad():
390
  inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
391
  inputs = inputs[:, [2, 1, 0]]
392
- if torch.cuda.is_available():
 
 
393
  inputs = inputs.cuda()
 
 
394
  output = pose_model(inputs)
395
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
396
 
@@ -423,11 +417,10 @@ def generate_ntu_kpts_json(video_path, kpts_file):
423
  ret, frame = cap.read()
424
  if not ret:
425
  continue
426
-
427
  try:
428
  bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
429
  if bboxs is None or not bboxs.any():
430
- print('No person detected!')
431
  continue
432
 
433
  people_track = people_sort.update(bboxs)
@@ -447,14 +440,17 @@ def generate_ntu_kpts_json(video_path, kpts_file):
447
  bbox = [round(i, 3) for i in list(bbox)]
448
  track_bboxs.append(bbox)
449
 
450
- except Exception as e:
451
- print(e)
452
  continue
453
 
454
  inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
455
  inputs = inputs[:, [2, 1, 0]]
456
- if torch.cuda.is_available():
 
 
457
  inputs = inputs.cuda()
 
 
458
  output = pose_model(inputs)
459
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
460
 
@@ -473,6 +469,7 @@ def generate_ntu_kpts_json(video_path, kpts_file):
473
  kpts_info.update({'data': data})
474
  with open(kpts_file, 'w') as fw:
475
  json.dump(kpts_info, fw)
 
476
 
477
  def round_list(input_list, decimals=3):
478
  dim = len(input_list)
@@ -484,18 +481,12 @@ def round_list(input_list, decimals=3):
484
  if __name__ == "__main__":
485
  args = parse_args()
486
  video_path = args.video
487
-
488
  if args.animation:
489
- # Real-time animation mode
490
- gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
491
  gen_output=False, animation=True)
492
  else:
493
- # Process and save keypoints
494
- keypoints, scores = gen_video_kpts(video_path, det_dim=args.det_dim,
495
- num_person=args.num_person,
496
- gen_output=True,
497
- batch_size=args.batch_size)
498
  if keypoints is not None:
499
  output_file = "output.npz"
500
- np.savez(output_file, keypoints=keypoints, scores=scores)
501
- print(f"Saved to {output_file}")
 
17
  import torch.multiprocessing as mp
18
  from functools import partial
19
  from io import StringIO
20
+ import signal
21
+
22
+ def signal_handler(sig, frame):
23
+ print("\nInterrupted by user, shutting down...")
24
+ if 'loader_thread' in globals() and loader_thread.is_alive():
25
+ loader_thread.join(timeout=1.0) # Give the thread 1 second to finish
26
+ if torch.cuda.is_available():
27
+ torch.cuda.empty_cache() # Free GPU memory immediately
28
+ os.exit(0)
29
+
30
+ # Register the signal handler
31
+ signal.signal(signal.SIGINT, signal_handler)
32
 
33
  import _init_paths
34
  from _init_paths import get_path
 
62
  parser.add_argument('-a', '--animation', action='store_true', help='output animation')
63
  parser.add_argument('-np', '--num-person', type=int, default=1)
64
  parser.add_argument("-v", "--video", type=str, default='camera')
65
+ parser.add_argument('--batch-size', type=int, default=8) # Reduced batch size
66
  args = parser.parse_args()
67
  return args
68
 
 
81
  new_state_dict[k] = v
82
  model.load_state_dict(new_state_dict)
83
  if torch.cuda.is_available() and use_fp16:
84
+ model = model.half().cuda() # Use FP16 if specified and CUDA available
85
  elif torch.cuda.is_available():
86
  model = model.cuda()
87
  model.eval()
 
90
  def load_default_model():
91
  args = parse_args()
92
  reset_config(args)
93
+ model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)
94
  if torch.cuda.is_available():
95
  model = model.cuda()
96
  state_dict = torch.load(cfg.OUTPUT_DIR)
 
112
  queue.put(None)
113
  cap.release()
114
 
115
+ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device, people_sort):
116
  if not frames:
117
  return [], []
118
 
 
126
  if bboxs is None or not bboxs.any():
127
  continue
128
 
129
+ people_track = people_sort.update(bboxs)
130
  if people_track.shape[0] == 0:
131
  continue
132
+ num_to_track = min(num_person, people_track.shape[0])
133
+ people_track_ = people_track[-num_to_track:, :-1]
134
+ track_bboxs = np.round(people_track_, 2).tolist()
135
 
136
+ inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg, num_to_track)
137
+ inputs = inputs[:, [2, 1, 0]] # BGR to RGB
138
  batch_bboxs.append(track_bboxs)
139
  batch_centers.append(center)
140
  batch_scales.append(scale)
 
144
  return [], []
145
 
146
  inputs = torch.cat(batch_inputs, dim=0).to(device)
147
+ if use_fp16 and device.type == 'cuda':
148
+ inputs = inputs.half() # Convert to FP16 to match model precision
 
149
  with torch.no_grad():
150
  outputs = pose_model(inputs)
151
+ outputs = outputs.cpu().float() # Ensure output is FP32 for post-processing
152
 
153
  kpts_result = []
154
  scores_result = []
 
159
  np.asarray(center).flatten(), np.asarray(scale).flatten())
160
  offset += batch_size
161
 
162
+ kpts = np.zeros((batch_size, 17, 2), dtype=np.float32)
163
+ scores = np.zeros((batch_size, 17), dtype=np.float32)
164
+ for j in range(batch_size):
165
+ kpts[j] = preds[j]
166
+ scores[j] = maxvals[j].squeeze()
167
  kpts_result.append(kpts)
168
  scores_result.append(scores)
169
 
170
  return kpts_result, scores_result
171
 
172
+ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=8, animation=False):
173
+
174
+ def force_exit(sig, frame):
175
+ print("\nForce terminating...")
176
+ os._exit(1)
177
+
178
+ signal.signal(signal.SIGINT, force_exit)
179
+
180
+
181
  args = parse_args()
182
  reset_config(args)
183
 
184
  cap = cv2.VideoCapture(video)
185
  assert cap.isOpened(), 'Cannot capture source'
186
+
187
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
188
+ torch.set_num_threads(max(1, mp.cpu_count() - 1)) # Match thread count to processes
189
+ torch.autograd.set_grad_enabled(False) # Explicitly disable gradients
190
+
191
+ # Determine FP16 usage based on device capability
192
  use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
193
  batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
194
 
 
197
  people_sort = Sort()
198
 
199
  video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
 
200
 
201
  if animation:
 
202
  kpts_result = []
203
  scores_result = []
204
+
205
  for i in range(video_length):
206
  ret, frame = cap.read()
207
  if not ret:
208
  break
209
+
 
210
  bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
211
+
212
  if bboxs is None or not bboxs.any():
 
 
213
  continue
214
+
 
215
  people_track = people_sort.update(bboxs)
216
+
 
217
  if people_track.shape[0] == 1:
218
  people_track_ = people_track[-1, :-1].reshape(1, 4)
219
  elif people_track.shape[0] >= 2:
 
221
  people_track_ = people_track_[::-1]
222
  else:
223
  continue
224
+
225
  track_bboxs = []
226
  for bbox in people_track_:
227
  bbox = [round(i, 2) for i in list(bbox)]
228
  track_bboxs.append(bbox)
229
+
230
  with torch.no_grad():
 
231
  inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
232
+ inputs = inputs[:, [2, 1, 0]] # BGR to RGB
233
+
234
+ if device.type == 'cuda':
235
  inputs = inputs.cuda()
236
+ if use_fp16:
237
+ inputs = inputs.half() # Convert to FP16 if model is in FP16
238
  output = pose_model(inputs)
239
+
 
240
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
241
+
242
  if gen_output:
 
243
  kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
244
  scores = np.zeros((num_person, 17), dtype=np.float32)
245
+
246
  for j, kpt in enumerate(preds):
247
  kpts[j] = kpt
248
+
249
  for j, score in enumerate(maxvals):
250
  scores[j] = score.squeeze()
251
+
252
  kpts_result.append(kpts)
253
  scores_result.append(scores)
254
+
255
  else:
 
256
  index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
257
  list(map(lambda x: write(x, frame), index_bboxs))
258
  plot_keypoint(frame, preds, maxvals, 0.3)
259
+
260
  cv2.imshow('frame', frame)
261
  key = cv2.waitKey(1)
262
  if key & 0xFF == ord('q'):
263
  break
264
  else:
265
+ frame_queue = Queue(maxsize=batch_size) # Use regular Queue instead of mp.Queue
 
266
  loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
267
  loader_thread.start()
268
 
 
271
  kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
272
  scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
273
  frame_idx = 0
274
+ people_sort = Sort()
275
 
 
276
  try:
277
  if device.type == 'cuda':
 
278
  batch_frames = []
279
  with torch.no_grad():
280
  for i in range(video_length):
281
+ frame = frame_queue.get(timeout=1.0)
282
  if frame is None:
283
  break
284
  batch_frames.append(frame)
285
+
 
 
 
286
  if len(batch_frames) >= batch_size:
287
  kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
288
  det_dim, num_person, args.thred_score,
289
+ use_fp16, device, people_sort)
290
  for kpts, scores in zip(kpts_batch, scores_batch):
291
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
292
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
293
  frame_idx += 1
294
  batch_frames = []
295
+
296
+ # Print progress every batch
297
+ if i % batch_size == 0:
298
+ progress = ((i + 1) / video_length) * 100
299
+ print(f"PROGRESS:{progress:.2f}%")
300
+
 
 
 
 
 
 
 
 
 
 
 
301
  # Process remaining frames
302
  if batch_frames:
303
  kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
304
  det_dim, num_person, args.thred_score,
305
+ use_fp16, device, people_sort)
306
  for kpts, scores in zip(kpts_batch, scores_batch):
307
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
308
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
309
  frame_idx += 1
310
  progress = (frame_idx / video_length) * 100
311
+ print(f"PROGRESS:{progress:.2f}%")
 
312
  else:
313
+ # Sequential processing for CPU to avoid multiprocessing overhead
 
 
 
 
 
314
  batch_frames = []
315
  with torch.no_grad():
316
  for i in range(video_length):
317
+ frame = frame_queue.get(timeout=1.0)
318
  if frame is None:
319
  break
320
  batch_frames.append(frame)
321
+
 
 
322
  if len(batch_frames) >= batch_size:
323
+ kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
324
+ det_dim, num_person, args.thred_score,
325
+ use_fp16, device, people_sort)
326
  for kpts, scores in zip(kpts_batch, scores_batch):
327
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
328
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
329
  frame_idx += 1
330
  batch_frames = []
331
+
332
+ # Print progress every batch
333
+ if i % batch_size == 0:
334
+ progress = ((i + 1) / video_length) * 100
335
+ print(f"PROGRESS:{progress:.2f}%")
336
+
337
  # Process remaining frames
338
  if batch_frames:
339
+ kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
340
+ det_dim, num_person, args.thred_score,
341
+ use_fp16, device, people_sort)
342
  for kpts, scores in zip(kpts_batch, scores_batch):
343
  kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
344
  scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
345
  frame_idx += 1
346
  progress = (frame_idx / video_length) * 100
347
+ print(f"PROGRESS:{progress:.2f}%")
348
+ except Exception as e:
 
 
 
 
 
 
 
 
 
349
  loader_thread.join()
350
+ raise
351
+ finally:
352
+ loader_thread.join()
353
+ cap.release()
354
+ if device.type == 'cuda':
355
+ torch.cuda.empty_cache() # Free GPU memory
356
 
 
 
357
  if gen_output and kpts_result.any():
358
  keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
359
  scores = scores_result[:frame_idx].transpose(1, 0, 2)
 
379
  with torch.no_grad():
380
  inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
381
  inputs = inputs[:, [2, 1, 0]]
382
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
383
+ use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
384
+ if device.type == 'cuda':
385
  inputs = inputs.cuda()
386
+ if use_fp16:
387
+ inputs = inputs.half() # Match model precision
388
  output = pose_model(inputs)
389
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
390
 
 
417
  ret, frame = cap.read()
418
  if not ret:
419
  continue
420
+
421
  try:
422
  bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
423
  if bboxs is None or not bboxs.any():
 
424
  continue
425
 
426
  people_track = people_sort.update(bboxs)
 
440
  bbox = [round(i, 3) for i in list(bbox)]
441
  track_bboxs.append(bbox)
442
 
443
+ except Exception:
 
444
  continue
445
 
446
  inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
447
  inputs = inputs[:, [2, 1, 0]]
448
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
449
+ use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
450
+ if device.type == 'cuda':
451
  inputs = inputs.cuda()
452
+ if use_fp16:
453
+ inputs = inputs.half() # Match model precision
454
  output = pose_model(inputs)
455
  preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
456
 
 
469
  kpts_info.update({'data': data})
470
  with open(kpts_file, 'w') as fw:
471
  json.dump(kpts_info, fw)
472
+ cap.release()
473
 
474
  def round_list(input_list, decimals=3):
475
  dim = len(input_list)
 
481
  if __name__ == "__main__":
482
  args = parse_args()
483
  video_path = args.video
484
+
485
  if args.animation:
486
+ gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
 
487
  gen_output=False, animation=True)
488
  else:
489
+ keypoints, scores = gen_video_kpts(video_path, det_dim=416, num_person=1, gen_output=True, batch_size=8) # Increased batch_size to 8
 
 
 
 
490
  if keypoints is not None:
491
  output_file = "output.npz"
492
+ np.savez(output_file, keypoints=keypoints, scores=scores)
 
app.py CHANGED
@@ -51,18 +51,21 @@ def run_command(command, working_dir, progress_bar, progress_text, step_start_pr
51
  if source == 'stdout':
52
  if show_progress and line.startswith("PROGRESS:"):
53
  try:
54
- progress = float(line.strip().split("PROGRESS:")[1]) / 100
 
55
  if Path(command[1]).name == 'gen_skes.py':
56
- if progress <= 1.0:
57
- adjusted_progress = step_start_progress + (progress * 0.6)
58
- else:
59
- adjusted_progress = step_start_progress + 0.6 + ((progress - 1.0) * 0.2)
60
- else:
61
- adjusted_progress = step_start_progress + (progress * step_weight)
62
- total_progress = min(adjusted_progress, step_start_progress + step_weight)
 
63
  progress_bar.progress(total_progress)
64
  progress_text.text(f"Progress: {int(total_progress * 100)}%")
65
- except ValueError:
 
66
  pass
67
  elif source == 'stderr':
68
  stderr_lines.append(line.strip())
 
51
  if source == 'stdout':
52
  if show_progress and line.startswith("PROGRESS:"):
53
  try:
54
+ progress_str = line.strip().split("PROGRESS:")[1].replace("%", "") # Remove '%'
55
+ progress = float(progress_str) # Convert to float after removing '%' # Debug output
56
  if Path(command[1]).name == 'gen_skes.py':
57
+ if progress <= 100.0: # 2D Keypoint generation (0-100% maps to 0-60%)
58
+ adjusted_progress = step_start_progress + (progress / 100.0 * 0.6)
59
+ else: # 3D Pose generation (100-200% maps to 60-80%)
60
+ adjusted_progress = step_start_progress + 0.6 + ((progress - 100.0) / 100.0 * 0.2)
61
+ total_progress = min(adjusted_progress, step_start_progress + step_weight)
62
+ else: # For conver_bvh.py or others with 0-100% progress
63
+ adjusted_progress = step_start_progress + (progress / 100.0 * step_weight)
64
+ total_progress = min(adjusted_progress, step_start_progress + step_weight)
65
  progress_bar.progress(total_progress)
66
  progress_text.text(f"Progress: {int(total_progress * 100)}%")
67
+ except ValueError as e:
68
+ print(f"DEBUG: Error parsing progress: {e}")
69
  pass
70
  elif source == 'stderr':
71
  stderr_lines.append(line.strip())
convertNPZtoBVH/conver_bvh.py CHANGED
@@ -2,11 +2,13 @@ import os
2
  import numpy as np
3
  from scipy.spatial.transform import Rotation
4
  from collections import deque
5
- from tqdm import tqdm
6
  import sys
7
  import argparse
8
 
9
- print(f"Saving 3D Motion")
 
 
 
10
 
11
  def parse_obj(filename):
12
  vertices = []
@@ -51,10 +53,11 @@ def build_hierarchy(lines, root=0):
51
 
52
  def compute_offsets(vertices_ref, parent):
53
  num_joints = len(vertices_ref)
 
54
  offsets = np.zeros((num_joints, 3))
55
  for j in range(num_joints):
56
  if parent[j] != -1:
57
- offsets[j] = vertices_ref[j] - vertices_ref[parent[j]]
58
  return offsets
59
 
60
  def compute_R_world(joint, vertices_ref, vertices_cur, children):
@@ -79,8 +82,7 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
79
  return np.eye(3)
80
  axis = axis / axis_norm
81
  angle = np.arccos(cos_theta)
82
- R = Rotation.from_rotvec(axis * angle).as_matrix()
83
- return R
84
  else:
85
  A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
86
  B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
@@ -92,89 +94,231 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
92
  R = U @ Vh
93
  return R
94
 
95
- def main(output_dir):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  folder = os.path.join(output_dir, 'obj_sequence')
97
 
98
  try:
99
  obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
100
  except Exception as e:
101
- print(f"Error accessing folder {folder}: {e}")
102
- return
103
 
104
  if not obj_files:
105
- print("No OBJ files found.")
106
- return
107
 
108
- try:
109
- vertices_ref, lines = parse_obj(os.path.join(folder, obj_files[0]))
110
- num_joints = len(vertices_ref)
111
- parent, children = build_hierarchy(lines)
112
- offsets = compute_offsets(vertices_ref, parent)
113
- root = 0
114
-
115
- hierarchy_order = []
116
- def dfs(joint):
117
- hierarchy_order.append(joint)
118
- for child in children[joint]:
119
- dfs(child)
120
- dfs(root)
121
-
122
- motion_data = []
123
- total_files = len(obj_files)
124
- for i in range(total_files):
125
- obj_file = obj_files[i]
126
- vertices_cur = parse_obj(os.path.join(folder, obj_file))[0]
127
- R_world = [compute_R_world(j, vertices_ref, vertices_cur, children) for j in range(num_joints)]
128
- R_local = [R_world[j] if parent[j] == -1 else R_world[parent[j]].T @ R_world[j] for j in range(num_joints)]
129
- euler_angles = [Rotation.from_matrix(R).as_euler('ZYX', degrees=True) for R in R_local]
130
- root_pos = vertices_cur[root]
131
- motion_line = list(root_pos) + list(euler_angles[root])
132
- for j_idx, j in enumerate(hierarchy_order[1:], 1):
133
- motion_line.extend(euler_angles[j])
134
- progress = ((i / total_files) + (j_idx / len(hierarchy_order) / total_files)) * 100
135
- print(f"PROGRESS:{progress:.2f}")
136
- sys.stdout.flush()
137
- motion_data.append(motion_line)
138
-
139
- bvh_dir = os.path.join(output_dir, 'bvh')
140
- os.makedirs(bvh_dir, exist_ok=True)
141
- bvh_file = os.path.join(bvh_dir, 'output.bvh')
142
-
143
- with open(bvh_file, 'w') as f:
144
- f.write("HIERARCHY\n")
145
- def write_hierarchy(joint, parent, f, indent=0):
146
- if parent == -1:
147
- f.write("ROOT Joint{}\n".format(joint))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  else:
149
- f.write(" " * indent + "JOINT Joint{}\n".format(joint))
150
- f.write(" " * indent + "{\n")
151
- f.write(" " * (indent + 1) + "OFFSET {:.6f} {:.6f} {:.6f}\n".format(*offsets[joint]))
152
- if parent == -1:
153
- f.write(" " * (indent + 1) + "CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation\n")
 
 
 
 
 
 
 
 
 
154
  else:
155
- f.write(" " * (indent + 1) + "CHANNELS 3 Zrotation Yrotation Xrotation\n")
156
- for child in children[joint]:
157
- write_hierarchy(child, joint, f, indent + 1)
158
- if not children[joint]:
159
- f.write(" " * (indent + 1) + "End Site\n")
160
- f.write(" " * (indent + 1) + "{\n")
161
- f.write(" " * (indent + 2) + "OFFSET 0.000000 0.000000 0.000000\n")
162
- f.write(" " * (indent + 1) + "}\n")
163
- f.write(" " * indent + "}\n")
164
-
165
- write_hierarchy(root, -1, f)
166
-
167
- f.write("MOTION\n")
168
- f.write("Frames: {}\n".format(len(motion_data)))
169
- f.write("Frame Time: 0.033333\n")
170
- for motion_line in motion_data:
171
- f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
172
 
173
- except Exception as e:
174
- print(f"Error during processing: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  if __name__ == "__main__":
177
- parser = argparse.ArgumentParser('Convert OBJ sequence to BVH.')
178
  parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
 
 
 
 
179
  args = parser.parse_args()
180
- main(args.output_dir)
 
2
  import numpy as np
3
  from scipy.spatial.transform import Rotation
4
  from collections import deque
 
5
  import sys
6
  import argparse
7
 
8
+ # Custom print function to show only progress
9
+ def log_progress(message):
10
+ if message.startswith("PROGRESS:"):
11
+ print(message)
12
 
13
  def parse_obj(filename):
14
  vertices = []
 
53
 
54
  def compute_offsets(vertices_ref, parent):
55
  num_joints = len(vertices_ref)
56
+ scale_factor = 0.05
57
  offsets = np.zeros((num_joints, 3))
58
  for j in range(num_joints):
59
  if parent[j] != -1:
60
+ offsets[j] = (vertices_ref[j] - vertices_ref[parent[j]])*scale_factor
61
  return offsets
62
 
63
  def compute_R_world(joint, vertices_ref, vertices_cur, children):
 
82
  return np.eye(3)
83
  axis = axis / axis_norm
84
  angle = np.arccos(cos_theta)
85
+ return Rotation.from_rotvec(axis * angle).as_matrix()
 
86
  else:
87
  A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
88
  B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
 
94
  R = U @ Vh
95
  return R
96
 
97
+ def calculate_motion_velocity(rotations):
98
+ n_frames = len(rotations)
99
+ if n_frames <= 1:
100
+ return np.zeros(n_frames)
101
+
102
+ velocities = np.zeros(n_frames)
103
+ for i in range(1, n_frames):
104
+ prev_quat = rotations[i-1].as_quat()
105
+ curr_quat = rotations[i].as_quat()
106
+ if np.dot(prev_quat, curr_quat) < 0:
107
+ curr_quat = -curr_quat
108
+ diff = Rotation.from_quat(prev_quat).inv() * Rotation.from_quat(curr_quat)
109
+ velocities[i] = np.linalg.norm(diff.as_rotvec())
110
+ if n_frames > 1:
111
+ velocities[0] = velocities[1]
112
+ return velocities
113
+
114
+ def adaptive_smooth_rotations(rotations, window_size=7, velocity_threshold=0.03):
115
+ n_frames = len(rotations)
116
+ if n_frames <= 1:
117
+ return rotations
118
+
119
+ velocities = calculate_motion_velocity(rotations)
120
+ if np.max(velocities) > 0:
121
+ velocities = velocities / np.max(velocities)
122
+
123
+ smoothed = []
124
+ half_window = window_size // 2
125
+
126
+ for i in range(n_frames):
127
+ start_idx = max(0, i - half_window)
128
+ end_idx = min(n_frames - 1, i + half_window)
129
+ window_rots = rotations[start_idx:end_idx + 1]
130
+
131
+ velocity_factor = min(1.0, velocities[i] / velocity_threshold)
132
+ sigma = 0.5 + 1.5 * velocity_factor
133
+ dist = np.linspace(-1, 1, len(window_rots))
134
+ weights = np.exp(-sigma * np.square(dist))
135
+ weights = weights / np.sum(weights)
136
+
137
+ quats = [r.as_quat() for r in window_rots]
138
+ for j in range(1, len(quats)):
139
+ if np.dot(quats[0], quats[j]) < 0:
140
+ quats[j] = -quats[j]
141
+
142
+ result_quat = np.zeros(4)
143
+ for j in range(len(quats)):
144
+ result_quat += weights[j] * quats[j]
145
+ result_quat = result_quat / np.linalg.norm(result_quat)
146
+ smoothed.append(Rotation.from_quat(result_quat))
147
+
148
+ return smoothed
149
+
150
+ def adaptive_smooth_positions(positions, window_size=7, velocity_threshold=0.03):
151
+ n_frames = len(positions)
152
+ if n_frames <= 1:
153
+ return positions
154
+
155
+ positions = np.array(positions)
156
+ smoothed = np.zeros_like(positions)
157
+ half_window = window_size // 2
158
+
159
+ velocities = np.zeros(n_frames)
160
+ for i in range(1, n_frames):
161
+ velocities[i] = np.linalg.norm(positions[i] - positions[i-1])
162
+ velocities[0] = velocities[1]
163
+ if np.max(velocities) > 0:
164
+ velocities = velocities / np.max(velocities)
165
+
166
+ for i in range(n_frames):
167
+ start_idx = max(0, i - half_window)
168
+ end_idx = min(n_frames - 1, i + half_window)
169
+ window_pos = positions[start_idx:end_idx + 1]
170
+
171
+ velocity_factor = min(1.0, velocities[i] / velocity_threshold)
172
+ sigma = 0.5 + 1.5 * velocity_factor
173
+ dist = np.linspace(-1, 1, len(window_pos))
174
+ weights = np.exp(-sigma * np.square(dist))
175
+ weights = weights / np.sum(weights)
176
+
177
+ smoothed[i] = np.sum(window_pos * weights[:, np.newaxis], axis=0)
178
+
179
+ return smoothed
180
+
181
+ def detect_arm_joints(children, num_joints):
182
+ return [j for j in range(num_joints) if len(children[j]) == 1]
183
+
184
+ def main(output_dir, smoothing_window=8, velocity_threshold=0.04, joint_constraint=True):
185
  folder = os.path.join(output_dir, 'obj_sequence')
186
 
187
  try:
188
  obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
189
  except Exception as e:
190
+ sys.exit(f"Error accessing folder {folder}: {e}")
 
191
 
192
  if not obj_files:
193
+ sys.exit("No OBJ files found.")
 
194
 
195
+ vertices_ref, lines = parse_obj(os.path.join(folder, obj_files[0]))
196
+ num_joints = len(vertices_ref)
197
+ parent, children = build_hierarchy(lines)
198
+ offsets = compute_offsets(vertices_ref, parent)
199
+ root = 0
200
+
201
+ hierarchy_order = []
202
+ def dfs(joint):
203
+ hierarchy_order.append(joint)
204
+ for child in children[joint]:
205
+ dfs(child)
206
+ dfs(root)
207
+
208
+ arm_joints = detect_arm_joints(children, num_joints)
209
+
210
+ all_root_positions = []
211
+ all_positions = [[] for _ in range(num_joints)]
212
+ all_rotations = [[] for _ in range(num_joints)]
213
+
214
+ total_files = len(obj_files)
215
+ for i in range(total_files):
216
+ obj_file = obj_files[i]
217
+ vertices_cur = parse_obj(os.path.join(folder, obj_file))[0]
218
+ R_world = [compute_R_world(j, vertices_ref, vertices_cur, children) for j in range(num_joints)]
219
+ R_local = [R_world[j] if parent[j] == -1 else R_world[parent[j]].T @ R_world[j] for j in range(num_joints)]
220
+ rotations = [Rotation.from_matrix(R) for R in R_local]
221
+
222
+ all_root_positions.append(vertices_cur[root])
223
+ for j in range(num_joints):
224
+ all_positions[j].append(vertices_cur[j])
225
+ all_rotations[j].append(rotations[j])
226
+
227
+ # First half of progress (0-50%)
228
+ progress = (i / total_files) * 50
229
+ log_progress(f"PROGRESS:{progress:.2f}")
230
+
231
+ smoothed_root_positions = adaptive_smooth_positions(all_root_positions, smoothing_window, velocity_threshold)
232
+ smoothed_positions = [adaptive_smooth_positions(np.array(pos), smoothing_window, velocity_threshold) for pos in all_positions]
233
+ smoothed_rotations = [adaptive_smooth_rotations(rot, smoothing_window, velocity_threshold) for rot in all_rotations]
234
+
235
+ # Enforce bone lengths (no lengthening restraint)
236
+ for i in range(total_files):
237
+ # Start with root position
238
+ smoothed_positions[root][i] = smoothed_root_positions[i]
239
+ # Adjust each child joint to maintain bone length
240
+ for j in range(num_joints):
241
+ if parent[j] != -1: # Skip root
242
+ parent_pos = smoothed_positions[parent[j]][i]
243
+ child_pos = smoothed_positions[j][i]
244
+ ref_offset = offsets[j] # Reference bone length vector
245
+ bone_length = np.linalg.norm(ref_offset)
246
+ if bone_length < 1e-6:
247
+ continue # Skip if bone length is near zero
248
+ current_vec = child_pos - parent_pos
249
+ current_length = np.linalg.norm(current_vec)
250
+ if current_length < 1e-6:
251
+ # If current length is near zero, use reference direction
252
+ smoothed_positions[j][i] = parent_pos + ref_offset
253
  else:
254
+ # Scale the current vector to match reference bone length
255
+ corrected_vec = (current_vec / current_length) * bone_length
256
+ smoothed_positions[j][i] = parent_pos + corrected_vec
257
+
258
+ motion_data = []
259
+ joints_to_remove = {10, 13, 16, 6, 3}
260
+ for i in range(total_files):
261
+ root_pos = smoothed_root_positions[i]
262
+
263
+ if joint_constraint:
264
+ for j in range(num_joints):
265
+ euler = smoothed_rotations[j][i].as_euler('ZYX', degrees=True)
266
+ if j in arm_joints:
267
+ euler = np.clip(euler, -180, 180)
268
  else:
269
+ euler = np.clip(euler, -150, 150)
270
+ smoothed_rotations[j][i] = Rotation.from_euler('ZYX', euler, degrees=True)
271
+
272
+ euler_angles = [smoothed_rotations[j][i].as_euler('ZYX', degrees=True) for j in range(num_joints)]
273
+ motion_line = list(root_pos) + list(euler_angles[root])
274
+ for j in hierarchy_order[1:]:
275
+ motion_line.extend(euler_angles[j])
276
+ motion_data.append(motion_line)
277
+
278
+ # Second half of progress (50-100%)
279
+ progress = 50 + (i / total_files) * 50
280
+ log_progress(f"PROGRESS:{progress:.2f}")
 
 
 
 
 
281
 
282
+ bvh_dir = os.path.join(output_dir, 'bvh')
283
+ os.makedirs(bvh_dir, exist_ok=True)
284
+ bvh_file = os.path.join(bvh_dir, 'output.bvh')
285
+
286
+ with open(bvh_file, 'w') as f:
287
+ f.write("HIERARCHY\n")
288
+ def write_hierarchy(joint, parent, f, indent=0):
289
+ if parent == -1:
290
+ f.write("ROOT Joint{}\n".format(joint))
291
+ else:
292
+ f.write(" " * indent + "JOINT Joint{}\n".format(joint))
293
+ f.write(" " * indent + "{\n")
294
+ f.write(" " * (indent + 1) + "OFFSET {:.6f} {:.6f} {:.6f}\n".format(*offsets[joint]))
295
+ if parent == -1:
296
+ f.write(" " * (indent + 1) + "CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation\n")
297
+ else:
298
+ f.write(" " * (indent + 1) + "CHANNELS 3 Zrotation Yrotation Xrotation\n")
299
+ for child in children[joint]:
300
+ write_hierarchy(child, joint, f, indent + 1)
301
+ if not children[joint]:
302
+ f.write(" " * (indent + 1) + "End Site\n")
303
+ f.write(" " * (indent + 1) + "{\n")
304
+ f.write(" " * (indent + 2) + "OFFSET 0.000000 0.000000 0.000000\n")
305
+ f.write(" " * (indent + 1) + "}\n")
306
+ f.write(" " * indent + "}\n")
307
+
308
+ write_hierarchy(root, -1, f)
309
+
310
+ f.write("MOTION\n")
311
+ f.write("Frames: {}\n".format(len(motion_data)))
312
+ f.write("Frame Time: 0.033333\n")
313
+ for motion_line in motion_data:
314
+ f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
315
 
316
  if __name__ == "__main__":
317
+ parser = argparse.ArgumentParser('Convert OBJ sequence to BVH with improved adaptive smoothing.')
318
  parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
319
+ parser.add_argument('--smoothing-window', type=int, default=7, help='Size of smoothing window')
320
+ parser.add_argument('--velocity-threshold', type=float, default=0.03, help='Velocity threshold for adaptive smoothing')
321
+ parser.add_argument('--disable-joint-constraints', action='store_false', dest='joint_constraint',
322
+ help='Disable joint constraints that prevent extreme rotations')
323
  args = parser.parse_args()
324
+ main(args.output_dir, args.smoothing_window, args.velocity_threshold, args.joint_constraint)