Spaces:
Runtime error
Runtime error
Amanpreet
commited on
Commit
·
82d4b57
1
Parent(s):
9d2691d
fixed
Browse files- VideoToNPZ/gen_skes.py +17 -8
- VideoToNPZ/lib/pose/hrnet/pose_estimation/gen_kpts.py +116 -125
- app.py +12 -9
- convertNPZtoBVH/conver_bvh.py +220 -76
VideoToNPZ/gen_skes.py
CHANGED
@@ -13,6 +13,17 @@ import signal
|
|
13 |
|
14 |
warnings.filterwarnings('ignore')
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
|
17 |
from tools.utils import get_path
|
18 |
from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
|
@@ -23,6 +34,7 @@ from tools.preprocess import load_kpts_json, h36m_coco_format, revise_kpts, revi
|
|
23 |
from tools.inference import gen_pose
|
24 |
from tools.vis_kpts import plot_keypoint
|
25 |
|
|
|
26 |
cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
|
27 |
model_dir = chk_root + 'gastnet/'
|
28 |
sys.path.insert(1, lib_root)
|
@@ -37,14 +49,6 @@ adj = adj_mx_from_skeleton(skeleton)
|
|
37 |
joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
|
38 |
kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
|
39 |
|
40 |
-
def signal_handler(sig, frame):
|
41 |
-
print("\nInterrupted by user, shutting down...")
|
42 |
-
if 'pool' in locals() and pool is not None:
|
43 |
-
pool.terminate()
|
44 |
-
pool.join()
|
45 |
-
sys.exit(0)
|
46 |
-
|
47 |
-
signal.signal(signal.SIGINT, signal_handler)
|
48 |
|
49 |
def load_model_layer():
|
50 |
chk = model_dir + '81_frame_model.bin'
|
@@ -63,6 +67,11 @@ def load_model_layer():
|
|
63 |
return model_pos
|
64 |
|
65 |
def generate_skeletons(video=''):
|
|
|
|
|
|
|
|
|
|
|
66 |
cap = cv2.VideoCapture(video)
|
67 |
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
68 |
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
|
|
13 |
|
14 |
warnings.filterwarnings('ignore')
|
15 |
|
16 |
+
def signal_handler(sig, frame):
|
17 |
+
print("\nInterrupted by user, shutting down...")
|
18 |
+
if 'loader_thread' in globals() and loader_thread.is_alive():
|
19 |
+
loader_thread.join(timeout=1.0) # Give the thread 1 second to finish
|
20 |
+
if torch.cuda.is_available():
|
21 |
+
torch.cuda.empty_cache() # Free GPU memory immediately
|
22 |
+
os.exit(0)
|
23 |
+
|
24 |
+
# Register the signal handler
|
25 |
+
signal.signal(signal.SIGINT, signal_handler)
|
26 |
+
|
27 |
sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
|
28 |
from tools.utils import get_path
|
29 |
from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
|
|
|
34 |
from tools.inference import gen_pose
|
35 |
from tools.vis_kpts import plot_keypoint
|
36 |
|
37 |
+
|
38 |
cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
|
39 |
model_dir = chk_root + 'gastnet/'
|
40 |
sys.path.insert(1, lib_root)
|
|
|
49 |
joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
|
50 |
kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
def load_model_layer():
|
54 |
chk = model_dir + '81_frame_model.bin'
|
|
|
67 |
return model_pos
|
68 |
|
69 |
def generate_skeletons(video=''):
|
70 |
+
def force_exit(sig, frame):
|
71 |
+
print("\nForce terminating...")
|
72 |
+
os._exit(1)
|
73 |
+
signal.signal(signal.SIGINT, force_exit)
|
74 |
+
|
75 |
cap = cv2.VideoCapture(video)
|
76 |
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
77 |
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
VideoToNPZ/lib/pose/hrnet/pose_estimation/gen_kpts.py
CHANGED
@@ -17,6 +17,18 @@ import json
|
|
17 |
import torch.multiprocessing as mp
|
18 |
from functools import partial
|
19 |
from io import StringIO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
import _init_paths
|
22 |
from _init_paths import get_path
|
@@ -50,7 +62,7 @@ def parse_args():
|
|
50 |
parser.add_argument('-a', '--animation', action='store_true', help='output animation')
|
51 |
parser.add_argument('-np', '--num-person', type=int, default=1)
|
52 |
parser.add_argument("-v", "--video", type=str, default='camera')
|
53 |
-
parser.add_argument('--batch-size', type=int, default=
|
54 |
args = parser.parse_args()
|
55 |
return args
|
56 |
|
@@ -69,7 +81,7 @@ def model_load(config, use_fp16=False):
|
|
69 |
new_state_dict[k] = v
|
70 |
model.load_state_dict(new_state_dict)
|
71 |
if torch.cuda.is_available() and use_fp16:
|
72 |
-
model = model.half().cuda()
|
73 |
elif torch.cuda.is_available():
|
74 |
model = model.cuda()
|
75 |
model.eval()
|
@@ -78,7 +90,7 @@ def model_load(config, use_fp16=False):
|
|
78 |
def load_default_model():
|
79 |
args = parse_args()
|
80 |
reset_config(args)
|
81 |
-
model = eval('models.' +
|
82 |
if torch.cuda.is_available():
|
83 |
model = model.cuda()
|
84 |
state_dict = torch.load(cfg.OUTPUT_DIR)
|
@@ -100,7 +112,7 @@ def frame_loader(video, queue, video_length):
|
|
100 |
queue.put(None)
|
101 |
cap.release()
|
102 |
|
103 |
-
def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device):
|
104 |
if not frames:
|
105 |
return [], []
|
106 |
|
@@ -114,14 +126,15 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
|
|
114 |
if bboxs is None or not bboxs.any():
|
115 |
continue
|
116 |
|
117 |
-
people_track =
|
118 |
if people_track.shape[0] == 0:
|
119 |
continue
|
120 |
-
|
121 |
-
|
|
|
122 |
|
123 |
-
inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg,
|
124 |
-
inputs = inputs[:, [2, 1, 0]]
|
125 |
batch_bboxs.append(track_bboxs)
|
126 |
batch_centers.append(center)
|
127 |
batch_scales.append(scale)
|
@@ -131,12 +144,11 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
|
|
131 |
return [], []
|
132 |
|
133 |
inputs = torch.cat(batch_inputs, dim=0).to(device)
|
134 |
-
if use_fp16:
|
135 |
-
inputs = inputs.half()
|
136 |
-
|
137 |
with torch.no_grad():
|
138 |
outputs = pose_model(inputs)
|
139 |
-
outputs = outputs.cpu().float()
|
140 |
|
141 |
kpts_result = []
|
142 |
scores_result = []
|
@@ -147,24 +159,36 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
|
|
147 |
np.asarray(center).flatten(), np.asarray(scale).flatten())
|
148 |
offset += batch_size
|
149 |
|
150 |
-
kpts = np.zeros((
|
151 |
-
scores = np.zeros((
|
152 |
-
for j
|
153 |
-
kpts[j] =
|
154 |
-
scores[j] =
|
155 |
kpts_result.append(kpts)
|
156 |
scores_result.append(scores)
|
157 |
|
158 |
return kpts_result, scores_result
|
159 |
|
160 |
-
def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
args = parse_args()
|
162 |
reset_config(args)
|
163 |
|
164 |
cap = cv2.VideoCapture(video)
|
165 |
assert cap.isOpened(), 'Cannot capture source'
|
166 |
-
|
167 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
|
|
|
|
|
168 |
use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
|
169 |
batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
|
170 |
|
@@ -173,31 +197,23 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
|
|
173 |
people_sort = Sort()
|
174 |
|
175 |
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
176 |
-
print('Recording 2D pose ...')
|
177 |
-
sys.stdout.flush() # Ensure initial message shows up immediately
|
178 |
|
179 |
if animation:
|
180 |
-
# Animation mode uses frame-by-frame processing like in the backup code
|
181 |
kpts_result = []
|
182 |
scores_result = []
|
183 |
-
|
184 |
for i in range(video_length):
|
185 |
ret, frame = cap.read()
|
186 |
if not ret:
|
187 |
break
|
188 |
-
|
189 |
-
# Detect humans
|
190 |
bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
|
191 |
-
|
192 |
if bboxs is None or not bboxs.any():
|
193 |
-
print('No person detected!')
|
194 |
-
sys.stdout.flush()
|
195 |
continue
|
196 |
-
|
197 |
-
# Track people
|
198 |
people_track = people_sort.update(bboxs)
|
199 |
-
|
200 |
-
# Select people to track
|
201 |
if people_track.shape[0] == 1:
|
202 |
people_track_ = people_track[-1, :-1].reshape(1, 4)
|
203 |
elif people_track.shape[0] >= 2:
|
@@ -205,51 +221,48 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
|
|
205 |
people_track_ = people_track_[::-1]
|
206 |
else:
|
207 |
continue
|
208 |
-
|
209 |
track_bboxs = []
|
210 |
for bbox in people_track_:
|
211 |
bbox = [round(i, 2) for i in list(bbox)]
|
212 |
track_bboxs.append(bbox)
|
213 |
-
|
214 |
with torch.no_grad():
|
215 |
-
# Preprocess and get pose predictions
|
216 |
inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
|
217 |
-
inputs = inputs[:, [2, 1, 0]]
|
218 |
-
|
219 |
-
if
|
220 |
inputs = inputs.cuda()
|
|
|
|
|
221 |
output = pose_model(inputs)
|
222 |
-
|
223 |
-
# Compute coordinates
|
224 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
225 |
-
|
226 |
if gen_output:
|
227 |
-
# Store results for later processing
|
228 |
kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
|
229 |
scores = np.zeros((num_person, 17), dtype=np.float32)
|
230 |
-
|
231 |
for j, kpt in enumerate(preds):
|
232 |
kpts[j] = kpt
|
233 |
-
|
234 |
for j, score in enumerate(maxvals):
|
235 |
scores[j] = score.squeeze()
|
236 |
-
|
237 |
kpts_result.append(kpts)
|
238 |
scores_result.append(scores)
|
239 |
-
|
240 |
else:
|
241 |
-
# Visualize results in real-time
|
242 |
index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
|
243 |
list(map(lambda x: write(x, frame), index_bboxs))
|
244 |
plot_keypoint(frame, preds, maxvals, 0.3)
|
245 |
-
|
246 |
cv2.imshow('frame', frame)
|
247 |
key = cv2.waitKey(1)
|
248 |
if key & 0xFF == ord('q'):
|
249 |
break
|
250 |
else:
|
251 |
-
|
252 |
-
frame_queue = mp.Queue(maxsize=batch_size * 2)
|
253 |
loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
|
254 |
loader_thread.start()
|
255 |
|
@@ -258,112 +271,89 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
|
|
258 |
kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
|
259 |
scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
|
260 |
frame_idx = 0
|
|
|
261 |
|
262 |
-
pool = None # Initialize pool outside try block for cleanup
|
263 |
try:
|
264 |
if device.type == 'cuda':
|
265 |
-
# GPU batch processing
|
266 |
batch_frames = []
|
267 |
with torch.no_grad():
|
268 |
for i in range(video_length):
|
269 |
-
frame = frame_queue.get()
|
270 |
if frame is None:
|
271 |
break
|
272 |
batch_frames.append(frame)
|
273 |
-
|
274 |
-
print(f"PROGRESS:{progress:.2f}")
|
275 |
-
sys.stdout.flush() # Force per-frame update
|
276 |
-
|
277 |
if len(batch_frames) >= batch_size:
|
278 |
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
279 |
det_dim, num_person, args.thred_score,
|
280 |
-
use_fp16, device)
|
281 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
282 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
283 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
284 |
frame_idx += 1
|
285 |
batch_frames = []
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
293 |
-
det_dim, num_person, args.thred_score,
|
294 |
-
use_fp16, device)
|
295 |
-
for kpts, scores in zip(kpts_batch, scores_batch):
|
296 |
-
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
297 |
-
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
298 |
-
frame_idx += 1
|
299 |
-
progress = (frame_idx / video_length) * 100
|
300 |
-
print(f"PROGRESS:{progress:.2f}")
|
301 |
-
sys.stdout.flush() # Force after batch
|
302 |
-
|
303 |
# Process remaining frames
|
304 |
if batch_frames:
|
305 |
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
306 |
det_dim, num_person, args.thred_score,
|
307 |
-
use_fp16, device)
|
308 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
309 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
310 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
311 |
frame_idx += 1
|
312 |
progress = (frame_idx / video_length) * 100
|
313 |
-
print(f"PROGRESS:{progress:.2f}")
|
314 |
-
sys.stdout.flush() # Force final update
|
315 |
else:
|
316 |
-
# CPU
|
317 |
-
pool = mp.Pool(processes=mp.cpu_count())
|
318 |
-
process_func = partial(process_batch, human_model=human_model, pose_model=pose_model,
|
319 |
-
det_dim=det_dim, num_person=num_person, thred_score=args.thred_score,
|
320 |
-
use_fp16=use_fp16, device=device)
|
321 |
-
|
322 |
batch_frames = []
|
323 |
with torch.no_grad():
|
324 |
for i in range(video_length):
|
325 |
-
frame = frame_queue.get()
|
326 |
if frame is None:
|
327 |
break
|
328 |
batch_frames.append(frame)
|
329 |
-
|
330 |
-
print(f"PROGRESS:{progress:.2f}")
|
331 |
-
sys.stdout.flush() # Force per-frame update
|
332 |
if len(batch_frames) >= batch_size:
|
333 |
-
kpts_batch, scores_batch =
|
|
|
|
|
334 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
335 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
336 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
337 |
frame_idx += 1
|
338 |
batch_frames = []
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
|
|
|
|
343 |
# Process remaining frames
|
344 |
if batch_frames:
|
345 |
-
kpts_batch, scores_batch =
|
|
|
|
|
346 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
347 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
348 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
349 |
frame_idx += 1
|
350 |
progress = (frame_idx / video_length) * 100
|
351 |
-
print(f"PROGRESS:{progress:.2f}")
|
352 |
-
|
353 |
-
|
354 |
-
pool.close()
|
355 |
-
pool.join()
|
356 |
-
except KeyboardInterrupt:
|
357 |
-
print("\nInterrupted by user, shutting down...")
|
358 |
-
sys.stdout.flush()
|
359 |
-
if pool is not None:
|
360 |
-
pool.terminate()
|
361 |
-
pool.join()
|
362 |
loader_thread.join()
|
363 |
-
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
-
loader_thread.join()
|
366 |
-
|
367 |
if gen_output and kpts_result.any():
|
368 |
keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
|
369 |
scores = scores_result[:frame_idx].transpose(1, 0, 2)
|
@@ -389,8 +379,12 @@ def gen_img_kpts(image, human_model, pose_model, human_sort, det_dim=416, num_pe
|
|
389 |
with torch.no_grad():
|
390 |
inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
|
391 |
inputs = inputs[:, [2, 1, 0]]
|
392 |
-
if torch.cuda.is_available()
|
|
|
|
|
393 |
inputs = inputs.cuda()
|
|
|
|
|
394 |
output = pose_model(inputs)
|
395 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
396 |
|
@@ -423,11 +417,10 @@ def generate_ntu_kpts_json(video_path, kpts_file):
|
|
423 |
ret, frame = cap.read()
|
424 |
if not ret:
|
425 |
continue
|
426 |
-
|
427 |
try:
|
428 |
bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
|
429 |
if bboxs is None or not bboxs.any():
|
430 |
-
print('No person detected!')
|
431 |
continue
|
432 |
|
433 |
people_track = people_sort.update(bboxs)
|
@@ -447,14 +440,17 @@ def generate_ntu_kpts_json(video_path, kpts_file):
|
|
447 |
bbox = [round(i, 3) for i in list(bbox)]
|
448 |
track_bboxs.append(bbox)
|
449 |
|
450 |
-
except Exception
|
451 |
-
print(e)
|
452 |
continue
|
453 |
|
454 |
inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
|
455 |
inputs = inputs[:, [2, 1, 0]]
|
456 |
-
if torch.cuda.is_available()
|
|
|
|
|
457 |
inputs = inputs.cuda()
|
|
|
|
|
458 |
output = pose_model(inputs)
|
459 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
460 |
|
@@ -473,6 +469,7 @@ def generate_ntu_kpts_json(video_path, kpts_file):
|
|
473 |
kpts_info.update({'data': data})
|
474 |
with open(kpts_file, 'w') as fw:
|
475 |
json.dump(kpts_info, fw)
|
|
|
476 |
|
477 |
def round_list(input_list, decimals=3):
|
478 |
dim = len(input_list)
|
@@ -484,18 +481,12 @@ def round_list(input_list, decimals=3):
|
|
484 |
if __name__ == "__main__":
|
485 |
args = parse_args()
|
486 |
video_path = args.video
|
487 |
-
|
488 |
if args.animation:
|
489 |
-
|
490 |
-
gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
|
491 |
gen_output=False, animation=True)
|
492 |
else:
|
493 |
-
#
|
494 |
-
keypoints, scores = gen_video_kpts(video_path, det_dim=args.det_dim,
|
495 |
-
num_person=args.num_person,
|
496 |
-
gen_output=True,
|
497 |
-
batch_size=args.batch_size)
|
498 |
if keypoints is not None:
|
499 |
output_file = "output.npz"
|
500 |
-
np.savez(output_file, keypoints=keypoints, scores=scores)
|
501 |
-
print(f"Saved to {output_file}")
|
|
|
17 |
import torch.multiprocessing as mp
|
18 |
from functools import partial
|
19 |
from io import StringIO
|
20 |
+
import signal
|
21 |
+
|
22 |
+
def signal_handler(sig, frame):
|
23 |
+
print("\nInterrupted by user, shutting down...")
|
24 |
+
if 'loader_thread' in globals() and loader_thread.is_alive():
|
25 |
+
loader_thread.join(timeout=1.0) # Give the thread 1 second to finish
|
26 |
+
if torch.cuda.is_available():
|
27 |
+
torch.cuda.empty_cache() # Free GPU memory immediately
|
28 |
+
os.exit(0)
|
29 |
+
|
30 |
+
# Register the signal handler
|
31 |
+
signal.signal(signal.SIGINT, signal_handler)
|
32 |
|
33 |
import _init_paths
|
34 |
from _init_paths import get_path
|
|
|
62 |
parser.add_argument('-a', '--animation', action='store_true', help='output animation')
|
63 |
parser.add_argument('-np', '--num-person', type=int, default=1)
|
64 |
parser.add_argument("-v", "--video", type=str, default='camera')
|
65 |
+
parser.add_argument('--batch-size', type=int, default=8) # Reduced batch size
|
66 |
args = parser.parse_args()
|
67 |
return args
|
68 |
|
|
|
81 |
new_state_dict[k] = v
|
82 |
model.load_state_dict(new_state_dict)
|
83 |
if torch.cuda.is_available() and use_fp16:
|
84 |
+
model = model.half().cuda() # Use FP16 if specified and CUDA available
|
85 |
elif torch.cuda.is_available():
|
86 |
model = model.cuda()
|
87 |
model.eval()
|
|
|
90 |
def load_default_model():
|
91 |
args = parse_args()
|
92 |
reset_config(args)
|
93 |
+
model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)
|
94 |
if torch.cuda.is_available():
|
95 |
model = model.cuda()
|
96 |
state_dict = torch.load(cfg.OUTPUT_DIR)
|
|
|
112 |
queue.put(None)
|
113 |
cap.release()
|
114 |
|
115 |
+
def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device, people_sort):
|
116 |
if not frames:
|
117 |
return [], []
|
118 |
|
|
|
126 |
if bboxs is None or not bboxs.any():
|
127 |
continue
|
128 |
|
129 |
+
people_track = people_sort.update(bboxs)
|
130 |
if people_track.shape[0] == 0:
|
131 |
continue
|
132 |
+
num_to_track = min(num_person, people_track.shape[0])
|
133 |
+
people_track_ = people_track[-num_to_track:, :-1]
|
134 |
+
track_bboxs = np.round(people_track_, 2).tolist()
|
135 |
|
136 |
+
inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg, num_to_track)
|
137 |
+
inputs = inputs[:, [2, 1, 0]] # BGR to RGB
|
138 |
batch_bboxs.append(track_bboxs)
|
139 |
batch_centers.append(center)
|
140 |
batch_scales.append(scale)
|
|
|
144 |
return [], []
|
145 |
|
146 |
inputs = torch.cat(batch_inputs, dim=0).to(device)
|
147 |
+
if use_fp16 and device.type == 'cuda':
|
148 |
+
inputs = inputs.half() # Convert to FP16 to match model precision
|
|
|
149 |
with torch.no_grad():
|
150 |
outputs = pose_model(inputs)
|
151 |
+
outputs = outputs.cpu().float() # Ensure output is FP32 for post-processing
|
152 |
|
153 |
kpts_result = []
|
154 |
scores_result = []
|
|
|
159 |
np.asarray(center).flatten(), np.asarray(scale).flatten())
|
160 |
offset += batch_size
|
161 |
|
162 |
+
kpts = np.zeros((batch_size, 17, 2), dtype=np.float32)
|
163 |
+
scores = np.zeros((batch_size, 17), dtype=np.float32)
|
164 |
+
for j in range(batch_size):
|
165 |
+
kpts[j] = preds[j]
|
166 |
+
scores[j] = maxvals[j].squeeze()
|
167 |
kpts_result.append(kpts)
|
168 |
scores_result.append(scores)
|
169 |
|
170 |
return kpts_result, scores_result
|
171 |
|
172 |
+
def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=8, animation=False):
|
173 |
+
|
174 |
+
def force_exit(sig, frame):
|
175 |
+
print("\nForce terminating...")
|
176 |
+
os._exit(1)
|
177 |
+
|
178 |
+
signal.signal(signal.SIGINT, force_exit)
|
179 |
+
|
180 |
+
|
181 |
args = parse_args()
|
182 |
reset_config(args)
|
183 |
|
184 |
cap = cv2.VideoCapture(video)
|
185 |
assert cap.isOpened(), 'Cannot capture source'
|
186 |
+
|
187 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
188 |
+
torch.set_num_threads(max(1, mp.cpu_count() - 1)) # Match thread count to processes
|
189 |
+
torch.autograd.set_grad_enabled(False) # Explicitly disable gradients
|
190 |
+
|
191 |
+
# Determine FP16 usage based on device capability
|
192 |
use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
|
193 |
batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
|
194 |
|
|
|
197 |
people_sort = Sort()
|
198 |
|
199 |
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
|
200 |
|
201 |
if animation:
|
|
|
202 |
kpts_result = []
|
203 |
scores_result = []
|
204 |
+
|
205 |
for i in range(video_length):
|
206 |
ret, frame = cap.read()
|
207 |
if not ret:
|
208 |
break
|
209 |
+
|
|
|
210 |
bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
|
211 |
+
|
212 |
if bboxs is None or not bboxs.any():
|
|
|
|
|
213 |
continue
|
214 |
+
|
|
|
215 |
people_track = people_sort.update(bboxs)
|
216 |
+
|
|
|
217 |
if people_track.shape[0] == 1:
|
218 |
people_track_ = people_track[-1, :-1].reshape(1, 4)
|
219 |
elif people_track.shape[0] >= 2:
|
|
|
221 |
people_track_ = people_track_[::-1]
|
222 |
else:
|
223 |
continue
|
224 |
+
|
225 |
track_bboxs = []
|
226 |
for bbox in people_track_:
|
227 |
bbox = [round(i, 2) for i in list(bbox)]
|
228 |
track_bboxs.append(bbox)
|
229 |
+
|
230 |
with torch.no_grad():
|
|
|
231 |
inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
|
232 |
+
inputs = inputs[:, [2, 1, 0]] # BGR to RGB
|
233 |
+
|
234 |
+
if device.type == 'cuda':
|
235 |
inputs = inputs.cuda()
|
236 |
+
if use_fp16:
|
237 |
+
inputs = inputs.half() # Convert to FP16 if model is in FP16
|
238 |
output = pose_model(inputs)
|
239 |
+
|
|
|
240 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
241 |
+
|
242 |
if gen_output:
|
|
|
243 |
kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
|
244 |
scores = np.zeros((num_person, 17), dtype=np.float32)
|
245 |
+
|
246 |
for j, kpt in enumerate(preds):
|
247 |
kpts[j] = kpt
|
248 |
+
|
249 |
for j, score in enumerate(maxvals):
|
250 |
scores[j] = score.squeeze()
|
251 |
+
|
252 |
kpts_result.append(kpts)
|
253 |
scores_result.append(scores)
|
254 |
+
|
255 |
else:
|
|
|
256 |
index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
|
257 |
list(map(lambda x: write(x, frame), index_bboxs))
|
258 |
plot_keypoint(frame, preds, maxvals, 0.3)
|
259 |
+
|
260 |
cv2.imshow('frame', frame)
|
261 |
key = cv2.waitKey(1)
|
262 |
if key & 0xFF == ord('q'):
|
263 |
break
|
264 |
else:
|
265 |
+
frame_queue = Queue(maxsize=batch_size) # Use regular Queue instead of mp.Queue
|
|
|
266 |
loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
|
267 |
loader_thread.start()
|
268 |
|
|
|
271 |
kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
|
272 |
scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
|
273 |
frame_idx = 0
|
274 |
+
people_sort = Sort()
|
275 |
|
|
|
276 |
try:
|
277 |
if device.type == 'cuda':
|
|
|
278 |
batch_frames = []
|
279 |
with torch.no_grad():
|
280 |
for i in range(video_length):
|
281 |
+
frame = frame_queue.get(timeout=1.0)
|
282 |
if frame is None:
|
283 |
break
|
284 |
batch_frames.append(frame)
|
285 |
+
|
|
|
|
|
|
|
286 |
if len(batch_frames) >= batch_size:
|
287 |
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
288 |
det_dim, num_person, args.thred_score,
|
289 |
+
use_fp16, device, people_sort)
|
290 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
291 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
292 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
293 |
frame_idx += 1
|
294 |
batch_frames = []
|
295 |
+
|
296 |
+
# Print progress every batch
|
297 |
+
if i % batch_size == 0:
|
298 |
+
progress = ((i + 1) / video_length) * 100
|
299 |
+
print(f"PROGRESS:{progress:.2f}%")
|
300 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
# Process remaining frames
|
302 |
if batch_frames:
|
303 |
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
304 |
det_dim, num_person, args.thred_score,
|
305 |
+
use_fp16, device, people_sort)
|
306 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
307 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
308 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
309 |
frame_idx += 1
|
310 |
progress = (frame_idx / video_length) * 100
|
311 |
+
print(f"PROGRESS:{progress:.2f}%")
|
|
|
312 |
else:
|
313 |
+
# Sequential processing for CPU to avoid multiprocessing overhead
|
|
|
|
|
|
|
|
|
|
|
314 |
batch_frames = []
|
315 |
with torch.no_grad():
|
316 |
for i in range(video_length):
|
317 |
+
frame = frame_queue.get(timeout=1.0)
|
318 |
if frame is None:
|
319 |
break
|
320 |
batch_frames.append(frame)
|
321 |
+
|
|
|
|
|
322 |
if len(batch_frames) >= batch_size:
|
323 |
+
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
324 |
+
det_dim, num_person, args.thred_score,
|
325 |
+
use_fp16, device, people_sort)
|
326 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
327 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
328 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
329 |
frame_idx += 1
|
330 |
batch_frames = []
|
331 |
+
|
332 |
+
# Print progress every batch
|
333 |
+
if i % batch_size == 0:
|
334 |
+
progress = ((i + 1) / video_length) * 100
|
335 |
+
print(f"PROGRESS:{progress:.2f}%")
|
336 |
+
|
337 |
# Process remaining frames
|
338 |
if batch_frames:
|
339 |
+
kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
|
340 |
+
det_dim, num_person, args.thred_score,
|
341 |
+
use_fp16, device, people_sort)
|
342 |
for kpts, scores in zip(kpts_batch, scores_batch):
|
343 |
kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
|
344 |
scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
|
345 |
frame_idx += 1
|
346 |
progress = (frame_idx / video_length) * 100
|
347 |
+
print(f"PROGRESS:{progress:.2f}%")
|
348 |
+
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
loader_thread.join()
|
350 |
+
raise
|
351 |
+
finally:
|
352 |
+
loader_thread.join()
|
353 |
+
cap.release()
|
354 |
+
if device.type == 'cuda':
|
355 |
+
torch.cuda.empty_cache() # Free GPU memory
|
356 |
|
|
|
|
|
357 |
if gen_output and kpts_result.any():
|
358 |
keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
|
359 |
scores = scores_result[:frame_idx].transpose(1, 0, 2)
|
|
|
379 |
with torch.no_grad():
|
380 |
inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
|
381 |
inputs = inputs[:, [2, 1, 0]]
|
382 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
383 |
+
use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
|
384 |
+
if device.type == 'cuda':
|
385 |
inputs = inputs.cuda()
|
386 |
+
if use_fp16:
|
387 |
+
inputs = inputs.half() # Match model precision
|
388 |
output = pose_model(inputs)
|
389 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
390 |
|
|
|
417 |
ret, frame = cap.read()
|
418 |
if not ret:
|
419 |
continue
|
420 |
+
|
421 |
try:
|
422 |
bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
|
423 |
if bboxs is None or not bboxs.any():
|
|
|
424 |
continue
|
425 |
|
426 |
people_track = people_sort.update(bboxs)
|
|
|
440 |
bbox = [round(i, 3) for i in list(bbox)]
|
441 |
track_bboxs.append(bbox)
|
442 |
|
443 |
+
except Exception:
|
|
|
444 |
continue
|
445 |
|
446 |
inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
|
447 |
inputs = inputs[:, [2, 1, 0]]
|
448 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
449 |
+
use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
|
450 |
+
if device.type == 'cuda':
|
451 |
inputs = inputs.cuda()
|
452 |
+
if use_fp16:
|
453 |
+
inputs = inputs.half() # Match model precision
|
454 |
output = pose_model(inputs)
|
455 |
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
|
456 |
|
|
|
469 |
kpts_info.update({'data': data})
|
470 |
with open(kpts_file, 'w') as fw:
|
471 |
json.dump(kpts_info, fw)
|
472 |
+
cap.release()
|
473 |
|
474 |
def round_list(input_list, decimals=3):
|
475 |
dim = len(input_list)
|
|
|
481 |
if __name__ == "__main__":
|
482 |
args = parse_args()
|
483 |
video_path = args.video
|
484 |
+
|
485 |
if args.animation:
|
486 |
+
gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
|
|
|
487 |
gen_output=False, animation=True)
|
488 |
else:
|
489 |
+
keypoints, scores = gen_video_kpts(video_path, det_dim=416, num_person=1, gen_output=True, batch_size=8) # Increased batch_size to 8
|
|
|
|
|
|
|
|
|
490 |
if keypoints is not None:
|
491 |
output_file = "output.npz"
|
492 |
+
np.savez(output_file, keypoints=keypoints, scores=scores)
|
|
app.py
CHANGED
@@ -51,18 +51,21 @@ def run_command(command, working_dir, progress_bar, progress_text, step_start_pr
|
|
51 |
if source == 'stdout':
|
52 |
if show_progress and line.startswith("PROGRESS:"):
|
53 |
try:
|
54 |
-
|
|
|
55 |
if Path(command[1]).name == 'gen_skes.py':
|
56 |
-
if progress <=
|
57 |
-
adjusted_progress = step_start_progress + (progress * 0.6)
|
58 |
-
else:
|
59 |
-
adjusted_progress = step_start_progress + 0.6 + ((progress -
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
63 |
progress_bar.progress(total_progress)
|
64 |
progress_text.text(f"Progress: {int(total_progress * 100)}%")
|
65 |
-
except ValueError:
|
|
|
66 |
pass
|
67 |
elif source == 'stderr':
|
68 |
stderr_lines.append(line.strip())
|
|
|
51 |
if source == 'stdout':
|
52 |
if show_progress and line.startswith("PROGRESS:"):
|
53 |
try:
|
54 |
+
progress_str = line.strip().split("PROGRESS:")[1].replace("%", "") # Remove '%'
|
55 |
+
progress = float(progress_str) # Convert to float after removing '%' # Debug output
|
56 |
if Path(command[1]).name == 'gen_skes.py':
|
57 |
+
if progress <= 100.0: # 2D Keypoint generation (0-100% maps to 0-60%)
|
58 |
+
adjusted_progress = step_start_progress + (progress / 100.0 * 0.6)
|
59 |
+
else: # 3D Pose generation (100-200% maps to 60-80%)
|
60 |
+
adjusted_progress = step_start_progress + 0.6 + ((progress - 100.0) / 100.0 * 0.2)
|
61 |
+
total_progress = min(adjusted_progress, step_start_progress + step_weight)
|
62 |
+
else: # For conver_bvh.py or others with 0-100% progress
|
63 |
+
adjusted_progress = step_start_progress + (progress / 100.0 * step_weight)
|
64 |
+
total_progress = min(adjusted_progress, step_start_progress + step_weight)
|
65 |
progress_bar.progress(total_progress)
|
66 |
progress_text.text(f"Progress: {int(total_progress * 100)}%")
|
67 |
+
except ValueError as e:
|
68 |
+
print(f"DEBUG: Error parsing progress: {e}")
|
69 |
pass
|
70 |
elif source == 'stderr':
|
71 |
stderr_lines.append(line.strip())
|
convertNPZtoBVH/conver_bvh.py
CHANGED
@@ -2,11 +2,13 @@ import os
|
|
2 |
import numpy as np
|
3 |
from scipy.spatial.transform import Rotation
|
4 |
from collections import deque
|
5 |
-
from tqdm import tqdm
|
6 |
import sys
|
7 |
import argparse
|
8 |
|
9 |
-
print
|
|
|
|
|
|
|
10 |
|
11 |
def parse_obj(filename):
|
12 |
vertices = []
|
@@ -51,10 +53,11 @@ def build_hierarchy(lines, root=0):
|
|
51 |
|
52 |
def compute_offsets(vertices_ref, parent):
|
53 |
num_joints = len(vertices_ref)
|
|
|
54 |
offsets = np.zeros((num_joints, 3))
|
55 |
for j in range(num_joints):
|
56 |
if parent[j] != -1:
|
57 |
-
offsets[j] = vertices_ref[j] - vertices_ref[parent[j]]
|
58 |
return offsets
|
59 |
|
60 |
def compute_R_world(joint, vertices_ref, vertices_cur, children):
|
@@ -79,8 +82,7 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
|
|
79 |
return np.eye(3)
|
80 |
axis = axis / axis_norm
|
81 |
angle = np.arccos(cos_theta)
|
82 |
-
|
83 |
-
return R
|
84 |
else:
|
85 |
A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
|
86 |
B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
|
@@ -92,89 +94,231 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
|
|
92 |
R = U @ Vh
|
93 |
return R
|
94 |
|
95 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
folder = os.path.join(output_dir, 'obj_sequence')
|
97 |
|
98 |
try:
|
99 |
obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
|
100 |
except Exception as e:
|
101 |
-
|
102 |
-
return
|
103 |
|
104 |
if not obj_files:
|
105 |
-
|
106 |
-
return
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
else:
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
else:
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
f.write("MOTION\n")
|
168 |
-
f.write("Frames: {}\n".format(len(motion_data)))
|
169 |
-
f.write("Frame Time: 0.033333\n")
|
170 |
-
for motion_line in motion_data:
|
171 |
-
f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
|
172 |
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
if __name__ == "__main__":
|
177 |
-
parser = argparse.ArgumentParser('Convert OBJ sequence to BVH.')
|
178 |
parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
|
|
|
|
|
|
|
|
|
179 |
args = parser.parse_args()
|
180 |
-
main(args.output_dir)
|
|
|
2 |
import numpy as np
|
3 |
from scipy.spatial.transform import Rotation
|
4 |
from collections import deque
|
|
|
5 |
import sys
|
6 |
import argparse
|
7 |
|
8 |
+
# Custom print function to show only progress
|
9 |
+
def log_progress(message):
|
10 |
+
if message.startswith("PROGRESS:"):
|
11 |
+
print(message)
|
12 |
|
13 |
def parse_obj(filename):
|
14 |
vertices = []
|
|
|
53 |
|
54 |
def compute_offsets(vertices_ref, parent):
|
55 |
num_joints = len(vertices_ref)
|
56 |
+
scale_factor = 0.05
|
57 |
offsets = np.zeros((num_joints, 3))
|
58 |
for j in range(num_joints):
|
59 |
if parent[j] != -1:
|
60 |
+
offsets[j] = (vertices_ref[j] - vertices_ref[parent[j]])*scale_factor
|
61 |
return offsets
|
62 |
|
63 |
def compute_R_world(joint, vertices_ref, vertices_cur, children):
|
|
|
82 |
return np.eye(3)
|
83 |
axis = axis / axis_norm
|
84 |
angle = np.arccos(cos_theta)
|
85 |
+
return Rotation.from_rotvec(axis * angle).as_matrix()
|
|
|
86 |
else:
|
87 |
A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
|
88 |
B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
|
|
|
94 |
R = U @ Vh
|
95 |
return R
|
96 |
|
97 |
+
def calculate_motion_velocity(rotations):
|
98 |
+
n_frames = len(rotations)
|
99 |
+
if n_frames <= 1:
|
100 |
+
return np.zeros(n_frames)
|
101 |
+
|
102 |
+
velocities = np.zeros(n_frames)
|
103 |
+
for i in range(1, n_frames):
|
104 |
+
prev_quat = rotations[i-1].as_quat()
|
105 |
+
curr_quat = rotations[i].as_quat()
|
106 |
+
if np.dot(prev_quat, curr_quat) < 0:
|
107 |
+
curr_quat = -curr_quat
|
108 |
+
diff = Rotation.from_quat(prev_quat).inv() * Rotation.from_quat(curr_quat)
|
109 |
+
velocities[i] = np.linalg.norm(diff.as_rotvec())
|
110 |
+
if n_frames > 1:
|
111 |
+
velocities[0] = velocities[1]
|
112 |
+
return velocities
|
113 |
+
|
114 |
+
def adaptive_smooth_rotations(rotations, window_size=7, velocity_threshold=0.03):
|
115 |
+
n_frames = len(rotations)
|
116 |
+
if n_frames <= 1:
|
117 |
+
return rotations
|
118 |
+
|
119 |
+
velocities = calculate_motion_velocity(rotations)
|
120 |
+
if np.max(velocities) > 0:
|
121 |
+
velocities = velocities / np.max(velocities)
|
122 |
+
|
123 |
+
smoothed = []
|
124 |
+
half_window = window_size // 2
|
125 |
+
|
126 |
+
for i in range(n_frames):
|
127 |
+
start_idx = max(0, i - half_window)
|
128 |
+
end_idx = min(n_frames - 1, i + half_window)
|
129 |
+
window_rots = rotations[start_idx:end_idx + 1]
|
130 |
+
|
131 |
+
velocity_factor = min(1.0, velocities[i] / velocity_threshold)
|
132 |
+
sigma = 0.5 + 1.5 * velocity_factor
|
133 |
+
dist = np.linspace(-1, 1, len(window_rots))
|
134 |
+
weights = np.exp(-sigma * np.square(dist))
|
135 |
+
weights = weights / np.sum(weights)
|
136 |
+
|
137 |
+
quats = [r.as_quat() for r in window_rots]
|
138 |
+
for j in range(1, len(quats)):
|
139 |
+
if np.dot(quats[0], quats[j]) < 0:
|
140 |
+
quats[j] = -quats[j]
|
141 |
+
|
142 |
+
result_quat = np.zeros(4)
|
143 |
+
for j in range(len(quats)):
|
144 |
+
result_quat += weights[j] * quats[j]
|
145 |
+
result_quat = result_quat / np.linalg.norm(result_quat)
|
146 |
+
smoothed.append(Rotation.from_quat(result_quat))
|
147 |
+
|
148 |
+
return smoothed
|
149 |
+
|
150 |
+
def adaptive_smooth_positions(positions, window_size=7, velocity_threshold=0.03):
|
151 |
+
n_frames = len(positions)
|
152 |
+
if n_frames <= 1:
|
153 |
+
return positions
|
154 |
+
|
155 |
+
positions = np.array(positions)
|
156 |
+
smoothed = np.zeros_like(positions)
|
157 |
+
half_window = window_size // 2
|
158 |
+
|
159 |
+
velocities = np.zeros(n_frames)
|
160 |
+
for i in range(1, n_frames):
|
161 |
+
velocities[i] = np.linalg.norm(positions[i] - positions[i-1])
|
162 |
+
velocities[0] = velocities[1]
|
163 |
+
if np.max(velocities) > 0:
|
164 |
+
velocities = velocities / np.max(velocities)
|
165 |
+
|
166 |
+
for i in range(n_frames):
|
167 |
+
start_idx = max(0, i - half_window)
|
168 |
+
end_idx = min(n_frames - 1, i + half_window)
|
169 |
+
window_pos = positions[start_idx:end_idx + 1]
|
170 |
+
|
171 |
+
velocity_factor = min(1.0, velocities[i] / velocity_threshold)
|
172 |
+
sigma = 0.5 + 1.5 * velocity_factor
|
173 |
+
dist = np.linspace(-1, 1, len(window_pos))
|
174 |
+
weights = np.exp(-sigma * np.square(dist))
|
175 |
+
weights = weights / np.sum(weights)
|
176 |
+
|
177 |
+
smoothed[i] = np.sum(window_pos * weights[:, np.newaxis], axis=0)
|
178 |
+
|
179 |
+
return smoothed
|
180 |
+
|
181 |
+
def detect_arm_joints(children, num_joints):
|
182 |
+
return [j for j in range(num_joints) if len(children[j]) == 1]
|
183 |
+
|
184 |
+
def main(output_dir, smoothing_window=8, velocity_threshold=0.04, joint_constraint=True):
|
185 |
folder = os.path.join(output_dir, 'obj_sequence')
|
186 |
|
187 |
try:
|
188 |
obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
|
189 |
except Exception as e:
|
190 |
+
sys.exit(f"Error accessing folder {folder}: {e}")
|
|
|
191 |
|
192 |
if not obj_files:
|
193 |
+
sys.exit("No OBJ files found.")
|
|
|
194 |
|
195 |
+
vertices_ref, lines = parse_obj(os.path.join(folder, obj_files[0]))
|
196 |
+
num_joints = len(vertices_ref)
|
197 |
+
parent, children = build_hierarchy(lines)
|
198 |
+
offsets = compute_offsets(vertices_ref, parent)
|
199 |
+
root = 0
|
200 |
+
|
201 |
+
hierarchy_order = []
|
202 |
+
def dfs(joint):
|
203 |
+
hierarchy_order.append(joint)
|
204 |
+
for child in children[joint]:
|
205 |
+
dfs(child)
|
206 |
+
dfs(root)
|
207 |
+
|
208 |
+
arm_joints = detect_arm_joints(children, num_joints)
|
209 |
+
|
210 |
+
all_root_positions = []
|
211 |
+
all_positions = [[] for _ in range(num_joints)]
|
212 |
+
all_rotations = [[] for _ in range(num_joints)]
|
213 |
+
|
214 |
+
total_files = len(obj_files)
|
215 |
+
for i in range(total_files):
|
216 |
+
obj_file = obj_files[i]
|
217 |
+
vertices_cur = parse_obj(os.path.join(folder, obj_file))[0]
|
218 |
+
R_world = [compute_R_world(j, vertices_ref, vertices_cur, children) for j in range(num_joints)]
|
219 |
+
R_local = [R_world[j] if parent[j] == -1 else R_world[parent[j]].T @ R_world[j] for j in range(num_joints)]
|
220 |
+
rotations = [Rotation.from_matrix(R) for R in R_local]
|
221 |
+
|
222 |
+
all_root_positions.append(vertices_cur[root])
|
223 |
+
for j in range(num_joints):
|
224 |
+
all_positions[j].append(vertices_cur[j])
|
225 |
+
all_rotations[j].append(rotations[j])
|
226 |
+
|
227 |
+
# First half of progress (0-50%)
|
228 |
+
progress = (i / total_files) * 50
|
229 |
+
log_progress(f"PROGRESS:{progress:.2f}")
|
230 |
+
|
231 |
+
smoothed_root_positions = adaptive_smooth_positions(all_root_positions, smoothing_window, velocity_threshold)
|
232 |
+
smoothed_positions = [adaptive_smooth_positions(np.array(pos), smoothing_window, velocity_threshold) for pos in all_positions]
|
233 |
+
smoothed_rotations = [adaptive_smooth_rotations(rot, smoothing_window, velocity_threshold) for rot in all_rotations]
|
234 |
+
|
235 |
+
# Enforce bone lengths (no lengthening restraint)
|
236 |
+
for i in range(total_files):
|
237 |
+
# Start with root position
|
238 |
+
smoothed_positions[root][i] = smoothed_root_positions[i]
|
239 |
+
# Adjust each child joint to maintain bone length
|
240 |
+
for j in range(num_joints):
|
241 |
+
if parent[j] != -1: # Skip root
|
242 |
+
parent_pos = smoothed_positions[parent[j]][i]
|
243 |
+
child_pos = smoothed_positions[j][i]
|
244 |
+
ref_offset = offsets[j] # Reference bone length vector
|
245 |
+
bone_length = np.linalg.norm(ref_offset)
|
246 |
+
if bone_length < 1e-6:
|
247 |
+
continue # Skip if bone length is near zero
|
248 |
+
current_vec = child_pos - parent_pos
|
249 |
+
current_length = np.linalg.norm(current_vec)
|
250 |
+
if current_length < 1e-6:
|
251 |
+
# If current length is near zero, use reference direction
|
252 |
+
smoothed_positions[j][i] = parent_pos + ref_offset
|
253 |
else:
|
254 |
+
# Scale the current vector to match reference bone length
|
255 |
+
corrected_vec = (current_vec / current_length) * bone_length
|
256 |
+
smoothed_positions[j][i] = parent_pos + corrected_vec
|
257 |
+
|
258 |
+
motion_data = []
|
259 |
+
joints_to_remove = {10, 13, 16, 6, 3}
|
260 |
+
for i in range(total_files):
|
261 |
+
root_pos = smoothed_root_positions[i]
|
262 |
+
|
263 |
+
if joint_constraint:
|
264 |
+
for j in range(num_joints):
|
265 |
+
euler = smoothed_rotations[j][i].as_euler('ZYX', degrees=True)
|
266 |
+
if j in arm_joints:
|
267 |
+
euler = np.clip(euler, -180, 180)
|
268 |
else:
|
269 |
+
euler = np.clip(euler, -150, 150)
|
270 |
+
smoothed_rotations[j][i] = Rotation.from_euler('ZYX', euler, degrees=True)
|
271 |
+
|
272 |
+
euler_angles = [smoothed_rotations[j][i].as_euler('ZYX', degrees=True) for j in range(num_joints)]
|
273 |
+
motion_line = list(root_pos) + list(euler_angles[root])
|
274 |
+
for j in hierarchy_order[1:]:
|
275 |
+
motion_line.extend(euler_angles[j])
|
276 |
+
motion_data.append(motion_line)
|
277 |
+
|
278 |
+
# Second half of progress (50-100%)
|
279 |
+
progress = 50 + (i / total_files) * 50
|
280 |
+
log_progress(f"PROGRESS:{progress:.2f}")
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
+
bvh_dir = os.path.join(output_dir, 'bvh')
|
283 |
+
os.makedirs(bvh_dir, exist_ok=True)
|
284 |
+
bvh_file = os.path.join(bvh_dir, 'output.bvh')
|
285 |
+
|
286 |
+
with open(bvh_file, 'w') as f:
|
287 |
+
f.write("HIERARCHY\n")
|
288 |
+
def write_hierarchy(joint, parent, f, indent=0):
|
289 |
+
if parent == -1:
|
290 |
+
f.write("ROOT Joint{}\n".format(joint))
|
291 |
+
else:
|
292 |
+
f.write(" " * indent + "JOINT Joint{}\n".format(joint))
|
293 |
+
f.write(" " * indent + "{\n")
|
294 |
+
f.write(" " * (indent + 1) + "OFFSET {:.6f} {:.6f} {:.6f}\n".format(*offsets[joint]))
|
295 |
+
if parent == -1:
|
296 |
+
f.write(" " * (indent + 1) + "CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation\n")
|
297 |
+
else:
|
298 |
+
f.write(" " * (indent + 1) + "CHANNELS 3 Zrotation Yrotation Xrotation\n")
|
299 |
+
for child in children[joint]:
|
300 |
+
write_hierarchy(child, joint, f, indent + 1)
|
301 |
+
if not children[joint]:
|
302 |
+
f.write(" " * (indent + 1) + "End Site\n")
|
303 |
+
f.write(" " * (indent + 1) + "{\n")
|
304 |
+
f.write(" " * (indent + 2) + "OFFSET 0.000000 0.000000 0.000000\n")
|
305 |
+
f.write(" " * (indent + 1) + "}\n")
|
306 |
+
f.write(" " * indent + "}\n")
|
307 |
+
|
308 |
+
write_hierarchy(root, -1, f)
|
309 |
+
|
310 |
+
f.write("MOTION\n")
|
311 |
+
f.write("Frames: {}\n".format(len(motion_data)))
|
312 |
+
f.write("Frame Time: 0.033333\n")
|
313 |
+
for motion_line in motion_data:
|
314 |
+
f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
|
315 |
|
316 |
if __name__ == "__main__":
|
317 |
+
parser = argparse.ArgumentParser('Convert OBJ sequence to BVH with improved adaptive smoothing.')
|
318 |
parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
|
319 |
+
parser.add_argument('--smoothing-window', type=int, default=7, help='Size of smoothing window')
|
320 |
+
parser.add_argument('--velocity-threshold', type=float, default=0.03, help='Velocity threshold for adaptive smoothing')
|
321 |
+
parser.add_argument('--disable-joint-constraints', action='store_false', dest='joint_constraint',
|
322 |
+
help='Disable joint constraints that prevent extreme rotations')
|
323 |
args = parser.parse_args()
|
324 |
+
main(args.output_dir, args.smoothing_window, args.velocity_threshold, args.joint_constraint)
|