Spaces:

amanpreet7
/

Sam

Runtime error

App Files Files Community

Amanpreet commited on Mar 7

Commit

82d4b57

1 Parent(s): 9d2691d

fixed

Browse files

Files changed (4) hide show

VideoToNPZ/gen_skes.py +17 -8
VideoToNPZ/lib/pose/hrnet/pose_estimation/gen_kpts.py +116 -125
app.py +12 -9
convertNPZtoBVH/conver_bvh.py +220 -76

VideoToNPZ/gen_skes.py CHANGED Viewed

@@ -13,6 +13,17 @@ import signal
 warnings.filterwarnings('ignore')
 sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
 from tools.utils import get_path
 from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
@@ -23,6 +34,7 @@ from tools.preprocess import load_kpts_json, h36m_coco_format, revise_kpts, revi
 from tools.inference import gen_pose
 from tools.vis_kpts import plot_keypoint
 cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
 model_dir = chk_root + 'gastnet/'
 sys.path.insert(1, lib_root)
@@ -37,14 +49,6 @@ adj = adj_mx_from_skeleton(skeleton)
 joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
 kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
-def signal_handler(sig, frame):
-    print("\nInterrupted by user, shutting down...")
-    if 'pool' in locals() and pool is not None:
-        pool.terminate()
-        pool.join()
-    sys.exit(0)
-signal.signal(signal.SIGINT, signal_handler)
 def load_model_layer():
     chk = model_dir + '81_frame_model.bin'
@@ -63,6 +67,11 @@ def load_model_layer():
     return model_pos
 def generate_skeletons(video=''):
     cap = cv2.VideoCapture(video)
     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

 warnings.filterwarnings('ignore')
+def signal_handler(sig, frame):
+    print("\nInterrupted by user, shutting down...")
+    if 'loader_thread' in globals() and loader_thread.is_alive():
+        loader_thread.join(timeout=1.0)  # Give the thread 1 second to finish
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # Free GPU memory immediately
+    os.exit(0)
+# Register the signal handler
+signal.signal(signal.SIGINT, signal_handler)
 sys.path.insert(0, osp.dirname(osp.realpath(__file__)))
 from tools.utils import get_path
 from model.gast_net import SpatioTemporalModel, SpatioTemporalModelOptimized1f
 from tools.inference import gen_pose
 from tools.vis_kpts import plot_keypoint
 cur_dir, chk_root, data_root, lib_root, output_root = get_path(__file__)
 model_dir = chk_root + 'gastnet/'
 sys.path.insert(1, lib_root)
 joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
 kps_left, kps_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16]
 def load_model_layer():
     chk = model_dir + '81_frame_model.bin'
     return model_pos
 def generate_skeletons(video=''):
+    def force_exit(sig, frame):
+        print("\nForce terminating...")
+        os._exit(1)
+    signal.signal(signal.SIGINT, force_exit)
     cap = cv2.VideoCapture(video)
     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

VideoToNPZ/lib/pose/hrnet/pose_estimation/gen_kpts.py CHANGED Viewed

@@ -17,6 +17,18 @@ import json
 import torch.multiprocessing as mp
 from functools import partial
 from io import StringIO
 import _init_paths
 from _init_paths import get_path
@@ -50,7 +62,7 @@ def parse_args():
     parser.add_argument('-a', '--animation', action='store_true', help='output animation')
     parser.add_argument('-np', '--num-person', type=int, default=1)
     parser.add_argument("-v", "--video", type=str, default='camera')
-    parser.add_argument('--batch-size', type=int, default=16)
     args = parser.parse_args()
     return args
@@ -69,7 +81,7 @@ def model_load(config, use_fp16=False):
         new_state_dict[k] = v
     model.load_state_dict(new_state_dict)
     if torch.cuda.is_available() and use_fp16:
-        model = model.half().cuda()
     elif torch.cuda.is_available():
         model = model.cuda()
     model.eval()
@@ -78,7 +90,7 @@ def model_load(config, use_fp16=False):
 def load_default_model():
     args = parse_args()
     reset_config(args)
-    model = eval('models.' + config.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)
     if torch.cuda.is_available():
         model = model.cuda()
     state_dict = torch.load(cfg.OUTPUT_DIR)
@@ -100,7 +112,7 @@ def frame_loader(video, queue, video_length):
     queue.put(None)
     cap.release()
-def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device):
     if not frames:
         return [], []
@@ -114,14 +126,15 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
         if bboxs is None or not bboxs.any():
             continue
-        people_track = Sort().update(bboxs)
         if people_track.shape[0] == 0:
             continue
-        people_track_ = people_track[-min(num_person, people_track.shape[0]):, :-1]
-        track_bboxs = [[round(i, 2) for i in list(bbox)] for bbox in people_track_]
-        inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg, len(track_bboxs))
-        inputs = inputs[:, [2, 1, 0]]
         batch_bboxs.append(track_bboxs)
         batch_centers.append(center)
         batch_scales.append(scale)
@@ -131,12 +144,11 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
         return [], []
     inputs = torch.cat(batch_inputs, dim=0).to(device)
-    if use_fp16:
-        inputs = inputs.half()
     with torch.no_grad():
         outputs = pose_model(inputs)
-        outputs = outputs.cpu().float()
     kpts_result = []
     scores_result = []
@@ -147,24 +159,36 @@ def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_sc
                                         np.asarray(center).flatten(), np.asarray(scale).flatten())
         offset += batch_size
-        kpts = np.zeros((len(preds), 17, 2), dtype=np.float32)
-        scores = np.zeros((len(preds), 17), dtype=np.float32)
-        for j, (kpt, score) in enumerate(zip(preds, maxvals)):
-            kpts[j] = kpt
-            scores[j] = score.squeeze()
         kpts_result.append(kpts)
         scores_result.append(scores)
     return kpts_result, scores_result
-def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=16, animation=False):
     args = parse_args()
     reset_config(args)
     cap = cv2.VideoCapture(video)
     assert cap.isOpened(), 'Cannot capture source'
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
     batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
@@ -173,31 +197,23 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
     people_sort = Sort()
     video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    print('Recording 2D pose ...')
-    sys.stdout.flush()  # Ensure initial message shows up immediately
     if animation:
-        # Animation mode uses frame-by-frame processing like in the backup code
         kpts_result = []
         scores_result = []
         for i in range(video_length):
             ret, frame = cap.read()
             if not ret:
                 break
-            # Detect humans
             bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
             if bboxs is None or not bboxs.any():
-                print('No person detected!')
-                sys.stdout.flush()
                 continue
-            # Track people
             people_track = people_sort.update(bboxs)
-            # Select people to track
             if people_track.shape[0] == 1:
                 people_track_ = people_track[-1, :-1].reshape(1, 4)
             elif people_track.shape[0] >= 2:
@@ -205,51 +221,48 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
                 people_track_ = people_track_[::-1]
             else:
                 continue
             track_bboxs = []
             for bbox in people_track_:
                 bbox = [round(i, 2) for i in list(bbox)]
                 track_bboxs.append(bbox)
             with torch.no_grad():
-                # Preprocess and get pose predictions
                 inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
-                inputs = inputs[:, [2, 1, 0]]
-                if torch.cuda.is_available():
                     inputs = inputs.cuda()
                 output = pose_model(inputs)
-                # Compute coordinates
                 preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
             if gen_output:
-                # Store results for later processing
                 kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
                 scores = np.zeros((num_person, 17), dtype=np.float32)
                 for j, kpt in enumerate(preds):
                     kpts[j] = kpt
                 for j, score in enumerate(maxvals):
                     scores[j] = score.squeeze()
                 kpts_result.append(kpts)
                 scores_result.append(scores)
             else:
-                # Visualize results in real-time
                 index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
                 list(map(lambda x: write(x, frame), index_bboxs))
                 plot_keypoint(frame, preds, maxvals, 0.3)
                 cv2.imshow('frame', frame)
                 key = cv2.waitKey(1)
                 if key & 0xFF == ord('q'):
                     break
     else:
-        # Optimized batch processing with Queue (no StringIO redirection)
-        frame_queue = mp.Queue(maxsize=batch_size * 2)
         loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
         loader_thread.start()
@@ -258,112 +271,89 @@ def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_siz
         kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
         scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
         frame_idx = 0
-        pool = None  # Initialize pool outside try block for cleanup
         try:
             if device.type == 'cuda':
-                # GPU batch processing
                 batch_frames = []
                 with torch.no_grad():
                     for i in range(video_length):
-                        frame = frame_queue.get()
                         if frame is None:
                             break
                         batch_frames.append(frame)
-                        progress = (i + 1) / video_length * 100
-                        print(f"PROGRESS:{progress:.2f}")
-                        sys.stdout.flush()  # Force per-frame update
                         if len(batch_frames) >= batch_size:
                             kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
                                                                     det_dim, num_person, args.thred_score,
-                                                                    use_fp16, device)
                             for kpts, scores in zip(kpts_batch, scores_batch):
                                 kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                                 scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                                 frame_idx += 1
                             batch_frames = []
-                            progress = (frame_idx / video_length) * 100
-                            print(f"PROGRESS:{progress:.2f}")
-                            sys.stdout.flush()  # Force after batch
-                    # Process remaining frames
-                    if batch_frames:
-                        kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
-                                                                det_dim, num_person, args.thred_score,
-                                                                use_fp16, device)
-                        for kpts, scores in zip(kpts_batch, scores_batch):
-                            kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
-                            scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
-                            frame_idx += 1
-                        progress = (frame_idx / video_length) * 100
-                        print(f"PROGRESS:{progress:.2f}")
-                        sys.stdout.flush() # Force after batch
                     # Process remaining frames
                     if batch_frames:
                         kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
                                                                 det_dim, num_person, args.thred_score,
-                                                                use_fp16, device)
                         for kpts, scores in zip(kpts_batch, scores_batch):
                             kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                             scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                             frame_idx += 1
                         progress = (frame_idx / video_length) * 100
-                        print(f"PROGRESS:{progress:.2f}")
-                        sys.stdout.flush()  # Force final update
             else:
-                # CPU batch processing with multiprocessing
-                pool = mp.Pool(processes=mp.cpu_count())
-                process_func = partial(process_batch, human_model=human_model, pose_model=pose_model,
-                                      det_dim=det_dim, num_person=num_person, thred_score=args.thred_score,
-                                      use_fp16=use_fp16, device=device)
                 batch_frames = []
                 with torch.no_grad():
                     for i in range(video_length):
-                        frame = frame_queue.get()
                         if frame is None:
                             break
                         batch_frames.append(frame)
-                        progress = (i + 1) / video_length * 100
-                        print(f"PROGRESS:{progress:.2f}")
-                        sys.stdout.flush()  # Force per-frame update
                         if len(batch_frames) >= batch_size:
-                            kpts_batch, scores_batch = process_func(batch_frames)
                             for kpts, scores in zip(kpts_batch, scores_batch):
                                 kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                                 scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                                 frame_idx += 1
                             batch_frames = []
-                            progress = (frame_idx / video_length) * 100
-                            print(f"PROGRESS:{progress:.2f}")
-                            sys.stdout.flush()  # Force after batch
                     # Process remaining frames
                     if batch_frames:
-                        kpts_batch, scores_batch = process_func(batch_frames)
                         for kpts, scores in zip(kpts_batch, scores_batch):
                             kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                             scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                             frame_idx += 1
                         progress = (frame_idx / video_length) * 100
-                        print(f"PROGRESS:{progress:.2f}")
-                        sys.stdout.flush()  # Force final update
-                pool.close()
-                pool.join()
-        except KeyboardInterrupt:
-            print("\nInterrupted by user, shutting down...")
-            sys.stdout.flush()
-            if pool is not None:
-                pool.terminate()
-                pool.join()
             loader_thread.join()
-            sys.exit(0)
-        loader_thread.join()
     if gen_output and kpts_result.any():
         keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
         scores = scores_result[:frame_idx].transpose(1, 0, 2)
@@ -389,8 +379,12 @@ def gen_img_kpts(image, human_model, pose_model, human_sort, det_dim=416, num_pe
     with torch.no_grad():
         inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
         inputs = inputs[:, [2, 1, 0]]
-        if torch.cuda.is_available():
             inputs = inputs.cuda()
         output = pose_model(inputs)
         preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
@@ -423,11 +417,10 @@ def generate_ntu_kpts_json(video_path, kpts_file):
             ret, frame = cap.read()
             if not ret:
                 continue
             try:
                 bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
                 if bboxs is None or not bboxs.any():
-                    print('No person detected!')
                     continue
                 people_track = people_sort.update(bboxs)
@@ -447,14 +440,17 @@ def generate_ntu_kpts_json(video_path, kpts_file):
                     bbox = [round(i, 3) for i in list(bbox)]
                     track_bboxs.append(bbox)
-            except Exception as e:
-                print(e)
                 continue
             inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
             inputs = inputs[:, [2, 1, 0]]
-            if torch.cuda.is_available():
                 inputs = inputs.cuda()
             output = pose_model(inputs)
             preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
@@ -473,6 +469,7 @@ def generate_ntu_kpts_json(video_path, kpts_file):
         kpts_info.update({'data': data})
         with open(kpts_file, 'w') as fw:
             json.dump(kpts_info, fw)
 def round_list(input_list, decimals=3):
     dim = len(input_list)
@@ -484,18 +481,12 @@ def round_list(input_list, decimals=3):
 if __name__ == "__main__":
     args = parse_args()
     video_path = args.video
     if args.animation:
-        # Real-time animation mode
-        gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
                        gen_output=False, animation=True)
     else:
-        # Process and save keypoints
-        keypoints, scores = gen_video_kpts(video_path, det_dim=args.det_dim,
-                                          num_person=args.num_person,
-                                          gen_output=True,
-                                          batch_size=args.batch_size)
         if keypoints is not None:
             output_file = "output.npz"
-            np.savez(output_file, keypoints=keypoints, scores=scores)
-            print(f"Saved to {output_file}")

 import torch.multiprocessing as mp
 from functools import partial
 from io import StringIO
+import signal
+def signal_handler(sig, frame):
+    print("\nInterrupted by user, shutting down...")
+    if 'loader_thread' in globals() and loader_thread.is_alive():
+        loader_thread.join(timeout=1.0)  # Give the thread 1 second to finish
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # Free GPU memory immediately
+    os.exit(0)
+# Register the signal handler
+signal.signal(signal.SIGINT, signal_handler)
 import _init_paths
 from _init_paths import get_path
     parser.add_argument('-a', '--animation', action='store_true', help='output animation')
     parser.add_argument('-np', '--num-person', type=int, default=1)
     parser.add_argument("-v", "--video", type=str, default='camera')
+    parser.add_argument('--batch-size', type=int, default=8)  # Reduced batch size
     args = parser.parse_args()
     return args
         new_state_dict[k] = v
     model.load_state_dict(new_state_dict)
     if torch.cuda.is_available() and use_fp16:
+        model = model.half().cuda()  # Use FP16 if specified and CUDA available
     elif torch.cuda.is_available():
         model = model.cuda()
     model.eval()
 def load_default_model():
     args = parse_args()
     reset_config(args)
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False)
     if torch.cuda.is_available():
         model = model.cuda()
     state_dict = torch.load(cfg.OUTPUT_DIR)
     queue.put(None)
     cap.release()
+def process_batch(frames, human_model, pose_model, det_dim, num_person, thred_score, use_fp16, device, people_sort):
     if not frames:
         return [], []
         if bboxs is None or not bboxs.any():
             continue
+        people_track = people_sort.update(bboxs)
         if people_track.shape[0] == 0:
             continue
+        num_to_track = min(num_person, people_track.shape[0])
+        people_track_ = people_track[-num_to_track:, :-1]
+        track_bboxs = np.round(people_track_, 2).tolist()
+        inputs, _, center, scale = PreProcess(frame, track_bboxs, cfg, num_to_track)
+        inputs = inputs[:, [2, 1, 0]]  # BGR to RGB
         batch_bboxs.append(track_bboxs)
         batch_centers.append(center)
         batch_scales.append(scale)
         return [], []
     inputs = torch.cat(batch_inputs, dim=0).to(device)
+    if use_fp16 and device.type == 'cuda':
+        inputs = inputs.half()  # Convert to FP16 to match model precision
     with torch.no_grad():
         outputs = pose_model(inputs)
+        outputs = outputs.cpu().float()  # Ensure output is FP32 for post-processing
     kpts_result = []
     scores_result = []
                                         np.asarray(center).flatten(), np.asarray(scale).flatten())
         offset += batch_size
+        kpts = np.zeros((batch_size, 17, 2), dtype=np.float32)
+        scores = np.zeros((batch_size, 17), dtype=np.float32)
+        for j in range(batch_size):
+            kpts[j] = preds[j]
+            scores[j] = maxvals[j].squeeze()
         kpts_result.append(kpts)
         scores_result.append(scores)
     return kpts_result, scores_result
+def gen_video_kpts(video, det_dim=416, num_person=1, gen_output=False, batch_size=8, animation=False):
+    def force_exit(sig, frame):
+        print("\nForce terminating...")
+        os._exit(1)
+    signal.signal(signal.SIGINT, force_exit)
     args = parse_args()
     reset_config(args)
     cap = cv2.VideoCapture(video)
     assert cap.isOpened(), 'Cannot capture source'
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    torch.set_num_threads(max(1, mp.cpu_count() - 1))  # Match thread count to processes
+    torch.autograd.set_grad_enabled(False)  # Explicitly disable gradients
+    # Determine FP16 usage based on device capability
     use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
     batch_size = min(batch_size, torch.cuda.get_device_properties(0).total_memory // (1024**3) if device.type == 'cuda' else mp.cpu_count())
     people_sort = Sort()
     video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     if animation:
         kpts_result = []
         scores_result = []
         for i in range(video_length):
             ret, frame = cap.read()
             if not ret:
                 break
             bboxs, scores = yolo_det(frame, human_model, reso=det_dim, confidence=args.thred_score)
             if bboxs is None or not bboxs.any():
                 continue
             people_track = people_sort.update(bboxs)
             if people_track.shape[0] == 1:
                 people_track_ = people_track[-1, :-1].reshape(1, 4)
             elif people_track.shape[0] >= 2:
                 people_track_ = people_track_[::-1]
             else:
                 continue
             track_bboxs = []
             for bbox in people_track_:
                 bbox = [round(i, 2) for i in list(bbox)]
                 track_bboxs.append(bbox)
             with torch.no_grad():
                 inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, num_person)
+                inputs = inputs[:, [2, 1, 0]]  # BGR to RGB
+                if device.type == 'cuda':
                     inputs = inputs.cuda()
+                    if use_fp16:
+                        inputs = inputs.half()  # Convert to FP16 if model is in FP16
                 output = pose_model(inputs)
                 preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
             if gen_output:
                 kpts = np.zeros((num_person, 17, 2), dtype=np.float32)
                 scores = np.zeros((num_person, 17), dtype=np.float32)
                 for j, kpt in enumerate(preds):
                     kpts[j] = kpt
                 for j, score in enumerate(maxvals):
                     scores[j] = score.squeeze()
                 kpts_result.append(kpts)
                 scores_result.append(scores)
             else:
                 index_bboxs = [bbox + [j] for j, bbox in enumerate(track_bboxs)]
                 list(map(lambda x: write(x, frame), index_bboxs))
                 plot_keypoint(frame, preds, maxvals, 0.3)
                 cv2.imshow('frame', frame)
                 key = cv2.waitKey(1)
                 if key & 0xFF == ord('q'):
                     break
     else:
+        frame_queue = Queue(maxsize=batch_size)  # Use regular Queue instead of mp.Queue
         loader_thread = Thread(target=frame_loader, args=(video, frame_queue, video_length))
         loader_thread.start()
         kpts_result = np.zeros((max_frames, num_person, 17, 2), dtype=np.float32)
         scores_result = np.zeros((max_frames, num_person, 17), dtype=np.float32)
         frame_idx = 0
+        people_sort = Sort()
         try:
             if device.type == 'cuda':
                 batch_frames = []
                 with torch.no_grad():
                     for i in range(video_length):
+                        frame = frame_queue.get(timeout=1.0)
                         if frame is None:
                             break
                         batch_frames.append(frame)
                         if len(batch_frames) >= batch_size:
                             kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
                                                                     det_dim, num_person, args.thred_score,
+                                                                    use_fp16, device, people_sort)
                             for kpts, scores in zip(kpts_batch, scores_batch):
                                 kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                                 scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                                 frame_idx += 1
                             batch_frames = []
+                        # Print progress every batch
+                        if i % batch_size == 0:
+                            progress = ((i + 1) / video_length) * 100
+                            print(f"PROGRESS:{progress:.2f}%")
                     # Process remaining frames
                     if batch_frames:
                         kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
                                                                 det_dim, num_person, args.thred_score,
+                                                                use_fp16, device, people_sort)
                         for kpts, scores in zip(kpts_batch, scores_batch):
                             kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                             scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                             frame_idx += 1
                         progress = (frame_idx / video_length) * 100
+                        print(f"PROGRESS:{progress:.2f}%")
             else:
+                # Sequential processing for CPU to avoid multiprocessing overhead
                 batch_frames = []
                 with torch.no_grad():
                     for i in range(video_length):
+                        frame = frame_queue.get(timeout=1.0)
                         if frame is None:
                             break
                         batch_frames.append(frame)
                         if len(batch_frames) >= batch_size:
+                            kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
+                                                                    det_dim, num_person, args.thred_score,
+                                                                    use_fp16, device, people_sort)
                             for kpts, scores in zip(kpts_batch, scores_batch):
                                 kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                                 scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                                 frame_idx += 1
                             batch_frames = []
+                        # Print progress every batch
+                        if i % batch_size == 0:
+                            progress = ((i + 1) / video_length) * 100
+                            print(f"PROGRESS:{progress:.2f}%")
                     # Process remaining frames
                     if batch_frames:
+                        kpts_batch, scores_batch = process_batch(batch_frames, human_model, pose_model,
+                                                                det_dim, num_person, args.thred_score,
+                                                                use_fp16, device, people_sort)
                         for kpts, scores in zip(kpts_batch, scores_batch):
                             kpts_result[frame_idx:frame_idx + 1] = kpts[None, :num_person]
                             scores_result[frame_idx:frame_idx + 1] = scores[None, :num_person]
                             frame_idx += 1
                         progress = (frame_idx / video_length) * 100
+                        print(f"PROGRESS:{progress:.2f}%")
+        except Exception as e:
             loader_thread.join()
+            raise
+        finally:
+            loader_thread.join()
+            cap.release()
+            if device.type == 'cuda':
+                torch.cuda.empty_cache()  # Free GPU memory
     if gen_output and kpts_result.any():
         keypoints = kpts_result[:frame_idx].transpose(1, 0, 2, 3)
         scores = scores_result[:frame_idx].transpose(1, 0, 2)
     with torch.no_grad():
         inputs, origin_img, center, scale = PreProcess(image, bboxs_track, cfg, num_person)
         inputs = inputs[:, [2, 1, 0]]
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
+        if device.type == 'cuda':
             inputs = inputs.cuda()
+            if use_fp16:
+                inputs = inputs.half()  # Match model precision
         output = pose_model(inputs)
         preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
             ret, frame = cap.read()
             if not ret:
                 continue
             try:
                 bboxs, scores = yolo_det(frame, human_model, confidence=args.thred_score)
                 if bboxs is None or not bboxs.any():
                     continue
                 people_track = people_sort.update(bboxs)
                     bbox = [round(i, 3) for i in list(bbox)]
                     track_bboxs.append(bbox)
+            except Exception:
                 continue
             inputs, origin_img, center, scale = PreProcess(frame, track_bboxs, cfg, args.num_person)
             inputs = inputs[:, [2, 1, 0]]
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            use_fp16 = device.type == 'cuda' and torch.cuda.get_device_capability()[0] >= 7
+            if device.type == 'cuda':
                 inputs = inputs.cuda()
+                if use_fp16:
+                    inputs = inputs.half()  # Match model precision
             output = pose_model(inputs)
             preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
         kpts_info.update({'data': data})
         with open(kpts_file, 'w') as fw:
             json.dump(kpts_info, fw)
+        cap.release()
 def round_list(input_list, decimals=3):
     dim = len(input_list)
 if __name__ == "__main__":
     args = parse_args()
     video_path = args.video
     if args.animation:
+        gen_video_kpts(video_path, det_dim=args.det_dim, num_person=args.num_person,
                        gen_output=False, animation=True)
     else:
+        keypoints, scores = gen_video_kpts(video_path, det_dim=416, num_person=1, gen_output=True, batch_size=8)  # Increased batch_size to 8
         if keypoints is not None:
             output_file = "output.npz"
+            np.savez(output_file, keypoints=keypoints, scores=scores)

app.py CHANGED Viewed

@@ -51,18 +51,21 @@ def run_command(command, working_dir, progress_bar, progress_text, step_start_pr
                     if source == 'stdout':
                         if show_progress and line.startswith("PROGRESS:"):
                             try:
-                                progress = float(line.strip().split("PROGRESS:")[1]) / 100
                                 if Path(command[1]).name == 'gen_skes.py':
-                                    if progress <= 1.0:
-                                        adjusted_progress = step_start_progress + (progress * 0.6)
-                                    else:
-                                        adjusted_progress = step_start_progress + 0.6 + ((progress - 1.0) * 0.2)
-                                else:
-                                    adjusted_progress = step_start_progress + (progress * step_weight)
-                                total_progress = min(adjusted_progress, step_start_progress + step_weight)
                                 progress_bar.progress(total_progress)
                                 progress_text.text(f"Progress: {int(total_progress * 100)}%")
-                            except ValueError:
                                 pass
                     elif source == 'stderr':
                         stderr_lines.append(line.strip())

                     if source == 'stdout':
                         if show_progress and line.startswith("PROGRESS:"):
                             try:
+                                progress_str = line.strip().split("PROGRESS:")[1].replace("%", "")  # Remove '%'
+                                progress = float(progress_str)  # Convert to float after removing '%'  # Debug output
                                 if Path(command[1]).name == 'gen_skes.py':
+                                    if progress <= 100.0:  # 2D Keypoint generation (0-100% maps to 0-60%)
+                                        adjusted_progress = step_start_progress + (progress / 100.0 * 0.6)
+                                    else:  # 3D Pose generation (100-200% maps to 60-80%)
+                                        adjusted_progress = step_start_progress + 0.6 + ((progress - 100.0) / 100.0 * 0.2)
+                                    total_progress = min(adjusted_progress, step_start_progress + step_weight)
+                                else:  # For conver_bvh.py or others with 0-100% progress
+                                    adjusted_progress = step_start_progress + (progress / 100.0 * step_weight)
+                                    total_progress = min(adjusted_progress, step_start_progress + step_weight)
                                 progress_bar.progress(total_progress)
                                 progress_text.text(f"Progress: {int(total_progress * 100)}%")
+                            except ValueError as e:
+                                print(f"DEBUG: Error parsing progress: {e}")
                                 pass
                     elif source == 'stderr':
                         stderr_lines.append(line.strip())

convertNPZtoBVH/conver_bvh.py CHANGED Viewed

@@ -2,11 +2,13 @@ import os
 import numpy as np
 from scipy.spatial.transform import Rotation
 from collections import deque
-from tqdm import tqdm
 import sys
 import argparse
-print(f"Saving 3D Motion")
 def parse_obj(filename):
     vertices = []
@@ -51,10 +53,11 @@ def build_hierarchy(lines, root=0):
 def compute_offsets(vertices_ref, parent):
     num_joints = len(vertices_ref)
     offsets = np.zeros((num_joints, 3))
     for j in range(num_joints):
         if parent[j] != -1:
-            offsets[j] = vertices_ref[j] - vertices_ref[parent[j]]
     return offsets
 def compute_R_world(joint, vertices_ref, vertices_cur, children):
@@ -79,8 +82,7 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
             return np.eye(3)
         axis = axis / axis_norm
         angle = np.arccos(cos_theta)
-        R = Rotation.from_rotvec(axis * angle).as_matrix()
-        return R
     else:
         A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
         B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
@@ -92,89 +94,231 @@ def compute_R_world(joint, vertices_ref, vertices_cur, children):
             R = U @ Vh
         return R
-def main(output_dir):
     folder = os.path.join(output_dir, 'obj_sequence')
     try:
         obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
     except Exception as e:
-        print(f"Error accessing folder {folder}: {e}")
-        return
     if not obj_files:
-        print("No OBJ files found.")
-        return
-    try:
-        vertices_ref, lines = parse_obj(os.path.join(folder, obj_files[0]))
-        num_joints = len(vertices_ref)
-        parent, children = build_hierarchy(lines)
-        offsets = compute_offsets(vertices_ref, parent)
-        root = 0
-        hierarchy_order = []
-        def dfs(joint):
-            hierarchy_order.append(joint)
-            for child in children[joint]:
-                dfs(child)
-        dfs(root)
-        motion_data = []
-        total_files = len(obj_files)
-        for i in range(total_files):
-            obj_file = obj_files[i]
-            vertices_cur = parse_obj(os.path.join(folder, obj_file))[0]
-            R_world = [compute_R_world(j, vertices_ref, vertices_cur, children) for j in range(num_joints)]
-            R_local = [R_world[j] if parent[j] == -1 else R_world[parent[j]].T @ R_world[j] for j in range(num_joints)]
-            euler_angles = [Rotation.from_matrix(R).as_euler('ZYX', degrees=True) for R in R_local]
-            root_pos = vertices_cur[root]
-            motion_line = list(root_pos) + list(euler_angles[root])
-            for j_idx, j in enumerate(hierarchy_order[1:], 1):
-                motion_line.extend(euler_angles[j])
-                progress = ((i / total_files) + (j_idx / len(hierarchy_order) / total_files)) * 100
-                print(f"PROGRESS:{progress:.2f}")
-                sys.stdout.flush()
-            motion_data.append(motion_line)
-        bvh_dir = os.path.join(output_dir, 'bvh')
-        os.makedirs(bvh_dir, exist_ok=True)
-        bvh_file = os.path.join(bvh_dir, 'output.bvh')
-        with open(bvh_file, 'w') as f:
-            f.write("HIERARCHY\n")
-            def write_hierarchy(joint, parent, f, indent=0):
-                if parent == -1:
-                    f.write("ROOT Joint{}\n".format(joint))
                 else:
-                    f.write("  " * indent + "JOINT Joint{}\n".format(joint))
-                f.write("  " * indent + "{\n")
-                f.write("  " * (indent + 1) + "OFFSET {:.6f} {:.6f} {:.6f}\n".format(*offsets[joint]))
-                if parent == -1:
-                    f.write("  " * (indent + 1) + "CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation\n")
                 else:
-                    f.write("  " * (indent + 1) + "CHANNELS 3 Zrotation Yrotation Xrotation\n")
-                for child in children[joint]:
-                    write_hierarchy(child, joint, f, indent + 1)
-                if not children[joint]:
-                    f.write("  " * (indent + 1) + "End Site\n")
-                    f.write("  " * (indent + 1) + "{\n")
-                    f.write("  " * (indent + 2) + "OFFSET 0.000000 0.000000 0.000000\n")
-                    f.write("  " * (indent + 1) + "}\n")
-                f.write("  " * indent + "}\n")
-            write_hierarchy(root, -1, f)
-            f.write("MOTION\n")
-            f.write("Frames: {}\n".format(len(motion_data)))
-            f.write("Frame Time: 0.033333\n")
-            for motion_line in motion_data:
-                f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
-    except Exception as e:
-        print(f"Error during processing: {e}")
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser('Convert OBJ sequence to BVH.')
     parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
     args = parser.parse_args()
-    main(args.output_dir)

 import numpy as np
 from scipy.spatial.transform import Rotation
 from collections import deque
 import sys
 import argparse
+# Custom print function to show only progress
+def log_progress(message):
+    if message.startswith("PROGRESS:"):
+        print(message)
 def parse_obj(filename):
     vertices = []
 def compute_offsets(vertices_ref, parent):
     num_joints = len(vertices_ref)
+    scale_factor = 0.05
     offsets = np.zeros((num_joints, 3))
     for j in range(num_joints):
         if parent[j] != -1:
+            offsets[j] = (vertices_ref[j] - vertices_ref[parent[j]])*scale_factor
     return offsets
 def compute_R_world(joint, vertices_ref, vertices_cur, children):
             return np.eye(3)
         axis = axis / axis_norm
         angle = np.arccos(cos_theta)
+        return Rotation.from_rotvec(axis * angle).as_matrix()
     else:
         A = np.column_stack([vertices_ref[c] - vertices_ref[joint] for c in children[joint]])
         B = np.column_stack([vertices_cur[c] - vertices_cur[joint] for c in children[joint]])
             R = U @ Vh
         return R
+def calculate_motion_velocity(rotations):
+    n_frames = len(rotations)
+    if n_frames <= 1:
+        return np.zeros(n_frames)
+    velocities = np.zeros(n_frames)
+    for i in range(1, n_frames):
+        prev_quat = rotations[i-1].as_quat()
+        curr_quat = rotations[i].as_quat()
+        if np.dot(prev_quat, curr_quat) < 0:
+            curr_quat = -curr_quat
+        diff = Rotation.from_quat(prev_quat).inv() * Rotation.from_quat(curr_quat)
+        velocities[i] = np.linalg.norm(diff.as_rotvec())
+    if n_frames > 1:
+        velocities[0] = velocities[1]
+    return velocities
+def adaptive_smooth_rotations(rotations, window_size=7, velocity_threshold=0.03):
+    n_frames = len(rotations)
+    if n_frames <= 1:
+        return rotations
+    velocities = calculate_motion_velocity(rotations)
+    if np.max(velocities) > 0:
+        velocities = velocities / np.max(velocities)
+    smoothed = []
+    half_window = window_size // 2
+    for i in range(n_frames):
+        start_idx = max(0, i - half_window)
+        end_idx = min(n_frames - 1, i + half_window)
+        window_rots = rotations[start_idx:end_idx + 1]
+        velocity_factor = min(1.0, velocities[i] / velocity_threshold)
+        sigma = 0.5 + 1.5 * velocity_factor
+        dist = np.linspace(-1, 1, len(window_rots))
+        weights = np.exp(-sigma * np.square(dist))
+        weights = weights / np.sum(weights)
+        quats = [r.as_quat() for r in window_rots]
+        for j in range(1, len(quats)):
+            if np.dot(quats[0], quats[j]) < 0:
+                quats[j] = -quats[j]
+        result_quat = np.zeros(4)
+        for j in range(len(quats)):
+            result_quat += weights[j] * quats[j]
+        result_quat = result_quat / np.linalg.norm(result_quat)
+        smoothed.append(Rotation.from_quat(result_quat))
+    return smoothed
+def adaptive_smooth_positions(positions, window_size=7, velocity_threshold=0.03):
+    n_frames = len(positions)
+    if n_frames <= 1:
+        return positions
+    positions = np.array(positions)
+    smoothed = np.zeros_like(positions)
+    half_window = window_size // 2
+    velocities = np.zeros(n_frames)
+    for i in range(1, n_frames):
+        velocities[i] = np.linalg.norm(positions[i] - positions[i-1])
+    velocities[0] = velocities[1]
+    if np.max(velocities) > 0:
+        velocities = velocities / np.max(velocities)
+    for i in range(n_frames):
+        start_idx = max(0, i - half_window)
+        end_idx = min(n_frames - 1, i + half_window)
+        window_pos = positions[start_idx:end_idx + 1]
+        velocity_factor = min(1.0, velocities[i] / velocity_threshold)
+        sigma = 0.5 + 1.5 * velocity_factor
+        dist = np.linspace(-1, 1, len(window_pos))
+        weights = np.exp(-sigma * np.square(dist))
+        weights = weights / np.sum(weights)
+        smoothed[i] = np.sum(window_pos * weights[:, np.newaxis], axis=0)
+    return smoothed
+def detect_arm_joints(children, num_joints):
+    return [j for j in range(num_joints) if len(children[j]) == 1]
+def main(output_dir, smoothing_window=8, velocity_threshold=0.04, joint_constraint=True):
     folder = os.path.join(output_dir, 'obj_sequence')
     try:
         obj_files = sorted([f for f in os.listdir(folder) if f.endswith('.obj')])
     except Exception as e:
+        sys.exit(f"Error accessing folder {folder}: {e}")
     if not obj_files:
+        sys.exit("No OBJ files found.")
+    vertices_ref, lines = parse_obj(os.path.join(folder, obj_files[0]))
+    num_joints = len(vertices_ref)
+    parent, children = build_hierarchy(lines)
+    offsets = compute_offsets(vertices_ref, parent)
+    root = 0
+    hierarchy_order = []
+    def dfs(joint):
+        hierarchy_order.append(joint)
+        for child in children[joint]:
+            dfs(child)
+    dfs(root)
+    arm_joints = detect_arm_joints(children, num_joints)
+    all_root_positions = []
+    all_positions = [[] for _ in range(num_joints)]
+    all_rotations = [[] for _ in range(num_joints)]
+    total_files = len(obj_files)
+    for i in range(total_files):
+        obj_file = obj_files[i]
+        vertices_cur = parse_obj(os.path.join(folder, obj_file))[0]
+        R_world = [compute_R_world(j, vertices_ref, vertices_cur, children) for j in range(num_joints)]
+        R_local = [R_world[j] if parent[j] == -1 else R_world[parent[j]].T @ R_world[j] for j in range(num_joints)]
+        rotations = [Rotation.from_matrix(R) for R in R_local]
+        all_root_positions.append(vertices_cur[root])
+        for j in range(num_joints):
+            all_positions[j].append(vertices_cur[j])
+            all_rotations[j].append(rotations[j])
+        # First half of progress (0-50%)
+        progress = (i / total_files) * 50
+        log_progress(f"PROGRESS:{progress:.2f}")
+    smoothed_root_positions = adaptive_smooth_positions(all_root_positions, smoothing_window, velocity_threshold)
+    smoothed_positions = [adaptive_smooth_positions(np.array(pos), smoothing_window, velocity_threshold) for pos in all_positions]
+    smoothed_rotations = [adaptive_smooth_rotations(rot, smoothing_window, velocity_threshold) for rot in all_rotations]
+    # Enforce bone lengths (no lengthening restraint)
+    for i in range(total_files):
+        # Start with root position
+        smoothed_positions[root][i] = smoothed_root_positions[i]
+        # Adjust each child joint to maintain bone length
+        for j in range(num_joints):
+            if parent[j] != -1:  # Skip root
+                parent_pos = smoothed_positions[parent[j]][i]
+                child_pos = smoothed_positions[j][i]
+                ref_offset = offsets[j]  # Reference bone length vector
+                bone_length = np.linalg.norm(ref_offset)
+                if bone_length < 1e-6:
+                    continue  # Skip if bone length is near zero
+                current_vec = child_pos - parent_pos
+                current_length = np.linalg.norm(current_vec)
+                if current_length < 1e-6:
+                    # If current length is near zero, use reference direction
+                    smoothed_positions[j][i] = parent_pos + ref_offset
                 else:
+                    # Scale the current vector to match reference bone length
+                    corrected_vec = (current_vec / current_length) * bone_length
+                    smoothed_positions[j][i] = parent_pos + corrected_vec
+    motion_data = []
+    joints_to_remove = {10, 13, 16, 6, 3}
+    for i in range(total_files):
+        root_pos = smoothed_root_positions[i]
+        if joint_constraint:
+            for j in range(num_joints):
+                euler = smoothed_rotations[j][i].as_euler('ZYX', degrees=True)
+                if j in arm_joints:
+                    euler = np.clip(euler, -180, 180)
                 else:
+                    euler = np.clip(euler, -150, 150)
+                smoothed_rotations[j][i] = Rotation.from_euler('ZYX', euler, degrees=True)
+        euler_angles = [smoothed_rotations[j][i].as_euler('ZYX', degrees=True) for j in range(num_joints)]
+        motion_line = list(root_pos) + list(euler_angles[root])
+        for j in hierarchy_order[1:]:
+            motion_line.extend(euler_angles[j])
+        motion_data.append(motion_line)
+        # Second half of progress (50-100%)
+        progress = 50 + (i / total_files) * 50
+        log_progress(f"PROGRESS:{progress:.2f}")
+    bvh_dir = os.path.join(output_dir, 'bvh')
+    os.makedirs(bvh_dir, exist_ok=True)
+    bvh_file = os.path.join(bvh_dir, 'output.bvh')
+    with open(bvh_file, 'w') as f:
+        f.write("HIERARCHY\n")
+        def write_hierarchy(joint, parent, f, indent=0):
+            if parent == -1:
+                f.write("ROOT Joint{}\n".format(joint))
+            else:
+                f.write("  " * indent + "JOINT Joint{}\n".format(joint))
+            f.write("  " * indent + "{\n")
+            f.write("  " * (indent + 1) + "OFFSET {:.6f} {:.6f} {:.6f}\n".format(*offsets[joint]))
+            if parent == -1:
+                f.write("  " * (indent + 1) + "CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation\n")
+            else:
+                f.write("  " * (indent + 1) + "CHANNELS 3 Zrotation Yrotation Xrotation\n")
+            for child in children[joint]:
+                write_hierarchy(child, joint, f, indent + 1)
+            if not children[joint]:
+                f.write("  " * (indent + 1) + "End Site\n")
+                f.write("  " * (indent + 1) + "{\n")
+                f.write("  " * (indent + 2) + "OFFSET 0.000000 0.000000 0.000000\n")
+                f.write("  " * (indent + 1) + "}\n")
+            f.write("  " * indent + "}\n")
+        write_hierarchy(root, -1, f)
+        f.write("MOTION\n")
+        f.write("Frames: {}\n".format(len(motion_data)))
+        f.write("Frame Time: 0.033333\n")
+        for motion_line in motion_data:
+            f.write(" ".join("{:.6f}".format(x) for x in motion_line) + "\n")
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser('Convert OBJ sequence to BVH with improved adaptive smoothing.')
     parser.add_argument('--output-dir', type=str, default='../outputs/', help='Output directory containing obj_sequence')
+    parser.add_argument('--smoothing-window', type=int, default=7, help='Size of smoothing window')
+    parser.add_argument('--velocity-threshold', type=float, default=0.03, help='Velocity threshold for adaptive smoothing')
+    parser.add_argument('--disable-joint-constraints', action='store_false', dest='joint_constraint',
+                        help='Disable joint constraints that prevent extreme rotations')
     args = parser.parse_args()
+    main(args.output_dir, args.smoothing_window, args.velocity_threshold, args.joint_constraint)