Pratyush101 commited on
Commit
5fdbc61
·
verified ·
1 Parent(s): 618adcc

Update app.py

Browse files

The error occurs because the MediaPipe Hand Detector expects specific input dimensions for hand landmarks when using NORM_RECT. The default assumption is that the input image is square unless otherwise specified.

Solution 1: Use a Square Input Image
Crop or resize the input frame img to a square before passing it to the hand detector:

Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -274,30 +274,34 @@ result_queue = queue.Queue()
274
 
275
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
276
  img = frame.to_ndarray(format="bgr24")
277
-
 
 
 
 
 
278
  # Detect hands
279
- hands, img = detector.findHands(img, flipType=False)
280
-
281
  # Collect detections
282
  detections = []
283
-
284
  if hands:
285
  for hand in hands:
286
  bbox = hand["bbox"]
287
  label = hand["type"]
288
  score = hand["score"]
289
-
290
  # Draw bounding box
291
- cv2.rectangle(img, (bbox[0], bbox[1]),
292
- (bbox[0]+bbox[2], bbox[1]+bbox[3]), (255, 0, 0), 2)
293
-
294
  # Append detection details
295
  detections.append({"label": label, "score": score, "bbox": bbox})
296
-
297
  # Put detections into result queue
298
  result_queue.put(detections)
299
 
300
- return av.VideoFrame.from_ndarray(img, format="bgr24")
301
 
302
 
303
  # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
 
274
 
275
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
276
  img = frame.to_ndarray(format="bgr24")
277
+
278
+ # Ensure square input for MediaPipe
279
+ h, w, _ = img.shape
280
+ size = min(h, w)
281
+ img_cropped = img[:size, :size]
282
+
283
  # Detect hands
284
+ hands, img_cropped = detector.findHands(img_cropped, flipType=False)
285
+
286
  # Collect detections
287
  detections = []
 
288
  if hands:
289
  for hand in hands:
290
  bbox = hand["bbox"]
291
  label = hand["type"]
292
  score = hand["score"]
293
+
294
  # Draw bounding box
295
+ cv2.rectangle(img_cropped, (bbox[0], bbox[1]),
296
+ (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
297
+
298
  # Append detection details
299
  detections.append({"label": label, "score": score, "bbox": bbox})
300
+
301
  # Put detections into result queue
302
  result_queue.put(detections)
303
 
304
+ return av.VideoFrame.from_ndarray(img_cropped, format="bgr24")
305
 
306
 
307
  # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: