streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 13, 2024

Commit

5fdbc61

verified ·

1 Parent(s): 618adcc

Update app.py

Browse files

The error occurs because the MediaPipe Hand Detector expects specific input dimensions for hand landmarks when using NORM_RECT. The default assumption is that the input image is square unless otherwise specified.

Solution 1: Use a Square Input Image
Crop or resize the input frame img to a square before passing it to the hand detector:

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -274,30 +274,34 @@ result_queue = queue.Queue()
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     img = frame.to_ndarray(format="bgr24")
     # Detect hands
-    hands, img = detector.findHands(img, flipType=False)
     # Collect detections
     detections = []
     if hands:
         for hand in hands:
             bbox = hand["bbox"]
             label = hand["type"]
             score = hand["score"]
             # Draw bounding box
-            cv2.rectangle(img, (bbox[0], bbox[1]),
-                          (bbox[0]+bbox[2], bbox[1]+bbox[3]), (255, 0, 0), 2)
             # Append detection details
             detections.append({"label": label, "score": score, "bbox": bbox})
         # Put detections into result queue
         result_queue.put(detections)
-    return av.VideoFrame.from_ndarray(img, format="bgr24")
 # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:

 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     img = frame.to_ndarray(format="bgr24")
+    # Ensure square input for MediaPipe
+    h, w, _ = img.shape
+    size = min(h, w)
+    img_cropped = img[:size, :size]
     # Detect hands
+    hands, img_cropped = detector.findHands(img_cropped, flipType=False)
     # Collect detections
     detections = []
     if hands:
         for hand in hands:
             bbox = hand["bbox"]
             label = hand["type"]
             score = hand["score"]
             # Draw bounding box
+            cv2.rectangle(img_cropped, (bbox[0], bbox[1]),
+                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
             # Append detection details
             detections.append({"label": label, "score": score, "bbox": bbox})
         # Put detections into result queue
         result_queue.put(detections)
+    return av.VideoFrame.from_ndarray(img_cropped, format="bgr24")
 # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: