streamlit-webrtc-example-experimental

Sleeping

App Files Files Community

Pratyush101 commited on Dec 11, 2024

Commit

0ebb9ed

verified ·

1 Parent(s): cb8aa2b

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -29

app.py CHANGED Viewed

@@ -96,37 +96,74 @@ detector = HandDetector(maxHands=1, detectionCon=0.8)
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
-# Create a thread-safe queue for passing results from callback
-result_queue = queue.Queue()
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
-    img = frame.to_ndarray(format="bgr24")
-    # Ensure dimensions are provided or preprocess image
-    height, width, _ = img.shape
-    # Try passing image dimensions explicitly
-    hands, img = detector.findHands(img, flipType=False, imgDim=(width, height))
-    if hands:
-        hand = hands[0]
-        bbox = hand["bbox"]
-        cv2.rectangle(img,
-                      (bbox[0], bbox[1]),
-                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
-                      (255, 0, 0), 2)
-        # Render text once
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        fontScale = 2
-        color = (255, 255, 255)
-        thickness = 2
-        cv2.putText(img, 'OpenCV', (50, 50), font, fontScale, color, thickness, cv2.LINE_AA)
-        # Pass simplified results to the queue
-        result_queue.put({"bbox": bbox, "landmarks": hand["landmarks"]})
-    return av.VideoFrame.from_ndarray(img, format="bgr24")
 webrtc_ctx = webrtc_streamer(

 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
+class Detection(NamedTuple):
+    label: str
+    score: float
+    box: np.ndarray
+@st.cache_resource  # Cache label colors
+def generate_label_colors():
+    return np.random.uniform(0, 255, size=(2, 3))  # Two classes: Left and Right Hand
+COLORS = generate_label_colors()
+# Initialize MediaPipe Hands
+mp_hands = mp.solutions.hands
+detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
+# Session-specific caching
+result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
+# Hand detection callback
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+    image = frame.to_ndarray(format="bgr24")
+    h, w = image.shape[:2]
+    # Process image with MediaPipe Hands
+    results = detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    detections = []
+    if results.multi_hand_landmarks:
+        for hand_landmarks, hand_class in zip(results.multi_hand_landmarks, results.multi_handedness):
+            # Extract bounding box
+            x_min, y_min = 1, 1
+            x_max, y_max = 0, 0
+            for lm in hand_landmarks.landmark:
+                x_min = min(x_min, lm.x)
+                y_min = min(y_min, lm.y)
+                x_max = max(x_max, lm.x)
+                y_max = max(y_max, lm.y)
+            # Scale bbox to image size
+            box = np.array([x_min * w, y_min * h, x_max * w, y_max * h]).astype("int")
+            # Label and score
+            label = hand_class.classification[0].label
+            score = hand_class.classification[0].score
+            detections.append(Detection(label=label, score=score, box=box))
+            # Draw bounding box and label
+            color = COLORS[0 if label == "Left" else 1]
+            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
+            caption = f"{label}: {round(score * 100, 2)}%"
+            cv2.putText(
+                image,
+                caption,
+                (box[0], box[1] - 15 if box[1] - 15 > 15 else box[1] + 15),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                color,
+                2,
+            )
+    # Put results in the queue
+    result_queue.put(detections)
+    return av.VideoFrame.from_ndarray(image, format="bgr24")
 webrtc_ctx = webrtc_streamer(