streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 13, 2024

Commit

89f5596

verified ·

1 Parent(s): 5fdbc61

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -31

app.py CHANGED Viewed

@@ -269,39 +269,54 @@ if "output_text" not in st.session_state:
 #     return av.VideoFrame.from_ndarray(img, format="bgr24")
-# Initialize result queue
-result_queue = queue.Queue()
-def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
-    img = frame.to_ndarray(format="bgr24")
-    # Ensure square input for MediaPipe
-    h, w, _ = img.shape
-    size = min(h, w)
-    img_cropped = img[:size, :size]
-    # Detect hands
-    hands, img_cropped = detector.findHands(img_cropped, flipType=False)
-    # Collect detections
-    detections = []
-    if hands:
-        for hand in hands:
-            bbox = hand["bbox"]
-            label = hand["type"]
-            score = hand["score"]
-            # Draw bounding box
-            cv2.rectangle(img_cropped, (bbox[0], bbox[1]),
-                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
-            # Append detection details
-            detections.append({"label": label, "score": score, "bbox": bbox})
-        # Put detections into result queue
-        result_queue.put(detections)
-    return av.VideoFrame.from_ndarray(img_cropped, format="bgr24")
 # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:

 #     return av.VideoFrame.from_ndarray(img, format="bgr24")
+result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
+def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+    image = frame.to_ndarray(format="bgr24")
+    # Run inference
+    blob = cv2.dnn.blobFromImage(
+        cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
+    )
+    net.setInput(blob)
+    output = net.forward()
+    h, w = image.shape[:2]
+    # Convert the output array into a structured form.
+    output = output.squeeze()  # (1, 1, N, 7) -> (N, 7)
+    output = output[output[:, 2] >= score_threshold]
+    detections = [
+        Detection(
+            class_id=int(detection[1]),
+            label=CLASSES[int(detection[1])],
+            score=float(detection[2]),
+            box=(detection[3:7] * np.array([w, h, w, h])),
+        )
+        for detection in output
+    ]
+    # Render bounding boxes and captions
+    for detection in detections:
+        caption = f"{detection.label}: {round(detection.score * 100, 2)}%"
+        color = COLORS[detection.class_id]
+        xmin, ymin, xmax, ymax = detection.box.astype("int")
+        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
+        cv2.putText(
+            image,
+            caption,
+            (xmin, ymin - 15 if ymin - 15 > 15 else ymin + 15),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            color,
+            2,
+        )
+    result_queue.put(detections)
+    return av.VideoFrame.from_ndarray(image, format="bgr24")
 # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: