Spaces:

whitphx
/

streamlit-webrtc-example

Running

App Files Files Community

whitphx HF Staff commited on Jan 31, 2021

Commit

e8c0c75

1 Parent(s): a4daff6

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import threading
 import time
 import urllib.request
 from pathlib import Path
-from typing import List, Union
 try:
     from typing import Literal
@@ -15,7 +15,6 @@ except ImportError:
 import av
 import cv2
 import numpy as np
-import PIL
 import streamlit as st
 from aiortc.contrib.media import MediaPlayer
@@ -77,6 +76,12 @@ def download_file(url, download_to: Path, expected_size=None):
             progress_bar.empty()
 def main():
     st.header("WebRTC demo")
@@ -230,28 +235,32 @@ def app_object_detection():
     DEFAULT_CONFIDENCE_THRESHOLD = 0.5
     class MobileNetSSDVideoTransformer(VideoTransformerBase):
         confidence_threshold: float
-        _labels: Union[List[str], None]
-        _labels_lock: threading.Lock
         def __init__(self) -> None:
             self._net = cv2.dnn.readNetFromCaffe(
                 str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH)
             )
             self.confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD
-            self._labels = None
-            self._labels_lock = threading.Lock()
         @property
-        def labels(self) -> Union[List[str], None]:
-            with self._labels_lock:
-                return self._labels
         def _annotate_image(self, image, detections):
             # loop over the detections
             (h, w) = image.shape[:2]
-            labels = []
             for i in np.arange(0, detections.shape[2]):
                 confidence = detections[0, 0, i, 2]
@@ -263,9 +272,11 @@ def app_object_detection():
                     box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                     (startX, startY, endX, endY) = box.astype("int")
                     # display the prediction
-                    label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%"
-                    labels.append(label)
                     cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
                     y = startY - 15 if startY - 15 > 15 else startY + 15
                     cv2.putText(
@@ -277,7 +288,7 @@ def app_object_detection():
                         COLORS[idx],
                         2,
                     )
-            return image, labels
         def transform(self, frame: av.VideoFrame) -> np.ndarray:
             image = frame.to_ndarray(format="bgr24")
@@ -286,12 +297,12 @@ def app_object_detection():
             )
             self._net.setInput(blob)
             detections = self._net.forward()
-            annotated_image, labels = self._annotate_image(image, detections)
             # NOTE: This `transform` method is called in another thread,
             # so it must be thread-safe.
-            with self._labels_lock:
-                self._labels = labels
             return annotated_image
@@ -309,7 +320,7 @@ def app_object_detection():
     if webrtc_ctx.video_transformer:
         webrtc_ctx.video_transformer.confidence_threshold = confidence_threshold
-    if st.checkbox("Show the detected labels"):
         if webrtc_ctx.state.playing:
             labels_placeholder = st.empty()
             # NOTE: The video transformation with object detection and
@@ -319,7 +330,7 @@ def app_object_detection():
             # are not synchronized.
             while True:
                 if webrtc_ctx.video_transformer:
-                    labels_placeholder.write(webrtc_ctx.video_transformer.labels)
                 time.sleep(0.1)
     st.markdown(
@@ -371,7 +382,7 @@ def app_streaming():
     WEBRTC_CLIENT_SETTINGS.update(
         {
-            "fmedia_stream_constraints": {
                 "video": media_file_info["type"] == "video",
                 "audio": media_file_info["type"] == "audio",
             }
@@ -405,15 +416,9 @@ def app_sendonly():
                 webrtc_ctx.video_receiver.stop()
                 break
-            img = frame.to_ndarray(format="bgr24")
-            img = PIL.Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-            image_loc.image(img)
-WEBRTC_CLIENT_SETTINGS = ClientSettings(
-    rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
-    media_stream_constraints={"video": True, "audio": True},
-)
 if __name__ == "__main__":
     logging.basicConfig(

 import time
 import urllib.request
 from pathlib import Path
+from typing import List, NamedTuple, Union
 try:
     from typing import Literal
 import av
 import cv2
 import numpy as np
 import streamlit as st
 from aiortc.contrib.media import MediaPlayer
             progress_bar.empty()
+WEBRTC_CLIENT_SETTINGS = ClientSettings(
+    rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
+    media_stream_constraints={"video": True, "audio": True},
+)
 def main():
     st.header("WebRTC demo")
     DEFAULT_CONFIDENCE_THRESHOLD = 0.5
+    class Detection(NamedTuple):
+        name: str
+        prob: float
     class MobileNetSSDVideoTransformer(VideoTransformerBase):
         confidence_threshold: float
+        _result: Union[List[Detection], None]
+        _result_lock: threading.Lock
         def __init__(self) -> None:
             self._net = cv2.dnn.readNetFromCaffe(
                 str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH)
             )
             self.confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD
+            self._result = None
+            self._result_lock = threading.Lock()
         @property
+        def result(self) -> Union[List[Detection], None]:
+            with self._result_lock:
+                return self._result
         def _annotate_image(self, image, detections):
             # loop over the detections
             (h, w) = image.shape[:2]
+            result: List[Detection] = []
             for i in np.arange(0, detections.shape[2]):
                 confidence = detections[0, 0, i, 2]
                     box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                     (startX, startY, endX, endY) = box.astype("int")
+                    name = CLASSES[idx]
+                    result.append(Detection(name=name, prob=float(confidence)))
                     # display the prediction
+                    label = f"{name}: {round(confidence * 100, 2)}%"
                     cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
                     y = startY - 15 if startY - 15 > 15 else startY + 15
                     cv2.putText(
                         COLORS[idx],
                         2,
                     )
+            return image, result
         def transform(self, frame: av.VideoFrame) -> np.ndarray:
             image = frame.to_ndarray(format="bgr24")
             )
             self._net.setInput(blob)
             detections = self._net.forward()
+            annotated_image, result = self._annotate_image(image, detections)
             # NOTE: This `transform` method is called in another thread,
             # so it must be thread-safe.
+            with self._result_lock:
+                self._result = result
             return annotated_image
     if webrtc_ctx.video_transformer:
         webrtc_ctx.video_transformer.confidence_threshold = confidence_threshold
+    if st.checkbox("Show the detected labels", value=True):
         if webrtc_ctx.state.playing:
             labels_placeholder = st.empty()
             # NOTE: The video transformation with object detection and
             # are not synchronized.
             while True:
                 if webrtc_ctx.video_transformer:
+                    labels_placeholder.table(webrtc_ctx.video_transformer.result)
                 time.sleep(0.1)
     st.markdown(
     WEBRTC_CLIENT_SETTINGS.update(
         {
+            "media_stream_constraints": {
                 "video": media_file_info["type"] == "video",
                 "audio": media_file_info["type"] == "audio",
             }
                 webrtc_ctx.video_receiver.stop()
                 break
+            img_rgb = frame.to_ndarray(format="rgb24")
+            image_loc.image(img_rgb)
 if __name__ == "__main__":
     logging.basicConfig(