streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 13, 2024

Commit

6b85062

verified ·

1 Parent(s): ab7606f

Update app.py

Browse files

I have included the result queue

Files changed (1) hide show

app.py +51 -39

app.py CHANGED Viewed

@@ -199,6 +199,9 @@ from cvzone.HandTrackingModule import HandDetector
 from cvzone.SelfiSegmentationModule import SelfiSegmentation
 import os
 import time
 from sample_utils.turn import get_ice_servers
 logger = logging.getLogger(__name__)
@@ -224,6 +227,13 @@ class Button:
         self.size = size
         self.text = text
 listImg = os.listdir('model/street') if os.path.exists('model/street') else []
 if not listImg:
     st.error("Error: 'street' directory is missing or empty. Please add background images.")
@@ -238,50 +248,51 @@ output_text = ""
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
-def video_frame_callback(frame):
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
     imgOut = segmentor.removeBG(img, imgList[indexImg])
-    hands, img = detector.findHands(imgOut, flipType=False)
-    keyboard_canvas = np.zeros_like(img)
-    buttonList = []
-    for key in keys[0]:
-        buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
-    for key in keys[1]:
-        buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
-    for key in keys[2]:
-        buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
-    for i, hand in enumerate(hands):
-        lmList = hand['lmList']
-        if lmList:
-            x4, y4 = lmList[4][0], lmList[4][1]
-            x8, y8 = lmList[8][0], lmList[8][1]
-            distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
-            click_threshold = 10
-            for button in buttonList:
-                x, y = button.pos
-                w, h = button.size
-                if x < x8 < x + w and y < y8 < y + h:
-                    cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
-                    cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
-                    if (distance / np.sqrt((hand['bbox'][2]) ** 2 + (hand['bbox'][3]) ** 2)) * 100 < click_threshold:
-                        if time.time() - prev_key_time[i] > 2:
-                            prev_key_time[i] = time.time()
-                            if button.text != 'BS' and button.text != 'SPACE':
-                                output_text += button.text
-                            elif button.text == 'BS':
-                                output_text = output_text[:-1]
-                            else:
-                                output_text += ' '
     st.session_state["output_text"] = output_text
-    return frame.from_ndarray(img, format="bgr24")
 webrtc_streamer(
     key="virtual-keyboard",
@@ -295,3 +306,4 @@ webrtc_streamer(
 st.subheader("Output Text")
 st.text_area("Live Input:", value=st.session_state["output_text"], height=200)

 from cvzone.SelfiSegmentationModule import SelfiSegmentation
 import os
 import time
+import av
+import queue
+from typing import List, NamedTuple
 from sample_utils.turn import get_ice_servers
 logger = logging.getLogger(__name__)
         self.size = size
         self.text = text
+class Detection(NamedTuple):
+    label: str
+    score: float
+    box: np.ndarray
+result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 listImg = os.listdir('model/street') if os.path.exists('model/street') else []
 if not listImg:
     st.error("Error: 'street' directory is missing or empty. Please add background images.")
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
+def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
     imgOut = segmentor.removeBG(img, imgList[indexImg])
+    hands, imgOut = detector.findHands(imgOut, flipType=False)
+    buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
+    detections = []
+    if hands:
+        for i, hand in enumerate(hands):
+            lmList = hand['lmList']
+            bbox = hand['bbox']
+            label = "Hand"
+            score = hand['score']
+            box = np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
+            detections.append(Detection(label=label, score=score, box=box))
+            if lmList:
+                x4, y4 = lmList[4][0], lmList[4][1]
+                x8, y8 = lmList[8][0], lmList[8][1]
+                distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
+                click_threshold = 10
+                for button in buttonList:
+                    x, y = button.pos
+                    w, h = button.size
+                    if x < x8 < x + w and y < y8 < y + h:
+                        cv2.rectangle(imgOut, button.pos, (x + w, y + h), (0, 255, 160), -1)
+                        cv2.putText(imgOut, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
+                        if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
+                            if time.time() - prev_key_time[i] > 2:
+                                prev_key_time[i] = time.time()
+                                if button.text != 'BS' and button.text != 'SPACE':
+                                    output_text += button.text
+                                elif button.text == 'BS':
+                                    output_text = output_text[:-1]
+                                else:
+                                    output_text += ' '
+    result_queue.put(detections)
     st.session_state["output_text"] = output_text
+    return av.VideoFrame.from_ndarray(imgOut, format="bgr24")
 webrtc_streamer(
     key="virtual-keyboard",
 st.subheader("Output Text")
 st.text_area("Live Input:", value=st.session_state["output_text"], height=200)