streamlit-webrtc-example-experimental

Sleeping

App Files Files Community

Pratyush101 commited on Dec 15, 2024

Commit

99e69b6

verified ·

1 Parent(s): 1c5d1dd

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -7

app.py CHANGED Viewed

@@ -1,7 +1,18 @@
 import time
-# Logger Setup
-logger = logging.getLogger(__name__)
 # Streamlit settings
 st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
@@ -10,13 +21,41 @@ st.subheader('''Turn on the webcam and use hand gestures to interact with the vi
 # Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
-    def __init__(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
@@ -37,14 +76,40 @@ class Button:
     # Draw Keyboard Buttons
     for button in buttonList:
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
-            click_threshold = 0.2*np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
             for button in buttonList:
                 x, y = button.pos
                 if x < x8 < x + bw and y < y8 < y + bh:
                    cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                    cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
@@ -59,13 +124,11 @@ class Button:
                             else:
                                 output_text += ' '  # Add space
     # Position and dimensions for the rectangle
     text_x = int(0.05 * w)
     text_y = int(0.70 * h)
     text_width = int(0.9 * w)  # Adjust width as needed
     text_height = int(0.1 * h)  # Adjust height as needed
     # Draw the rectangle
     cv2.rectangle(img,
               (text_x, text_y - text_height),  # Top-left corner
               (text_x + text_width, text_y),  # Bottom-right corner
@@ -81,6 +144,10 @@ class Button:
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,

+import logging
+import queue
+from typing import List, NamedTuple
+import av
+import cv2
+import numpy as np
+import streamlit as st
+from streamlit_webrtc import WebRtcMode, webrtc_streamer
+from sample_utils.turn import get_ice_servers
+import mediapipe as mp
+import os
 import time
+# # Logger Setup
+# logger = logging.getLogger(__name__)
 # Streamlit settings
 st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
 # Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
+hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
+mp_drawing = mp.solutions.drawing_utils
+# Virtual Keyboard Layout
+keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
+        ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
+    def _init_(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
+class Detection(NamedTuple):
+    label: str
+    score: float
+    box: np.ndarray
+result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
+indexImg = 0
+output_text = ""
+prev_key_time = [time.time()] * 2
+if "output_text" not in st.session_state:
+    st.session_state["output_text"] = ""
+# Video Frame Callback with Your Logic
+def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+    global indexImg, output_text
+    img = frame.to_ndarray(format="bgr24")
+    result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
     # Draw Keyboard Buttons
     for button in buttonList:
+        x, y = button.pos
+        bw, bh = button.size
+        cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
+        cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
+    detections = []
+    if result.multi_hand_landmarks:
+        for hand_landmarks in result.multi_hand_landmarks:
+            mp_drawing.draw_landmarks(
+                img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
+                mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
+                mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
+            )
+            h, w, _ = img.shape
+            x_min, y_min = w, h
+            x_max, y_max = 0, 0
+            for lm in hand_landmarks.landmark:
+                x, y = int(lm.x * w), int(lm.y * h)
+                x_min, y_min = min(x_min, x), min(y_min, y)
+                x_max, y_max = max(x_max, x), max(y_max, y)
+            bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
+            detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
+            distance = np.sqrt((x8 - x4) * 2 + (y8 - y4) * 2)
+            click_threshold = 0.2*np.sqrt(bbox[2] * 2 + bbox[3] * 2)
             for button in buttonList:
                 x, y = button.pos
+                bw, bh = button.size
                 if x < x8 < x + bw and y < y8 < y + bh:
                    cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                    cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
                             else:
                                 output_text += ' '  # Add space
     # Position and dimensions for the rectangle
     text_x = int(0.05 * w)
     text_y = int(0.70 * h)
     text_width = int(0.9 * w)  # Adjust width as needed
     text_height = int(0.1 * h)  # Adjust height as needed
     # Draw the rectangle
     cv2.rectangle(img,
               (text_x, text_y - text_height),  # Top-left corner
               (text_x + text_width, text_y),  # Bottom-right corner
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
+webrtc_streamer(
+    key="virtual-keyboard",
+    mode=WebRtcMode.SENDRECV,
+    rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,