streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 15, 2024

Commit

5109a85

verified ·

1 Parent(s): c0c1c4c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -29

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ import time
 logger = logging.getLogger(__name__)
 # Streamlit settings
-st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋")
 st.title("Interactive Virtual Keyboard")
 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')
@@ -29,19 +29,22 @@ keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
         ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
-    def _init_(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
 class Detection(NamedTuple):
     label: str
     score: float
     box: np.ndarray
-result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
@@ -49,7 +52,8 @@ prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
-# Video Frame Callback with Your Logic
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
@@ -59,20 +63,20 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
-    key_width = int(0.08 * w)  # Increased button width
-    key_height = int(0.1 * h)  # Increased button height
-    font_scale = 0.005 * w  # Adjusted font size
-    font_thickness = int(0.01 * h)  # Adjusted font thickness
     for row, key_row in enumerate(keys):
         for col, key in enumerate(key_row):
-            x = int(0.05 * w + col * (key_width + 10))  # Added extra spacing between keys
-            y = int(0.05 * h + row * (key_height + 10))  # Added extra spacing between keys
             buttonList.append(Button([x, y], key, size=[key_width, key_height]))
     # Add special buttons for Backspace and Space
-    buttonList.append(Button([int(0.85 * w), int(0.05 * h)], 'BS', size=[int(0.1 * w), key_height]))
-    buttonList.append(Button([int(0.2 * w), int(0.35 * h)], 'SPACE', size=[int(0.6 * w), key_height]))
     # Draw Keyboard Buttons
     for button in buttonList:
@@ -104,16 +108,17 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            distance = np.sqrt((x8 - x4) * 2 + (y8 - y4) * 2)
-            click_threshold = 0.2 * np.sqrt(bbox[2] * 2 + bbox[3] * 2)
             for button in buttonList:
                 x, y = button.pos
                 bw, bh = button.size
                 if x < x8 < x + bw and y < y8 < y + bh:
-                   cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
-                   cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
-                   if distance < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
                             if button.text != 'BS' and button.text != 'SPACE':
@@ -123,26 +128,24 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
                             else:
                                 output_text += ' '  # Add space
-    # Position and dimensions for the output text box
     text_x = int(0.05 * w)
-    text_y = int(0.75 * h)
     text_width = int(0.9 * w)
     text_height = int(0.1 * h)
-    # Draw the background for output text box
     cv2.rectangle(img,
-              (text_x, text_y - text_height),
-              (text_x + text_width, text_y),
-              (50, 50, 50),  # Dark background for the text area
-              -1)
-    # Display the output text in white with larger font
-    cv2.putText(img, output_text, (text_x + 20, text_y - 20), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 5)
     result_queue.put(detections)
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
 webrtc_streamer(
     key="virtual-keyboard",
@@ -151,4 +154,4 @@ webrtc_streamer(
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
-)

 logger = logging.getLogger(__name__)
 # Streamlit settings
+st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
 st.title("Interactive Virtual Keyboard")
 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')
         ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
+    def __init__(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
 class Detection(NamedTuple):
     label: str
     score: float
     box: np.ndarray
+# Global variables
+result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
+# Video Frame Callback with Logic
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
+    key_width = int(0.07 * w)
+    key_height = int(0.09 * h)
+    font_scale = 0.0045 * w
+    font_thickness = int(0.009 * h)
     for row, key_row in enumerate(keys):
         for col, key in enumerate(key_row):
+            x = int(0.03 * w + col * (key_width + 5))
+            y = int(0.03 * h + row * (key_height + 5))
             buttonList.append(Button([x, y], key, size=[key_width, key_height]))
     # Add special buttons for Backspace and Space
+    buttonList.append(Button([int(0.9 * w), int(0.03 * h)], 'BS', size=[int(0.08 * w), key_height]))
+    buttonList.append(Button([int(0.2 * w), int(0.4 * h)], 'SPACE', size=[int(0.6 * w), key_height]))
     # Draw Keyboard Buttons
     for button in buttonList:
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
+            distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
+            click_threshold = 0.2 * np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
             for button in buttonList:
                 x, y = button.pos
                 bw, bh = button.size
                 if x < x8 < x + bw and y < y8 < y + bh:
+                    cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
+                    cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
+                    if distance < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
                             if button.text != 'BS' and button.text != 'SPACE':
                             else:
                                 output_text += ' '  # Add space
+    # Draw a background rectangle for the output text
     text_x = int(0.05 * w)
+    text_y = int(0.70 * h)
     text_width = int(0.9 * w)
     text_height = int(0.1 * h)
     cv2.rectangle(img,
+                  (text_x, text_y - text_height),
+                  (text_x + text_width, text_y),
+                  (100, 100, 100),
+                  -1)
+    # Overlay the output text
+    cv2.putText(img, output_text, (text_x + int(0.02 * w), text_y - int(0.02 * h)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 5)
     result_queue.put(detections)
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
 webrtc_streamer(
     key="virtual-keyboard",
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
+)