streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 15, 2024

Commit

1c5d1dd

verified ·

1 Parent(s): 3a44bd9

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -72

app.py CHANGED Viewed

@@ -1,14 +1,3 @@
-import logging
-import queue
-from typing import List, NamedTuple
-import av
-import cv2
-import numpy as np
-import streamlit as st
-from streamlit_webrtc import WebRtcMode, webrtc_streamer
-from sample_utils.turn import get_ice_servers
-import mediapipe as mp
-import os
 import time
 # Logger Setup
@@ -21,41 +10,13 @@ st.subheader('''Turn on the webcam and use hand gestures to interact with the vi
 # Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
-hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
-mp_drawing = mp.solutions.drawing_utils
-# Virtual Keyboard Layout
-keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
-        ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
-    def _init_(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
-class Detection(NamedTuple):
-    label: str
-    score: float
-    box: np.ndarray
-result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
-indexImg = 0
-output_text = ""
-prev_key_time = [time.time()] * 2
-if "output_text" not in st.session_state:
-    st.session_state["output_text"] = ""
-# Video Frame Callback with Your Logic
-def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
-    global indexImg, output_text
-    img = frame.to_ndarray(format="bgr24")
-    result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
@@ -76,40 +37,14 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     # Draw Keyboard Buttons
     for button in buttonList:
-        x, y = button.pos
-        bw, bh = button.size
-        cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
-        cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
-    detections = []
-    if result.multi_hand_landmarks:
-        for hand_landmarks in result.multi_hand_landmarks:
-            mp_drawing.draw_landmarks(
-                img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
-                mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
-                mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
-            )
-            h, w, _ = img.shape
-            x_min, y_min = w, h
-            x_max, y_max = 0, 0
-            for lm in hand_landmarks.landmark:
-                x, y = int(lm.x * w), int(lm.y * h)
-                x_min, y_min = min(x_min, x), min(y_min, y)
-                x_max, y_max = max(x_max, x), max(y_max, y)
-            bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
-            detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            distance = np.sqrt((x8 - x4) * 2 + (y8 - y4) * 2)
-            click_threshold = 0.2*np.sqrt(bbox[2] * 2 + bbox[3] * 2)
             for button in buttonList:
                 x, y = button.pos
-                bw, bh = button.size
                 if x < x8 < x + bw and y < y8 < y + bh:
                    cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                    cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
@@ -124,11 +59,13 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
                             else:
                                 output_text += ' '  # Add space
     # Position and dimensions for the rectangle
     text_x = int(0.05 * w)
     text_y = int(0.70 * h)
     text_width = int(0.9 * w)  # Adjust width as needed
     text_height = int(0.1 * h)  # Adjust height as needed
     # Draw the rectangle
     cv2.rectangle(img,
               (text_x, text_y - text_height),  # Top-left corner
               (text_x + text_width, text_y),  # Bottom-right corner
@@ -144,10 +81,6 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
-webrtc_streamer(
-    key="virtual-keyboard",
-    mode=WebRtcMode.SENDRECV,
-    rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,

 import time
 # Logger Setup
 # Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
 class Button:
+    def __init__(self, pos, text, size=[100, 100]):
         self.pos = pos
         self.size = size
         self.text = text
     # Create the keyboard buttons
     buttonList = []
     h, w = img.shape[:2]
     # Draw Keyboard Buttons
     for button in buttonList:
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
+            distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
+            click_threshold = 0.2*np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
             for button in buttonList:
                 x, y = button.pos
                 if x < x8 < x + bw and y < y8 < y + bh:
                    cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                    cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
                             else:
                                 output_text += ' '  # Add space
     # Position and dimensions for the rectangle
     text_x = int(0.05 * w)
     text_y = int(0.70 * h)
     text_width = int(0.9 * w)  # Adjust width as needed
     text_height = int(0.1 * h)  # Adjust height as needed
     # Draw the rectangle
     cv2.rectangle(img,
               (text_x, text_y - text_height),  # Top-left corner
               (text_x + text_width, text_y),  # Bottom-right corner
     return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,