streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 14, 2024

Commit

aa2a068

verified ·

1 Parent(s): f68bda2

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -24

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import streamlit as st
 from streamlit_webrtc import WebRtcMode, webrtc_streamer
 from sample_utils.turn import get_ice_servers
 import mediapipe as mp
-from cvzone.SelfiSegmentationModule import SelfiSegmentation
 import os
 import time
@@ -21,11 +20,10 @@ st.title("Interactive Virtual Keyboard")
 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
 Use 'a' and 'd' from the keyboard to change the background.''')
-# Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
-hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
 mp_drawing = mp.solutions.drawing_utils
-segmentor = SelfiSegmentation()
 # Virtual Keyboard Layout
 keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
@@ -45,15 +43,6 @@ class Detection(NamedTuple):
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
-# Load Background Images
-listImg = os.listdir('model/street') if os.path.exists('model/street') else []
-if not listImg:
-    st.error("Error: 'street' directory is missing or empty. Please add background images.")
-    st.stop()
-else:
-    imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
-    imgList = [img for img in imgList if img is not None]
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
@@ -61,30 +50,32 @@ prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
-# Video Frame Callback with Your Logic
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
-    imgOut = segmentor.removeBG(img, imgList[indexImg])
     # Process frame using MediaPipe
-    result = hands.process(cv2.cvtColor(imgOut, cv2.COLOR_BGR2RGB))
-    buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
     detections = []
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
             # Draw hand landmarks
             mp_drawing.draw_landmarks(
-                imgOut, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                 mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                 mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
             )
             # Extract bounding box for each hand
-            h, w, _ = imgOut.shape
             x_min, y_min = w, h
             x_max, y_max = 0, 0
             for lm in hand_landmarks.landmark:
@@ -101,7 +92,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
                      int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            # Distance Calculation
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
             click_threshold = 50
@@ -109,10 +100,10 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
                 x, y = button.pos
                 w, h = button.size
                 if x < x8 < x + w and y < y8 < y + h:
-                    cv2.rectangle(imgOut, button.pos, (x + w, y + h), (0, 255, 160), -1)
-                    cv2.putText(imgOut, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
-                    # Simulate key press if finger close enough
                     if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
@@ -125,7 +116,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     result_queue.put(detections)
     st.session_state["output_text"] = output_text
-    return av.VideoFrame.from_ndarray(imgOut, format="bgr24")
 # WebRTC Streamer
 webrtc_streamer(

 from streamlit_webrtc import WebRtcMode, webrtc_streamer
 from sample_utils.turn import get_ice_servers
 import mediapipe as mp
 import os
 import time
 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
 Use 'a' and 'd' from the keyboard to change the background.''')
+# Initialize MediaPipe Hand Detection
 mp_hands = mp.solutions.hands
+hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
 mp_drawing = mp.solutions.drawing_utils
 # Virtual Keyboard Layout
 keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
+# Video Frame Callback with Logic Correction
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
     # Process frame using MediaPipe
+    result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    # Create Buttons
+    buttonList = [Button([30 + col * 105, 30 + row * 120], key)
+                  for row, line in enumerate(keys)
+                  for col, key in enumerate(line)]
     detections = []
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
             # Draw hand landmarks
             mp_drawing.draw_landmarks(
+                img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                 mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                 mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
             )
             # Extract bounding box for each hand
+            h, w, _ = img.shape
             x_min, y_min = w, h
             x_max, y_max = 0, 0
             for lm in hand_landmarks.landmark:
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
                      int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
+            # Calculate Distance and Detect Button Click
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
             click_threshold = 50
                 x, y = button.pos
                 w, h = button.size
                 if x < x8 < x + w and y < y8 < y + h:
+                    cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
+                    cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
+                    # Simulate Key Press if Finger Close Enough
                     if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
     result_queue.put(detections)
     st.session_state["output_text"] = output_text
+    return av.VideoFrame.from_ndarray(img, format="bgr24")
 # WebRTC Streamer
 webrtc_streamer(