streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 14, 2024

Commit

fb324e7

verified ·

1 Parent(s): aa2a068

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -14

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ st.title("Interactive Virtual Keyboard")
 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
 Use 'a' and 'd' from the keyboard to change the background.''')
-# Initialize MediaPipe Hand Detection
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
 mp_drawing = mp.solutions.drawing_utils
@@ -43,6 +43,7 @@ class Detection(NamedTuple):
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
@@ -50,7 +51,7 @@ prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
-# Video Frame Callback with Logic Correction
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
@@ -59,10 +60,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     # Process frame using MediaPipe
     result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-    # Create Buttons
-    buttonList = [Button([30 + col * 105, 30 + row * 120], key)
-                  for row, line in enumerate(keys)
-                  for col, key in enumerate(line)]
     detections = []
     if result.multi_hand_landmarks:
@@ -87,14 +85,13 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
             detections.append(Detection(label="Hand", score=1.0, box=np.array(bbox)))
             # Extract finger tip positions
-            x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), \
-                     int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
-            x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
-                     int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            # Calculate Distance and Detect Button Click
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
-            click_threshold = 50
             for button in buttonList:
                 x, y = button.pos
@@ -103,7 +100,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
                     cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
                     cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
-                    # Simulate Key Press if Finger Close Enough
                     if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
@@ -126,4 +123,4 @@ webrtc_streamer(
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
-)

 st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
 Use 'a' and 'd' from the keyboard to change the background.''')
+# Initialize MediaPipe and Background Segmentor
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
 mp_drawing = mp.solutions.drawing_utils
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
 if "output_text" not in st.session_state:
     st.session_state["output_text"] = ""
+# Video Frame Callback with Your Logic
 def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     # Process frame using MediaPipe
     result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
     detections = []
     if result.multi_hand_landmarks:
             detections.append(Detection(label="Hand", score=1.0, box=np.array(bbox)))
             # Extract finger tip positions
+            x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
+            x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
+            # Distance Calculation
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
+            click_threshold = 10
             for button in buttonList:
                 x, y = button.pos
                     cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
                     cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
+                    # Simulate key press if finger close enough
                     if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
                         if time.time() - prev_key_time[0] > 2:
                             prev_key_time[0] = time.time()
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
+)