streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 14, 2024

Commit

5bad12e

verified ·

1 Parent(s): 075c853

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -44

app.py CHANGED Viewed

@@ -43,7 +43,6 @@ class Detection(NamedTuple):
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
@@ -56,8 +55,6 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
-    # h, w = img.shape[:2]
-    # Process frame using MediaPipe
     result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     # Create the keyboard buttons
@@ -85,39 +82,15 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
         cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
         cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
-    # # Create a blank canvas for drawing the keyboard
-    # # keyboard_canvas = np.zeros_like(img)
-    # buttonList = []
-    # # Define buttons in each row of the virtual keyboard
-    # for key in keys[0]:
-    #     buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
-    # for key in keys[1]:
-    #     buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
-    # for key in keys[2]:
-    #     buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
-    # # Add special buttons for Backspace and Space
-    # buttonList.append(Button([90 + 10 * 100, 30], 'BS', size=[125, 100]))
-    # buttonList.append(Button([300, 370], 'SPACE', size=[500, 100]))
-    # # Draw Keyboard Buttons
-    # for button in buttonList:
-    #     x, y = button.pos
-    #     w, h = button.size
-    #     cv2.rectangle(img, (x, y), (x + w, y + h), (200, 200, 200), -1)
-    #     cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 0), 3)
     detections = []
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
-            # Draw hand landmarks
             mp_drawing.draw_landmarks(
                 img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                 mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                 mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
             )
-            # Extract bounding box for each hand
             h, w, _ = img.shape
             x_min, y_min = w, h
             x_max, y_max = 0, 0
@@ -129,11 +102,9 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
             bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
             detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
-            # Extract finger tip positions
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
-            # Check for whether the finger is in button bounds
             for button in buttonList:
                 x, y = button.pos
                 bw, bh = button.size
@@ -141,22 +112,18 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
                     cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                     cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
-            # Distance Calculation
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
             click_threshold = 10
-            #         # Simulate key press if finger close enough
-            # if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
-            #     if time.time() - prev_key_time[0] > 2:
-            #         prev_key_time[0] = time.time()
-            #         if button.text != 'BS' and button.text != 'SPACE':
-            #             output_text += button.text
-            #         elif button.text == 'BS':
-            #             output_text = output_text[:-1]
-            #         else:
-            #             output_text += ' '
     result_queue.put(detections)
     st.session_state["output_text"] = output_text
@@ -170,4 +137,4 @@ webrtc_streamer(
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
-)

 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 indexImg = 0
 output_text = ""
 prev_key_time = [time.time()] * 2
     global indexImg, output_text
     img = frame.to_ndarray(format="bgr24")
     result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     # Create the keyboard buttons
         cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
         cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
     detections = []
     if result.multi_hand_landmarks:
         for hand_landmarks in result.multi_hand_landmarks:
             mp_drawing.draw_landmarks(
                 img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                 mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                 mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
             )
             h, w, _ = img.shape
             x_min, y_min = w, h
             x_max, y_max = 0, 0
             bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
             detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
             x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
             x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
             for button in buttonList:
                 x, y = button.pos
                 bw, bh = button.size
                     cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
                     cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
             distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
             click_threshold = 10
+            if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
+                if time.time() - prev_key_time[0] > 2:
+                    prev_key_time[0] = time.time()
+                    if button.text != 'BS' and button.text != 'SPACE':
+                        output_text += button.text
+                    elif button.text == 'BS':
+                        output_text = output_text[:-1]
+                    else:
+                        output_text += ' '
     result_queue.put(detections)
     st.session_state["output_text"] = output_text
     media_stream_constraints={"video": True, "audio": False},
     video_frame_callback=video_frame_callback,
     async_processing=True,
+)