streamlit-webrtc-example-experimental

Running

App Files Files Community

Pratyush101 commited on Dec 13, 2024

Commit

df8ec21

verified ·

1 Parent(s): a472ccb

Update app.py

Browse files

Add the squat detection code

Files changed (1) hide show

app.py +281 -174

app.py CHANGED Viewed

@@ -1,191 +1,298 @@
-import logging
-import queue
-from pathlib import Path
-from typing import List, NamedTuple
-import mediapipe as mp
-import av
-import cv2
-import numpy as np
-import streamlit as st
-from streamlit_webrtc import WebRtcMode, webrtc_streamer
-from sample_utils.turn import get_ice_servers
-from cvzone.HandTrackingModule import HandDetector
-from cvzone.SelfiSegmentationModule import SelfiSegmentation
-import time
-import os
-logger = logging.getLogger(__name__)
-st.title("Interactive Virtual Keyboard with Twilio Integration")
-st.info("Use your webcam to interact with the virtual keyboard via hand gestures.")
-class Button:
-    def __init__(self, pos, text, size=[100, 100]):
-        self.pos = pos
-        self.size = size
-        self.text = text
-# Initialize components
-detector = HandDetector(maxHands=1, detectionCon=0.8)
-# segmentor = SelfiSegmentation()
-# keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
-#         ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
-#         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
-# listImg = os.listdir('model/street')
-# imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
-# indexImg = 0
-# # Function to process the video frame from the webcam
-# def process_video_frame(frame, detector, segmentor, imgList, indexImg, keys, session_state):
-#     # Convert the frame to a numpy array (BGR format)
-#     image = frame.to_ndarray(format="bgr24")
-#     # Remove background using SelfiSegmentation
-#     imgOut = segmentor.removeBG(image, imgList[indexImg])
-#     # Detect hands on the background-removed image
-#     hands, img = detector.findHands(imgOut, flipType=False)
-#     # Create a blank canvas for the keyboard
-#     keyboard_canvas = np.zeros_like(img)
-#     buttonList = []
-#     # Create buttons for the virtual keyboard based on the keys list
-#     for key in keys[0]:
-#         buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
-#     for key in keys[1]:
-#         buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
-#     for key in keys[2]:
-#         buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
-#     # Draw the buttons on the keyboard canvas
-#     for button in buttonList:
-#         x, y = button.pos
-#         cv2.rectangle(keyboard_canvas, (x, y), (x + button.size[0], y + button.size[1]), (255, 255, 255), -1)
-#         cv2.putText(keyboard_canvas, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 0), 3)
-#     # Handle input and gestures from detected hands
-#     if hands:
-#         for hand in hands:
-#             lmList = hand["lmList"]
-#             if lmList:
-#                 # Get the coordinates of the index finger tip (landmark 8)
-#                 x8, y8 = lmList[8][0], lmList[8][1]
-#                 for button in buttonList:
-#                     bx, by = button.pos
-#                     bw, bh = button.size
-#                     # Check if the index finger is over a button
-#                     if bx < x8 < bx + bw and by < y8 < by + bh:
-#                         # Highlight the button and update the text
-#                         cv2.rectangle(img, (bx, by), (bx + bw, by + bh), (0, 255, 0), -1)
-#                         cv2.putText(img, button.text, (bx + 20, by + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
-#                         # Update the output text in session_state
-#                         session_state["output_text"] += button.text
-#     # Corrected return: Create a video frame from the ndarray image
-#     return av.VideoFrame.from_ndarray(img, format="bgr24")
-# Shared state for output text
-if "output_text" not in st.session_state:
-    st.session_state["output_text"] = ""
-class Detection(NamedTuple):
-    label: str
-    score: float
-    box: np.ndarray
-@st.cache_resource  # Cache label colors
-def generate_label_colors():
-    return np.random.uniform(0, 255, size=(2, 3))  # Two classes: Left and Right Hand
-COLORS = generate_label_colors()
-# Initialize MediaPipe Hands
-mp_hands = mp.solutions.hands
-detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
-# Session-specific caching
-result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
-# Hand detection callback
-def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
-    image = frame.to_ndarray(format="bgr24")
-    h, w = image.shape[:2]
-    # Process image with MediaPipe Hands
-    results = detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-    detections = []
-    if results.multi_hand_landmarks:
-        for hand_landmarks, hand_class in zip(results.multi_hand_landmarks, results.multi_handedness):
-            # Extract bounding box
-            x_min, y_min = 1, 1
-            x_max, y_max = 0, 0
-            for lm in hand_landmarks.landmark:
-                x_min = min(x_min, lm.x)
-                y_min = min(y_min, lm.y)
-                x_max = max(x_max, lm.x)
-                y_max = max(y_max, lm.y)
-            # Scale bbox to image size
-            box = np.array([x_min * w, y_min * h, x_max * w, y_max * h]).astype("int")
-            # Label and score
-            label = hand_class.classification[0].label
-            score = hand_class.classification[0].score
-            detections.append(Detection(label=label, score=score, box=box))
-            # Draw bounding box and label
-            color = COLORS[0 if label == "Left" else 1]
-            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
-            caption = f"{label}: {round(score * 100, 2)}%"
-            cv2.putText(
-                image,
-                caption,
-                (box[0], box[1] - 15 if box[1] - 15 > 15 else box[1] + 15),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                0.5,
-                color,
-                2,
-            )
-    # Put results in the queue
-    result_queue.put(detections)
-    return av.VideoFrame.from_ndarray(image, format="bgr24")
-webrtc_ctx = webrtc_streamer(
-    key="keyboard-demo",
-    mode=WebRtcMode.SENDRECV,
-    rtc_configuration={
-        "iceServers": get_ice_servers(),
-        "iceTransportPolicy": "relay",
-    },
-    video_frame_callback=video_frame_callback,
     media_stream_constraints={"video": True, "audio": False},
-    async_processing=True,
 )
-st.markdown("### Instructions")
-st.write(
-    """
-    1. Turn on your webcam using the checkbox above.
-    2. Use hand gestures to interact with the virtual keyboard.
-    """
-)

+# import logging
+# import queue
+# from pathlib import Path
+# from typing import List, NamedTuple
+# import mediapipe as mp
+# import av
+# import cv2
+# import numpy as np
+# import streamlit as st
+# from streamlit_webrtc import WebRtcMode, webrtc_streamer
+# from sample_utils.turn import get_ice_servers
+# from cvzone.HandTrackingModule import HandDetector
+# from cvzone.SelfiSegmentationModule import SelfiSegmentation
+# import time
+# import os
+# logger = logging.getLogger(__name__)
+# st.title("Interactive Virtual Keyboard with Twilio Integration")
+# st.info("Use your webcam to interact with the virtual keyboard via hand gestures.")
+# class Button:
+#     def __init__(self, pos, text, size=[100, 100]):
+#         self.pos = pos
+#         self.size = size
+#         self.text = text
+# # Initialize components
+# detector = HandDetector(maxHands=1, detectionCon=0.8)
+# # segmentor = SelfiSegmentation()
+# # keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
+# #         ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
+# #         ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
+# # listImg = os.listdir('model/street')
+# # imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
+# # indexImg = 0
+# # # Function to process the video frame from the webcam
+# # def process_video_frame(frame, detector, segmentor, imgList, indexImg, keys, session_state):
+# #     # Convert the frame to a numpy array (BGR format)
+# #     image = frame.to_ndarray(format="bgr24")
+# #     # Remove background using SelfiSegmentation
+# #     imgOut = segmentor.removeBG(image, imgList[indexImg])
+# #     # Detect hands on the background-removed image
+# #     hands, img = detector.findHands(imgOut, flipType=False)
+# #     # Create a blank canvas for the keyboard
+# #     keyboard_canvas = np.zeros_like(img)
+# #     buttonList = []
+# #     # Create buttons for the virtual keyboard based on the keys list
+# #     for key in keys[0]:
+# #         buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
+# #     for key in keys[1]:
+# #         buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
+# #     for key in keys[2]:
+# #         buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
+# #     # Draw the buttons on the keyboard canvas
+# #     for button in buttonList:
+# #         x, y = button.pos
+# #         cv2.rectangle(keyboard_canvas, (x, y), (x + button.size[0], y + button.size[1]), (255, 255, 255), -1)
+# #         cv2.putText(keyboard_canvas, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 0), 3)
+# #     # Handle input and gestures from detected hands
+# #     if hands:
+# #         for hand in hands:
+# #             lmList = hand["lmList"]
+# #             if lmList:
+# #                 # Get the coordinates of the index finger tip (landmark 8)
+# #                 x8, y8 = lmList[8][0], lmList[8][1]
+# #                 for button in buttonList:
+# #                     bx, by = button.pos
+# #                     bw, bh = button.size
+# #                     # Check if the index finger is over a button
+# #                     if bx < x8 < bx + bw and by < y8 < by + bh:
+# #                         # Highlight the button and update the text
+# #                         cv2.rectangle(img, (bx, by), (bx + bw, by + bh), (0, 255, 0), -1)
+# #                         cv2.putText(img, button.text, (bx + 20, by + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
+# #                         # Update the output text in session_state
+# #                         session_state["output_text"] += button.text
+# #     # Corrected return: Create a video frame from the ndarray image
+# #     return av.VideoFrame.from_ndarray(img, format="bgr24")
+# # Shared state for output text
+# if "output_text" not in st.session_state:
+#     st.session_state["output_text"] = ""
+# class Detection(NamedTuple):
+#     label: str
+#     score: float
+#     box: np.ndarray
+# @st.cache_resource  # Cache label colors
+# def generate_label_colors():
+#     return np.random.uniform(0, 255, size=(2, 3))  # Two classes: Left and Right Hand
+# COLORS = generate_label_colors()
+# # Initialize MediaPipe Hands
+# mp_hands = mp.solutions.hands
+# detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
+# # Session-specific caching
+# result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
+# # Hand detection callback
+# def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+#     image = frame.to_ndarray(format="bgr24")
+#     h, w = image.shape[:2]
+#     # Process image with MediaPipe Hands
+#     results = detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+#     detections = []
+#     if results.multi_hand_landmarks:
+#         for hand_landmarks, hand_class in zip(results.multi_hand_landmarks, results.multi_handedness):
+#             # Extract bounding box
+#             x_min, y_min = 1, 1
+#             x_max, y_max = 0, 0
+#             for lm in hand_landmarks.landmark:
+#                 x_min = min(x_min, lm.x)
+#                 y_min = min(y_min, lm.y)
+#                 x_max = max(x_max, lm.x)
+#                 y_max = max(y_max, lm.y)
+#             # Scale bbox to image size
+#             box = np.array([x_min * w, y_min * h, x_max * w, y_max * h]).astype("int")
+#             # Label and score
+#             label = hand_class.classification[0].label
+#             score = hand_class.classification[0].score
+#             detections.append(Detection(label=label, score=score, box=box))
+#             # Draw bounding box and label
+#             color = COLORS[0 if label == "Left" else 1]
+#             cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
+#             caption = f"{label}: {round(score * 100, 2)}%"
+#             cv2.putText(
+#                 image,
+#                 caption,
+#                 (box[0], box[1] - 15 if box[1] - 15 > 15 else box[1] + 15),
+#                 cv2.FONT_HERSHEY_SIMPLEX,
+#                 0.5,
+#                 color,
+#                 2,
+#             )
+#     # Put results in the queue
+#     result_queue.put(detections)
+#     return av.VideoFrame.from_ndarray(image, format="bgr24")
+# webrtc_ctx = webrtc_streamer(
+#     key="keyboard-demo",
+#     mode=WebRtcMode.SENDRECV,
+#     rtc_configuration={
+#         "iceServers": get_ice_servers(),
+#         "iceTransportPolicy": "relay",
+#     },
+#     video_frame_callback=video_frame_callback,
+#     media_stream_constraints={"video": True, "audio": False},
+#     async_processing=True,
+# )
+# st.markdown("### Instructions")
+# st.write(
+#     """
+#     1. Turn on your webcam using the checkbox above.
+#     2. Use hand gestures to interact with the virtual keyboard.
+#     """
+# )
+import cv2
+import mediapipe as mp
+import numpy as np
+import streamlit as st
+from streamlit_webrtc import webrtc_streamer
+# Initialize MediaPipe Pose
+mp_pose = mp.solutions.pose
+mp_drawing = mp.solutions.drawing_utils
+# Function to calculate angles between three points
+def calculate_angle(a, b, c):
+    a = np.array(a)  # First point
+    b = np.array(b)  # Midpoint
+    c = np.array(c)  # Endpoint
+    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
+    angle = np.abs(radians * 180.0 / np.pi)
+    if angle > 180.0:
+        angle = 360 - angle
+    return angle
+# Squat detection processor class
+class VideoProcessor:
+    def __init__(self):
+        self.pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+    def recv(self, frame):
+        image = frame.to_ndarray(format="bgr24")
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image.flags.writeable = False
+        results = self.pose.process(image)
+        image.flags.writeable = True
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        try:
+            landmarks = results.pose_landmarks.landmark
+            hip = [landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x,
+                   landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y]
+            knee = [landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].x,
+                    landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].y]
+            ankle = [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x,
+                     landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y]
+            shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x,
+                        landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
+            foot_index = [landmarks[mp_pose.PoseLandmark.LEFT_FOOT_INDEX.value].x,
+                          landmarks[mp_pose.PoseLandmark.LEFT_FOOT_INDEX.value].y]
+            x_axis_hip = [hip[0], 0]
+            angle_knee = calculate_angle(hip, knee, ankle)
+            angle_ankle = calculate_angle(foot_index, ankle, knee)
+            angle_hip = calculate_angle(shoulder, hip, x_axis_hip)
+            feedback = ""
+            if 80 < angle_knee < 110 and 29 < angle_hip < 40:
+                feedback = "Good Squat!"
+            elif angle_knee < 80:
+                feedback = "Squat too deep!"
+            elif angle_knee > 110:
+                feedback = "Lower your hips!"
+            elif angle_hip < 29:
+                feedback = "Bend Forward!"
+            elif angle_hip > 45:
+                feedback = "Bend Backward!"
+            cv2.putText(image, feedback, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
+            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
+        except Exception as e:
+            print(f"Error: {e}")
+        return frame.from_ndarray(image, format="bgr24")
+# Streamlit WebRTC configuration
+webrtc_streamer(
+    key="squat_detector",
+    video_processor_factory=VideoProcessor,
     media_stream_constraints={"video": True, "audio": False},
+    rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
 )