import logging import queue from typing import List, NamedTuple import av import cv2 import numpy as np import streamlit as st from streamlit_webrtc import WebRtcMode, webrtc_streamer from sample_utils.turn import get_ice_servers import mediapipe as mp import os import time # Logger Setup logger = logging.getLogger(__name__) # Streamlit settings st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️") st.title("Interactive Virtual Keyboard") st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard. Use 'a' and 'd' from the keyboard to change the background.''') # Initialize MediaPipe and Background Segmentor mp_hands = mp.solutions.hands hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5) mp_drawing = mp.solutions.drawing_utils # Virtual Keyboard Layout keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"], ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"], ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]] class Button: def __init__(self, pos, text, size=[100, 100]): self.pos = pos self.size = size self.text = text class Detection(NamedTuple): label: str score: float box: np.ndarray result_queue: "queue.Queue[List[Detection]]" = queue.Queue() indexImg = 0 output_text = "" prev_key_time = [time.time()] * 2 if "output_text" not in st.session_state: st.session_state["output_text"] = "" # Video Frame Callback with Your Logic def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: global indexImg, output_text img = frame.to_ndarray(format="bgr24") result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # Create the keyboard buttons buttonList = [] h, w = img.shape[:2] key_width = int(0.08 * w) key_height = int(0.1 * h) font_scale = 0.005 * w font_thickness = int(0.01 * h) for row, key_row in enumerate(keys): for col, key in enumerate(key_row): x = int(0.03 * w + col * (key_width + 5)) y = int(0.03 * h + row * (key_height + 5)) buttonList.append(Button([x, y], key, size=[key_width, key_height])) # Add special buttons for Backspace and Space buttonList.append(Button([int(0.7 * w), int(0.03 * h)], 'BS', size=[int(0.1 * w), key_height])) buttonList.append(Button([int(0.2 * w), int(0.8 * h)], 'SPACE', size=[int(0.6 * w), key_height])) # Draw Keyboard Buttons for button in buttonList: x, y = button.pos bw, bh = button.size cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1) cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness) detections = [] if result.multi_hand_landmarks: for hand_landmarks in result.multi_hand_landmarks: mp_drawing.draw_landmarks( img, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2) ) h, w, _ = img.shape x_min, y_min = w, h x_max, y_max = 0, 0 for lm in hand_landmarks.landmark: x, y = int(lm.x * w), int(lm.y * h) x_min, y_min = min(x_min, x), min(y_min, y) x_max, y_max = max(x_max, x), max(y_max, y) bbox = [x_min, y_min, x_max - x_min, y_max - y_min] detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox))) x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h) x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h) distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2) click_threshold = 10 for button in buttonList: x, y = button.pos bw, bh = button.size if x < x8 < x + bw and y < y8 < y + bh: cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1) cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness) # # Handle button press # if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold: # if time.time() - prev_key_time[0] > 2: # prev_key_time[0] = time.time() # if button.text != 'BS' and button.text != 'SPACE': # st.session_state["output_text"] += button.text # Append key to output text # elif button.text == 'BS': # st.session_state["output_text"] = st.session_state["output_text"][:-1] # Remove last character # else: # st.session_state["output_text"] += ' ' # Add space if distance < 30: st.session_state["output_text"] = "hi" st.experimental_rerun() result_queue.put(detections) return av.VideoFrame.from_ndarray(img, format="bgr24") st.text_area("Typed Text", st.session_state["output_text"], height=150) # WebRTC Streamer webrtc_streamer( key="virtual-keyboard", mode=WebRtcMode.SENDRECV, rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"}, media_stream_constraints={"video": True, "audio": False}, video_frame_callback=video_frame_callback, async_processing=True, )