import logging import queue from typing import List, NamedTuple import av import cv2 import numpy as np import streamlit as st from streamlit_webrtc import WebRtcMode, webrtc_streamer from sample_utils.turn import get_ice_servers from cvzone.HandTrackingModule import HandDetector from cvzone.SelfiSegmentationModule import SelfiSegmentation import os import time # Logger Setup logger = logging.getLogger(__name__) # Streamlit settings st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️") st.title("Interactive Virtual Keyboard") st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard. Use 'a' and 'd' from the keyboard to change the background.''') # Initialize modules detector = HandDetector(maxHands=1, detectionCon=0.85) segmentor = SelfiSegmentation() # Define virtual keyboard layout keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"], ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"], ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]] class Button: def __init__(self, pos, text, size=[100, 100]): self.pos = pos self.size = size self.text = text class Detection(NamedTuple): label: str score: float box: np.ndarray result_queue: "queue.Queue[List[Detection]]" = queue.Queue() # Load background images listImg = os.listdir('model/street') if os.path.exists('model/street') else [] if not listImg: st.error("Error: 'street' directory is missing or empty. Please add background images.") st.stop() else: imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg] imgList = [img for img in imgList if img is not None] indexImg = 0 output_text = "" if "output_text" not in st.session_state: st.session_state["output_text"] = "" # Video Frame Callback def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: global indexImg, output_text img = frame.to_ndarray(format="bgr24") hands, img = detector.findHands(img, draw=True) detections = [] if hands: for hand in hands: bbox = hand['bbox'] label = "Hand" score = hand['score'] box = np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]) detections.append(Detection(label=label, score=score, box=box)) result_queue.put(detections) st.session_state["output_text"] = output_text return av.VideoFrame.from_ndarray(img, format="bgr24") # WebRTC Streamer webrtc_streamer( key="virtual-keyboard", mode=WebRtcMode.SENDRECV, rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"}, media_stream_constraints={"video": True, "audio": False}, video_frame_callback=video_frame_callback, async_processing=True, )