import logging import queue from pathlib import Path from typing import List, NamedTuple import mediapipe as mp import av import cv2 import numpy as np import streamlit as st from streamlit_webrtc import WebRtcMode, webrtc_streamer from sample_utils.download import download_file from sample_utils.turn import get_ice_servers from cvzone.HandTrackingModule import HandDetector from cvzone.SelfiSegmentationModule import SelfiSegmentation import os import time logger = logging.getLogger(_name_) # Streamlit settings st.set_page_config(page_title="Virtual Keyboard", layout="wide") st.title("Interactive Virtual Keyboard") st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard. Use 'a' and 'd' from the keyboard to change the background.''') # # Logging setup # logger = logging.getLogger(__name__) # Streamlit settings st.set_page_config(page_title="Virtual Keyboard", layout="wide") st.title("Interactive Virtual Keyboard") st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard. Use 'a' and 'd' from the keyboard to change the background.''') # Initialize modules detector = HandDetector(maxHands=1, detectionCon=0.8) segmentor = SelfiSegmentation() # Define virtual keyboard layout keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"], ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"], ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]] class Button: def _init_(self, pos, text, size=[100, 100]): self.pos = pos self.size = size self.text = text class Detection(NamedTuple): label: str score: float box: np.ndarray result_queue: "queue.Queue[List[Detection]]" = queue.Queue() listImg = os.listdir('model/street') if os.path.exists('model/street') else [] if not listImg: st.error("Error: 'street' directory is missing or empty. Please add background images.") st.stop() else: imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg if cv2.imread(f'model/street/{imgPath}') is not None] indexImg = 0 prev_key_time = [time.time()] * 2 output_text = "" if "output_text" not in st.session_state: st.session_state["output_text"] = "" def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: global indexImg, output_text img = frame.to_ndarray(format="bgr24") detections = [] if hands: for i, hand in enumerate(hands): lmList = hand['lmList'] bbox = hand['bbox'] label = "Hand" score = hand['score'] box = np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]) detections.append(Detection(label=label, score=score, box=box)) result_queue.put(detections) st.session_state["output_text"] = output_text return av.VideoFrame.from_ndarray(imgOut, format="bgr24") webrtc_streamer( key="virtual-keyboard", mode=WebRtcMode.SENDRECV, rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"}, media_stream_constraints={"video": True, "audio": False}, video_frame_callback=video_frame_callback, async_processing=True, )