File size: 3,513 Bytes
8083389 ce9d171 8083389 20e2e7b 8083389 84c826d 8083389 ce9d171 91368dd ce9d171 91368dd 20e2e7b 91368dd 20e2e7b 91368dd ce9d171 51bb045 ce9d171 91368dd ce9d171 51bb045 20e2e7b 91368dd 84c826d 91368dd 20e2e7b 91368dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import logging
import queue
from typing import List, NamedTuple
import av
import cv2
import numpy as np
import streamlit as st
from streamlit_webrtc import WebRtcMode, webrtc_streamer
from sample_utils.turn import get_ice_servers
import mediapipe as mp
import os
import time
# Logger Setup
logger = logging.getLogger(__name__)
# Streamlit settings
st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
st.title("Interactive Virtual Keyboard")
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
Use 'a' and 'd' from the keyboard to change the background.''')
# Initialize MediaPipe Hand Detector
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils
# Define virtual keyboard layout
keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
class Detection(NamedTuple):
label: str
score: float
box: np.ndarray
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
# Load background images
listImg = os.listdir('model/street') if os.path.exists('model/street') else []
if not listImg:
st.error("Error: 'street' directory is missing or empty. Please add background images.")
st.stop()
else:
imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
imgList = [img for img in imgList if img is not None]
indexImg = 0
output_text = ""
if "output_text" not in st.session_state:
st.session_state["output_text"] = ""
# Video Frame Callback
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
global indexImg, output_text
img = frame.to_ndarray(format="bgr24")
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Process the frame with MediaPipe
result = hands.process(img_rgb)
detections = []
if result.multi_hand_landmarks:
for hand_landmarks in result.multi_hand_landmarks:
mp_drawing.draw_landmarks(
img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
)
# Extract bounding box for detection info
x_min, y_min = 1.0, 1.0
x_max, y_max = 0.0, 0.0
for lm in hand_landmarks.landmark:
x_min = min(x_min, lm.x)
y_min = min(y_min, lm.y)
x_max = max(x_max, lm.x)
y_max = max(y_max, lm.y)
h, w, _ = img.shape
bbox = np.array([int(x_min * w), int(y_min * h), int((x_max - x_min) * w), int((y_max - y_min) * h)])
detections.append(Detection(label="Hand", score=1.0, box=bbox))
logger.info(f"Detected {len(detections)} hand(s).")
else:
logger.info("No hands detected.")
result_queue.put(detections)
st.session_state["output_text"] = output_text
return av.VideoFrame.from_ndarray(img, format="bgr24")
# WebRTC Streamer
webrtc_streamer(
key="virtual-keyboard",
mode=WebRtcMode.SENDRECV,
rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
media_stream_constraints={"video": True, "audio": False},
video_frame_callback=video_frame_callback,
async_processing=True,
)
|