Pratyush101's picture
Update app.py
8864ad8 verified
import logging
import queue
from typing import List, NamedTuple
import av
import cv2
import numpy as np
import streamlit as st
from streamlit_webrtc import WebRtcMode, webrtc_streamer
from sample_utils.turn import get_ice_servers
import mediapipe as mp
import os
import time
# Logger Setup
logger = logging.getLogger(__name__)
# Streamlit settings
st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
st.title("Interactive Virtual Keyboard")
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')
# Initialize MediaPipe and Background Segmentor
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
# Improved keyboard layout with better styling
keys = [
["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]
]
# Enhanced colors and styling
COLORS = {
'KEY_IDLE': (75, 75, 75), # Darker gray for idle keys
'KEY_HOVER': (100, 100, 255), # Blue highlight for hover
'KEY_PRESS': (9, 9, 175), # Green for pressed keys
'TEXT_NORMAL': (255, 255, 255), # White text
'TEXT_PRESS': (255, 255, 255), # Black text when pressed
'OUTPUT_BG': (45, 45, 45), # Dark gray for output background
}
# Modified Button class with hover state
class Button:
def __init__(self, pos, text, size=[100, 100]):
self.pos = pos
self.size = size
self.text = text
self.is_hover = False
self.is_pressed = False
class Detection(NamedTuple):
label: str
score: float
box: np.ndarray
# Global variables
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
indexImg = 0
output_text = ""
prev_key_time = [time.time()] * 2
if "output_text" not in st.session_state:
st.session_state["output_text"] = ""
# Video Frame Callback with Logic
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
global indexImg, output_text
img = frame.to_ndarray(format="bgr24")
# Mirror the image horizontally
img = cv2.flip(img, 1) # Flip code 1 means horizontal flip
result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# Create the keyboard buttons
buttonList = []
h, w = img.shape[:2]
key_width = int(0.07 * w)
key_height = int(0.09 * h)
font_scale = 0.0045 * w
font_thickness = int(0.009 * h)
for row, key_row in enumerate(keys):
for col, key in enumerate(key_row):
x = int(0.03 * w + col * (key_width + 5))
y = int(0.03 * h + row * (key_height + 5))
buttonList.append(Button([x, y], key, size=[key_width, key_height]))
# Add special buttons for Backspace and Space
buttonList.append(Button([int(0.85 * w), int(0.03 * h)], 'BS', size=[int(0.12 * w), key_height]))
buttonList.append(Button([int(0.2 * w), int(0.4 * h)], 'SPACE', size=[int(0.55 * w), key_height]))
# Draw Keyboard Buttons
for button in buttonList:
x, y = button.pos
bw, bh = button.size
# Draw key background with rounded corners
if button.is_pressed:
color = COLORS['KEY_PRESS']
text_color = COLORS['TEXT_PRESS']
elif button.is_hover:
color = COLORS['KEY_HOVER']
text_color = COLORS['TEXT_NORMAL']
else:
color = COLORS['KEY_IDLE']
text_color = COLORS['TEXT_NORMAL']
# Draw rounded rectangle
cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
cv2.rectangle(img, (x, y), (x + bw, y + bh), (100, 100, 100), 1, cv2.LINE_AA)
# Center text
text_size = cv2.getTextSize(button.text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)[0]
text_x = x + (bw - text_size[0]) // 2
text_y = y + (bh + text_size[1]) // 2
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)
# cv2.putText(img, button.text, (text_x, text_y),
# cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness)
detections = []
if result.multi_hand_landmarks:
for hand_landmarks in result.multi_hand_landmarks:
mp_drawing.draw_landmarks(
img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
)
h, w, _ = img.shape
x_min, y_min = w, h
x_max, y_max = 0, 0
for lm in hand_landmarks.landmark:
x, y = int(lm.x * w), int(lm.y * h)
x_min, y_min = min(x_min, x), min(y_min, y)
x_max, y_max = max(x_max, x), max(y_max, y)
bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
click_threshold = 0.2 * np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
for button in buttonList:
x, y = button.pos
bw, bh = button.size
button.is_hover=False
button.is_pressed=False
if x < x8 < x + bw and y < y8 < y + bh:
cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)
if distance < click_threshold:
button.is_pressed = True # Mark as pressed
cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)
if time.time() - prev_key_time[0] > 2:
prev_key_time[0] = time.time()
if button.text != 'BS' and button.text != 'SPACE':
output_text += button.text # Append key to output text
elif button.text == 'BS':
output_text = output_text[:-1] # Remove last character
else:
output_text += ' ' # Add space
# Improved output display
output_bg_height = int(0.15 * h)
output_y = int(0.8 * h)
# Draw output background with rounded corners
cv2.rectangle(img,
(int(0.05 * w), output_y - output_bg_height),
(int(0.95 * w), output_y),
COLORS['OUTPUT_BG'], -1, cv2.LINE_AA)
# Add text with better positioning
text_size = cv2.getTextSize(output_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
text_x = int(0.07 * w)
text_y = output_y - output_bg_height//2 + text_size[1]//2
cv2.putText(img, output_text, (text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX, 1, COLORS['TEXT_NORMAL'], 2)
result_queue.put(detections)
return av.VideoFrame.from_ndarray(img, format="bgr24")
# WebRTC Streamer
webrtc_streamer(
key="virtual-keyboard",
mode=WebRtcMode.SENDRECV,
rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
media_stream_constraints={"video": True, "audio": False},
video_frame_callback=video_frame_callback,
async_processing=True,
)