streamlit-webrtc-example-experimental

Running

App Files Files Community

streamlit-webrtc-example-experimental / app.py

Pratyush101

Update app.py

8864ad8 verified 7 months ago

raw

history blame contribute delete

8.12 kB

	import logging
	import queue
	from typing import List, NamedTuple
	import av
	import cv2
	import numpy as np
	import streamlit as st
	from streamlit_webrtc import WebRtcMode, webrtc_streamer
	from sample_utils.turn import get_ice_servers
	import mediapipe as mp
	import os
	import time

	# Logger Setup
	logger = logging.getLogger(__name__)

	# Streamlit settings
	st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
	st.title("Interactive Virtual Keyboard")
	st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')

	# Initialize MediaPipe and Background Segmentor
	mp_hands = mp.solutions.hands
	hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
	mp_drawing = mp.solutions.drawing_utils

	# Improved keyboard layout with better styling
	keys = [
	["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
	["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
	["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]
	]

	# Enhanced colors and styling
	COLORS = {
	'KEY_IDLE': (75, 75, 75), # Darker gray for idle keys
	'KEY_HOVER': (100, 100, 255), # Blue highlight for hover
	'KEY_PRESS': (9, 9, 175), # Green for pressed keys
	'TEXT_NORMAL': (255, 255, 255), # White text
	'TEXT_PRESS': (255, 255, 255), # Black text when pressed
	'OUTPUT_BG': (45, 45, 45), # Dark gray for output background
	}

	# Modified Button class with hover state
	class Button:
	def __init__(self, pos, text, size=[100, 100]):
	self.pos = pos
	self.size = size
	self.text = text
	self.is_hover = False
	self.is_pressed = False

	class Detection(NamedTuple):
	label: str
	score: float
	box: np.ndarray

	# Global variables
	result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
	indexImg = 0
	output_text = ""
	prev_key_time = [time.time()] * 2

	if "output_text" not in st.session_state:
	st.session_state["output_text"] = ""


	# Video Frame Callback with Logic
	def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
	global indexImg, output_text

	img = frame.to_ndarray(format="bgr24")
	# Mirror the image horizontally
	img = cv2.flip(img, 1) # Flip code 1 means horizontal flip
	result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	# Create the keyboard buttons
	buttonList = []
	h, w = img.shape[:2]
	key_width = int(0.07 * w)
	key_height = int(0.09 * h)
	font_scale = 0.0045 * w
	font_thickness = int(0.009 * h)

	for row, key_row in enumerate(keys):
	for col, key in enumerate(key_row):
	x = int(0.03 * w + col * (key_width + 5))
	y = int(0.03 * h + row * (key_height + 5))
	buttonList.append(Button([x, y], key, size=[key_width, key_height]))

	# Add special buttons for Backspace and Space
	buttonList.append(Button([int(0.85 * w), int(0.03 * h)], 'BS', size=[int(0.12 * w), key_height]))
	buttonList.append(Button([int(0.2 * w), int(0.4 * h)], 'SPACE', size=[int(0.55 * w), key_height]))

	# Draw Keyboard Buttons
	for button in buttonList:
	x, y = button.pos
	bw, bh = button.size

	# Draw key background with rounded corners
	if button.is_pressed:
	color = COLORS['KEY_PRESS']
	text_color = COLORS['TEXT_PRESS']
	elif button.is_hover:
	color = COLORS['KEY_HOVER']
	text_color = COLORS['TEXT_NORMAL']
	else:
	color = COLORS['KEY_IDLE']
	text_color = COLORS['TEXT_NORMAL']

	# Draw rounded rectangle
	cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
	cv2.rectangle(img, (x, y), (x + bw, y + bh), (100, 100, 100), 1, cv2.LINE_AA)

	# Center text
	text_size = cv2.getTextSize(button.text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)[0]
	text_x = x + (bw - text_size[0]) // 2
	text_y = y + (bh + text_size[1]) // 2
	cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)

	# cv2.putText(img, button.text, (text_x, text_y),
	# cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness)

	detections = []
	if result.multi_hand_landmarks:
	for hand_landmarks in result.multi_hand_landmarks:
	mp_drawing.draw_landmarks(
	img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
	mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
	mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
	)

	h, w, _ = img.shape
	x_min, y_min = w, h
	x_max, y_max = 0, 0
	for lm in hand_landmarks.landmark:
	x, y = int(lm.x * w), int(lm.y * h)
	x_min, y_min = min(x_min, x), min(y_min, y)
	x_max, y_max = max(x_max, x), max(y_max, y)

	bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
	detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))

	x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
	x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)

	distance = np.sqrt((x8 - x4) 2 + (y8 - y4) 2)
	click_threshold = 0.2 * np.sqrt(bbox[2] 2 + bbox[3] 2)

	for button in buttonList:
	x, y = button.pos
	bw, bh = button.size
	button.is_hover=False
	button.is_pressed=False
	if x < x8 < x + bw and y < y8 < y + bh:
	cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
	cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)

	if distance < click_threshold:
	button.is_pressed = True # Mark as pressed
	cv2.rectangle(img, (x, y), (x + bw, y + bh), color, -1, cv2.LINE_AA)
	cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, font_thickness)
	if time.time() - prev_key_time[0] > 2:
	prev_key_time[0] = time.time()
	if button.text != 'BS' and button.text != 'SPACE':
	output_text += button.text # Append key to output text
	elif button.text == 'BS':
	output_text = output_text[:-1] # Remove last character
	else:
	output_text += ' ' # Add space

	# Improved output display
	output_bg_height = int(0.15 * h)
	output_y = int(0.8 * h)

	# Draw output background with rounded corners
	cv2.rectangle(img,
	(int(0.05 * w), output_y - output_bg_height),
	(int(0.95 * w), output_y),
	COLORS['OUTPUT_BG'], -1, cv2.LINE_AA)

	# Add text with better positioning
	text_size = cv2.getTextSize(output_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
	text_x = int(0.07 * w)
	text_y = output_y - output_bg_height//2 + text_size[1]//2

	cv2.putText(img, output_text, (text_x, text_y),
	cv2.FONT_HERSHEY_SIMPLEX, 1, COLORS['TEXT_NORMAL'], 2)

	result_queue.put(detections)
	return av.VideoFrame.from_ndarray(img, format="bgr24")


	# WebRTC Streamer
	webrtc_streamer(
	key="virtual-keyboard",
	mode=WebRtcMode.SENDRECV,
	rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
	media_stream_constraints={"video": True, "audio": False},
	video_frame_callback=video_frame_callback,
	async_processing=True,
	)