streamlit-webrtc-example-experimental

Running

App Files Files Community

streamlit-webrtc-example-experimental / app.py

Pratyush101

Update app.py

a078ac1 verified 7 months ago

raw

history blame

5.54 kB

	import logging
	import queue
	from typing import List, NamedTuple
	import av
	import cv2
	import numpy as np
	import streamlit as st
	from streamlit_webrtc import WebRtcMode, webrtc_streamer
	from sample_utils.turn import get_ice_servers
	import mediapipe as mp
	import os
	import time

	# Logger Setup
	logger = logging.getLogger(__name__)

	# Streamlit settings
	st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
	st.title("Interactive Virtual Keyboard")
	st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
	Use 'a' and 'd' from the keyboard to change the background.''')

	# Initialize MediaPipe and Background Segmentor
	mp_hands = mp.solutions.hands
	hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
	mp_drawing = mp.solutions.drawing_utils

	# Virtual Keyboard Layout
	keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
	["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
	["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]

	class Button:
	def __init__(self, pos, text, size=[100, 100]):
	self.pos = pos
	self.size = size
	self.text = text

	class Detection(NamedTuple):
	label: str
	score: float
	box: np.ndarray

	result_queue: "queue.Queue[List[Detection]]" = queue.Queue()

	indexImg = 0
	output_text = ""
	prev_key_time = [time.time()] * 2

	if "output_text" not in st.session_state:
	st.session_state["output_text"] = ""

	# Video Frame Callback with Your Logic
	def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
	global indexImg, output_text

	img = frame.to_ndarray(format="bgr24")
	result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	# Create the keyboard buttons
	buttonList = []
	h, w = img.shape[:2]
	key_width = int(0.08 * w)
	key_height = int(0.1 * h)
	font_scale = 0.005 * w
	font_thickness = int(0.01 * h)

	for row, key_row in enumerate(keys):
	for col, key in enumerate(key_row):
	x = int(0.03 * w + col * (key_width + 5))
	y = int(0.03 * h + row * (key_height + 5))
	buttonList.append(Button([x, y], key, size=[key_width, key_height]))

	# Add special buttons for Backspace and Space
	buttonList.append(Button([int(0.7 * w), int(0.03 * h)], 'BS', size=[int(0.1 * w), key_height]))
	buttonList.append(Button([int(0.2 * w), int(0.8 * h)], 'SPACE', size=[int(0.6 * w), key_height]))

	# Draw Keyboard Buttons
	for button in buttonList:
	x, y = button.pos
	bw, bh = button.size
	cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
	cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)

	detections = []
	if result.multi_hand_landmarks:
	for hand_landmarks in result.multi_hand_landmarks:
	mp_drawing.draw_landmarks(
	img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
	mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
	mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
	)

	h, w, _ = img.shape
	x_min, y_min = w, h
	x_max, y_max = 0, 0
	for lm in hand_landmarks.landmark:
	x, y = int(lm.x * w), int(lm.y * h)
	x_min, y_min = min(x_min, x), min(y_min, y)
	x_max, y_max = max(x_max, x), max(y_max, y)

	bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
	detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))

	x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
	x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)

	distance = np.sqrt((x8 - x4) 2 + (y8 - y4) 2)
	click_threshold = 10

	for button in buttonList:
	x, y = button.pos
	bw, bh = button.size
	if x < x8 < x + bw and y < y8 < y + bh:
	cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
	cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)

	if (distance / np.sqrt(bbox[2] 2 + bbox[3] 2)) * 100 < click_threshold:
	if time.time() - prev_key_time[0] > 2:
	prev_key_time[0] = time.time()
	if button.text != 'BS' and button.text != 'SPACE':
	output_text += button.text
	elif button.text == 'BS':
	output_text = output_text[:-1]
	else:
	output_text += ' '

	result_queue.put(detections)
	st.session_state["output_text"] = output_text

	return av.VideoFrame.from_ndarray(img, format="bgr24")

	st.text_area("Typed Text", st.session_state["output_text"], height=150)

	# WebRTC Streamer
	webrtc_streamer(
	key="virtual-keyboard",
	mode=WebRtcMode.SENDRECV,
	rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
	media_stream_constraints={"video": True, "audio": False},
	video_frame_callback=video_frame_callback,
	async_processing=True,
	)