Pratyush101 commited on
Commit
1c5d1dd
·
verified ·
1 Parent(s): 3a44bd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -72
app.py CHANGED
@@ -1,14 +1,3 @@
1
- import logging
2
- import queue
3
- from typing import List, NamedTuple
4
- import av
5
- import cv2
6
- import numpy as np
7
- import streamlit as st
8
- from streamlit_webrtc import WebRtcMode, webrtc_streamer
9
- from sample_utils.turn import get_ice_servers
10
- import mediapipe as mp
11
- import os
12
  import time
13
 
14
  # Logger Setup
@@ -21,41 +10,13 @@ st.subheader('''Turn on the webcam and use hand gestures to interact with the vi
21
 
22
  # Initialize MediaPipe and Background Segmentor
23
  mp_hands = mp.solutions.hands
24
- hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
25
- mp_drawing = mp.solutions.drawing_utils
26
-
27
- # Virtual Keyboard Layout
28
- keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
29
- ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
30
  ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
31
 
32
  class Button:
33
- def _init_(self, pos, text, size=[100, 100]):
34
  self.pos = pos
35
  self.size = size
36
  self.text = text
37
-
38
- class Detection(NamedTuple):
39
- label: str
40
- score: float
41
- box: np.ndarray
42
-
43
- result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
44
-
45
- indexImg = 0
46
- output_text = ""
47
- prev_key_time = [time.time()] * 2
48
-
49
- if "output_text" not in st.session_state:
50
- st.session_state["output_text"] = ""
51
-
52
- # Video Frame Callback with Your Logic
53
- def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
54
- global indexImg, output_text
55
-
56
- img = frame.to_ndarray(format="bgr24")
57
- result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
58
-
59
  # Create the keyboard buttons
60
  buttonList = []
61
  h, w = img.shape[:2]
@@ -76,40 +37,14 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
76
 
77
  # Draw Keyboard Buttons
78
  for button in buttonList:
79
- x, y = button.pos
80
- bw, bh = button.size
81
- cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
82
- cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
83
-
84
- detections = []
85
- if result.multi_hand_landmarks:
86
- for hand_landmarks in result.multi_hand_landmarks:
87
- mp_drawing.draw_landmarks(
88
- img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
89
- mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
90
- mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
91
- )
92
-
93
- h, w, _ = img.shape
94
- x_min, y_min = w, h
95
- x_max, y_max = 0, 0
96
- for lm in hand_landmarks.landmark:
97
- x, y = int(lm.x * w), int(lm.y * h)
98
- x_min, y_min = min(x_min, x), min(y_min, y)
99
- x_max, y_max = max(x_max, x), max(y_max, y)
100
-
101
- bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
102
- detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
103
-
104
  x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
105
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
106
 
107
- distance = np.sqrt((x8 - x4) * 2 + (y8 - y4) * 2)
108
- click_threshold = 0.2*np.sqrt(bbox[2] * 2 + bbox[3] * 2)
109
 
110
  for button in buttonList:
111
  x, y = button.pos
112
- bw, bh = button.size
113
  if x < x8 < x + bw and y < y8 < y + bh:
114
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
115
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
@@ -124,11 +59,13 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
124
  else:
125
  output_text += ' ' # Add space
126
  # Position and dimensions for the rectangle
 
127
  text_x = int(0.05 * w)
128
  text_y = int(0.70 * h)
129
  text_width = int(0.9 * w) # Adjust width as needed
130
  text_height = int(0.1 * h) # Adjust height as needed
131
  # Draw the rectangle
 
132
  cv2.rectangle(img,
133
  (text_x, text_y - text_height), # Top-left corner
134
  (text_x + text_width, text_y), # Bottom-right corner
@@ -144,10 +81,6 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
144
  return av.VideoFrame.from_ndarray(img, format="bgr24")
145
 
146
  # WebRTC Streamer
147
- webrtc_streamer(
148
- key="virtual-keyboard",
149
- mode=WebRtcMode.SENDRECV,
150
- rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
151
  media_stream_constraints={"video": True, "audio": False},
152
  video_frame_callback=video_frame_callback,
153
  async_processing=True,
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
2
 
3
  # Logger Setup
 
10
 
11
  # Initialize MediaPipe and Background Segmentor
12
  mp_hands = mp.solutions.hands
 
 
 
 
 
 
13
  ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
14
 
15
  class Button:
16
+ def __init__(self, pos, text, size=[100, 100]):
17
  self.pos = pos
18
  self.size = size
19
  self.text = text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Create the keyboard buttons
21
  buttonList = []
22
  h, w = img.shape[:2]
 
37
 
38
  # Draw Keyboard Buttons
39
  for button in buttonList:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
41
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
42
 
43
+ distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
44
+ click_threshold = 0.2*np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
45
 
46
  for button in buttonList:
47
  x, y = button.pos
 
48
  if x < x8 < x + bw and y < y8 < y + bh:
49
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
50
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
 
59
  else:
60
  output_text += ' ' # Add space
61
  # Position and dimensions for the rectangle
62
+
63
  text_x = int(0.05 * w)
64
  text_y = int(0.70 * h)
65
  text_width = int(0.9 * w) # Adjust width as needed
66
  text_height = int(0.1 * h) # Adjust height as needed
67
  # Draw the rectangle
68
+
69
  cv2.rectangle(img,
70
  (text_x, text_y - text_height), # Top-left corner
71
  (text_x + text_width, text_y), # Bottom-right corner
 
81
  return av.VideoFrame.from_ndarray(img, format="bgr24")
82
 
83
  # WebRTC Streamer
 
 
 
 
84
  media_stream_constraints={"video": True, "audio": False},
85
  video_frame_callback=video_frame_callback,
86
  async_processing=True,