Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ import time
|
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
# Streamlit settings
|
18 |
-
st.set_page_config(page_title="Virtual Keyboard", page_icon="
|
19 |
st.title("Interactive Virtual Keyboard")
|
20 |
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')
|
21 |
|
@@ -29,19 +29,22 @@ keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
|
|
29 |
["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
|
30 |
["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
|
31 |
|
|
|
32 |
class Button:
|
33 |
-
def
|
34 |
self.pos = pos
|
35 |
self.size = size
|
36 |
self.text = text
|
37 |
|
|
|
38 |
class Detection(NamedTuple):
|
39 |
label: str
|
40 |
score: float
|
41 |
box: np.ndarray
|
42 |
|
43 |
-
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
44 |
|
|
|
|
|
45 |
indexImg = 0
|
46 |
output_text = ""
|
47 |
prev_key_time = [time.time()] * 2
|
@@ -49,7 +52,8 @@ prev_key_time = [time.time()] * 2
|
|
49 |
if "output_text" not in st.session_state:
|
50 |
st.session_state["output_text"] = ""
|
51 |
|
52 |
-
|
|
|
53 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
54 |
global indexImg, output_text
|
55 |
|
@@ -59,20 +63,20 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
59 |
# Create the keyboard buttons
|
60 |
buttonList = []
|
61 |
h, w = img.shape[:2]
|
62 |
-
key_width = int(0.
|
63 |
-
key_height = int(0.
|
64 |
-
font_scale = 0.
|
65 |
-
font_thickness = int(0.
|
66 |
|
67 |
for row, key_row in enumerate(keys):
|
68 |
for col, key in enumerate(key_row):
|
69 |
-
x = int(0.
|
70 |
-
y = int(0.
|
71 |
buttonList.append(Button([x, y], key, size=[key_width, key_height]))
|
72 |
|
73 |
# Add special buttons for Backspace and Space
|
74 |
-
buttonList.append(Button([int(0.
|
75 |
-
buttonList.append(Button([int(0.2 * w), int(0.
|
76 |
|
77 |
# Draw Keyboard Buttons
|
78 |
for button in buttonList:
|
@@ -104,16 +108,17 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
104 |
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
|
105 |
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
106 |
|
107 |
-
distance = np.sqrt((x8 - x4)
|
108 |
-
click_threshold = 0.2 * np.sqrt(bbox[2]
|
109 |
|
110 |
for button in buttonList:
|
111 |
x, y = button.pos
|
112 |
bw, bh = button.size
|
113 |
if x < x8 < x + bw and y < y8 < y + bh:
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
117 |
if time.time() - prev_key_time[0] > 2:
|
118 |
prev_key_time[0] = time.time()
|
119 |
if button.text != 'BS' and button.text != 'SPACE':
|
@@ -123,26 +128,24 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
123 |
else:
|
124 |
output_text += ' ' # Add space
|
125 |
|
126 |
-
#
|
127 |
text_x = int(0.05 * w)
|
128 |
-
text_y = int(0.
|
129 |
text_width = int(0.9 * w)
|
130 |
text_height = int(0.1 * h)
|
131 |
-
|
132 |
-
# Draw the background for output text box
|
133 |
cv2.rectangle(img,
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
|
139 |
-
#
|
140 |
-
cv2.putText(img, output_text, (text_x +
|
141 |
|
142 |
result_queue.put(detections)
|
143 |
-
|
144 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
145 |
|
|
|
146 |
# WebRTC Streamer
|
147 |
webrtc_streamer(
|
148 |
key="virtual-keyboard",
|
@@ -151,4 +154,4 @@ webrtc_streamer(
|
|
151 |
media_stream_constraints={"video": True, "audio": False},
|
152 |
video_frame_callback=video_frame_callback,
|
153 |
async_processing=True,
|
154 |
-
)
|
|
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
# Streamlit settings
|
18 |
+
st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
|
19 |
st.title("Interactive Virtual Keyboard")
|
20 |
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.''')
|
21 |
|
|
|
29 |
["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
|
30 |
["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
|
31 |
|
32 |
+
|
33 |
class Button:
|
34 |
+
def __init__(self, pos, text, size=[100, 100]):
|
35 |
self.pos = pos
|
36 |
self.size = size
|
37 |
self.text = text
|
38 |
|
39 |
+
|
40 |
class Detection(NamedTuple):
|
41 |
label: str
|
42 |
score: float
|
43 |
box: np.ndarray
|
44 |
|
|
|
45 |
|
46 |
+
# Global variables
|
47 |
+
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
48 |
indexImg = 0
|
49 |
output_text = ""
|
50 |
prev_key_time = [time.time()] * 2
|
|
|
52 |
if "output_text" not in st.session_state:
|
53 |
st.session_state["output_text"] = ""
|
54 |
|
55 |
+
|
56 |
+
# Video Frame Callback with Logic
|
57 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
58 |
global indexImg, output_text
|
59 |
|
|
|
63 |
# Create the keyboard buttons
|
64 |
buttonList = []
|
65 |
h, w = img.shape[:2]
|
66 |
+
key_width = int(0.07 * w)
|
67 |
+
key_height = int(0.09 * h)
|
68 |
+
font_scale = 0.0045 * w
|
69 |
+
font_thickness = int(0.009 * h)
|
70 |
|
71 |
for row, key_row in enumerate(keys):
|
72 |
for col, key in enumerate(key_row):
|
73 |
+
x = int(0.03 * w + col * (key_width + 5))
|
74 |
+
y = int(0.03 * h + row * (key_height + 5))
|
75 |
buttonList.append(Button([x, y], key, size=[key_width, key_height]))
|
76 |
|
77 |
# Add special buttons for Backspace and Space
|
78 |
+
buttonList.append(Button([int(0.9 * w), int(0.03 * h)], 'BS', size=[int(0.08 * w), key_height]))
|
79 |
+
buttonList.append(Button([int(0.2 * w), int(0.4 * h)], 'SPACE', size=[int(0.6 * w), key_height]))
|
80 |
|
81 |
# Draw Keyboard Buttons
|
82 |
for button in buttonList:
|
|
|
108 |
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
|
109 |
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
110 |
|
111 |
+
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
|
112 |
+
click_threshold = 0.2 * np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)
|
113 |
|
114 |
for button in buttonList:
|
115 |
x, y = button.pos
|
116 |
bw, bh = button.size
|
117 |
if x < x8 < x + bw and y < y8 < y + bh:
|
118 |
+
cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
|
119 |
+
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
|
120 |
+
|
121 |
+
if distance < click_threshold:
|
122 |
if time.time() - prev_key_time[0] > 2:
|
123 |
prev_key_time[0] = time.time()
|
124 |
if button.text != 'BS' and button.text != 'SPACE':
|
|
|
128 |
else:
|
129 |
output_text += ' ' # Add space
|
130 |
|
131 |
+
# Draw a background rectangle for the output text
|
132 |
text_x = int(0.05 * w)
|
133 |
+
text_y = int(0.70 * h)
|
134 |
text_width = int(0.9 * w)
|
135 |
text_height = int(0.1 * h)
|
|
|
|
|
136 |
cv2.rectangle(img,
|
137 |
+
(text_x, text_y - text_height),
|
138 |
+
(text_x + text_width, text_y),
|
139 |
+
(100, 100, 100),
|
140 |
+
-1)
|
141 |
|
142 |
+
# Overlay the output text
|
143 |
+
cv2.putText(img, output_text, (text_x + int(0.02 * w), text_y - int(0.02 * h)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 5)
|
144 |
|
145 |
result_queue.put(detections)
|
|
|
146 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
147 |
|
148 |
+
|
149 |
# WebRTC Streamer
|
150 |
webrtc_streamer(
|
151 |
key="virtual-keyboard",
|
|
|
154 |
media_stream_constraints={"video": True, "audio": False},
|
155 |
video_frame_callback=video_frame_callback,
|
156 |
async_processing=True,
|
157 |
+
)
|