Update app.py
Browse files
app.py
CHANGED
@@ -43,7 +43,6 @@ class Detection(NamedTuple):
|
|
43 |
|
44 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
45 |
|
46 |
-
|
47 |
indexImg = 0
|
48 |
output_text = ""
|
49 |
prev_key_time = [time.time()] * 2
|
@@ -56,8 +55,6 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
56 |
global indexImg, output_text
|
57 |
|
58 |
img = frame.to_ndarray(format="bgr24")
|
59 |
-
# h, w = img.shape[:2]
|
60 |
-
# Process frame using MediaPipe
|
61 |
result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
62 |
|
63 |
# Create the keyboard buttons
|
@@ -85,39 +82,15 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
85 |
cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
|
86 |
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
|
87 |
|
88 |
-
# # Create a blank canvas for drawing the keyboard
|
89 |
-
# # keyboard_canvas = np.zeros_like(img)
|
90 |
-
# buttonList = []
|
91 |
-
# # Define buttons in each row of the virtual keyboard
|
92 |
-
# for key in keys[0]:
|
93 |
-
# buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
|
94 |
-
# for key in keys[1]:
|
95 |
-
# buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
|
96 |
-
# for key in keys[2]:
|
97 |
-
# buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
|
98 |
-
|
99 |
-
# # Add special buttons for Backspace and Space
|
100 |
-
# buttonList.append(Button([90 + 10 * 100, 30], 'BS', size=[125, 100]))
|
101 |
-
# buttonList.append(Button([300, 370], 'SPACE', size=[500, 100]))
|
102 |
-
|
103 |
-
# # Draw Keyboard Buttons
|
104 |
-
# for button in buttonList:
|
105 |
-
# x, y = button.pos
|
106 |
-
# w, h = button.size
|
107 |
-
# cv2.rectangle(img, (x, y), (x + w, y + h), (200, 200, 200), -1)
|
108 |
-
# cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 0), 3)
|
109 |
-
|
110 |
detections = []
|
111 |
if result.multi_hand_landmarks:
|
112 |
for hand_landmarks in result.multi_hand_landmarks:
|
113 |
-
# Draw hand landmarks
|
114 |
mp_drawing.draw_landmarks(
|
115 |
img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
116 |
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
|
117 |
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
|
118 |
)
|
119 |
|
120 |
-
# Extract bounding box for each hand
|
121 |
h, w, _ = img.shape
|
122 |
x_min, y_min = w, h
|
123 |
x_max, y_max = 0, 0
|
@@ -129,11 +102,9 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
129 |
bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
|
130 |
detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
|
131 |
|
132 |
-
# Extract finger tip positions
|
133 |
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
|
134 |
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
135 |
|
136 |
-
# Check for whether the finger is in button bounds
|
137 |
for button in buttonList:
|
138 |
x, y = button.pos
|
139 |
bw, bh = button.size
|
@@ -141,22 +112,18 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
141 |
cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
|
142 |
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
# Distance Calculation
|
147 |
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
|
148 |
click_threshold = 10
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
# output_text += ' '
|
160 |
|
161 |
result_queue.put(detections)
|
162 |
st.session_state["output_text"] = output_text
|
@@ -170,4 +137,4 @@ webrtc_streamer(
|
|
170 |
media_stream_constraints={"video": True, "audio": False},
|
171 |
video_frame_callback=video_frame_callback,
|
172 |
async_processing=True,
|
173 |
-
)
|
|
|
43 |
|
44 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
45 |
|
|
|
46 |
indexImg = 0
|
47 |
output_text = ""
|
48 |
prev_key_time = [time.time()] * 2
|
|
|
55 |
global indexImg, output_text
|
56 |
|
57 |
img = frame.to_ndarray(format="bgr24")
|
|
|
|
|
58 |
result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
59 |
|
60 |
# Create the keyboard buttons
|
|
|
82 |
cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
|
83 |
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
detections = []
|
86 |
if result.multi_hand_landmarks:
|
87 |
for hand_landmarks in result.multi_hand_landmarks:
|
|
|
88 |
mp_drawing.draw_landmarks(
|
89 |
img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
90 |
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
|
91 |
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
|
92 |
)
|
93 |
|
|
|
94 |
h, w, _ = img.shape
|
95 |
x_min, y_min = w, h
|
96 |
x_max, y_max = 0, 0
|
|
|
102 |
bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
|
103 |
detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
|
104 |
|
|
|
105 |
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
|
106 |
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
107 |
|
|
|
108 |
for button in buttonList:
|
109 |
x, y = button.pos
|
110 |
bw, bh = button.size
|
|
|
112 |
cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
|
113 |
cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
|
114 |
|
|
|
|
|
|
|
115 |
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
|
116 |
click_threshold = 10
|
117 |
+
|
118 |
+
if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
|
119 |
+
if time.time() - prev_key_time[0] > 2:
|
120 |
+
prev_key_time[0] = time.time()
|
121 |
+
if button.text != 'BS' and button.text != 'SPACE':
|
122 |
+
output_text += button.text
|
123 |
+
elif button.text == 'BS':
|
124 |
+
output_text = output_text[:-1]
|
125 |
+
else:
|
126 |
+
output_text += ' '
|
|
|
127 |
|
128 |
result_queue.put(detections)
|
129 |
st.session_state["output_text"] = output_text
|
|
|
137 |
media_stream_constraints={"video": True, "audio": False},
|
138 |
video_frame_callback=video_frame_callback,
|
139 |
async_processing=True,
|
140 |
+
)
|