Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ st.title("Interactive Virtual Keyboard")
|
|
20 |
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
|
21 |
Use 'a' and 'd' from the keyboard to change the background.''')
|
22 |
|
23 |
-
# Initialize MediaPipe
|
24 |
mp_hands = mp.solutions.hands
|
25 |
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
|
26 |
mp_drawing = mp.solutions.drawing_utils
|
@@ -43,6 +43,7 @@ class Detection(NamedTuple):
|
|
43 |
|
44 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
45 |
|
|
|
46 |
indexImg = 0
|
47 |
output_text = ""
|
48 |
prev_key_time = [time.time()] * 2
|
@@ -50,7 +51,7 @@ prev_key_time = [time.time()] * 2
|
|
50 |
if "output_text" not in st.session_state:
|
51 |
st.session_state["output_text"] = ""
|
52 |
|
53 |
-
# Video Frame Callback with Logic
|
54 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
55 |
global indexImg, output_text
|
56 |
|
@@ -59,10 +60,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
59 |
# Process frame using MediaPipe
|
60 |
result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
61 |
|
62 |
-
|
63 |
-
buttonList = [Button([30 + col * 105, 30 + row * 120], key)
|
64 |
-
for row, line in enumerate(keys)
|
65 |
-
for col, key in enumerate(line)]
|
66 |
|
67 |
detections = []
|
68 |
if result.multi_hand_landmarks:
|
@@ -87,14 +85,13 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
87 |
detections.append(Detection(label="Hand", score=1.0, box=np.array(bbox)))
|
88 |
|
89 |
# Extract finger tip positions
|
90 |
-
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w),
|
91 |
-
|
92 |
-
|
93 |
-
int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
94 |
|
95 |
-
#
|
96 |
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
|
97 |
-
click_threshold =
|
98 |
|
99 |
for button in buttonList:
|
100 |
x, y = button.pos
|
@@ -103,7 +100,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
103 |
cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
|
104 |
cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
|
105 |
|
106 |
-
# Simulate
|
107 |
if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
|
108 |
if time.time() - prev_key_time[0] > 2:
|
109 |
prev_key_time[0] = time.time()
|
@@ -126,4 +123,4 @@ webrtc_streamer(
|
|
126 |
media_stream_constraints={"video": True, "audio": False},
|
127 |
video_frame_callback=video_frame_callback,
|
128 |
async_processing=True,
|
129 |
-
)
|
|
|
20 |
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
|
21 |
Use 'a' and 'd' from the keyboard to change the background.''')
|
22 |
|
23 |
+
# Initialize MediaPipe and Background Segmentor
|
24 |
mp_hands = mp.solutions.hands
|
25 |
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
|
26 |
mp_drawing = mp.solutions.drawing_utils
|
|
|
43 |
|
44 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
45 |
|
46 |
+
|
47 |
indexImg = 0
|
48 |
output_text = ""
|
49 |
prev_key_time = [time.time()] * 2
|
|
|
51 |
if "output_text" not in st.session_state:
|
52 |
st.session_state["output_text"] = ""
|
53 |
|
54 |
+
# Video Frame Callback with Your Logic
|
55 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
56 |
global indexImg, output_text
|
57 |
|
|
|
60 |
# Process frame using MediaPipe
|
61 |
result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
62 |
|
63 |
+
buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
|
|
|
|
|
|
|
64 |
|
65 |
detections = []
|
66 |
if result.multi_hand_landmarks:
|
|
|
85 |
detections.append(Detection(label="Hand", score=1.0, box=np.array(bbox)))
|
86 |
|
87 |
# Extract finger tip positions
|
88 |
+
x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
|
89 |
+
x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
|
90 |
+
|
|
|
91 |
|
92 |
+
# Distance Calculation
|
93 |
distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
|
94 |
+
click_threshold = 10
|
95 |
|
96 |
for button in buttonList:
|
97 |
x, y = button.pos
|
|
|
100 |
cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
|
101 |
cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
|
102 |
|
103 |
+
# Simulate key press if finger close enough
|
104 |
if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
|
105 |
if time.time() - prev_key_time[0] > 2:
|
106 |
prev_key_time[0] = time.time()
|
|
|
123 |
media_stream_constraints={"video": True, "audio": False},
|
124 |
video_frame_callback=video_frame_callback,
|
125 |
async_processing=True,
|
126 |
+
)
|