Pratyush101 commited on
Commit
f68bda2
·
verified ·
1 Parent(s): ce9d171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -23
app.py CHANGED
@@ -8,6 +8,7 @@ import streamlit as st
8
  from streamlit_webrtc import WebRtcMode, webrtc_streamer
9
  from sample_utils.turn import get_ice_servers
10
  import mediapipe as mp
 
11
  import os
12
  import time
13
 
@@ -20,16 +21,23 @@ st.title("Interactive Virtual Keyboard")
20
  st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
21
  Use 'a' and 'd' from the keyboard to change the background.''')
22
 
23
- # Initialize MediaPipe Hand Detector
24
  mp_hands = mp.solutions.hands
25
  hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
26
  mp_drawing = mp.solutions.drawing_utils
 
27
 
28
- # Define virtual keyboard layout
29
  keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
30
  ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
31
  ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
32
 
 
 
 
 
 
 
33
  class Detection(NamedTuple):
34
  label: str
35
  score: float
@@ -37,7 +45,7 @@ class Detection(NamedTuple):
37
 
38
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
39
 
40
- # Load background images
41
  listImg = os.listdir('model/street') if os.path.exists('model/street') else []
42
  if not listImg:
43
  st.error("Error: 'street' directory is missing or empty. Please add background images.")
@@ -48,48 +56,76 @@ else:
48
 
49
  indexImg = 0
50
  output_text = ""
 
51
 
52
  if "output_text" not in st.session_state:
53
  st.session_state["output_text"] = ""
54
 
55
- # Video Frame Callback
56
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
57
  global indexImg, output_text
58
 
59
  img = frame.to_ndarray(format="bgr24")
60
- img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
 
 
61
 
62
- # Process the frame with MediaPipe
63
- result = hands.process(img_rgb)
64
 
65
  detections = []
66
  if result.multi_hand_landmarks:
67
  for hand_landmarks in result.multi_hand_landmarks:
 
68
  mp_drawing.draw_landmarks(
69
- img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
70
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
71
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
72
  )
73
- # Extract bounding box for detection info
74
- x_min, y_min = 1.0, 1.0
75
- x_max, y_max = 0.0, 0.0
 
 
76
  for lm in hand_landmarks.landmark:
77
- x_min = min(x_min, lm.x)
78
- y_min = min(y_min, lm.y)
79
- x_max = max(x_max, lm.x)
80
- y_max = max(y_max, lm.y)
81
-
82
- h, w, _ = img.shape
83
- bbox = np.array([int(x_min * w), int(y_min * h), int((x_max - x_min) * w), int((y_max - y_min) * h)])
84
- detections.append(Detection(label="Hand", score=1.0, box=bbox))
 
 
 
 
85
 
86
- logger.info(f"Detected {len(detections)} hand(s).")
87
- else:
88
- logger.info("No hands detected.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  result_queue.put(detections)
91
  st.session_state["output_text"] = output_text
92
- return av.VideoFrame.from_ndarray(img, format="bgr24")
93
 
94
  # WebRTC Streamer
95
  webrtc_streamer(
 
8
  from streamlit_webrtc import WebRtcMode, webrtc_streamer
9
  from sample_utils.turn import get_ice_servers
10
  import mediapipe as mp
11
+ from cvzone.SelfiSegmentationModule import SelfiSegmentation
12
  import os
13
  import time
14
 
 
21
  st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
22
  Use 'a' and 'd' from the keyboard to change the background.''')
23
 
24
+ # Initialize MediaPipe and Background Segmentor
25
  mp_hands = mp.solutions.hands
26
  hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
27
  mp_drawing = mp.solutions.drawing_utils
28
+ segmentor = SelfiSegmentation()
29
 
30
+ # Virtual Keyboard Layout
31
  keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
32
  ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
33
  ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]
34
 
35
+ class Button:
36
+ def __init__(self, pos, text, size=[100, 100]):
37
+ self.pos = pos
38
+ self.size = size
39
+ self.text = text
40
+
41
  class Detection(NamedTuple):
42
  label: str
43
  score: float
 
45
 
46
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
47
 
48
+ # Load Background Images
49
  listImg = os.listdir('model/street') if os.path.exists('model/street') else []
50
  if not listImg:
51
  st.error("Error: 'street' directory is missing or empty. Please add background images.")
 
56
 
57
  indexImg = 0
58
  output_text = ""
59
+ prev_key_time = [time.time()] * 2
60
 
61
  if "output_text" not in st.session_state:
62
  st.session_state["output_text"] = ""
63
 
64
+ # Video Frame Callback with Your Logic
65
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
66
  global indexImg, output_text
67
 
68
  img = frame.to_ndarray(format="bgr24")
69
+ imgOut = segmentor.removeBG(img, imgList[indexImg])
70
+
71
+ # Process frame using MediaPipe
72
+ result = hands.process(cv2.cvtColor(imgOut, cv2.COLOR_BGR2RGB))
73
 
74
+ buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
 
75
 
76
  detections = []
77
  if result.multi_hand_landmarks:
78
  for hand_landmarks in result.multi_hand_landmarks:
79
+ # Draw hand landmarks
80
  mp_drawing.draw_landmarks(
81
+ imgOut, hand_landmarks, mp_hands.HAND_CONNECTIONS,
82
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
83
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
84
  )
85
+
86
+ # Extract bounding box for each hand
87
+ h, w, _ = imgOut.shape
88
+ x_min, y_min = w, h
89
+ x_max, y_max = 0, 0
90
  for lm in hand_landmarks.landmark:
91
+ x, y = int(lm.x * w), int(lm.y * h)
92
+ x_min, y_min = min(x_min, x), min(y_min, y)
93
+ x_max, y_max = max(x_max, x), max(y_max, y)
94
+
95
+ bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
96
+ detections.append(Detection(label="Hand", score=1.0, box=np.array(bbox)))
97
+
98
+ # Extract finger tip positions
99
+ x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), \
100
+ int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
101
+ x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
102
+ int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
103
 
104
+ # Distance Calculation
105
+ distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
106
+ click_threshold = 50
107
+
108
+ for button in buttonList:
109
+ x, y = button.pos
110
+ w, h = button.size
111
+ if x < x8 < x + w and y < y8 < y + h:
112
+ cv2.rectangle(imgOut, button.pos, (x + w, y + h), (0, 255, 160), -1)
113
+ cv2.putText(imgOut, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
114
+
115
+ # Simulate key press if finger close enough
116
+ if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
117
+ if time.time() - prev_key_time[0] > 2:
118
+ prev_key_time[0] = time.time()
119
+ if button.text != 'BS' and button.text != 'SPACE':
120
+ output_text += button.text
121
+ elif button.text == 'BS':
122
+ output_text = output_text[:-1]
123
+ else:
124
+ output_text += ' '
125
 
126
  result_queue.put(detections)
127
  st.session_state["output_text"] = output_text
128
+ return av.VideoFrame.from_ndarray(imgOut, format="bgr24")
129
 
130
  # WebRTC Streamer
131
  webrtc_streamer(