Pratyush101 commited on
Commit
5bad12e
·
verified ·
1 Parent(s): 075c853

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -44
app.py CHANGED
@@ -43,7 +43,6 @@ class Detection(NamedTuple):
43
 
44
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
45
 
46
-
47
  indexImg = 0
48
  output_text = ""
49
  prev_key_time = [time.time()] * 2
@@ -56,8 +55,6 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
56
  global indexImg, output_text
57
 
58
  img = frame.to_ndarray(format="bgr24")
59
- # h, w = img.shape[:2]
60
- # Process frame using MediaPipe
61
  result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
62
 
63
  # Create the keyboard buttons
@@ -85,39 +82,15 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
85
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
86
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
87
 
88
- # # Create a blank canvas for drawing the keyboard
89
- # # keyboard_canvas = np.zeros_like(img)
90
- # buttonList = []
91
- # # Define buttons in each row of the virtual keyboard
92
- # for key in keys[0]:
93
- # buttonList.append(Button([30 + keys[0].index(key) * 105, 30], key))
94
- # for key in keys[1]:
95
- # buttonList.append(Button([30 + keys[1].index(key) * 105, 150], key))
96
- # for key in keys[2]:
97
- # buttonList.append(Button([30 + keys[2].index(key) * 105, 260], key))
98
-
99
- # # Add special buttons for Backspace and Space
100
- # buttonList.append(Button([90 + 10 * 100, 30], 'BS', size=[125, 100]))
101
- # buttonList.append(Button([300, 370], 'SPACE', size=[500, 100]))
102
-
103
- # # Draw Keyboard Buttons
104
- # for button in buttonList:
105
- # x, y = button.pos
106
- # w, h = button.size
107
- # cv2.rectangle(img, (x, y), (x + w, y + h), (200, 200, 200), -1)
108
- # cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 0), 3)
109
-
110
  detections = []
111
  if result.multi_hand_landmarks:
112
  for hand_landmarks in result.multi_hand_landmarks:
113
- # Draw hand landmarks
114
  mp_drawing.draw_landmarks(
115
  img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
116
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
117
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
118
  )
119
 
120
- # Extract bounding box for each hand
121
  h, w, _ = img.shape
122
  x_min, y_min = w, h
123
  x_max, y_max = 0, 0
@@ -129,11 +102,9 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
129
  bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
130
  detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
131
 
132
- # Extract finger tip positions
133
  x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
134
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
135
 
136
- # Check for whether the finger is in button bounds
137
  for button in buttonList:
138
  x, y = button.pos
139
  bw, bh = button.size
@@ -141,22 +112,18 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
141
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
142
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
143
 
144
-
145
-
146
- # Distance Calculation
147
  distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
148
  click_threshold = 10
149
-
150
- # # Simulate key press if finger close enough
151
- # if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
152
- # if time.time() - prev_key_time[0] > 2:
153
- # prev_key_time[0] = time.time()
154
- # if button.text != 'BS' and button.text != 'SPACE':
155
- # output_text += button.text
156
- # elif button.text == 'BS':
157
- # output_text = output_text[:-1]
158
- # else:
159
- # output_text += ' '
160
 
161
  result_queue.put(detections)
162
  st.session_state["output_text"] = output_text
@@ -170,4 +137,4 @@ webrtc_streamer(
170
  media_stream_constraints={"video": True, "audio": False},
171
  video_frame_callback=video_frame_callback,
172
  async_processing=True,
173
- )
 
43
 
44
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
45
 
 
46
  indexImg = 0
47
  output_text = ""
48
  prev_key_time = [time.time()] * 2
 
55
  global indexImg, output_text
56
 
57
  img = frame.to_ndarray(format="bgr24")
 
 
58
  result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
59
 
60
  # Create the keyboard buttons
 
82
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (200, 200, 200), -1)
83
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (0, 0, 0), font_thickness)
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  detections = []
86
  if result.multi_hand_landmarks:
87
  for hand_landmarks in result.multi_hand_landmarks:
 
88
  mp_drawing.draw_landmarks(
89
  img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
90
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
91
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
92
  )
93
 
 
94
  h, w, _ = img.shape
95
  x_min, y_min = w, h
96
  x_max, y_max = 0, 0
 
102
  bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
103
  detections.append(Detection(label="Hand", score=0.5, box=np.array(bbox)))
104
 
 
105
  x4, y4 = int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y * h)
106
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
107
 
 
108
  for button in buttonList:
109
  x, y = button.pos
110
  bw, bh = button.size
 
112
  cv2.rectangle(img, (x, y), (x + bw, y + bh), (0, 255, 160), -1)
113
  cv2.putText(img, button.text, (x + int(0.2 * bw), y + int(0.7 * bh)), cv2.FONT_HERSHEY_PLAIN, font_scale, (255, 255, 255), font_thickness)
114
 
 
 
 
115
  distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
116
  click_threshold = 10
117
+
118
+ if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
119
+ if time.time() - prev_key_time[0] > 2:
120
+ prev_key_time[0] = time.time()
121
+ if button.text != 'BS' and button.text != 'SPACE':
122
+ output_text += button.text
123
+ elif button.text == 'BS':
124
+ output_text = output_text[:-1]
125
+ else:
126
+ output_text += ' '
 
127
 
128
  result_queue.put(detections)
129
  st.session_state["output_text"] = output_text
 
137
  media_stream_constraints={"video": True, "audio": False},
138
  video_frame_callback=video_frame_callback,
139
  async_processing=True,
140
+ )