Pratyush101 commited on
Commit
aa2a068
·
verified ·
1 Parent(s): f68bda2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -24
app.py CHANGED
@@ -8,7 +8,6 @@ import streamlit as st
8
  from streamlit_webrtc import WebRtcMode, webrtc_streamer
9
  from sample_utils.turn import get_ice_servers
10
  import mediapipe as mp
11
- from cvzone.SelfiSegmentationModule import SelfiSegmentation
12
  import os
13
  import time
14
 
@@ -21,11 +20,10 @@ st.title("Interactive Virtual Keyboard")
21
  st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
22
  Use 'a' and 'd' from the keyboard to change the background.''')
23
 
24
- # Initialize MediaPipe and Background Segmentor
25
  mp_hands = mp.solutions.hands
26
- hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
27
  mp_drawing = mp.solutions.drawing_utils
28
- segmentor = SelfiSegmentation()
29
 
30
  # Virtual Keyboard Layout
31
  keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
@@ -45,15 +43,6 @@ class Detection(NamedTuple):
45
 
46
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
47
 
48
- # Load Background Images
49
- listImg = os.listdir('model/street') if os.path.exists('model/street') else []
50
- if not listImg:
51
- st.error("Error: 'street' directory is missing or empty. Please add background images.")
52
- st.stop()
53
- else:
54
- imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
55
- imgList = [img for img in imgList if img is not None]
56
-
57
  indexImg = 0
58
  output_text = ""
59
  prev_key_time = [time.time()] * 2
@@ -61,30 +50,32 @@ prev_key_time = [time.time()] * 2
61
  if "output_text" not in st.session_state:
62
  st.session_state["output_text"] = ""
63
 
64
- # Video Frame Callback with Your Logic
65
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
66
  global indexImg, output_text
67
 
68
  img = frame.to_ndarray(format="bgr24")
69
- imgOut = segmentor.removeBG(img, imgList[indexImg])
70
 
71
  # Process frame using MediaPipe
72
- result = hands.process(cv2.cvtColor(imgOut, cv2.COLOR_BGR2RGB))
73
 
74
- buttonList = [Button([30 + col * 105, 30 + row * 120], key) for row, line in enumerate(keys) for col, key in enumerate(line)]
 
 
 
75
 
76
  detections = []
77
  if result.multi_hand_landmarks:
78
  for hand_landmarks in result.multi_hand_landmarks:
79
  # Draw hand landmarks
80
  mp_drawing.draw_landmarks(
81
- imgOut, hand_landmarks, mp_hands.HAND_CONNECTIONS,
82
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
83
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
84
  )
85
 
86
  # Extract bounding box for each hand
87
- h, w, _ = imgOut.shape
88
  x_min, y_min = w, h
89
  x_max, y_max = 0, 0
90
  for lm in hand_landmarks.landmark:
@@ -101,7 +92,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
101
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
102
  int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
103
 
104
- # Distance Calculation
105
  distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
106
  click_threshold = 50
107
 
@@ -109,10 +100,10 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
109
  x, y = button.pos
110
  w, h = button.size
111
  if x < x8 < x + w and y < y8 < y + h:
112
- cv2.rectangle(imgOut, button.pos, (x + w, y + h), (0, 255, 160), -1)
113
- cv2.putText(imgOut, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
114
 
115
- # Simulate key press if finger close enough
116
  if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
117
  if time.time() - prev_key_time[0] > 2:
118
  prev_key_time[0] = time.time()
@@ -125,7 +116,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
125
 
126
  result_queue.put(detections)
127
  st.session_state["output_text"] = output_text
128
- return av.VideoFrame.from_ndarray(imgOut, format="bgr24")
129
 
130
  # WebRTC Streamer
131
  webrtc_streamer(
 
8
  from streamlit_webrtc import WebRtcMode, webrtc_streamer
9
  from sample_utils.turn import get_ice_servers
10
  import mediapipe as mp
 
11
  import os
12
  import time
13
 
 
20
  st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
21
  Use 'a' and 'd' from the keyboard to change the background.''')
22
 
23
+ # Initialize MediaPipe Hand Detection
24
  mp_hands = mp.solutions.hands
25
+ hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
26
  mp_drawing = mp.solutions.drawing_utils
 
27
 
28
  # Virtual Keyboard Layout
29
  keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
 
43
 
44
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
45
 
 
 
 
 
 
 
 
 
 
46
  indexImg = 0
47
  output_text = ""
48
  prev_key_time = [time.time()] * 2
 
50
  if "output_text" not in st.session_state:
51
  st.session_state["output_text"] = ""
52
 
53
+ # Video Frame Callback with Logic Correction
54
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
55
  global indexImg, output_text
56
 
57
  img = frame.to_ndarray(format="bgr24")
 
58
 
59
  # Process frame using MediaPipe
60
+ result = hands.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
61
 
62
+ # Create Buttons
63
+ buttonList = [Button([30 + col * 105, 30 + row * 120], key)
64
+ for row, line in enumerate(keys)
65
+ for col, key in enumerate(line)]
66
 
67
  detections = []
68
  if result.multi_hand_landmarks:
69
  for hand_landmarks in result.multi_hand_landmarks:
70
  # Draw hand landmarks
71
  mp_drawing.draw_landmarks(
72
+ img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
73
  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
74
  mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
75
  )
76
 
77
  # Extract bounding box for each hand
78
+ h, w, _ = img.shape
79
  x_min, y_min = w, h
80
  x_max, y_max = 0, 0
81
  for lm in hand_landmarks.landmark:
 
92
  x8, y8 = int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * w), \
93
  int(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * h)
94
 
95
+ # Calculate Distance and Detect Button Click
96
  distance = np.sqrt((x8 - x4) ** 2 + (y8 - y4) ** 2)
97
  click_threshold = 50
98
 
 
100
  x, y = button.pos
101
  w, h = button.size
102
  if x < x8 < x + w and y < y8 < y + h:
103
+ cv2.rectangle(img, button.pos, (x + w, y + h), (0, 255, 160), -1)
104
+ cv2.putText(img, button.text, (x + 20, y + 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 255, 255), 3)
105
 
106
+ # Simulate Key Press if Finger Close Enough
107
  if (distance / np.sqrt(bbox[2] ** 2 + bbox[3] ** 2)) * 100 < click_threshold:
108
  if time.time() - prev_key_time[0] > 2:
109
  prev_key_time[0] = time.time()
 
116
 
117
  result_queue.put(detections)
118
  st.session_state["output_text"] = output_text
119
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
120
 
121
  # WebRTC Streamer
122
  webrtc_streamer(