Pratyush101 commited on
Commit
0ebb9ed
·
verified ·
1 Parent(s): cb8aa2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -29
app.py CHANGED
@@ -96,37 +96,74 @@ detector = HandDetector(maxHands=1, detectionCon=0.8)
96
  if "output_text" not in st.session_state:
97
  st.session_state["output_text"] = ""
98
 
99
- # Create a thread-safe queue for passing results from callback
100
- result_queue = queue.Queue()
 
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
103
- img = frame.to_ndarray(format="bgr24")
104
-
105
- # Ensure dimensions are provided or preprocess image
106
- height, width, _ = img.shape
107
-
108
- # Try passing image dimensions explicitly
109
- hands, img = detector.findHands(img, flipType=False, imgDim=(width, height))
110
-
111
- if hands:
112
- hand = hands[0]
113
- bbox = hand["bbox"]
114
- cv2.rectangle(img,
115
- (bbox[0], bbox[1]),
116
- (bbox[0] + bbox[2], bbox[1] + bbox[3]),
117
- (255, 0, 0), 2)
118
-
119
- # Render text once
120
- font = cv2.FONT_HERSHEY_SIMPLEX
121
- fontScale = 2
122
- color = (255, 255, 255)
123
- thickness = 2
124
- cv2.putText(img, 'OpenCV', (50, 50), font, fontScale, color, thickness, cv2.LINE_AA)
125
-
126
- # Pass simplified results to the queue
127
- result_queue.put({"bbox": bbox, "landmarks": hand["landmarks"]})
128
-
129
- return av.VideoFrame.from_ndarray(img, format="bgr24")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
 
132
  webrtc_ctx = webrtc_streamer(
 
96
  if "output_text" not in st.session_state:
97
  st.session_state["output_text"] = ""
98
 
99
+ class Detection(NamedTuple):
100
+ label: str
101
+ score: float
102
+ box: np.ndarray
103
 
104
+
105
+ @st.cache_resource # Cache label colors
106
+ def generate_label_colors():
107
+ return np.random.uniform(0, 255, size=(2, 3)) # Two classes: Left and Right Hand
108
+
109
+
110
+ COLORS = generate_label_colors()
111
+
112
+ # Initialize MediaPipe Hands
113
+ mp_hands = mp.solutions.hands
114
+ detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
115
+
116
+ # Session-specific caching
117
+ result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
118
+
119
+ # Hand detection callback
120
  def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
121
+ image = frame.to_ndarray(format="bgr24")
122
+ h, w = image.shape[:2]
123
+
124
+ # Process image with MediaPipe Hands
125
+ results = detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
126
+
127
+ detections = []
128
+ if results.multi_hand_landmarks:
129
+ for hand_landmarks, hand_class in zip(results.multi_hand_landmarks, results.multi_handedness):
130
+ # Extract bounding box
131
+ x_min, y_min = 1, 1
132
+ x_max, y_max = 0, 0
133
+ for lm in hand_landmarks.landmark:
134
+ x_min = min(x_min, lm.x)
135
+ y_min = min(y_min, lm.y)
136
+ x_max = max(x_max, lm.x)
137
+ y_max = max(y_max, lm.y)
138
+
139
+ # Scale bbox to image size
140
+ box = np.array([x_min * w, y_min * h, x_max * w, y_max * h]).astype("int")
141
+
142
+ # Label and score
143
+ label = hand_class.classification[0].label
144
+ score = hand_class.classification[0].score
145
+
146
+ detections.append(Detection(label=label, score=score, box=box))
147
+
148
+ # Draw bounding box and label
149
+ color = COLORS[0 if label == "Left" else 1]
150
+ cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
151
+ caption = f"{label}: {round(score * 100, 2)}%"
152
+ cv2.putText(
153
+ image,
154
+ caption,
155
+ (box[0], box[1] - 15 if box[1] - 15 > 15 else box[1] + 15),
156
+ cv2.FONT_HERSHEY_SIMPLEX,
157
+ 0.5,
158
+ color,
159
+ 2,
160
+ )
161
+
162
+ # Put results in the queue
163
+ result_queue.put(detections)
164
+
165
+ return av.VideoFrame.from_ndarray(image, format="bgr24")
166
+
167
 
168
 
169
  webrtc_ctx = webrtc_streamer(