Pratyush101 commited on
Commit
89f5596
·
verified ·
1 Parent(s): 5fdbc61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -31
app.py CHANGED
@@ -269,39 +269,54 @@ if "output_text" not in st.session_state:
269
  # return av.VideoFrame.from_ndarray(img, format="bgr24")
270
 
271
 
272
- # Initialize result queue
273
- result_queue = queue.Queue()
274
 
275
- def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
276
- img = frame.to_ndarray(format="bgr24")
277
-
278
- # Ensure square input for MediaPipe
279
- h, w, _ = img.shape
280
- size = min(h, w)
281
- img_cropped = img[:size, :size]
282
-
283
- # Detect hands
284
- hands, img_cropped = detector.findHands(img_cropped, flipType=False)
285
-
286
- # Collect detections
287
- detections = []
288
- if hands:
289
- for hand in hands:
290
- bbox = hand["bbox"]
291
- label = hand["type"]
292
- score = hand["score"]
293
 
294
- # Draw bounding box
295
- cv2.rectangle(img_cropped, (bbox[0], bbox[1]),
296
- (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
297
-
298
- # Append detection details
299
- detections.append({"label": label, "score": score, "bbox": bbox})
300
-
301
- # Put detections into result queue
302
- result_queue.put(detections)
303
-
304
- return av.VideoFrame.from_ndarray(img_cropped, format="bgr24")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
 
307
  # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
 
269
  # return av.VideoFrame.from_ndarray(img, format="bgr24")
270
 
271
 
272
+ result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
276
+ image = frame.to_ndarray(format="bgr24")
277
+
278
+ # Run inference
279
+ blob = cv2.dnn.blobFromImage(
280
+ cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
281
+ )
282
+ net.setInput(blob)
283
+ output = net.forward()
284
+
285
+ h, w = image.shape[:2]
286
+
287
+ # Convert the output array into a structured form.
288
+ output = output.squeeze() # (1, 1, N, 7) -> (N, 7)
289
+ output = output[output[:, 2] >= score_threshold]
290
+ detections = [
291
+ Detection(
292
+ class_id=int(detection[1]),
293
+ label=CLASSES[int(detection[1])],
294
+ score=float(detection[2]),
295
+ box=(detection[3:7] * np.array([w, h, w, h])),
296
+ )
297
+ for detection in output
298
+ ]
299
+
300
+ # Render bounding boxes and captions
301
+ for detection in detections:
302
+ caption = f"{detection.label}: {round(detection.score * 100, 2)}%"
303
+ color = COLORS[detection.class_id]
304
+ xmin, ymin, xmax, ymax = detection.box.astype("int")
305
+
306
+ cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
307
+ cv2.putText(
308
+ image,
309
+ caption,
310
+ (xmin, ymin - 15 if ymin - 15 > 15 else ymin + 15),
311
+ cv2.FONT_HERSHEY_SIMPLEX,
312
+ 0.5,
313
+ color,
314
+ 2,
315
+ )
316
+
317
+ result_queue.put(detections)
318
+
319
+ return av.VideoFrame.from_ndarray(image, format="bgr24")
320
 
321
 
322
  # def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame: