Update app.py
Browse files
app.py
CHANGED
@@ -269,39 +269,54 @@ if "output_text" not in st.session_state:
|
|
269 |
# return av.VideoFrame.from_ndarray(img, format="bgr24")
|
270 |
|
271 |
|
272 |
-
|
273 |
-
result_queue = queue.Queue()
|
274 |
|
275 |
-
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
276 |
-
img = frame.to_ndarray(format="bgr24")
|
277 |
-
|
278 |
-
# Ensure square input for MediaPipe
|
279 |
-
h, w, _ = img.shape
|
280 |
-
size = min(h, w)
|
281 |
-
img_cropped = img[:size, :size]
|
282 |
-
|
283 |
-
# Detect hands
|
284 |
-
hands, img_cropped = detector.findHands(img_cropped, flipType=False)
|
285 |
-
|
286 |
-
# Collect detections
|
287 |
-
detections = []
|
288 |
-
if hands:
|
289 |
-
for hand in hands:
|
290 |
-
bbox = hand["bbox"]
|
291 |
-
label = hand["type"]
|
292 |
-
score = hand["score"]
|
293 |
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
|
307 |
# def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
|
269 |
# return av.VideoFrame.from_ndarray(img, format="bgr24")
|
270 |
|
271 |
|
272 |
+
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
|
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
+
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
276 |
+
image = frame.to_ndarray(format="bgr24")
|
277 |
+
|
278 |
+
# Run inference
|
279 |
+
blob = cv2.dnn.blobFromImage(
|
280 |
+
cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5
|
281 |
+
)
|
282 |
+
net.setInput(blob)
|
283 |
+
output = net.forward()
|
284 |
+
|
285 |
+
h, w = image.shape[:2]
|
286 |
+
|
287 |
+
# Convert the output array into a structured form.
|
288 |
+
output = output.squeeze() # (1, 1, N, 7) -> (N, 7)
|
289 |
+
output = output[output[:, 2] >= score_threshold]
|
290 |
+
detections = [
|
291 |
+
Detection(
|
292 |
+
class_id=int(detection[1]),
|
293 |
+
label=CLASSES[int(detection[1])],
|
294 |
+
score=float(detection[2]),
|
295 |
+
box=(detection[3:7] * np.array([w, h, w, h])),
|
296 |
+
)
|
297 |
+
for detection in output
|
298 |
+
]
|
299 |
+
|
300 |
+
# Render bounding boxes and captions
|
301 |
+
for detection in detections:
|
302 |
+
caption = f"{detection.label}: {round(detection.score * 100, 2)}%"
|
303 |
+
color = COLORS[detection.class_id]
|
304 |
+
xmin, ymin, xmax, ymax = detection.box.astype("int")
|
305 |
+
|
306 |
+
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
|
307 |
+
cv2.putText(
|
308 |
+
image,
|
309 |
+
caption,
|
310 |
+
(xmin, ymin - 15 if ymin - 15 > 15 else ymin + 15),
|
311 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
312 |
+
0.5,
|
313 |
+
color,
|
314 |
+
2,
|
315 |
+
)
|
316 |
+
|
317 |
+
result_queue.put(detections)
|
318 |
+
|
319 |
+
return av.VideoFrame.from_ndarray(image, format="bgr24")
|
320 |
|
321 |
|
322 |
# def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|