Update app.py
Browse files
app.py
CHANGED
@@ -96,37 +96,74 @@ detector = HandDetector(maxHands=1, detectionCon=0.8)
|
|
96 |
if "output_text" not in st.session_state:
|
97 |
st.session_state["output_text"] = ""
|
98 |
|
99 |
-
|
100 |
-
|
|
|
|
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
|
132 |
webrtc_ctx = webrtc_streamer(
|
|
|
96 |
if "output_text" not in st.session_state:
|
97 |
st.session_state["output_text"] = ""
|
98 |
|
99 |
+
class Detection(NamedTuple):
|
100 |
+
label: str
|
101 |
+
score: float
|
102 |
+
box: np.ndarray
|
103 |
|
104 |
+
|
105 |
+
@st.cache_resource # Cache label colors
|
106 |
+
def generate_label_colors():
|
107 |
+
return np.random.uniform(0, 255, size=(2, 3)) # Two classes: Left and Right Hand
|
108 |
+
|
109 |
+
|
110 |
+
COLORS = generate_label_colors()
|
111 |
+
|
112 |
+
# Initialize MediaPipe Hands
|
113 |
+
mp_hands = mp.solutions.hands
|
114 |
+
detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
|
115 |
+
|
116 |
+
# Session-specific caching
|
117 |
+
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
118 |
+
|
119 |
+
# Hand detection callback
|
120 |
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
121 |
+
image = frame.to_ndarray(format="bgr24")
|
122 |
+
h, w = image.shape[:2]
|
123 |
+
|
124 |
+
# Process image with MediaPipe Hands
|
125 |
+
results = detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
126 |
+
|
127 |
+
detections = []
|
128 |
+
if results.multi_hand_landmarks:
|
129 |
+
for hand_landmarks, hand_class in zip(results.multi_hand_landmarks, results.multi_handedness):
|
130 |
+
# Extract bounding box
|
131 |
+
x_min, y_min = 1, 1
|
132 |
+
x_max, y_max = 0, 0
|
133 |
+
for lm in hand_landmarks.landmark:
|
134 |
+
x_min = min(x_min, lm.x)
|
135 |
+
y_min = min(y_min, lm.y)
|
136 |
+
x_max = max(x_max, lm.x)
|
137 |
+
y_max = max(y_max, lm.y)
|
138 |
+
|
139 |
+
# Scale bbox to image size
|
140 |
+
box = np.array([x_min * w, y_min * h, x_max * w, y_max * h]).astype("int")
|
141 |
+
|
142 |
+
# Label and score
|
143 |
+
label = hand_class.classification[0].label
|
144 |
+
score = hand_class.classification[0].score
|
145 |
+
|
146 |
+
detections.append(Detection(label=label, score=score, box=box))
|
147 |
+
|
148 |
+
# Draw bounding box and label
|
149 |
+
color = COLORS[0 if label == "Left" else 1]
|
150 |
+
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
|
151 |
+
caption = f"{label}: {round(score * 100, 2)}%"
|
152 |
+
cv2.putText(
|
153 |
+
image,
|
154 |
+
caption,
|
155 |
+
(box[0], box[1] - 15 if box[1] - 15 > 15 else box[1] + 15),
|
156 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
157 |
+
0.5,
|
158 |
+
color,
|
159 |
+
2,
|
160 |
+
)
|
161 |
+
|
162 |
+
# Put results in the queue
|
163 |
+
result_queue.put(detections)
|
164 |
+
|
165 |
+
return av.VideoFrame.from_ndarray(image, format="bgr24")
|
166 |
+
|
167 |
|
168 |
|
169 |
webrtc_ctx = webrtc_streamer(
|