Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1166,7 +1166,210 @@ def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Goog
|
|
1166 |
cv2.putText(frame, f"Mode: {processing_mode}",
|
1167 |
(10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
1168 |
|
1169 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1170 |
|
1171 |
# Write the frame to output video
|
1172 |
out.write(frame)
|
@@ -2092,35 +2295,49 @@ def main():
|
|
2092 |
st.markdown("**Top Objects:**")
|
2093 |
for obj, count in list(sorted_objects.items())[:10]:
|
2094 |
st.markdown(f"- {obj}: {count} occurrences")
|
|
|
|
|
2095 |
|
2096 |
# Display face detection summary
|
2097 |
-
if "Face Detection" in analysis_types
|
2098 |
st.markdown("#### π€ Face Analysis")
|
2099 |
-
|
|
|
|
|
|
|
2100 |
|
2101 |
# Display text detection summary
|
2102 |
-
if "Text" in analysis_types
|
2103 |
st.markdown("#### π Text Analysis")
|
2104 |
-
|
|
|
|
|
|
|
2105 |
|
2106 |
-
# Display
|
2107 |
-
if "
|
2108 |
-
st.markdown("####
|
2109 |
|
2110 |
-
#
|
2111 |
-
|
2112 |
-
|
|
|
|
|
|
|
2113 |
|
2114 |
-
#
|
2115 |
-
if
|
2116 |
-
|
2117 |
-
|
2118 |
-
|
2119 |
-
|
2120 |
-
|
2121 |
-
|
2122 |
-
ax.
|
2123 |
-
ax.
|
|
|
|
|
|
|
2124 |
st.pyplot(fig)
|
2125 |
|
2126 |
except Exception as e:
|
|
|
1166 |
cv2.putText(frame, f"Mode: {processing_mode}",
|
1167 |
(10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
1168 |
|
1169 |
+
# Convert frame to grayscale for motion detection
|
1170 |
+
current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
1171 |
+
current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
|
1172 |
+
|
1173 |
+
if previous_frame_gray is not None:
|
1174 |
+
# Calculate frame difference for activity detection
|
1175 |
+
frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
|
1176 |
+
activity_level = np.mean(frame_diff)
|
1177 |
+
detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
|
1178 |
+
|
1179 |
+
# Scene change detection
|
1180 |
+
if activity_level > scene_change_threshold:
|
1181 |
+
detection_stats["scene_changes"].append(frame_count/fps)
|
1182 |
+
# Mark scene change on frame
|
1183 |
+
cv2.putText(frame, "SCENE CHANGE",
|
1184 |
+
(width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
|
1185 |
+
|
1186 |
+
# Add optical flow tracking if enabled
|
1187 |
+
if use_advanced_tracking and prev_points is not None:
|
1188 |
+
try:
|
1189 |
+
# Calculate optical flow
|
1190 |
+
next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
|
1191 |
+
current_frame_gray,
|
1192 |
+
prev_points,
|
1193 |
+
None,
|
1194 |
+
**lk_params)
|
1195 |
+
|
1196 |
+
# Select good points
|
1197 |
+
if next_points is not None:
|
1198 |
+
good_new = next_points[status==1]
|
1199 |
+
good_old = prev_points[status==1]
|
1200 |
+
|
1201 |
+
# Draw motion tracks
|
1202 |
+
for i, (new, old) in enumerate(zip(good_new, good_old)):
|
1203 |
+
a, b = new.ravel()
|
1204 |
+
c, d = old.ravel()
|
1205 |
+
# Draw motion lines
|
1206 |
+
cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
|
1207 |
+
cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
|
1208 |
+
except Exception as e:
|
1209 |
+
# If optical flow fails, just continue without it
|
1210 |
+
pass
|
1211 |
+
|
1212 |
+
# Update tracking points periodically if enabled
|
1213 |
+
if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
|
1214 |
+
try:
|
1215 |
+
prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
|
1216 |
+
except Exception:
|
1217 |
+
# If feature tracking fails, just continue without it
|
1218 |
+
prev_points = None
|
1219 |
+
|
1220 |
+
previous_frame_gray = current_frame_gray
|
1221 |
+
|
1222 |
+
# Process frames with Vision API if using Google Vision
|
1223 |
+
if (processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
|
1224 |
+
frame_count % process_every_n_frames == 0 and client is not None:
|
1225 |
+
|
1226 |
+
# Convert frame to JPEG for Vision API
|
1227 |
+
success, jpeg_frame = cv2.imencode('.jpg', frame)
|
1228 |
+
if success:
|
1229 |
+
image_content = jpeg_frame.tobytes()
|
1230 |
+
|
1231 |
+
# Create vision image
|
1232 |
+
vision_image = vision.Image(content=image_content)
|
1233 |
+
|
1234 |
+
try:
|
1235 |
+
# Perform detection based on selected types
|
1236 |
+
if "Objects" in analysis_types:
|
1237 |
+
objects = client.object_localization(image=vision_image)
|
1238 |
+
# Filter objects by confidence threshold
|
1239 |
+
filtered_objects = [obj for obj in objects.localized_object_annotations
|
1240 |
+
if obj.score >= confidence_threshold]
|
1241 |
+
|
1242 |
+
# Update object counts in stats
|
1243 |
+
for obj in filtered_objects:
|
1244 |
+
if obj.name in detection_stats["objects"]:
|
1245 |
+
detection_stats["objects"][obj.name] += 1
|
1246 |
+
else:
|
1247 |
+
detection_stats["objects"][obj.name] = 1
|
1248 |
+
|
1249 |
+
# Draw object boundaries
|
1250 |
+
box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
|
1251 |
+
for vertex in obj.bounding_poly.normalized_vertices]
|
1252 |
+
points = np.array([[int(p[0]), int(p[1])] for p in box])
|
1253 |
+
cv2.polylines(frame, [points], True, (0, 255, 0), 2)
|
1254 |
+
|
1255 |
+
# Add label with confidence
|
1256 |
+
cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%",
|
1257 |
+
(int(box[0][0]), int(box[0][1] - 10)),
|
1258 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
1259 |
+
|
1260 |
+
# Add to trackers for future frames
|
1261 |
+
# Calculate bounding box
|
1262 |
+
x_values = [p[0] for p in box]
|
1263 |
+
y_values = [p[1] for p in box]
|
1264 |
+
x_min, x_max = min(x_values), max(x_values)
|
1265 |
+
y_min, y_max = min(y_values), max(y_values)
|
1266 |
+
|
1267 |
+
object_trackers[obj.name] = {
|
1268 |
+
"bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
|
1269 |
+
"last_seen": frame_count,
|
1270 |
+
"score": obj.score
|
1271 |
+
}
|
1272 |
+
|
1273 |
+
# Process faces if selected
|
1274 |
+
if "Face Detection" in analysis_types:
|
1275 |
+
faces = client.face_detection(image=vision_image)
|
1276 |
+
# Count faces and draw boundaries
|
1277 |
+
face_count = 0
|
1278 |
+
for face in faces.face_annotations:
|
1279 |
+
if face.detection_confidence >= confidence_threshold:
|
1280 |
+
face_count += 1
|
1281 |
+
|
1282 |
+
# Draw face boundary
|
1283 |
+
vertices = face.bounding_poly.vertices
|
1284 |
+
points = [(vertex.x, vertex.y) for vertex in vertices]
|
1285 |
+
points = np.array([[p[0], p[1]] for p in points])
|
1286 |
+
cv2.polylines(frame, [points], True, (0, 0, 255), 2)
|
1287 |
+
|
1288 |
+
# Add confidence score
|
1289 |
+
cv2.putText(frame, f"Face: {int(face.detection_confidence * 100)}%",
|
1290 |
+
(points[0][0], points[0][1] - 10),
|
1291 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
1292 |
+
|
1293 |
+
# Draw facial landmarks if enabled
|
1294 |
+
if enable_face_landmarks:
|
1295 |
+
for landmark in face.landmarks:
|
1296 |
+
px = landmark.position.x
|
1297 |
+
py = landmark.position.y
|
1298 |
+
cv2.circle(frame, (int(px), int(py)), 2, (255, 255, 0), -1)
|
1299 |
+
|
1300 |
+
# Update face count
|
1301 |
+
detection_stats["faces"] += face_count
|
1302 |
+
|
1303 |
+
# Process text if selected
|
1304 |
+
if "Text" in analysis_types:
|
1305 |
+
text = client.text_detection(image=vision_image)
|
1306 |
+
if text.text_annotations:
|
1307 |
+
# Count text blocks
|
1308 |
+
text_blocks = len(text.text_annotations) - 1 # Subtract 1 for the full text annotation
|
1309 |
+
detection_stats["text_blocks"] += text_blocks
|
1310 |
+
|
1311 |
+
# Draw text bounding boxes
|
1312 |
+
for text_annot in text.text_annotations[1:]: # Skip the first one (full text)
|
1313 |
+
box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
|
1314 |
+
points = np.array([[int(p[0]), int(p[1])] for p in box])
|
1315 |
+
cv2.polylines(frame, [points], True, (255, 0, 0), 2)
|
1316 |
+
|
1317 |
+
# Add recognized text
|
1318 |
+
cv2.putText(frame, text_annot.description,
|
1319 |
+
(points[0][0], points[0][1] - 10),
|
1320 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
|
1321 |
+
except Exception as e:
|
1322 |
+
# Handle API errors gracefully
|
1323 |
+
error_msg = f"API Error: {str(e)}"
|
1324 |
+
cv2.putText(frame, error_msg, (10, 70),
|
1325 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
|
1326 |
+
|
1327 |
+
# Process with OpenCV object detection if enabled
|
1328 |
+
if (processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
|
1329 |
+
opencv_detector is not None and \
|
1330 |
+
(frame_count % process_every_n_frames == 0):
|
1331 |
+
|
1332 |
+
# The OpenCV detection code goes here...
|
1333 |
+
# This would be similar to what's in the VideoProcessor.transform method
|
1334 |
+
|
1335 |
+
try:
|
1336 |
+
# If using HOG detector (the fallback)
|
1337 |
+
if isinstance(opencv_detector, cv2.HOGDescriptor):
|
1338 |
+
# Detect people
|
1339 |
+
boxes, weights = opencv_detector.detectMultiScale(
|
1340 |
+
frame, winStride=(8, 8), padding=(4, 4), scale=1.05
|
1341 |
+
)
|
1342 |
+
|
1343 |
+
# Draw bounding boxes
|
1344 |
+
for i, (x, y, w, h) in enumerate(boxes):
|
1345 |
+
if weights[i] > 0.3: # Confidence threshold
|
1346 |
+
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
1347 |
+
cv2.putText(frame, f"Person: {int(weights[i] * 100)}%",
|
1348 |
+
(x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
|
1349 |
+
|
1350 |
+
# Add to trackers
|
1351 |
+
object_trackers["person"] = {
|
1352 |
+
"bbox": (x, y, w, h),
|
1353 |
+
"last_seen": frame_count,
|
1354 |
+
"score": weights[i]
|
1355 |
+
}
|
1356 |
+
|
1357 |
+
# Update count in stats
|
1358 |
+
if "person" in detection_stats["objects"]:
|
1359 |
+
detection_stats["objects"]["person"] += 1
|
1360 |
+
else:
|
1361 |
+
detection_stats["objects"]["person"] = 1
|
1362 |
+
else:
|
1363 |
+
# Using YOLO or another DNN-based detector code would go here
|
1364 |
+
pass
|
1365 |
+
|
1366 |
+
except Exception as e:
|
1367 |
+
cv2.putText(frame, f"OpenCV Error: {str(e)}", (10, 110),
|
1368 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
1369 |
+
|
1370 |
+
# Add hint about slowed down speed
|
1371 |
+
cv2.putText(frame, "Playback: 60% speed for better visualization",
|
1372 |
+
(width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
|
1373 |
|
1374 |
# Write the frame to output video
|
1375 |
out.write(frame)
|
|
|
2295 |
st.markdown("**Top Objects:**")
|
2296 |
for obj, count in list(sorted_objects.items())[:10]:
|
2297 |
st.markdown(f"- {obj}: {count} occurrences")
|
2298 |
+
else:
|
2299 |
+
st.info("No objects were detected in the video.")
|
2300 |
|
2301 |
# Display face detection summary
|
2302 |
+
if "Face Detection" in analysis_types:
|
2303 |
st.markdown("#### π€ Face Analysis")
|
2304 |
+
if results["detection_stats"]["faces"] > 0:
|
2305 |
+
st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
|
2306 |
+
else:
|
2307 |
+
st.info("No faces were detected in the video.")
|
2308 |
|
2309 |
# Display text detection summary
|
2310 |
+
if "Text" in analysis_types:
|
2311 |
st.markdown("#### π Text Analysis")
|
2312 |
+
if results["detection_stats"]["text_blocks"] > 0:
|
2313 |
+
st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
|
2314 |
+
else:
|
2315 |
+
st.info("No text was detected in the video.")
|
2316 |
|
2317 |
+
# Display scene analysis
|
2318 |
+
if "Motion" in analysis_types:
|
2319 |
+
st.markdown("#### π¬ Scene Analysis")
|
2320 |
|
2321 |
+
# Display scene changes
|
2322 |
+
if results["detection_stats"]["scene_changes"]:
|
2323 |
+
st.markdown(f"**Scene Changes:** {len(results['detection_stats']['scene_changes'])} detected")
|
2324 |
+
st.markdown("Scene changes at time points (seconds):")
|
2325 |
+
scene_times = [f"{t:.2f}s" for t in results["detection_stats"]["scene_changes"]]
|
2326 |
+
st.write(", ".join(scene_times))
|
2327 |
|
2328 |
+
# Activity metrics visualization
|
2329 |
+
if results["detection_stats"]["activity_metrics"]:
|
2330 |
+
st.markdown("**Activity Level Over Time:**")
|
2331 |
+
activity_data = results["detection_stats"]["activity_metrics"]
|
2332 |
+
times = [point[0] for point in activity_data]
|
2333 |
+
levels = [point[1] for point in activity_data]
|
2334 |
+
|
2335 |
+
fig, ax = plt.subplots(figsize=(10, 4))
|
2336 |
+
ax.plot(times, levels, 'r-')
|
2337 |
+
ax.set_xlabel('Time (seconds)')
|
2338 |
+
ax.set_ylabel('Activity Level')
|
2339 |
+
ax.set_title('Motion Activity Throughout Video')
|
2340 |
+
ax.grid(True, alpha=0.3)
|
2341 |
st.pyplot(fig)
|
2342 |
|
2343 |
except Exception as e:
|