CosmickVisions commited on
Commit
bc7057b
Β·
verified Β·
1 Parent(s): 6dc88fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -21
app.py CHANGED
@@ -1166,7 +1166,210 @@ def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Goog
1166
  cv2.putText(frame, f"Mode: {processing_mode}",
1167
  (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1168
 
1169
- # ... rest of the processing code ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1170
 
1171
  # Write the frame to output video
1172
  out.write(frame)
@@ -2092,35 +2295,49 @@ def main():
2092
  st.markdown("**Top Objects:**")
2093
  for obj, count in list(sorted_objects.items())[:10]:
2094
  st.markdown(f"- {obj}: {count} occurrences")
 
 
2095
 
2096
  # Display face detection summary
2097
- if "Face Detection" in analysis_types and results["detection_stats"]["faces"] > 0:
2098
  st.markdown("#### πŸ‘€ Face Analysis")
2099
- st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
 
 
 
2100
 
2101
  # Display text detection summary
2102
- if "Text" in analysis_types and results["detection_stats"]["text_blocks"] > 0:
2103
  st.markdown("#### πŸ“ Text Analysis")
2104
- st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
 
 
 
2105
 
2106
- # Display label detection summary
2107
- if "Labels" in analysis_types and results["detection_stats"]["labels"]:
2108
- st.markdown("#### 🏷️ Scene Labels")
2109
 
2110
- # Sort labels by frequency
2111
- sorted_labels = dict(sorted(results["detection_stats"]["labels"].items(),
2112
- key=lambda x: x[1], reverse=True))
 
 
 
2113
 
2114
- # Create pie chart for top labels
2115
- if sorted_labels:
2116
- fig, ax = plt.subplots(figsize=(8, 8))
2117
- top_labels = dict(list(sorted_labels.items())[:7])
2118
- if len(sorted_labels) > 7:
2119
- other_count = sum(list(sorted_labels.values())[7:])
2120
- top_labels["Other"] = other_count
2121
-
2122
- ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
2123
- ax.set_title('Distribution of Scene Labels')
 
 
 
2124
  st.pyplot(fig)
2125
 
2126
  except Exception as e:
 
1166
  cv2.putText(frame, f"Mode: {processing_mode}",
1167
  (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1168
 
1169
+ # Convert frame to grayscale for motion detection
1170
+ current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
1171
+ current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
1172
+
1173
+ if previous_frame_gray is not None:
1174
+ # Calculate frame difference for activity detection
1175
+ frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
1176
+ activity_level = np.mean(frame_diff)
1177
+ detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
1178
+
1179
+ # Scene change detection
1180
+ if activity_level > scene_change_threshold:
1181
+ detection_stats["scene_changes"].append(frame_count/fps)
1182
+ # Mark scene change on frame
1183
+ cv2.putText(frame, "SCENE CHANGE",
1184
+ (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
1185
+
1186
+ # Add optical flow tracking if enabled
1187
+ if use_advanced_tracking and prev_points is not None:
1188
+ try:
1189
+ # Calculate optical flow
1190
+ next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
1191
+ current_frame_gray,
1192
+ prev_points,
1193
+ None,
1194
+ **lk_params)
1195
+
1196
+ # Select good points
1197
+ if next_points is not None:
1198
+ good_new = next_points[status==1]
1199
+ good_old = prev_points[status==1]
1200
+
1201
+ # Draw motion tracks
1202
+ for i, (new, old) in enumerate(zip(good_new, good_old)):
1203
+ a, b = new.ravel()
1204
+ c, d = old.ravel()
1205
+ # Draw motion lines
1206
+ cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
1207
+ cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
1208
+ except Exception as e:
1209
+ # If optical flow fails, just continue without it
1210
+ pass
1211
+
1212
+ # Update tracking points periodically if enabled
1213
+ if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
1214
+ try:
1215
+ prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
1216
+ except Exception:
1217
+ # If feature tracking fails, just continue without it
1218
+ prev_points = None
1219
+
1220
+ previous_frame_gray = current_frame_gray
1221
+
1222
+ # Process frames with Vision API if using Google Vision
1223
+ if (processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
1224
+ frame_count % process_every_n_frames == 0 and client is not None:
1225
+
1226
+ # Convert frame to JPEG for Vision API
1227
+ success, jpeg_frame = cv2.imencode('.jpg', frame)
1228
+ if success:
1229
+ image_content = jpeg_frame.tobytes()
1230
+
1231
+ # Create vision image
1232
+ vision_image = vision.Image(content=image_content)
1233
+
1234
+ try:
1235
+ # Perform detection based on selected types
1236
+ if "Objects" in analysis_types:
1237
+ objects = client.object_localization(image=vision_image)
1238
+ # Filter objects by confidence threshold
1239
+ filtered_objects = [obj for obj in objects.localized_object_annotations
1240
+ if obj.score >= confidence_threshold]
1241
+
1242
+ # Update object counts in stats
1243
+ for obj in filtered_objects:
1244
+ if obj.name in detection_stats["objects"]:
1245
+ detection_stats["objects"][obj.name] += 1
1246
+ else:
1247
+ detection_stats["objects"][obj.name] = 1
1248
+
1249
+ # Draw object boundaries
1250
+ box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
1251
+ for vertex in obj.bounding_poly.normalized_vertices]
1252
+ points = np.array([[int(p[0]), int(p[1])] for p in box])
1253
+ cv2.polylines(frame, [points], True, (0, 255, 0), 2)
1254
+
1255
+ # Add label with confidence
1256
+ cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%",
1257
+ (int(box[0][0]), int(box[0][1] - 10)),
1258
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
1259
+
1260
+ # Add to trackers for future frames
1261
+ # Calculate bounding box
1262
+ x_values = [p[0] for p in box]
1263
+ y_values = [p[1] for p in box]
1264
+ x_min, x_max = min(x_values), max(x_values)
1265
+ y_min, y_max = min(y_values), max(y_values)
1266
+
1267
+ object_trackers[obj.name] = {
1268
+ "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
1269
+ "last_seen": frame_count,
1270
+ "score": obj.score
1271
+ }
1272
+
1273
+ # Process faces if selected
1274
+ if "Face Detection" in analysis_types:
1275
+ faces = client.face_detection(image=vision_image)
1276
+ # Count faces and draw boundaries
1277
+ face_count = 0
1278
+ for face in faces.face_annotations:
1279
+ if face.detection_confidence >= confidence_threshold:
1280
+ face_count += 1
1281
+
1282
+ # Draw face boundary
1283
+ vertices = face.bounding_poly.vertices
1284
+ points = [(vertex.x, vertex.y) for vertex in vertices]
1285
+ points = np.array([[p[0], p[1]] for p in points])
1286
+ cv2.polylines(frame, [points], True, (0, 0, 255), 2)
1287
+
1288
+ # Add confidence score
1289
+ cv2.putText(frame, f"Face: {int(face.detection_confidence * 100)}%",
1290
+ (points[0][0], points[0][1] - 10),
1291
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
1292
+
1293
+ # Draw facial landmarks if enabled
1294
+ if enable_face_landmarks:
1295
+ for landmark in face.landmarks:
1296
+ px = landmark.position.x
1297
+ py = landmark.position.y
1298
+ cv2.circle(frame, (int(px), int(py)), 2, (255, 255, 0), -1)
1299
+
1300
+ # Update face count
1301
+ detection_stats["faces"] += face_count
1302
+
1303
+ # Process text if selected
1304
+ if "Text" in analysis_types:
1305
+ text = client.text_detection(image=vision_image)
1306
+ if text.text_annotations:
1307
+ # Count text blocks
1308
+ text_blocks = len(text.text_annotations) - 1 # Subtract 1 for the full text annotation
1309
+ detection_stats["text_blocks"] += text_blocks
1310
+
1311
+ # Draw text bounding boxes
1312
+ for text_annot in text.text_annotations[1:]: # Skip the first one (full text)
1313
+ box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
1314
+ points = np.array([[int(p[0]), int(p[1])] for p in box])
1315
+ cv2.polylines(frame, [points], True, (255, 0, 0), 2)
1316
+
1317
+ # Add recognized text
1318
+ cv2.putText(frame, text_annot.description,
1319
+ (points[0][0], points[0][1] - 10),
1320
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
1321
+ except Exception as e:
1322
+ # Handle API errors gracefully
1323
+ error_msg = f"API Error: {str(e)}"
1324
+ cv2.putText(frame, error_msg, (10, 70),
1325
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
1326
+
1327
+ # Process with OpenCV object detection if enabled
1328
+ if (processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
1329
+ opencv_detector is not None and \
1330
+ (frame_count % process_every_n_frames == 0):
1331
+
1332
+ # The OpenCV detection code goes here...
1333
+ # This would be similar to what's in the VideoProcessor.transform method
1334
+
1335
+ try:
1336
+ # If using HOG detector (the fallback)
1337
+ if isinstance(opencv_detector, cv2.HOGDescriptor):
1338
+ # Detect people
1339
+ boxes, weights = opencv_detector.detectMultiScale(
1340
+ frame, winStride=(8, 8), padding=(4, 4), scale=1.05
1341
+ )
1342
+
1343
+ # Draw bounding boxes
1344
+ for i, (x, y, w, h) in enumerate(boxes):
1345
+ if weights[i] > 0.3: # Confidence threshold
1346
+ cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
1347
+ cv2.putText(frame, f"Person: {int(weights[i] * 100)}%",
1348
+ (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
1349
+
1350
+ # Add to trackers
1351
+ object_trackers["person"] = {
1352
+ "bbox": (x, y, w, h),
1353
+ "last_seen": frame_count,
1354
+ "score": weights[i]
1355
+ }
1356
+
1357
+ # Update count in stats
1358
+ if "person" in detection_stats["objects"]:
1359
+ detection_stats["objects"]["person"] += 1
1360
+ else:
1361
+ detection_stats["objects"]["person"] = 1
1362
+ else:
1363
+ # Using YOLO or another DNN-based detector code would go here
1364
+ pass
1365
+
1366
+ except Exception as e:
1367
+ cv2.putText(frame, f"OpenCV Error: {str(e)}", (10, 110),
1368
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
1369
+
1370
+ # Add hint about slowed down speed
1371
+ cv2.putText(frame, "Playback: 60% speed for better visualization",
1372
+ (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
1373
 
1374
  # Write the frame to output video
1375
  out.write(frame)
 
2295
  st.markdown("**Top Objects:**")
2296
  for obj, count in list(sorted_objects.items())[:10]:
2297
  st.markdown(f"- {obj}: {count} occurrences")
2298
+ else:
2299
+ st.info("No objects were detected in the video.")
2300
 
2301
  # Display face detection summary
2302
+ if "Face Detection" in analysis_types:
2303
  st.markdown("#### πŸ‘€ Face Analysis")
2304
+ if results["detection_stats"]["faces"] > 0:
2305
+ st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
2306
+ else:
2307
+ st.info("No faces were detected in the video.")
2308
 
2309
  # Display text detection summary
2310
+ if "Text" in analysis_types:
2311
  st.markdown("#### πŸ“ Text Analysis")
2312
+ if results["detection_stats"]["text_blocks"] > 0:
2313
+ st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
2314
+ else:
2315
+ st.info("No text was detected in the video.")
2316
 
2317
+ # Display scene analysis
2318
+ if "Motion" in analysis_types:
2319
+ st.markdown("#### 🎬 Scene Analysis")
2320
 
2321
+ # Display scene changes
2322
+ if results["detection_stats"]["scene_changes"]:
2323
+ st.markdown(f"**Scene Changes:** {len(results['detection_stats']['scene_changes'])} detected")
2324
+ st.markdown("Scene changes at time points (seconds):")
2325
+ scene_times = [f"{t:.2f}s" for t in results["detection_stats"]["scene_changes"]]
2326
+ st.write(", ".join(scene_times))
2327
 
2328
+ # Activity metrics visualization
2329
+ if results["detection_stats"]["activity_metrics"]:
2330
+ st.markdown("**Activity Level Over Time:**")
2331
+ activity_data = results["detection_stats"]["activity_metrics"]
2332
+ times = [point[0] for point in activity_data]
2333
+ levels = [point[1] for point in activity_data]
2334
+
2335
+ fig, ax = plt.subplots(figsize=(10, 4))
2336
+ ax.plot(times, levels, 'r-')
2337
+ ax.set_xlabel('Time (seconds)')
2338
+ ax.set_ylabel('Activity Level')
2339
+ ax.set_title('Motion Activity Throughout Video')
2340
+ ax.grid(True, alpha=0.3)
2341
  st.pyplot(fig)
2342
 
2343
  except Exception as e: