Spaces:

CosmickVisions
/

Cloud

Sleeping

App Files Files Community

CosmickVisions commited on Mar 30

Commit

bc7057b

verified ·

1 Parent(s): 6dc88fc

Update app.py

Browse files

Files changed (1) hide show

app.py +238 -21

app.py CHANGED Viewed

@@ -1166,7 +1166,210 @@ def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Goog
             cv2.putText(frame, f"Mode: {processing_mode}",
                       (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-            # ... rest of the processing code ...
             # Write the frame to output video
             out.write(frame)
@@ -2092,35 +2295,49 @@ def main():
                                             st.markdown("**Top Objects:**")
                                             for obj, count in list(sorted_objects.items())[:10]:
                                                 st.markdown(f"- {obj}: {count} occurrences")
                                 # Display face detection summary
-                                if "Face Detection" in analysis_types and results["detection_stats"]["faces"] > 0:
                                     st.markdown("#### 👤 Face Analysis")
-                                    st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
                                 # Display text detection summary
-                                if "Text" in analysis_types and results["detection_stats"]["text_blocks"] > 0:
                                     st.markdown("#### 📝 Text Analysis")
-                                    st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
-                                # Display label detection summary
-                                if "Labels" in analysis_types and results["detection_stats"]["labels"]:
-                                    st.markdown("#### 🏷️ Scene Labels")
-                                    # Sort labels by frequency
-                                    sorted_labels = dict(sorted(results["detection_stats"]["labels"].items(),
-                                                       key=lambda x: x[1], reverse=True))
-                                    # Create pie chart for top labels
-                                    if sorted_labels:
-                                        fig, ax = plt.subplots(figsize=(8, 8))
-                                        top_labels = dict(list(sorted_labels.items())[:7])
-                                        if len(sorted_labels) > 7:
-                                            other_count = sum(list(sorted_labels.values())[7:])
-                                            top_labels["Other"] = other_count
-                                        ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
-                                        ax.set_title('Distribution of Scene Labels')
                                         st.pyplot(fig)
                         except Exception as e:

             cv2.putText(frame, f"Mode: {processing_mode}",
                       (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+            # Convert frame to grayscale for motion detection
+            current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
+            if previous_frame_gray is not None:
+                # Calculate frame difference for activity detection
+                frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
+                activity_level = np.mean(frame_diff)
+                detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
+                # Scene change detection
+                if activity_level > scene_change_threshold:
+                    detection_stats["scene_changes"].append(frame_count/fps)
+                    # Mark scene change on frame
+                    cv2.putText(frame, "SCENE CHANGE",
+                              (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
+                # Add optical flow tracking if enabled
+                if use_advanced_tracking and prev_points is not None:
+                    try:
+                        # Calculate optical flow
+                        next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
+                                                                        current_frame_gray,
+                                                                        prev_points,
+                                                                        None,
+                                                                        **lk_params)
+                        # Select good points
+                        if next_points is not None:
+                            good_new = next_points[status==1]
+                            good_old = prev_points[status==1]
+                            # Draw motion tracks
+                            for i, (new, old) in enumerate(zip(good_new, good_old)):
+                                a, b = new.ravel()
+                                c, d = old.ravel()
+                                # Draw motion lines
+                                cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
+                                cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
+                    except Exception as e:
+                        # If optical flow fails, just continue without it
+                        pass
+            # Update tracking points periodically if enabled
+            if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
+                try:
+                    prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
+                except Exception:
+                    # If feature tracking fails, just continue without it
+                    prev_points = None
+            previous_frame_gray = current_frame_gray
+            # Process frames with Vision API if using Google Vision
+            if (processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
+               frame_count % process_every_n_frames == 0 and client is not None:
+                # Convert frame to JPEG for Vision API
+                success, jpeg_frame = cv2.imencode('.jpg', frame)
+                if success:
+                    image_content = jpeg_frame.tobytes()
+                    # Create vision image
+                    vision_image = vision.Image(content=image_content)
+                    try:
+                        # Perform detection based on selected types
+                        if "Objects" in analysis_types:
+                            objects = client.object_localization(image=vision_image)
+                            # Filter objects by confidence threshold
+                            filtered_objects = [obj for obj in objects.localized_object_annotations
+                                              if obj.score >= confidence_threshold]
+                            # Update object counts in stats
+                            for obj in filtered_objects:
+                                if obj.name in detection_stats["objects"]:
+                                    detection_stats["objects"][obj.name] += 1
+                                else:
+                                    detection_stats["objects"][obj.name] = 1
+                                # Draw object boundaries
+                                box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
+                                      for vertex in obj.bounding_poly.normalized_vertices]
+                                points = np.array([[int(p[0]), int(p[1])] for p in box])
+                                cv2.polylines(frame, [points], True, (0, 255, 0), 2)
+                                # Add label with confidence
+                                cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%",
+                                           (int(box[0][0]), int(box[0][1] - 10)),
+                                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                                # Add to trackers for future frames
+                                # Calculate bounding box
+                                x_values = [p[0] for p in box]
+                                y_values = [p[1] for p in box]
+                                x_min, x_max = min(x_values), max(x_values)
+                                y_min, y_max = min(y_values), max(y_values)
+                                object_trackers[obj.name] = {
+                                    "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+                                    "last_seen": frame_count,
+                                    "score": obj.score
+                                }
+                        # Process faces if selected
+                        if "Face Detection" in analysis_types:
+                            faces = client.face_detection(image=vision_image)
+                            # Count faces and draw boundaries
+                            face_count = 0
+                            for face in faces.face_annotations:
+                                if face.detection_confidence >= confidence_threshold:
+                                    face_count += 1
+                                    # Draw face boundary
+                                    vertices = face.bounding_poly.vertices
+                                    points = [(vertex.x, vertex.y) for vertex in vertices]
+                                    points = np.array([[p[0], p[1]] for p in points])
+                                    cv2.polylines(frame, [points], True, (0, 0, 255), 2)
+                                    # Add confidence score
+                                    cv2.putText(frame, f"Face: {int(face.detection_confidence * 100)}%",
+                                               (points[0][0], points[0][1] - 10),
+                                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+                                    # Draw facial landmarks if enabled
+                                    if enable_face_landmarks:
+                                        for landmark in face.landmarks:
+                                            px = landmark.position.x
+                                            py = landmark.position.y
+                                            cv2.circle(frame, (int(px), int(py)), 2, (255, 255, 0), -1)
+                            # Update face count
+                            detection_stats["faces"] += face_count
+                        # Process text if selected
+                        if "Text" in analysis_types:
+                            text = client.text_detection(image=vision_image)
+                            if text.text_annotations:
+                                # Count text blocks
+                                text_blocks = len(text.text_annotations) - 1  # Subtract 1 for the full text annotation
+                                detection_stats["text_blocks"] += text_blocks
+                                # Draw text bounding boxes
+                                for text_annot in text.text_annotations[1:]:  # Skip the first one (full text)
+                                    box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
+                                    points = np.array([[int(p[0]), int(p[1])] for p in box])
+                                    cv2.polylines(frame, [points], True, (255, 0, 0), 2)
+                                    # Add recognized text
+                                    cv2.putText(frame, text_annot.description,
+                                               (points[0][0], points[0][1] - 10),
+                                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
+                    except Exception as e:
+                        # Handle API errors gracefully
+                        error_msg = f"API Error: {str(e)}"
+                        cv2.putText(frame, error_msg, (10, 70),
+                                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+            # Process with OpenCV object detection if enabled
+            if (processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)") and \
+               opencv_detector is not None and \
+               (frame_count % process_every_n_frames == 0):
+                # The OpenCV detection code goes here...
+                # This would be similar to what's in the VideoProcessor.transform method
+                try:
+                    # If using HOG detector (the fallback)
+                    if isinstance(opencv_detector, cv2.HOGDescriptor):
+                        # Detect people
+                        boxes, weights = opencv_detector.detectMultiScale(
+                            frame, winStride=(8, 8), padding=(4, 4), scale=1.05
+                        )
+                        # Draw bounding boxes
+                        for i, (x, y, w, h) in enumerate(boxes):
+                            if weights[i] > 0.3:  # Confidence threshold
+                                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
+                                cv2.putText(frame, f"Person: {int(weights[i] * 100)}%",
+                                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
+                                # Add to trackers
+                                object_trackers["person"] = {
+                                    "bbox": (x, y, w, h),
+                                    "last_seen": frame_count,
+                                    "score": weights[i]
+                                }
+                                # Update count in stats
+                                if "person" in detection_stats["objects"]:
+                                    detection_stats["objects"]["person"] += 1
+                                else:
+                                    detection_stats["objects"]["person"] = 1
+                    else:
+                        # Using YOLO or another DNN-based detector code would go here
+                        pass
+                except Exception as e:
+                    cv2.putText(frame, f"OpenCV Error: {str(e)}", (10, 110),
+                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+            # Add hint about slowed down speed
+            cv2.putText(frame, "Playback: 60% speed for better visualization",
+                      (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
             # Write the frame to output video
             out.write(frame)
                                             st.markdown("**Top Objects:**")
                                             for obj, count in list(sorted_objects.items())[:10]:
                                                 st.markdown(f"- {obj}: {count} occurrences")
+                                else:
+                                    st.info("No objects were detected in the video.")
                                 # Display face detection summary
+                                if "Face Detection" in analysis_types:
                                     st.markdown("#### 👤 Face Analysis")
+                                    if results["detection_stats"]["faces"] > 0:
+                                        st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
+                                    else:
+                                        st.info("No faces were detected in the video.")
                                 # Display text detection summary
+                                if "Text" in analysis_types:
                                     st.markdown("#### 📝 Text Analysis")
+                                    if results["detection_stats"]["text_blocks"] > 0:
+                                        st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
+                                    else:
+                                        st.info("No text was detected in the video.")
+                                # Display scene analysis
+                                if "Motion" in analysis_types:
+                                    st.markdown("#### 🎬 Scene Analysis")
+                                    # Display scene changes
+                                    if results["detection_stats"]["scene_changes"]:
+                                        st.markdown(f"**Scene Changes:** {len(results['detection_stats']['scene_changes'])} detected")
+                                        st.markdown("Scene changes at time points (seconds):")
+                                        scene_times = [f"{t:.2f}s" for t in results["detection_stats"]["scene_changes"]]
+                                        st.write(", ".join(scene_times))
+                                    # Activity metrics visualization
+                                    if results["detection_stats"]["activity_metrics"]:
+                                        st.markdown("**Activity Level Over Time:**")
+                                        activity_data = results["detection_stats"]["activity_metrics"]
+                                        times = [point[0] for point in activity_data]
+                                        levels = [point[1] for point in activity_data]
+                                        fig, ax = plt.subplots(figsize=(10, 4))
+                                        ax.plot(times, levels, 'r-')
+                                        ax.set_xlabel('Time (seconds)')
+                                        ax.set_ylabel('Activity Level')
+                                        ax.set_title('Motion Activity Throughout Video')
+                                        ax.grid(True, alpha=0.3)
                                         st.pyplot(fig)
                         except Exception as e: