Spaces:

CosmickVisions
/

Cloud

Sleeping

App Files Files Community

CosmickVisions commited on Mar 30

Commit

87a3a89

verified ·

1 Parent(s): 3ce4149

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -364

app.py CHANGED Viewed

@@ -812,374 +812,24 @@ def process_video_file(video_file, analysis_types):
     max_frames = int(fps * 10)
     total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), max_frames)
-    # Inform user if video is being truncated
-    if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
-        st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
-    # Slow down the output video by reducing the fps (60% of original speed)
-    output_fps = fps * 0.6
-    st.info(f"Output video will be slowed down to {output_fps:.1f} FPS (60% of original speed) for better visualization.")
-    # Create video writer with higher quality settings
-    try:
-        # Try XVID first (widely available)
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
-    except Exception:
-        # If that fails, try Motion JPEG
-        try:
-            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
-        except Exception:
-            # Last resort - use uncompressed
-            fourcc = cv2.VideoWriter_fourcc(*'DIB ')  # Uncompressed RGB
-    out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
-    # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail
     process_every_n_frames = 5
-    # Create a progress bar
-    progress_bar = st.progress(0)
-    status_text = st.empty()
-    # Enhanced statistics tracking
-    detection_stats = {
-        "objects": {},
-        "faces": 0,
-        "text_blocks": 0,
-        "labels": {},
-        # New advanced tracking
-        "object_tracking": {},  # Track object appearances by frame
-        "activity_metrics": [],  # Track frame-to-frame differences
-        "scene_changes": []     # Track major scene transitions
-    }
-    # For scene change detection and motion tracking
-    previous_frame_gray = None
-    prev_points = None
     lk_params = dict(winSize=(15, 15),
-                     maxLevel=2,
-                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
-    # Feature detection params for tracking
     feature_params = dict(maxCorners=100,
-                         qualityLevel=0.3,
-                         minDistance=7,
-                         blockSize=7)
-    try:
-        frame_count = 0
-        while frame_count < max_frames:  # Limit to 10 seconds
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame_count += 1
-            # Update progress
-            progress = int(frame_count / total_frames * 100)
-            progress_bar.progress(progress)
-            status_text.text(f"Processing frame {frame_count}/{total_frames} ({progress}%) - {frame_count/fps:.1f}s of 10s")
-            # Add timestamp to frame
-            cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
-                      (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
-            # Activity detection and scene change detection
-            current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
-            if previous_frame_gray is not None:
-                # Calculate frame difference for activity detection
-                frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
-                _, thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)
-                thresh = cv2.dilate(thresh, None, iterations=2)
-                contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                # Better activity metric using contour area
-                activity_level = sum(cv2.contourArea(c) for c in contours) / (frame.shape[0] * frame.shape[1])
-                activity_level *= 100  # Convert to percentage
-                detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
-                # Add optical flow for better motion tracking
-                if "Objects" in analysis_types and prev_points is not None:
-                    # Calculate optical flow
-                    next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
-                                                                    current_frame_gray,
-                                                                    prev_points,
-                                                                    None,
-                                                                    **lk_params)
-                    # Select good points
-                    if next_points is not None:
-                        good_new = next_points[status==1]
-                        good_old = prev_points[status==1]
-                        # Draw motion tracks
-                        for i, (new, old) in enumerate(zip(good_new, good_old)):
-                            a, b = new.ravel()
-                            c, d = old.ravel()
-                            # Draw motion lines
-                            cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
-                            cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
-                # Scene change detection using contour analysis for more robust results
-                if activity_level > scene_change_threshold:
-                    detection_stats["scene_changes"].append(frame_count/fps)
-                    # Mark scene change on frame
-                    cv2.putText(frame, "SCENE CHANGE",
-                              (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
-                    # Reset tracking points on scene change
-                    prev_points = None
-            # Update tracking points periodically
-            if frame_count % 5 == 0 or prev_points is None or len(prev_points) < 10:
-                prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
-            previous_frame_gray = current_frame_gray
-            # Process frames with Vision API
-            if frame_count % process_every_n_frames == 0:
-                try:
-                    # Convert OpenCV frame to PIL Image for Vision API
-                    pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-                    # Create vision image
-                    img_byte_arr = io.BytesIO()
-                    pil_img.save(img_byte_arr, format='PNG')
-                    content = img_byte_arr.getvalue()
-                    vision_image = vision.Image(content=content)
-                    # Apply analysis based on selected types with enhanced detail
-                    if "Objects" in analysis_types:
-                        objects = client.object_localization(image=vision_image)
-                        # Draw boxes around detected objects with enhanced info
-                        for obj in objects.localized_object_annotations:
-                            obj_name = obj.name
-                            # Update basic stats
-                            if obj_name in detection_stats["objects"]:
-                                detection_stats["objects"][obj_name] += 1
-                            else:
-                                detection_stats["objects"][obj_name] = 1
-                            # Enhanced object tracking
-                            timestamp = frame_count/fps
-                            if obj_name not in detection_stats["object_tracking"]:
-                                detection_stats["object_tracking"][obj_name] = {
-                                    "first_seen": timestamp,
-                                    "last_seen": timestamp,
-                                    "frames_present": 1,
-                                    "timestamps": [timestamp]
-                                }
-                            else:
-                                tracking = detection_stats["object_tracking"][obj_name]
-                                tracking["frames_present"] += 1
-                                tracking["last_seen"] = timestamp
-                                tracking["timestamps"].append(timestamp)
-                            # Calculate box coordinates
-                            box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
-                                  for vertex in obj.bounding_poly.normalized_vertices]
-                            box = np.array(box, np.int32).reshape((-1, 1, 2))
-                            # Draw more noticeable box with thicker lines
-                            cv2.polylines(frame, [box], True, (0, 255, 0), 3)
-                            # Calculate box size for better placement of labels
-                            x_min = min([p[0][0] for p in box])
-                            y_min = min([p[0][1] for p in box])
-                            # Draw filled box with opacity for better label visibility
-                            overlay = frame.copy()
-                            box_np = np.array(box)
-                            hull = cv2.convexHull(box_np)
-                            cv2.fillConvexPoly(overlay, hull, (0, 255, 0, 64))
-                            # Apply overlay with transparency
-                            alpha = 0.3
-                            cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
-                            # Enhanced label with confidence and border
-                            confidence = int(obj.score * 100)
-                            label_text = f"{obj.name}: {confidence}%"
-                            text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
-                            # Create better text background with rounded rectangle
-                            text_bg_pts = np.array([
-                                [x_min, y_min - text_size[1] - 10],
-                                [x_min + text_size[0] + 10, y_min - text_size[1] - 10],
-                                [x_min + text_size[0] + 10, y_min],
-                                [x_min, y_min]
-                            ], np.int32)
-                            cv2.fillPoly(frame, [text_bg_pts], (0, 0, 0))
-                            cv2.putText(frame, label_text,
-                                      (int(x_min) + 5, int(y_min) - 5),
-                                      cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-                    if "Face Detection" in analysis_types:
-                        faces = client.face_detection(image=vision_image)
-                        # Track statistics
-                        detection_stats["faces"] += len(faces.face_annotations)
-                        for face in faces.face_annotations:
-                            vertices = face.bounding_poly.vertices
-                            points = [(vertex.x, vertex.y) for vertex in vertices]
-                            # Draw face box with thicker lines
-                            pts = np.array(points, np.int32).reshape((-1, 1, 2))
-                            cv2.polylines(frame, [pts], True, (0, 0, 255), 3)
-                            # Enhanced face info visualization
-                            emotions = []
-                            if face.joy_likelihood >= 3:
-                                emotions.append("Joy")
-                            if face.anger_likelihood >= 3:
-                                emotions.append("Anger")
-                            if face.surprise_likelihood >= 3:
-                                emotions.append("Surprise")
-                            if face.sorrow_likelihood >= 3:
-                                emotions.append("Sorrow")
-                            emotion_text = ", ".join(emotions) if emotions else "Neutral"
-                            x_min = min([p[0] for p in points])
-                            y_min = min([p[1] for p in points])
-                            # Add emotion gauge bar for better visualization
-                            emotions_map = {
-                                "Joy": (0, 255, 0),     # Green
-                                "Anger": (0, 0, 255),   # Red
-                                "Surprise": (255, 255, 0), # Yellow
-                                "Sorrow": (255, 0, 0)   # Blue
-                            }
-                            # Add detailed emotion text with colored background
-                            text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
-                            cv2.rectangle(frame,
-                                        (int(x_min), int(y_min) - text_size[1] - 8),
-                                        (int(x_min) + text_size[0] + 8, int(y_min)),
-                                        (0, 0, 0), -1)
-                            cv2.putText(frame, emotion_text,
-                                      (int(x_min) + 4, int(y_min) - 4),
-                                      cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                            # Draw enhanced landmarks with connections
-                            if len(face.landmarks) > 0:
-                                landmarks = [(int(landmark.position.x), int(landmark.position.y))
-                                           for landmark in face.landmarks]
-                                # Draw each landmark
-                                for landmark in landmarks:
-                                    cv2.circle(frame, landmark, 3, (255, 255, 0), -1)
-                                # Connect landmarks for eyes, nose, mouth if there are enough points
-                                if len(landmarks) >= 8:
-                                    # These indices are approximate - adjust based on your actual data
-                                    eye_indices = [0, 1, 2, 3]
-                                    for i in range(len(eye_indices)-1):
-                                        cv2.line(frame, landmarks[eye_indices[i]],
-                                               landmarks[eye_indices[i+1]], (255, 255, 0), 1)
-                    if "Text" in analysis_types:
-                        text = client.text_detection(image=vision_image)
-                        # Update stats
-                        if len(text.text_annotations) > 1:
-                            detection_stats["text_blocks"] += len(text.text_annotations) - 1
-                        # Add overall text summary to the frame
-                        if text.text_annotations:
-                            full_text = text.text_annotations[0].description
-                            words = full_text.split()
-                            short_text = " ".join(words[:5])
-                            if len(words) > 5:
-                                short_text += "..."
-                            # Add text summary to top of frame with better visibility
-                            cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1)
-                            cv2.putText(frame, f"Text: {short_text}",
-                                      (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                        # Draw text boxes with improved visibility
-                        for text_annot in text.text_annotations[1:]:
-                            box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
-                            pts = np.array(box, np.int32).reshape((-1, 1, 2))
-                            cv2.polylines(frame, [pts], True, (255, 0, 0), 2)  # Thicker lines
-                    # Add Labels analysis for more detail
-                    if "Labels" in analysis_types:
-                        labels = client.label_detection(image=vision_image, max_results=5)
-                        # Add labels to the frame with better visibility
-                        y_pos = 120
-                        cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1)
-                        cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                        # Track stats and show labels
-                        for i, label in enumerate(labels.label_annotations):
-                            # Update stats
-                            if label.description in detection_stats["labels"]:
-                                detection_stats["labels"][label.description] += 1
-                            else:
-                                detection_stats["labels"][label.description] = 1
-                            # Display on frame with larger text
-                            cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%",
-                                      (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                except Exception as e:
-                    # Show error on frame
-                    cv2.putText(frame, f"API Error: {str(e)[:30]}",
-                              (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-            # Add hint about slowed down speed
-            cv2.putText(frame, "Playback: 60% speed for better visualization",
-                      (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
-            # Write the frame to output video
-            out.write(frame)
-        # Release resources
-        cap.release()
-        out.release()
-        # Clear progress indicators
-        progress_bar.empty()
-        status_text.empty()
-        # Read the processed video as bytes for download
-        with open(output_path, 'rb') as file:
-            processed_video_bytes = file.read()
-        # Clean up temporary files
-        os.unlink(temp_video_path)
-        os.unlink(output_path)
-        # Calculate additional statistics
-        for obj_name, tracking in detection_stats["object_tracking"].items():
-            # Calculate total screen time
-            tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2)
-            # Calculate average confidence if available
-            if "confidences" in tracking and tracking["confidences"]:
-                tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"])
-        # Return enhanced results
-        results = {"detection_stats": detection_stats}
-        # Store results in session state for chatbot context
-        st.session_state.analysis_results = results
-        # Update vectorstore with new results
-        update_vectorstore_with_results(results)
-        return processed_video_bytes, results
-    except Exception as e:
-        # Clean up on error
-        cap.release()
-        if 'out' in locals():
-            out.release()
-        os.unlink(temp_video_path)
-        if os.path.exists(output_path):
-            os.unlink(output_path)
-        raise e
 def load_bigquery_table(dataset_id, table_id, limit=1000):
     """Load data directly from an existing BigQuery table"""

     max_frames = int(fps * 10)
     total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), max_frames)
+    # Define all configuration values at the beginning of the function
+    # ----------------- Key Parameters -----------------
+    # Scene change detection threshold
+    scene_change_threshold = 40.0  # Adjust as needed: lower = more sensitive
+    # Process every Nth frame to reduce API calls
     process_every_n_frames = 5
+    # Optical flow parameters
     lk_params = dict(winSize=(15, 15),
+                    maxLevel=2,
+                    criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
+    # Feature detection parameters
     feature_params = dict(maxCorners=100,
+                        qualityLevel=0.3,
+                        minDistance=7,
+                        blockSize=7)
+    # ----------------- End Parameters -----------------
+    # Rest of the function continues as before...
 def load_bigquery_table(dataset_id, table_id, limit=1000):
     """Load data directly from an existing BigQuery table"""