CosmickVisions commited on
Commit
87a3a89
·
verified ·
1 Parent(s): 3ce4149

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -364
app.py CHANGED
@@ -812,374 +812,24 @@ def process_video_file(video_file, analysis_types):
812
  max_frames = int(fps * 10)
813
  total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), max_frames)
814
 
815
- # Inform user if video is being truncated
816
- if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
817
- st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
818
-
819
- # Slow down the output video by reducing the fps (60% of original speed)
820
- output_fps = fps * 0.6
821
- st.info(f"Output video will be slowed down to {output_fps:.1f} FPS (60% of original speed) for better visualization.")
822
-
823
- # Create video writer with higher quality settings
824
- try:
825
- # Try XVID first (widely available)
826
- fourcc = cv2.VideoWriter_fourcc(*'XVID')
827
- except Exception:
828
- # If that fails, try Motion JPEG
829
- try:
830
- fourcc = cv2.VideoWriter_fourcc(*'MJPG')
831
- except Exception:
832
- # Last resort - use uncompressed
833
- fourcc = cv2.VideoWriter_fourcc(*'DIB ') # Uncompressed RGB
834
- out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
835
-
836
- # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail
837
  process_every_n_frames = 5
838
-
839
- # Create a progress bar
840
- progress_bar = st.progress(0)
841
- status_text = st.empty()
842
-
843
- # Enhanced statistics tracking
844
- detection_stats = {
845
- "objects": {},
846
- "faces": 0,
847
- "text_blocks": 0,
848
- "labels": {},
849
- # New advanced tracking
850
- "object_tracking": {}, # Track object appearances by frame
851
- "activity_metrics": [], # Track frame-to-frame differences
852
- "scene_changes": [] # Track major scene transitions
853
- }
854
-
855
- # For scene change detection and motion tracking
856
- previous_frame_gray = None
857
- prev_points = None
858
  lk_params = dict(winSize=(15, 15),
859
- maxLevel=2,
860
- criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
861
-
862
- # Feature detection params for tracking
863
  feature_params = dict(maxCorners=100,
864
- qualityLevel=0.3,
865
- minDistance=7,
866
- blockSize=7)
 
867
 
868
- try:
869
- frame_count = 0
870
- while frame_count < max_frames: # Limit to 10 seconds
871
- ret, frame = cap.read()
872
- if not ret:
873
- break
874
-
875
- frame_count += 1
876
-
877
- # Update progress
878
- progress = int(frame_count / total_frames * 100)
879
- progress_bar.progress(progress)
880
- status_text.text(f"Processing frame {frame_count}/{total_frames} ({progress}%) - {frame_count/fps:.1f}s of 10s")
881
-
882
- # Add timestamp to frame
883
- cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
884
- (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
885
-
886
- # Activity detection and scene change detection
887
- current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
888
- current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
889
-
890
- if previous_frame_gray is not None:
891
- # Calculate frame difference for activity detection
892
- frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
893
- _, thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY)
894
- thresh = cv2.dilate(thresh, None, iterations=2)
895
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
896
-
897
- # Better activity metric using contour area
898
- activity_level = sum(cv2.contourArea(c) for c in contours) / (frame.shape[0] * frame.shape[1])
899
- activity_level *= 100 # Convert to percentage
900
- detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
901
-
902
- # Add optical flow for better motion tracking
903
- if "Objects" in analysis_types and prev_points is not None:
904
- # Calculate optical flow
905
- next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
906
- current_frame_gray,
907
- prev_points,
908
- None,
909
- **lk_params)
910
-
911
- # Select good points
912
- if next_points is not None:
913
- good_new = next_points[status==1]
914
- good_old = prev_points[status==1]
915
-
916
- # Draw motion tracks
917
- for i, (new, old) in enumerate(zip(good_new, good_old)):
918
- a, b = new.ravel()
919
- c, d = old.ravel()
920
- # Draw motion lines
921
- cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
922
- cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
923
-
924
- # Scene change detection using contour analysis for more robust results
925
- if activity_level > scene_change_threshold:
926
- detection_stats["scene_changes"].append(frame_count/fps)
927
- # Mark scene change on frame
928
- cv2.putText(frame, "SCENE CHANGE",
929
- (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
930
-
931
- # Reset tracking points on scene change
932
- prev_points = None
933
-
934
- # Update tracking points periodically
935
- if frame_count % 5 == 0 or prev_points is None or len(prev_points) < 10:
936
- prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
937
-
938
- previous_frame_gray = current_frame_gray
939
-
940
- # Process frames with Vision API
941
- if frame_count % process_every_n_frames == 0:
942
- try:
943
- # Convert OpenCV frame to PIL Image for Vision API
944
- pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
945
-
946
- # Create vision image
947
- img_byte_arr = io.BytesIO()
948
- pil_img.save(img_byte_arr, format='PNG')
949
- content = img_byte_arr.getvalue()
950
- vision_image = vision.Image(content=content)
951
-
952
- # Apply analysis based on selected types with enhanced detail
953
- if "Objects" in analysis_types:
954
- objects = client.object_localization(image=vision_image)
955
- # Draw boxes around detected objects with enhanced info
956
- for obj in objects.localized_object_annotations:
957
- obj_name = obj.name
958
- # Update basic stats
959
- if obj_name in detection_stats["objects"]:
960
- detection_stats["objects"][obj_name] += 1
961
- else:
962
- detection_stats["objects"][obj_name] = 1
963
-
964
- # Enhanced object tracking
965
- timestamp = frame_count/fps
966
- if obj_name not in detection_stats["object_tracking"]:
967
- detection_stats["object_tracking"][obj_name] = {
968
- "first_seen": timestamp,
969
- "last_seen": timestamp,
970
- "frames_present": 1,
971
- "timestamps": [timestamp]
972
- }
973
- else:
974
- tracking = detection_stats["object_tracking"][obj_name]
975
- tracking["frames_present"] += 1
976
- tracking["last_seen"] = timestamp
977
- tracking["timestamps"].append(timestamp)
978
-
979
- # Calculate box coordinates
980
- box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
981
- for vertex in obj.bounding_poly.normalized_vertices]
982
- box = np.array(box, np.int32).reshape((-1, 1, 2))
983
-
984
- # Draw more noticeable box with thicker lines
985
- cv2.polylines(frame, [box], True, (0, 255, 0), 3)
986
-
987
- # Calculate box size for better placement of labels
988
- x_min = min([p[0][0] for p in box])
989
- y_min = min([p[0][1] for p in box])
990
-
991
- # Draw filled box with opacity for better label visibility
992
- overlay = frame.copy()
993
- box_np = np.array(box)
994
- hull = cv2.convexHull(box_np)
995
- cv2.fillConvexPoly(overlay, hull, (0, 255, 0, 64))
996
- # Apply overlay with transparency
997
- alpha = 0.3
998
- cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
999
-
1000
- # Enhanced label with confidence and border
1001
- confidence = int(obj.score * 100)
1002
- label_text = f"{obj.name}: {confidence}%"
1003
- text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
1004
-
1005
- # Create better text background with rounded rectangle
1006
- text_bg_pts = np.array([
1007
- [x_min, y_min - text_size[1] - 10],
1008
- [x_min + text_size[0] + 10, y_min - text_size[1] - 10],
1009
- [x_min + text_size[0] + 10, y_min],
1010
- [x_min, y_min]
1011
- ], np.int32)
1012
-
1013
- cv2.fillPoly(frame, [text_bg_pts], (0, 0, 0))
1014
- cv2.putText(frame, label_text,
1015
- (int(x_min) + 5, int(y_min) - 5),
1016
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
1017
-
1018
- if "Face Detection" in analysis_types:
1019
- faces = client.face_detection(image=vision_image)
1020
- # Track statistics
1021
- detection_stats["faces"] += len(faces.face_annotations)
1022
-
1023
- for face in faces.face_annotations:
1024
- vertices = face.bounding_poly.vertices
1025
- points = [(vertex.x, vertex.y) for vertex in vertices]
1026
- # Draw face box with thicker lines
1027
- pts = np.array(points, np.int32).reshape((-1, 1, 2))
1028
- cv2.polylines(frame, [pts], True, (0, 0, 255), 3)
1029
-
1030
- # Enhanced face info visualization
1031
- emotions = []
1032
- if face.joy_likelihood >= 3:
1033
- emotions.append("Joy")
1034
- if face.anger_likelihood >= 3:
1035
- emotions.append("Anger")
1036
- if face.surprise_likelihood >= 3:
1037
- emotions.append("Surprise")
1038
- if face.sorrow_likelihood >= 3:
1039
- emotions.append("Sorrow")
1040
-
1041
- emotion_text = ", ".join(emotions) if emotions else "Neutral"
1042
- x_min = min([p[0] for p in points])
1043
- y_min = min([p[1] for p in points])
1044
-
1045
- # Add emotion gauge bar for better visualization
1046
- emotions_map = {
1047
- "Joy": (0, 255, 0), # Green
1048
- "Anger": (0, 0, 255), # Red
1049
- "Surprise": (255, 255, 0), # Yellow
1050
- "Sorrow": (255, 0, 0) # Blue
1051
- }
1052
-
1053
- # Add detailed emotion text with colored background
1054
- text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
1055
- cv2.rectangle(frame,
1056
- (int(x_min), int(y_min) - text_size[1] - 8),
1057
- (int(x_min) + text_size[0] + 8, int(y_min)),
1058
- (0, 0, 0), -1)
1059
-
1060
- cv2.putText(frame, emotion_text,
1061
- (int(x_min) + 4, int(y_min) - 4),
1062
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1063
-
1064
- # Draw enhanced landmarks with connections
1065
- if len(face.landmarks) > 0:
1066
- landmarks = [(int(landmark.position.x), int(landmark.position.y))
1067
- for landmark in face.landmarks]
1068
-
1069
- # Draw each landmark
1070
- for landmark in landmarks:
1071
- cv2.circle(frame, landmark, 3, (255, 255, 0), -1)
1072
-
1073
- # Connect landmarks for eyes, nose, mouth if there are enough points
1074
- if len(landmarks) >= 8:
1075
- # These indices are approximate - adjust based on your actual data
1076
- eye_indices = [0, 1, 2, 3]
1077
- for i in range(len(eye_indices)-1):
1078
- cv2.line(frame, landmarks[eye_indices[i]],
1079
- landmarks[eye_indices[i+1]], (255, 255, 0), 1)
1080
-
1081
- if "Text" in analysis_types:
1082
- text = client.text_detection(image=vision_image)
1083
- # Update stats
1084
- if len(text.text_annotations) > 1:
1085
- detection_stats["text_blocks"] += len(text.text_annotations) - 1
1086
-
1087
- # Add overall text summary to the frame
1088
- if text.text_annotations:
1089
- full_text = text.text_annotations[0].description
1090
- words = full_text.split()
1091
- short_text = " ".join(words[:5])
1092
- if len(words) > 5:
1093
- short_text += "..."
1094
-
1095
- # Add text summary to top of frame with better visibility
1096
- cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1)
1097
- cv2.putText(frame, f"Text: {short_text}",
1098
- (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1099
-
1100
- # Draw text boxes with improved visibility
1101
- for text_annot in text.text_annotations[1:]:
1102
- box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
1103
- pts = np.array(box, np.int32).reshape((-1, 1, 2))
1104
- cv2.polylines(frame, [pts], True, (255, 0, 0), 2) # Thicker lines
1105
-
1106
- # Add Labels analysis for more detail
1107
- if "Labels" in analysis_types:
1108
- labels = client.label_detection(image=vision_image, max_results=5)
1109
-
1110
- # Add labels to the frame with better visibility
1111
- y_pos = 120
1112
- cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1)
1113
- cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1114
-
1115
- # Track stats and show labels
1116
- for i, label in enumerate(labels.label_annotations):
1117
- # Update stats
1118
- if label.description in detection_stats["labels"]:
1119
- detection_stats["labels"][label.description] += 1
1120
- else:
1121
- detection_stats["labels"][label.description] = 1
1122
-
1123
- # Display on frame with larger text
1124
- cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%",
1125
- (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1126
-
1127
- except Exception as e:
1128
- # Show error on frame
1129
- cv2.putText(frame, f"API Error: {str(e)[:30]}",
1130
- (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
1131
-
1132
- # Add hint about slowed down speed
1133
- cv2.putText(frame, "Playback: 60% speed for better visualization",
1134
- (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
1135
-
1136
- # Write the frame to output video
1137
- out.write(frame)
1138
-
1139
- # Release resources
1140
- cap.release()
1141
- out.release()
1142
-
1143
- # Clear progress indicators
1144
- progress_bar.empty()
1145
- status_text.empty()
1146
-
1147
- # Read the processed video as bytes for download
1148
- with open(output_path, 'rb') as file:
1149
- processed_video_bytes = file.read()
1150
-
1151
- # Clean up temporary files
1152
- os.unlink(temp_video_path)
1153
- os.unlink(output_path)
1154
-
1155
- # Calculate additional statistics
1156
- for obj_name, tracking in detection_stats["object_tracking"].items():
1157
- # Calculate total screen time
1158
- tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2)
1159
- # Calculate average confidence if available
1160
- if "confidences" in tracking and tracking["confidences"]:
1161
- tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"])
1162
-
1163
- # Return enhanced results
1164
- results = {"detection_stats": detection_stats}
1165
-
1166
- # Store results in session state for chatbot context
1167
- st.session_state.analysis_results = results
1168
-
1169
- # Update vectorstore with new results
1170
- update_vectorstore_with_results(results)
1171
-
1172
- return processed_video_bytes, results
1173
-
1174
- except Exception as e:
1175
- # Clean up on error
1176
- cap.release()
1177
- if 'out' in locals():
1178
- out.release()
1179
- os.unlink(temp_video_path)
1180
- if os.path.exists(output_path):
1181
- os.unlink(output_path)
1182
- raise e
1183
 
1184
  def load_bigquery_table(dataset_id, table_id, limit=1000):
1185
  """Load data directly from an existing BigQuery table"""
 
812
  max_frames = int(fps * 10)
813
  total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), max_frames)
814
 
815
+ # Define all configuration values at the beginning of the function
816
+ # ----------------- Key Parameters -----------------
817
+ # Scene change detection threshold
818
+ scene_change_threshold = 40.0 # Adjust as needed: lower = more sensitive
819
+ # Process every Nth frame to reduce API calls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  process_every_n_frames = 5
821
+ # Optical flow parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  lk_params = dict(winSize=(15, 15),
823
+ maxLevel=2,
824
+ criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
825
+ # Feature detection parameters
 
826
  feature_params = dict(maxCorners=100,
827
+ qualityLevel=0.3,
828
+ minDistance=7,
829
+ blockSize=7)
830
+ # ----------------- End Parameters -----------------
831
 
832
+ # Rest of the function continues as before...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
  def load_bigquery_table(dataset_id, table_id, limit=1000):
835
  """Load data directly from an existing BigQuery table"""