Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,6 @@ import time
|
|
21 |
import matplotlib.pyplot as plt
|
22 |
from pathlib import Path
|
23 |
import plotly.express as px
|
24 |
-
from google.cloud import videointelligence
|
25 |
|
26 |
# Set page config
|
27 |
st.set_page_config(
|
@@ -516,33 +515,6 @@ def process_video_file(video_file, analysis_types):
|
|
516 |
"labels": {}
|
517 |
}
|
518 |
|
519 |
-
# Add Video Intelligence API integration for semantic analysis
|
520 |
-
if "Semantic" in analysis_types:
|
521 |
-
# Initialize video intelligence client
|
522 |
-
video_client = videointelligence.VideoIntelligenceServiceClient(credentials=credentials)
|
523 |
-
|
524 |
-
# Set up features for semantic analysis
|
525 |
-
features = [
|
526 |
-
videointelligence.Feature.LABEL_DETECTION,
|
527 |
-
videointelligence.Feature.SHOT_CHANGE_DETECTION,
|
528 |
-
videointelligence.Feature.ACTION_RECOGNITION
|
529 |
-
]
|
530 |
-
|
531 |
-
# Process video for semantic understanding
|
532 |
-
operation = video_client.annotate_video(
|
533 |
-
request={"features": features, "input_content": video_file.read()}
|
534 |
-
)
|
535 |
-
|
536 |
-
# Get semantic results
|
537 |
-
semantic_results = operation.result()
|
538 |
-
|
539 |
-
# Store semantic insights for visualization
|
540 |
-
semantic_insights = {
|
541 |
-
"actions": extract_actions(semantic_results),
|
542 |
-
"segments": extract_segments(semantic_results),
|
543 |
-
"scene_labels": extract_labels(semantic_results)
|
544 |
-
}
|
545 |
-
|
546 |
try:
|
547 |
frame_count = 0
|
548 |
while frame_count < max_frames: # Limit to 10 seconds
|
@@ -561,7 +533,7 @@ def process_video_file(video_file, analysis_types):
|
|
561 |
cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
|
562 |
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
563 |
|
564 |
-
# Process frames with Vision
|
565 |
if frame_count % process_every_n_frames == 0:
|
566 |
try:
|
567 |
# Convert OpenCV frame to PIL Image for Vision API
|
@@ -725,10 +697,7 @@ def process_video_file(video_file, analysis_types):
|
|
725 |
os.unlink(output_path)
|
726 |
|
727 |
# Return both the video and the detection statistics
|
728 |
-
return processed_video_bytes,
|
729 |
-
"detection_stats": detection_stats,
|
730 |
-
"semantic_insights": semantic_insights if "Semantic" in analysis_types else None
|
731 |
-
}
|
732 |
|
733 |
except Exception as e:
|
734 |
# Clean up on error
|
@@ -990,9 +959,7 @@ def main():
|
|
990 |
analysis_types.append("Face Detection")
|
991 |
if st.sidebar.checkbox("Text Recognition"):
|
992 |
analysis_types.append("Text")
|
993 |
-
|
994 |
-
analysis_types.append("Semantic")
|
995 |
-
|
996 |
st.sidebar.markdown("---")
|
997 |
st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
|
998 |
|
@@ -1101,25 +1068,6 @@ def main():
|
|
1101 |
ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
|
1102 |
ax.set_title('Distribution of Scene Labels')
|
1103 |
st.pyplot(fig)
|
1104 |
-
|
1105 |
-
# Display semantic analysis results
|
1106 |
-
if "Semantic" in analysis_types and results["semantic_insights"]:
|
1107 |
-
st.markdown("### 🧠 Semantic Understanding")
|
1108 |
-
|
1109 |
-
# Display scene context
|
1110 |
-
st.markdown("#### Video Context")
|
1111 |
-
for label in results["semantic_insights"]["scene_labels"][:10]:
|
1112 |
-
st.markdown(f"- {label['description']}: {label['confidence']:.1%}")
|
1113 |
-
|
1114 |
-
# Display activities detected
|
1115 |
-
st.markdown("#### Activities")
|
1116 |
-
for action in results["semantic_insights"]["actions"]:
|
1117 |
-
st.markdown(f"- {action['description']} at {action['time_segment']}")
|
1118 |
-
|
1119 |
-
# Visualize relationships and context
|
1120 |
-
st.markdown("#### Scene Segments")
|
1121 |
-
fig = create_segment_visualization(results["semantic_insights"]["segments"])
|
1122 |
-
st.plotly_chart(fig)
|
1123 |
|
1124 |
except Exception as e:
|
1125 |
st.error(f"Error processing video: {str(e)}")
|
|
|
21 |
import matplotlib.pyplot as plt
|
22 |
from pathlib import Path
|
23 |
import plotly.express as px
|
|
|
24 |
|
25 |
# Set page config
|
26 |
st.set_page_config(
|
|
|
515 |
"labels": {}
|
516 |
}
|
517 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
518 |
try:
|
519 |
frame_count = 0
|
520 |
while frame_count < max_frames: # Limit to 10 seconds
|
|
|
533 |
cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
|
534 |
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
535 |
|
536 |
+
# Process frames with Vision API (more frequently for detail)
|
537 |
if frame_count % process_every_n_frames == 0:
|
538 |
try:
|
539 |
# Convert OpenCV frame to PIL Image for Vision API
|
|
|
697 |
os.unlink(output_path)
|
698 |
|
699 |
# Return both the video and the detection statistics
|
700 |
+
return processed_video_bytes, detection_stats
|
|
|
|
|
|
|
701 |
|
702 |
except Exception as e:
|
703 |
# Clean up on error
|
|
|
959 |
analysis_types.append("Face Detection")
|
960 |
if st.sidebar.checkbox("Text Recognition"):
|
961 |
analysis_types.append("Text")
|
962 |
+
|
|
|
|
|
963 |
st.sidebar.markdown("---")
|
964 |
st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
|
965 |
|
|
|
1068 |
ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
|
1069 |
ax.set_title('Distribution of Scene Labels')
|
1070 |
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1071 |
|
1072 |
except Exception as e:
|
1073 |
st.error(f"Error processing video: {str(e)}")
|