File size: 4,574 Bytes
100b9b2 109fee8 100b9b2 d48f382 100b9b2 01e83c8 9c69830 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 8a4dc7e d48f382 100b9b2 f5586b0 d48f382 47f97bd d48f382 fd362dd d48f382 100b9b2 fd362dd 47f97bd fd362dd d48f382 100b9b2 47f97bd 100b9b2 d5b8a83 d48f382 d5b8a83 d48f382 d5b8a83 47f97bd d5b8a83 d48f382 d5b8a83 d48f382 d5b8a83 100b9b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
from ultralytics import YOLOv10
from skimage.metrics import structural_similarity as ssim
import cv2
import torch
import os
import spaces
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
# Define activity categories based on detected objects
activity_categories = {
"Working": ["laptop", "computer", "keyboard", "office chair"],
"Meal Time": ["fork", "spoon", "plate", "food"],
"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
"Outdoors": ["car", "tree", "bicycle", "road"],
# Add more categories and objects as needed
}
# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
categorized_activities = {}
for activity, objects in activity_categories.items():
if any(obj in detected_objects for obj in objects):
if activity not in categorized_activities:
categorized_activities[activity] = []
categorized_activities[activity].append(detected_objects)
return categorized_activities
# Function to compare frames using SSIM to avoid repeated frames
def is_frame_different(frame1, frame2, threshold=0.9):
gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray_frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
score, _ = ssim(gray_frame1, gray_frame2, full=True)
return score < threshold
# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path, frame_interval=30):
cap = cv2.VideoCapture(video_path)
journal_entries = []
saved_images = []
frame_count = 0
last_processed_frame = None
output_folder = "detected_frames"
os.makedirs(output_folder, exist_ok=True) # Create folder to store images
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process every Nth frame or if the current frame is different from the last processed frame
if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)):
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Make predictions using YOLOv10 on the current frame
results = model.predict(source=frame_rgb, device=device)
# Plot bounding boxes and labels on the image
annotated_frame = results[0].plot() # Plot detection results on the frame
# Save the annotated image
frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1]) # Convert back to BGR for saving
saved_images.append(frame_filename)
# Extract labels (class indices) and map them to class names
detected_objects = [model.names[int(box.cls)] for box in results[0].boxes] # Access the first result
# Get current timestamp in the video
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds
# Categorize the detected objects into activities
activity_summary = categorize_activity(detected_objects)
# Store the activities with their timestamp
for activity, objects in activity_summary.items():
journal_entries.append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))
last_processed_frame = frame # Update the last processed frame
frame_count += 1
cap.release()
return journal_entries
def display_journal_with_images(video):
journal_entries, image_paths = generate_journal_with_images(video, frame_interval=30)
# Return journal text and list of images separately
journal_text = "\n".join(journal_entries)
return journal_text, image_paths
# Define Gradio Blocks for custom display
with gr.Blocks() as iface:
video_input = gr.Video(label="Upload Video", height=300)
journal_output = gr.Textbox(label="Generated Daily Journal", lines=10)
image_gallery = gr.Gallery(label="Annotated Frames").style(grid=[2], height="auto")
run_button = gr.Button("Generate Journal")
run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=[journal_output, image_gallery])
iface.launch()
iface.launch()
|