File size: 4,045 Bytes
100b9b2
 
 
d48f382
 
100b9b2
 
01e83c8
9c69830
 
100b9b2
d48f382
100b9b2
 
 
 
 
d48f382
100b9b2
 
 
 
d48f382
100b9b2
 
 
d48f382
 
 
100b9b2
d48f382
100b9b2
d48f382
100b9b2
d48f382
 
100b9b2
d48f382
 
 
 
100b9b2
 
 
 
 
 
d48f382
 
 
 
 
c3d6a85
 
d48f382
c3d6a85
d48f382
c3d6a85
d48f382
 
 
df98ef4
100b9b2
 
 
 
 
 
 
 
d48f382
100b9b2
 
d48f382
 
 
100b9b2
 
 
d48f382
100b9b2
 
 
d48f382
 
100b9b2
d48f382
100b9b2
d48f382
 
 
 
 
 
 
 
 
 
 
 
df98ef4
53d5146
d48f382
 
 
100b9b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from ultralytics import YOLOv10
import cv2
import torch
import os
import spaces


device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)

# Define activity categories based on detected objects
activity_categories = {
    "Working": ["laptop", "computer", "keyboard", "office chair"],
    "Meal Time": ["fork", "spoon", "plate", "food"],
    "Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
    "Outdoors": ["car", "tree", "bicycle", "road"],
    # Add more categories and objects as needed
}

# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
    categorized_activities = {}
    
    for activity, objects in activity_categories.items():
        if any(obj in detected_objects for obj in objects):
            if activity not in categorized_activities:
                categorized_activities[activity] = []
            categorized_activities[activity].append(detected_objects)
    
    return categorized_activities

# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path):
    cap = cv2.VideoCapture(video_path)
    journal_entries = {}
    saved_images = []
    frame_count = 0
    output_folder = "detected_frames"
    os.makedirs(output_folder, exist_ok=True)  # Create folder to store images
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Make predictions using YOLOv10 on the current frame
        results = model.predict(source=frame_rgb, device=device)
        
        # Plot bounding boxes and labels on the image
        annotated_frame = results[0].plot()  # Plot detection results on the frame
        
        # Save the annotated image
        frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
        cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1])  # Convert back to BGR for saving
        saved_images.append(frame_filename)
        
        # Extract labels (class indices) and map them to class names
        detected_objects = [model.names[int(box.cls)] for box in results[0].boxes]  # Access the first result
        
        # Get current timestamp in the video
        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert ms to seconds
        
        # Categorize the detected objects into activities
        activity_summary = categorize_activity(detected_objects)
        
        # Store the activities with their timestamp
        for activity, objects in activity_summary.items():
            if activity not in journal_entries:
                journal_entries[activity] = []
            journal_entries[activity].append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))
        
        frame_count += 1
    
    cap.release()
    
    # Create a formatted journal output
    formatted_journal = []
    for activity, entries in journal_entries.items():
        formatted_journal.append(f"**{activity}:**")
        for entry, image_path in entries:
            formatted_journal.append((entry, image_path))
    
    return formatted_journal

# Gradio interface for uploading video and generating journal with images
def display_journal_with_images(video):
    journal_with_images = generate_journal_with_images(video)
    
    # Create the final display with text and images
    display_items = []
    for entry, image_path in journal_with_images:
        display_items.append((entry, image_path))
    
    return display_items

with gr.Blocks() as iface:
    video_input = gr.Video(label="Upload Video", height=300)
    output_gallery = gr.Gallery(label="Generated Daily Journal with Images")
    run_button = gr.Button("Generate Journal")
    
    run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=output_gallery)

iface.launch()