File size: 1,876 Bytes
6932abb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import cv2
import numpy as np
from ultralytics import YOLO, SAM
import argparse

def process_video(video_path, output_path):
    # Load models - Ultralytics will handle caching automatically
    yolo_model = YOLO("yolo11n.pt")
    sam2_model = SAM("sam2_b.pt")

    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Detect people using YOLO model
        yolo_results = yolo_model(frame)

        # Filter for person class (adjust the class index if needed)
        person_boxes = yolo_results[0].boxes[yolo_results[0].boxes.cls == 0].xyxy.cpu().numpy()

        # Use SAM 2 for segmentation
        sam_results = sam2_model(frame, bboxes=person_boxes)

        # Combine all person masks
        combined_mask = np.zeros(frame.shape[:2], dtype=bool)
        for mask in sam_results[0].masks.data:
            combined_mask |= mask.cpu().numpy()

        # Apply the mask to the original frame
        segmented_frame = frame.copy()
        segmented_frame[~combined_mask] = [
            0,
            255,
            0,
        ]  # Green background, you can change this

        out.write(segmented_frame)

    cap.release()
    out.release()


def main():

    parser = argparse.ArgumentParser(description="Process video with YOLO and SAM2")
    parser.add_argument("input_video", help="Path to the input video file")
    parser.add_argument("output_video", help="Path to the output video file")
    args = parser.parse_args()

    process_video(args.input_video, args.output_video)

if __name__ == "__main__":
    main()