File size: 3,598 Bytes
be9fb11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import cv2
import numpy as np

# need to change these offsets later
X1_OFFSET, X2_OFFSET = 0,0
Y1_OFFSET, Y2_OFFSET = 0,0

class YOLO:
    def __init__(self):
        self.net = cv2.dnn.readNet("models/yolov3-tiny.weights", "configs/yolov3-tiny.cfg")
        self.layer_names = self.net.getLayerNames()
        self.output_layers = [self.layer_names[i - 1] for i in self.net.getUnconnectedOutLayers()]
        self.classes = []
        with open("coco.names", "r") as f:
            self.classes = [line.strip() for line in f.readlines()]
    def get_patches(self, img):
        patches = []
        for (x1, y1), (x2, y2), color, confidence, label in self.forward(img):
            if (x1 == x2 or y1 == y2):
                continue
            print((x1, y1), (x2, y2))
            patches.append(img[y1:y2, x1:x2])
        return patches
    def forward(self, img):
        height, width, channels = img.shape

        # Prepare the image for YOLO
        blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        self.net.setInput(blob)

        # Run the forward pass
        outs = self.net.forward(self.output_layers)

        # Processing the output
        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:] # center x, center y, width,  height, object confidence score, class confidence scores...
                class_id = np.argmax(scores)
                class_confidence = scores[class_id]
                object_confidence = detection[4]
                if object_confidence > 0.5:
                    # Get the coordinates for the bounding box
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    # Rectangle coordinates
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    if x < 0 or y < 0:
                        continue
                    boxes.append([x, y, w, h])
                    confidences.append(float(class_confidence))
                    class_ids.append(class_id)

        # Apply non-max suppression to remove overlapping boxes
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(self.classes[class_ids[i]])
                confidence = confidences[i]
                color = (0, 255, 0)  # Green box
                if label == "person":
                    yield (x + X1_OFFSET, y + Y1_OFFSET), (x + w + X2_OFFSET, y + h + Y2_OFFSET), color, confidence, label
    __call__ = get_patches
    
def display(yolo_model:YOLO):
    cam = cv2.VideoCapture(0)
    while True:
        ret, img = cam.read()
        if not ret:
            print("unable to record")
            continue
        for (x1, y1), (x2, y2), color, confidence, label in yolo_model.forward(img):
            print(x1, y1, x2, y2)
            cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            cv2.putText(img, f"{label} {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv2.imshow("Camera Feed", img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        # Show the image
    cam.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    yolo_model = YOLO()
    display(yolo_model=yolo_model)