File size: 2,770 Bytes
6cb8901
 
19a011f
6cb8901
19a011f
 
ddc4ca6
30cc27c
19a011f
 
 
 
 
 
30cc27c
19a011f
 
30cc27c
 
 
 
 
19a011f
30cc27c
 
 
 
ddc4ca6
19a011f
 
 
30cc27c
 
19a011f
 
 
 
ddc4ca6
6cb8901
 
 
 
 
 
 
 
 
30cc27c
 
 
e45fc58
 
6cb8901
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from collections import defaultdict
import datetime
import io
import time

import torch
import gradio as gr
import cv2

from transformers import AutoFeatureExtractor, AutoModelForObjectDetection

extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

BBOX_COLOR = [255, 0, 0]
PRED_THRESHOLD = 0.90

def composite_predictions(img, processed_predictions, show_video=False):
    interested_labels = processed_predictions["labels"] == 1 # only interested in people 
    scores = processed_predictions["scores"][interested_labels].tolist()
    boxes = [[int(j) for j in x] for x in processed_predictions["boxes"][interested_labels].tolist()]
    labels = [model.config.id2label[x] for x in processed_predictions["labels"][interested_labels].tolist()]
    
    for score, box, label in zip(scores, boxes, labels):
        cv2.rectangle(img, box, BBOX_COLOR, 1)
        cv2.putText(img, f"{label}: {score:0.2f}", (box[0]+2, box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.33, BBOX_COLOR, 1, cv2.LINE_AA)
    return img, len(boxes), datetime.datetime.now()

def process(img):
    inputs = extractor(images=img, return_tensors="pt")
    outputs = model(**inputs)
    h, w, _ = img.shape
    img_size = torch.tensor([(h, w)])
    processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size)
    
    # Composite image and prediction bounding boxes + labels prediction
    return composite_predictions(img, processed[0])


with gr.Blocks() as demo:
    stream = gr.State()
    with gr.Row():
        with gr.Column(scale=1, min_width=600):
            last_refresh_box = gr.Textbox(label="Last updated")
            attendance_label = gr.Label(label="Current Attendance")
        with gr.Row():
            with gr.Column(scale=1, min_width=600):
                webcam = gr.Webcam(streaming=True)
                output = gr.Image(label="Composite", visible=True)
    webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box])

if __name__ == "__main__":
    demo.queue().launch()



# import gradio as gr
# import numpy as np
# import time

# def add_to_stream(audio, instream):
#     time.sleep(1)
#     if audio is None:
#         return gr.update(), instream
#     if instream is None:
#         ret = audio
#     else:
#         ret = (audio[0], np.concatenate((instream[1], audio[1])))
#     return ret, ret


# with gr.Blocks() as demo:
#     inp = gr.Audio(source="microphone")
#     out = gr.Audio()
#     stream = gr.State()
#     clear = gr.Button("Clear")

#     inp.stream(add_to_stream, [inp, stream], [out, stream])
#     clear.click(lambda: [None, None, None], None, [inp, out, stream])


# if __name__ == "__main__":
#     demo.launch()