Spaces:
Sleeping
Sleeping
File size: 2,770 Bytes
6cb8901 19a011f 6cb8901 19a011f ddc4ca6 30cc27c 19a011f 30cc27c 19a011f 30cc27c 19a011f 30cc27c ddc4ca6 19a011f 30cc27c 19a011f ddc4ca6 6cb8901 30cc27c e45fc58 6cb8901 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from collections import defaultdict
import datetime
import io
import time
import torch
import gradio as gr
import cv2
from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
BBOX_COLOR = [255, 0, 0]
PRED_THRESHOLD = 0.90
def composite_predictions(img, processed_predictions, show_video=False):
interested_labels = processed_predictions["labels"] == 1 # only interested in people
scores = processed_predictions["scores"][interested_labels].tolist()
boxes = [[int(j) for j in x] for x in processed_predictions["boxes"][interested_labels].tolist()]
labels = [model.config.id2label[x] for x in processed_predictions["labels"][interested_labels].tolist()]
for score, box, label in zip(scores, boxes, labels):
cv2.rectangle(img, box, BBOX_COLOR, 1)
cv2.putText(img, f"{label}: {score:0.2f}", (box[0]+2, box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.33, BBOX_COLOR, 1, cv2.LINE_AA)
return img, len(boxes), datetime.datetime.now()
def process(img):
inputs = extractor(images=img, return_tensors="pt")
outputs = model(**inputs)
h, w, _ = img.shape
img_size = torch.tensor([(h, w)])
processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size)
# Composite image and prediction bounding boxes + labels prediction
return composite_predictions(img, processed[0])
with gr.Blocks() as demo:
stream = gr.State()
with gr.Row():
with gr.Column(scale=1, min_width=600):
last_refresh_box = gr.Textbox(label="Last updated")
attendance_label = gr.Label(label="Current Attendance")
with gr.Row():
with gr.Column(scale=1, min_width=600):
webcam = gr.Webcam(streaming=True)
output = gr.Image(label="Composite", visible=True)
webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box])
if __name__ == "__main__":
demo.queue().launch()
# import gradio as gr
# import numpy as np
# import time
# def add_to_stream(audio, instream):
# time.sleep(1)
# if audio is None:
# return gr.update(), instream
# if instream is None:
# ret = audio
# else:
# ret = (audio[0], np.concatenate((instream[1], audio[1])))
# return ret, ret
# with gr.Blocks() as demo:
# inp = gr.Audio(source="microphone")
# out = gr.Audio()
# stream = gr.State()
# clear = gr.Button("Clear")
# inp.stream(add_to_stream, [inp, stream], [out, stream])
# clear.click(lambda: [None, None, None], None, [inp, out, stream])
# if __name__ == "__main__":
# demo.launch() |