tech-assessment-kecilin / pipelines.py
mrneuralnet's picture
Upload 14 files
4142b1b verified
from PIL import Image
from ultralytics import YOLO
from utils import readb64, img2base64
model_int8 = YOLO('weights/best.torchscript', task='detect')
def inference_on_image(path):
results = model_int8(path)
img = cv2.imread(path, cv2.COLOR_BGR2RGB)
for box in results[0].boxes:
cls = box.cls.item()
confidence = box.conf.item()
label = labels[cls]
x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0]))
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2)
img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1)
img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1)
cv2.imshow('Detected Image', img)
cv2.waitKey(0)
return results
def inference_on_video(path, vid_stride=10):
results = model_int8(path, vid_stride=10, stream=True)
cap = cv2.VideoCapture(path)
ret, img = cap.read()
frame_counter = 0
while True:
ret, img = cap.read()
if ret:
if frame_counter % 10 == 0:
result = next(results)
for box in result.boxes:
cls = box.cls.item()
confidence = box.conf.item()
label = labels[cls]
x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0]))
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2)
img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1)
img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1)
else:
cap.release()
break
cv2.imshow('Detected Image', img)
frame_counter += 1
k = cv2.waitKey(5) & 0xFF
if k == 27:
cap.release()
cv2.destroyAllWindows()
break
return results
class ImagePipeline:
def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'):
self.model = YOLO(weights, task='detect')
def preprocess(self, data):
image_base64 = data.pop("images", data)
if not type(image_base64) == list:
image_base64 = [image_base64]
elif len(image_base64) > 1:
raise Exception("ImagePipeline only accepts 1 image/frame")
images = [readb64(image) for image in image_base64]
return images
def inference(self, images):
results = self.model(images[0])
return results
def get_response(self, inference_result):
response = []
if 0 in inference_result[0].boxes.cls.numpy():
message = "An ambulance is found "
else:
message = "There is no ambulance"
for i, result in enumerate(inference_result):
for xywhn, cls, conf in zip(
result.boxes.xywhn,
result.boxes.cls,
result.boxes.conf
):
xywhn = list(xywhn.numpy())
response.append({
'xywhn': {
'x': float(xywhn[0]),
'y': float(xywhn[1]),
'w': float(xywhn[2]),
'h': float(xywhn[3]),
},
'class': cls.item(),
'confidence': conf.item(),
})
return {'results': response,
'message': message}
def __call__(self, data, config_payload=None):
images = self.preprocess(data)
inference_result = self.inference(images)
response = self.get_response(inference_result)
return response
class VideoPipeline:
def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'):
self.model = YOLO(weights, task='detect')
def preprocess(self, data):
return data
def inference(self, video_path, vid_stride=30):
results = self.model(video_path, vid_stride=vid_stride)
return results
def get_response(self, inference_result):
response = []
# default message
message = "There is no ambulance"
for i, result in enumerate(inference_result):
if 0 in result.boxes.cls.numpy():
message = "An ambulance is found"
for xywhn, cls, conf in zip(
result.boxes.xywhn,
result.boxes.cls,
result.boxes.conf
):
xywhn = list(xywhn.numpy())
response.append({
'xywhn': {
'x': float(xywhn[0]),
'y': float(xywhn[1]),
'w': float(xywhn[2]),
'h': float(xywhn[3]),
},
'class': cls.item(),
'confidence': conf.item(),
})
return {'results': response,
'message': message}
def __call__(self, data, config_payload=None):
data = self.preprocess(data)
inference_result = self.inference(data)
response = self.get_response(inference_result)
return response
if __name__ == '__main__':
import cv2
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--input_type',
default='image',
const='image',
nargs='?',
choices=['image', 'video'],
help='type of input (default: %(default)s)')
parser.add_argument("-p", "--path", help="filepath")
args = parser.parse_args()
labels = {
0: 'ambulance',
1: 'truck'
}
if args.input_type=='image':
results = inference_on_image(args.path)
elif args.input_type == 'video':
results = inference_on_video(args.path)
print(results)
# Examples
# python pipelines.py --input_type image --path sample_files/ambulance-2.jpeg
# python pipelines.py --input_type video --path sample_files/ambulance.mp4