import cv2 from ultralytics import YOLO classes = ['ain', 'al', 'aleff','bb','dal','dha','dhad','fa','gaaf','ghain','ha','haa','jeem','kaaf','khaa','la','laam', 'meem','nun','ra','saad','seen','sheen','ta','taa','thaa','thal','toot','waw','ya','yaa','zay'] TargetMapper = dict(zip(range(32),classes)) model = YOLO('best.pt') def image_inference(image_path): print(image_path) image = cv2.imread(image_path) outputs = model.predict(source=image_path) results = outputs[0] for i,det in enumerate(results.boxes.xyxy): cls = TargetMapper[results.boxes.cls.numpy()[i]] #det = results.boxes.xyxy[0] cv2.rectangle( image, (int(det[0]), int(det[1])), (int(det[2]), int(det[3])), color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA ) cv2.putText(image, cls, (int(det[0]), int(det[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2) return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) def video_inference(video_path) : cap = cv2.VideoCapture(video_path) while(cap.isOpened()): ret, frame = cap.read() if ret: frame_copy = frame.copy() outputs = model.predict(source=frame) results = outputs[0]#.cpu().numpy() for i, det in enumerate(results.boxes.xyxy): cls = TargetMapper[results.boxes.cls.numpy()[i]] cv2.rectangle( frame_copy, (int(det[0]), int(det[1])), (int(det[2]), int(det[3])), color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA ) cv2.putText(frame_copy, cls, (int(det[0]), int(det[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2) yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)