from PIL import Image from ultralytics import YOLO from utils import readb64, img2base64 model_int8 = YOLO('weights/best.torchscript', task='detect') def inference_on_image(path): results = model_int8(path) img = cv2.imread(path, cv2.COLOR_BGR2RGB) for box in results[0].boxes: cls = box.cls.item() confidence = box.conf.item() label = labels[cls] x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0])) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2) img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1) img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1) cv2.imshow('Detected Image', img) cv2.waitKey(0) return results def inference_on_video(path, vid_stride=10): results = model_int8(path, vid_stride=10, stream=True) cap = cv2.VideoCapture(path) ret, img = cap.read() frame_counter = 0 while True: ret, img = cap.read() if ret: if frame_counter % 10 == 0: result = next(results) for box in result.boxes: cls = box.cls.item() confidence = box.conf.item() label = labels[cls] x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0])) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2) img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1) img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1) else: cap.release() break cv2.imshow('Detected Image', img) frame_counter += 1 k = cv2.waitKey(5) & 0xFF if k == 27: cap.release() cv2.destroyAllWindows() break return results class ImagePipeline: def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'): self.model = YOLO(weights, task='detect') def preprocess(self, data): image_base64 = data.pop("images", data) if not type(image_base64) == list: image_base64 = [image_base64] elif len(image_base64) > 1: raise Exception("ImagePipeline only accepts 1 image/frame") images = [readb64(image) for image in image_base64] return images def inference(self, images): results = self.model(images[0]) return results def get_response(self, inference_result): response = [] if 0 in inference_result[0].boxes.cls.numpy(): message = "An ambulance is found " else: message = "There is no ambulance" for i, result in enumerate(inference_result): for xywhn, cls, conf in zip( result.boxes.xywhn, result.boxes.cls, result.boxes.conf ): xywhn = list(xywhn.numpy()) response.append({ 'xywhn': { 'x': float(xywhn[0]), 'y': float(xywhn[1]), 'w': float(xywhn[2]), 'h': float(xywhn[3]), }, 'class': cls.item(), 'confidence': conf.item(), }) return {'results': response, 'message': message} def __call__(self, data, config_payload=None): images = self.preprocess(data) inference_result = self.inference(images) response = self.get_response(inference_result) return response class VideoPipeline: def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'): self.model = YOLO(weights, task='detect') def preprocess(self, data): return data def inference(self, video_path, vid_stride=30): results = self.model(video_path, vid_stride=vid_stride) return results def get_response(self, inference_result): response = [] # default message message = "There is no ambulance" for i, result in enumerate(inference_result): if 0 in result.boxes.cls.numpy(): message = "An ambulance is found" for xywhn, cls, conf in zip( result.boxes.xywhn, result.boxes.cls, result.boxes.conf ): xywhn = list(xywhn.numpy()) response.append({ 'xywhn': { 'x': float(xywhn[0]), 'y': float(xywhn[1]), 'w': float(xywhn[2]), 'h': float(xywhn[3]), }, 'class': cls.item(), 'confidence': conf.item(), }) return {'results': response, 'message': message} def __call__(self, data, config_payload=None): data = self.preprocess(data) inference_result = self.inference(data) response = self.get_response(inference_result) return response if __name__ == '__main__': import cv2 import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input_type', default='image', const='image', nargs='?', choices=['image', 'video'], help='type of input (default: %(default)s)') parser.add_argument("-p", "--path", help="filepath") args = parser.parse_args() labels = { 0: 'ambulance', 1: 'truck' } if args.input_type=='image': results = inference_on_image(args.path) elif args.input_type == 'video': results = inference_on_video(args.path) print(results) # Examples # python pipelines.py --input_type image --path sample_files/ambulance-2.jpeg # python pipelines.py --input_type video --path sample_files/ambulance.mp4