|
import cv2 |
|
import numpy as np |
|
import onnxruntime as ort |
|
import torch |
|
import copy |
|
|
|
from ultralytics.utils import ROOT, yaml_load |
|
from ultralytics.utils.checks import check_requirements, check_yaml |
|
|
|
|
|
class Yolov8onnx: |
|
|
|
def __init__(self, |
|
onnx_model, |
|
input_width, |
|
input_height, |
|
confidence_thres, |
|
iou_thres, |
|
device='cpu'): |
|
""" |
|
Initializes an instance of the Yolov8 class. |
|
|
|
Args: |
|
onnx_model: Path to the ONNX model. |
|
confidence_thres: Confidence threshold for filtering detections. |
|
iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. |
|
""" |
|
self.onnx_model = onnx_model |
|
self.confidence_thres = confidence_thres |
|
self.iou_thres = iou_thres |
|
self.input_width = input_width |
|
self.input_height = input_height |
|
|
|
|
|
|
|
providers = ['CPUExecutionProvider'] |
|
|
|
self.onnx_session = ort.InferenceSession( |
|
onnx_model, |
|
providers=providers |
|
) |
|
|
|
self.input_name = self.onnx_session.get_inputs()[0].name |
|
self.output_name = self.onnx_session.get_outputs()[0].name |
|
|
|
|
|
def preprocess(self, input_image): |
|
""" |
|
Preprocesses the input image before performing inference. |
|
|
|
Returns: |
|
image_data: Preprocessed image data ready for inference. |
|
""" |
|
|
|
self.img = input_image |
|
|
|
|
|
self.img_height, self.img_width = self.img.shape[:2] |
|
|
|
|
|
img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
img = cv2.resize(img, (self.input_width, self.input_height)) |
|
|
|
|
|
image_data = np.array(img) / 255.0 |
|
|
|
|
|
image_data = np.transpose(image_data, (2, 0, 1)) |
|
|
|
|
|
image_data = np.expand_dims(image_data, axis=0).astype(np.float32) |
|
|
|
|
|
return image_data |
|
|
|
def postprocess(self, output): |
|
""" |
|
Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. |
|
""" |
|
|
|
|
|
outputs = np.transpose(np.squeeze(output[0])) |
|
|
|
|
|
rows = outputs.shape[0] |
|
|
|
boxes = [] |
|
scores = [] |
|
class_ids = [] |
|
|
|
|
|
x_factor = self.img_width / self.input_width |
|
y_factor = self.img_height / self.input_height |
|
|
|
for i in range(rows): |
|
|
|
classes_scores = outputs[i][4:] |
|
|
|
|
|
max_score = np.amax(classes_scores) |
|
|
|
if max_score >= self.confidence_thres: |
|
|
|
class_id = np.argmax(classes_scores) |
|
|
|
|
|
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3] |
|
|
|
|
|
left = int((x - w / 2) * x_factor) |
|
top = int((y - h / 2) * y_factor) |
|
width = int(w * x_factor) |
|
height = int(h * y_factor) |
|
|
|
|
|
class_ids.append(int(class_id)) |
|
scores.append(max_score) |
|
boxes.append([left, top, left+width, top+height]) |
|
|
|
indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) |
|
output_boxes = [boxes[i] for i in indices] |
|
output_scores = [scores[i] for i in indices] |
|
output_classes = [class_ids[i] for i in indices] |
|
|
|
|
|
return output_boxes, output_scores, output_classes |
|
|
|
def inference(self, image): |
|
""" |
|
Performs inference using an ONNX model and returns the output image with drawn detections. |
|
|
|
Returns: |
|
output_img: The output image with drawn detections. |
|
""" |
|
|
|
temp_image = copy.deepcopy(image) |
|
image_height, image_width = image.shape[0], image.shape[1] |
|
|
|
|
|
img_data = self.preprocess(temp_image) |
|
|
|
outputs = self.onnx_session.run(None, {self.input_name: img_data}) |
|
|
|
bboxes, scores, class_ids = self.postprocess(outputs) |
|
|
|
return bboxes, scores, class_ids |