|
|
|
|
|
|
|
import os |
|
import sys |
|
import cv2 |
|
import pathlib |
|
import argparse |
|
import numpy as np |
|
import onnxruntime as ort |
|
|
|
CURRENT_DIR = pathlib.Path(__file__).parent |
|
sys.path.append(str(CURRENT_DIR)) |
|
|
|
from coco import COCO_CLASSES |
|
from demo_utils import mkdir, multiclass_nms, demo_postprocess, vis |
|
|
|
|
|
def make_parser(): |
|
parser = argparse.ArgumentParser("onnxruntime inference sample") |
|
parser.add_argument( |
|
"-m", |
|
"--model", |
|
type=str, |
|
default="yolox-s-int8.onnx", |
|
help="Input your onnx model.", |
|
) |
|
parser.add_argument( |
|
"-i", |
|
"--image_path", |
|
type=str, |
|
default='test_image.png', |
|
help="Path to your input image.", |
|
) |
|
parser.add_argument( |
|
"-o", |
|
"--output_dir", |
|
type=str, |
|
default='demo_output', |
|
help="Path to your output directory.", |
|
) |
|
parser.add_argument( |
|
"-s", |
|
"--score_thr", |
|
type=float, |
|
default=0.3, |
|
help="Score threshold to filter the result.", |
|
) |
|
parser.add_argument( |
|
"--input_shape", |
|
type=str, |
|
default="640,640", |
|
help="Specify an input shape for inference.", |
|
) |
|
parser.add_argument( |
|
"--ipu", |
|
action="store_true", |
|
help="Use IPU for inference.", |
|
) |
|
parser.add_argument( |
|
"--provider_config", |
|
type=str, |
|
default="vaip_config.json", |
|
help="Path of the config file for setting provider_options.", |
|
) |
|
return parser |
|
|
|
|
|
def preprocess(img, input_shape, swap=(2, 0, 1)): |
|
""" |
|
Preprocessing part of YOLOX for scaling and padding image as input to the network. |
|
|
|
Args: |
|
img (numpy.ndarray): H x W x C, image read with OpenCV |
|
input_shape (tuple(int)): input shape of the network for inference |
|
swap (tuple(int)): new order of axes to transpose the input image |
|
|
|
Returns: |
|
padded_img (numpy.ndarray): preprocessed image to be fed to the network |
|
ratio (float): ratio for scaling the image to the input shape |
|
""" |
|
if len(img.shape) == 3: |
|
padded_img = np.ones((input_shape[0], input_shape[1], 3), dtype=np.uint8) * 114 |
|
else: |
|
padded_img = np.ones(input_shape, dtype=np.uint8) * 114 |
|
ratio = min(input_shape[0] / img.shape[0], input_shape[1] / img.shape[1]) |
|
resized_img = cv2.resize( |
|
img, |
|
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)), |
|
interpolation=cv2.INTER_LINEAR, |
|
).astype(np.uint8) |
|
padded_img[: int(img.shape[0] * ratio), : int(img.shape[1] * ratio)] = resized_img |
|
padded_img = padded_img.transpose(swap) |
|
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) |
|
return padded_img, ratio |
|
|
|
|
|
def postprocess(outputs, input_shape, ratio): |
|
""" |
|
Post-processing part of YOLOX for generating final results from outputs of the network. |
|
|
|
Args: |
|
outputs (tuple(numpy.ndarray)): outputs of the detection heads with onnxruntime session |
|
input_shape (tuple(int)): input shape of the network for inference |
|
ratio (float): ratio for scaling the image to the input shape |
|
|
|
Returns: |
|
dets (numpy.ndarray): n x 6, dets[:,:4] -> boxes, dets[:,4] -> scores, dets[:,5] -> class indices |
|
""" |
|
outputs = [out.reshape(*out.shape[:2], -1).transpose(0,2,1) for out in outputs] |
|
outputs = np.concatenate(outputs, axis=1) |
|
outputs[..., 4:] = sigmoid(outputs[..., 4:]) |
|
predictions = demo_postprocess(outputs, input_shape, p6=False)[0] |
|
boxes = predictions[:, :4] |
|
scores = predictions[:, 4:5] * predictions[:, 5:] |
|
boxes_xyxy = np.ones_like(boxes) |
|
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2. |
|
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2. |
|
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2. |
|
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2. |
|
boxes_xyxy /= ratio |
|
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1) |
|
return dets |
|
|
|
|
|
def sigmoid(x): |
|
return 1.0 / (1.0 + np.exp(-x)) |
|
|
|
|
|
if __name__ == '__main__': |
|
args = make_parser().parse_args() |
|
input_shape = tuple(map(int, args.input_shape.split(','))) |
|
origin_img = cv2.imread(args.image_path) |
|
img, ratio = preprocess(origin_img, input_shape) |
|
if args.ipu: |
|
providers = ["VitisAIExecutionProvider"] |
|
provider_options = [{"config_file": args.provider_config}] |
|
else: |
|
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] |
|
provider_options = None |
|
session = ort.InferenceSession(args.model, providers=providers, provider_options=provider_options) |
|
|
|
ort_inputs = {session.get_inputs()[0].name: np.transpose(img[None, :, :, :], (0, 2 ,3, 1))} |
|
outputs = session.run(None, ort_inputs) |
|
outputs = [np.transpose(out, (0, 3, 1, 2)) for out in outputs] |
|
dets = postprocess(outputs, input_shape, ratio) |
|
if dets is not None: |
|
final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] |
|
origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds, |
|
conf=args.score_thr, class_names=COCO_CLASSES) |
|
mkdir(args.output_dir) |
|
output_path = os.path.join(args.output_dir, os.path.basename(args.image_path)) |
|
cv2.imwrite(output_path, origin_img) |
|
|