# Copyright (c) OpenMMLab. All rights reserved. import asyncio import os import shutil import urllib import mmcv import torch from mmdet.apis import (async_inference_detector, inference_detector, init_detector) from mmdet.utils.contextmanagers import concurrent from mmdet.utils.profiling import profile_time async def main(): """Benchmark between async and synchronous inference interfaces. Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x: async sync 7981.79 ms 9660.82 ms 8074.52 ms 9660.94 ms 7976.44 ms 9406.83 ms Async variant takes about 0.83-0.85 of the time of the synchronous interface. """ project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) project_dir = os.path.join(project_dir, '..') config_file = os.path.join( project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') checkpoint_file = os.path.join( project_dir, 'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') if not os.path.exists(checkpoint_file): url = ('https://download.openmmlab.com/mmdetection/v2.0' '/mask_rcnn/mask_rcnn_r50_fpn_1x_coco' '/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') print(f'Downloading {url} ...') local_filename, _ = urllib.request.urlretrieve(url) os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True) shutil.move(local_filename, checkpoint_file) print(f'Saved as {checkpoint_file}') else: print(f'Using existing checkpoint {checkpoint_file}') device = 'cuda:0' model = init_detector( config_file, checkpoint=checkpoint_file, device=device) # queue is used for concurrent inference of multiple images streamqueue = asyncio.Queue() # queue size defines concurrency level streamqueue_size = 4 for _ in range(streamqueue_size): streamqueue.put_nowait(torch.cuda.Stream(device=device)) # test a single image and show the results img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg')) # warmup await async_inference_detector(model, img) async def detect(img): async with concurrent(streamqueue): return await async_inference_detector(model, img) num_of_images = 20 with profile_time('benchmark', 'async'): tasks = [ asyncio.create_task(detect(img)) for _ in range(num_of_images) ] async_results = await asyncio.gather(*tasks) with torch.cuda.stream(torch.cuda.default_stream()): with profile_time('benchmark', 'sync'): sync_results = [ inference_detector(model, img) for _ in range(num_of_images) ] result_dir = os.path.join(project_dir, 'demo') model.show_result( img, async_results[0], score_thr=0.5, show=False, out_file=os.path.join(result_dir, 'result_async.jpg')) model.show_result( img, sync_results[0], score_thr=0.5, show=False, out_file=os.path.join(result_dir, 'result_sync.jpg')) if __name__ == '__main__': asyncio.run(main())