|
import asyncio |
|
import os |
|
import shutil |
|
import urllib |
|
|
|
import mmcv |
|
import torch |
|
|
|
from mmdet.apis import (async_inference_detector, inference_detector, |
|
init_detector) |
|
from mmdet.utils.contextmanagers import concurrent |
|
from mmdet.utils.profiling import profile_time |
|
|
|
|
|
async def main(): |
|
"""Benchmark between async and synchronous inference interfaces. |
|
|
|
Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x: |
|
|
|
async sync |
|
|
|
7981.79 ms 9660.82 ms |
|
8074.52 ms 9660.94 ms |
|
7976.44 ms 9406.83 ms |
|
|
|
Async variant takes about 0.83-0.85 of the time of the synchronous |
|
interface. |
|
""" |
|
project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) |
|
project_dir = os.path.join(project_dir, '..') |
|
|
|
config_file = os.path.join( |
|
project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') |
|
checkpoint_file = os.path.join( |
|
project_dir, |
|
'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') |
|
|
|
if not os.path.exists(checkpoint_file): |
|
url = ('http://download.openmmlab.com/mmdetection/v2.0' |
|
'/mask_rcnn/mask_rcnn_r50_fpn_1x_coco' |
|
'/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth') |
|
print(f'Downloading {url} ...') |
|
local_filename, _ = urllib.request.urlretrieve(url) |
|
os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True) |
|
shutil.move(local_filename, checkpoint_file) |
|
print(f'Saved as {checkpoint_file}') |
|
else: |
|
print(f'Using existing checkpoint {checkpoint_file}') |
|
|
|
device = 'cuda:0' |
|
model = init_detector( |
|
config_file, checkpoint=checkpoint_file, device=device) |
|
|
|
|
|
streamqueue = asyncio.Queue() |
|
|
|
streamqueue_size = 4 |
|
|
|
for _ in range(streamqueue_size): |
|
streamqueue.put_nowait(torch.cuda.Stream(device=device)) |
|
|
|
|
|
img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg')) |
|
|
|
|
|
await async_inference_detector(model, img) |
|
|
|
async def detect(img): |
|
async with concurrent(streamqueue): |
|
return await async_inference_detector(model, img) |
|
|
|
num_of_images = 20 |
|
with profile_time('benchmark', 'async'): |
|
tasks = [ |
|
asyncio.create_task(detect(img)) for _ in range(num_of_images) |
|
] |
|
async_results = await asyncio.gather(*tasks) |
|
|
|
with torch.cuda.stream(torch.cuda.default_stream()): |
|
with profile_time('benchmark', 'sync'): |
|
sync_results = [ |
|
inference_detector(model, img) for _ in range(num_of_images) |
|
] |
|
|
|
result_dir = os.path.join(project_dir, 'demo') |
|
model.show_result( |
|
img, |
|
async_results[0], |
|
score_thr=0.5, |
|
show=False, |
|
out_file=os.path.join(result_dir, 'result_async.jpg')) |
|
model.show_result( |
|
img, |
|
sync_results[0], |
|
score_thr=0.5, |
|
show=False, |
|
out_file=os.path.join(result_dir, 'result_sync.jpg')) |
|
|
|
|
|
if __name__ == '__main__': |
|
asyncio.run(main()) |
|
|