File size: 2,600 Bytes
ca19df5 6e46617 ca19df5 eb1217f ca19df5 eb1217f ca19df5 6e46617 80cfaf4 ca19df5 80cfaf4 ca19df5 6e46617 e6e36aa ca19df5 cb18cac 6e46617 80cfaf4 ca19df5 6e46617 ca19df5 6e46617 ca19df5 80cfaf4 ca19df5 6e46617 ca19df5 6e46617 ca19df5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# YOLOv5 π by Ultralytics, GPL-3.0 license
"""
Auto-batch utils
"""
from copy import deepcopy
import numpy as np
import torch
from utils.general import LOGGER, colorstr, emojis
from utils.torch_utils import profile
def check_train_batch_size(model, imgsz=640, amp=True):
# Check YOLOv5 training batch size
with torch.cuda.amp.autocast(amp):
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
# Automatically estimate best batch size to use `fraction` of available CUDA memory
# Usage:
# import torch
# from utils.autobatch import autobatch
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
# print(autobatch(model))
# Check device
prefix = colorstr('AutoBatch: ')
LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
device = next(model.parameters()).device # get model device
if device.type == 'cpu':
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size
# Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # GiB total
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
a = torch.cuda.memory_allocated(device) / gb # GiB allocated
f = t - (r + a) # GiB free
LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
# Profile batch sizes
batch_sizes = [1, 2, 4, 8, 16]
try:
img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes]
results = profile(img, model, n=3, device=device)
except Exception as e:
LOGGER.warning(f'{prefix}{e}')
# Fit a solution
y = [x[2] for x in results if x] # memory [2]
p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit
b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
if None in results: # some sizes failed
i = results.index(None) # first fail index
if b >= batch_sizes[i]: # y intercept above failure point
b = batch_sizes[max(i - 1, 0)] # select prior safe point
fraction = np.polyval(p, b) / t # actual fraction predicted
LOGGER.info(emojis(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) β
'))
return b
|