Commit
•
596de6d
1
Parent(s):
7e98b48
Default FP16 TensorRT export (#6798)
Browse files* Assert engine precision #6777
* Default to FP32 inputs for TensorRT engines
* Default to FP16 TensorRT exports #6777
* Remove wrong line #6777
* Automatically adjust detect.py input precision #6777
* Automatically adjust val.py input precision #6777
* Add missing colon
* Cleanup
* Cleanup
* Remove default trt_fp16_input definition
* Experiment
* Reorder detect.py if statement to after half checks
* Update common.py
* Update export.py
* Cleanup
Co-authored-by: Glenn Jocher <[email protected]>
detect.py
CHANGED
@@ -97,6 +97,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
|
|
97 |
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
|
98 |
if pt or jit:
|
99 |
model.model.half() if half else model.model.float()
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# Dataloader
|
102 |
if webcam:
|
|
|
97 |
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
|
98 |
if pt or jit:
|
99 |
model.model.half() if half else model.model.float()
|
100 |
+
elif engine and model.trt_fp16_input != half:
|
101 |
+
LOGGER.info('model ' + (
|
102 |
+
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
|
103 |
+
half = model.trt_fp16_input
|
104 |
|
105 |
# Dataloader
|
106 |
if webcam:
|
export.py
CHANGED
@@ -233,9 +233,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
|
|
233 |
for out in outputs:
|
234 |
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
|
235 |
|
236 |
-
|
237 |
-
|
238 |
-
if half:
|
239 |
config.set_flag(trt.BuilderFlag.FP16)
|
240 |
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
|
241 |
t.write(engine.serialize())
|
|
|
233 |
for out in outputs:
|
234 |
LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
|
235 |
|
236 |
+
LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
|
237 |
+
if builder.platform_has_fast_fp16:
|
|
|
238 |
config.set_flag(trt.BuilderFlag.FP16)
|
239 |
with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
|
240 |
t.write(engine.serialize())
|
models/common.py
CHANGED
@@ -338,6 +338,7 @@ class DetectMultiBackend(nn.Module):
|
|
338 |
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
339 |
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
340 |
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
|
|
341 |
logger = trt.Logger(trt.Logger.INFO)
|
342 |
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
343 |
model = runtime.deserialize_cuda_engine(f.read())
|
@@ -348,6 +349,8 @@ class DetectMultiBackend(nn.Module):
|
|
348 |
shape = tuple(model.get_binding_shape(index))
|
349 |
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
350 |
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
|
|
|
|
351 |
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
352 |
context = model.create_execution_context()
|
353 |
batch_size = bindings['images'].shape[0]
|
|
|
338 |
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
339 |
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
340 |
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
341 |
+
trt_fp16_input = False
|
342 |
logger = trt.Logger(trt.Logger.INFO)
|
343 |
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
344 |
model = runtime.deserialize_cuda_engine(f.read())
|
|
|
349 |
shape = tuple(model.get_binding_shape(index))
|
350 |
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
351 |
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
352 |
+
if model.binding_is_input(index) and dtype == np.float16:
|
353 |
+
trt_fp16_input = True
|
354 |
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
355 |
context = model.create_execution_context()
|
356 |
batch_size = bindings['images'].shape[0]
|
val.py
CHANGED
@@ -144,6 +144,10 @@ def run(data,
|
|
144 |
model.model.half() if half else model.model.float()
|
145 |
elif engine:
|
146 |
batch_size = model.batch_size
|
|
|
|
|
|
|
|
|
147 |
else:
|
148 |
half = False
|
149 |
batch_size = 1 # export.py models default to batch-size 1
|
|
|
144 |
model.model.half() if half else model.model.float()
|
145 |
elif engine:
|
146 |
batch_size = model.batch_size
|
147 |
+
if model.trt_fp16_input != half:
|
148 |
+
LOGGER.info('model ' + (
|
149 |
+
'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
|
150 |
+
half = model.trt_fp16_input
|
151 |
else:
|
152 |
half = False
|
153 |
batch_size = 1 # export.py models default to batch-size 1
|