Currently I'm tryin to convert given onnx file to tensorrt file, and do inference on the generated tensorrt file. To do so, I used tensorrt python binding API, but "Error Code 1: Cuda Driver (invalid resource handle)" happens and there is no kind description about this. Can anyone help me to overcome this situation? Thx in advance, and below is my code snippet.
def trt_export(self):
fp_16_mode = True
## Obviously, I provided appropriate file names
trt_file_name = "PATH_TO_TRT_FILE"
onnx_name = "PATH_TO_ONNX_FILE"
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(EXPLICIT_BATCH)
parser = trt.OnnxParser(network, TRT_LOGGER)
config = builder.create_builder_config()
config.max_workspace_size = (1<<30)
config.set_flag(trt.BuilderFlag.FP16)
config.default_device_type = trt.DeviceType.GPU
profile = builder.create_optimization_profile()
profile.set_shape('input', (1, 3, IMG_SIZE, IMG_SIZE), (12, 3, IMG_SIZE, IMG_SIZE), (32, 3, IMG_SIZE, IMG_SIZE)) # random nubmers for min. opt. max batch
config.add_optimization_profile(profile)
with open(onnx_name, 'rb') as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
engine = builder.build_engine(network, config)
buf = engine.serialize()
with open(trt_file_name, 'wb') as f:
f.write(buf)
def validate_trt_result(self, input_path):
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
trt_file_name = "PATH_TO_TRT_FILE"
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(trt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
cuda.init()
device = cuda.Device(0)
ctx = device.make_context()
inputs, outputs, bindings = [], [], []
context = engine.create_execution_context()
stream = cuda.Stream()
index = 0
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * -1 # assuming one batch
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
context.set_binding_shape(index, [1, 3, IMG_SIZE, IMG_SIZE])
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
index += 1
print(context.all_binding_shapes_specified)
input_img = cv2.imread(input_path)
input_r = cv2.resize(input_img, dsize = (256, 256))
input_p = np.transpose(input_r, (2, 0, 1))
input_e = np.expand_dims(input_p, axis = 0)
input_f = input_e.astype(np.float32)
input_f /= 255
numpy_array_input = [input_f]
hosts = [input.host for input in inputs]
trt_types = [trt.int32]
for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types):
numpy_array = np.asarray(numpy_array).astype(trt.nptype(trt_types)).ravel()
print(numpy_array.shape)
np.copyto(host, numpy_array)
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
#### ERROR HAPPENS HERE ####
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
#### ERROR HAPPENS HERE ####
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
stream.synchronize()
print("TRT model inference result : ")
output = outputs[0].host
for one in output :
print(one)
ctx.pop()
Looks like ctx.push()
line is missing before a line with memcpy_htod_async
.
Such a error can happen if TensorFlow / PyTorch is also using CUDA in parallel with TensorRT.
See the related question/answer: https://stackoverflow.com/a/73996477/5655977