name: "deberta" platform: "onnxruntime_onnx" max_batch_size: 8 input [ { name: "input_ids" data_type: TYPE_INT64 dims: [ -1, 512 ] }, { name: "attention_mask" data_type: TYPE_INT64 dims: [ -1, 512 ] } ] output [ { name: "logits" data_type: TYPE_FP32 dims: [ -1, 2 ] } ] instance_group [ { count: 1 kind: KIND_GPU } ] dynamic_batching { } optimization { execution_accelerators { gpu_execution_accelerator : [ { name : "tensorrt" parameters { key: "precision_mode" value: "FP32" } } ] }}