name: "deberta" | |
platform: "onnxruntime_onnx" | |
max_batch_size: 8 | |
input [ | |
{ | |
name: "input_ids" | |
data_type: TYPE_INT64 | |
dims: [ -1, 512 ] | |
}, | |
{ | |
name: "attention_mask" | |
data_type: TYPE_INT64 | |
dims: [ -1, 512 ] | |
} | |
] | |
output [ | |
{ | |
name: "logits" | |
data_type: TYPE_FP32 | |
dims: [ -1, 2 ] | |
} | |
] | |
instance_group [ | |
{ | |
count: 1 | |
kind: KIND_GPU | |
} | |
] | |
dynamic_batching { } | |
optimization { execution_accelerators { | |
gpu_execution_accelerator : [ { | |
name : "tensorrt" | |
parameters { key: "precision_mode" value: "FP32" } | |
} ] | |
}} |