Spaces:
Running
on
T4
Running
on
T4
# Copyright 2021 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import warnings | |
from inspect import signature | |
from itertools import chain | |
from pathlib import Path | |
from typing import TYPE_CHECKING, Iterable, List, Tuple, Union | |
import numpy as np | |
from packaging.version import Version, parse | |
from ..tokenization_utils_base import PreTrainedTokenizerBase | |
from ..utils import ( | |
TensorType, | |
is_tf_available, | |
is_torch_available, | |
logging, | |
) | |
from .config import OnnxConfig | |
if is_torch_available(): | |
from ..modeling_utils import PreTrainedModel | |
from ..pytorch_utils import is_torch_less_than_1_11 | |
if is_tf_available(): | |
from ..modeling_tf_utils import TFPreTrainedModel | |
if TYPE_CHECKING: | |
from ..feature_extraction_utils import FeatureExtractionMixin | |
from ..processing_utils import ProcessorMixin | |
from ..tokenization_utils import PreTrainedTokenizer | |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name | |
# This is the minimal required version to support some ONNX Runtime features | |
ORT_QUANTIZE_MINIMUM_VERSION = parse("1.4.0") | |
def check_onnxruntime_requirements(minimum_version: Version): | |
""" | |
Check onnxruntime is installed and if the installed version match is recent enough | |
Raises: | |
ImportError: If onnxruntime is not installed or too old version is found | |
""" | |
try: | |
import onnxruntime | |
# Parse the version of the installed onnxruntime | |
ort_version = parse(onnxruntime.__version__) | |
# We require 1.4.0 minimum | |
if ort_version < ORT_QUANTIZE_MINIMUM_VERSION: | |
raise ImportError( | |
f"We found an older version of onnxruntime ({onnxruntime.__version__}) " | |
f"but we require onnxruntime to be >= {minimum_version} to enable all the conversions options.\n" | |
"Please update onnxruntime by running `pip install --upgrade onnxruntime`" | |
) | |
except ImportError: | |
raise ImportError( | |
"onnxruntime doesn't seem to be currently installed. " | |
"Please install the onnxruntime by running `pip install onnxruntime`" | |
" and relaunch the conversion." | |
) | |
def export_pytorch( | |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], | |
model: "PreTrainedModel", | |
config: OnnxConfig, | |
opset: int, | |
output: Path, | |
tokenizer: "PreTrainedTokenizer" = None, | |
device: str = "cpu", | |
) -> Tuple[List[str], List[str]]: | |
""" | |
Export a PyTorch model to an ONNX Intermediate Representation (IR) | |
Args: | |
preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): | |
The preprocessor used for encoding the data. | |
model ([`PreTrainedModel`]): | |
The model to export. | |
config ([`~onnx.config.OnnxConfig`]): | |
The ONNX configuration associated with the exported model. | |
opset (`int`): | |
The version of the ONNX operator set to use. | |
output (`Path`): | |
Directory to store the exported ONNX model. | |
device (`str`, *optional*, defaults to `cpu`): | |
The device on which the ONNX model will be exported. Either `cpu` or `cuda`. | |
Returns: | |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from | |
the ONNX configuration. | |
""" | |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: | |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") | |
if tokenizer is not None: | |
warnings.warn( | |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" | |
" `preprocessor` instead.", | |
FutureWarning, | |
) | |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") | |
preprocessor = tokenizer | |
if issubclass(type(model), PreTrainedModel): | |
import torch | |
from torch.onnx import export as onnx_export | |
logger.info(f"Using framework PyTorch: {torch.__version__}") | |
with torch.no_grad(): | |
model.config.return_dict = True | |
model.eval() | |
# Check if we need to override certain configuration item | |
if config.values_override is not None: | |
logger.info(f"Overriding {len(config.values_override)} configuration item(s)") | |
for override_config_key, override_config_value in config.values_override.items(): | |
logger.info(f"\t- {override_config_key} -> {override_config_value}") | |
setattr(model.config, override_config_key, override_config_value) | |
# Ensure inputs match | |
# TODO: Check when exporting QA we provide "is_pair=True" | |
model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.PYTORCH) | |
device = torch.device(device) | |
if device.type == "cuda" and torch.cuda.is_available(): | |
model.to(device) | |
model_inputs_device = {} | |
for k, v in model_inputs.items(): | |
if isinstance(v, Tuple): | |
model_inputs_device[k] = tuple( | |
x.to(device) if isinstance(x, torch.Tensor) else None for x in v | |
) | |
elif isinstance(v, List): | |
model_inputs_device[k] = [ | |
tuple(x.to(device) if isinstance(x, torch.Tensor) else None for x in t) for t in v | |
] | |
else: | |
model_inputs_device[k] = v.to(device) | |
model_inputs = model_inputs_device | |
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys()) | |
onnx_outputs = list(config.outputs.keys()) | |
if not inputs_match: | |
raise ValueError("Model and config inputs doesn't match") | |
config.patch_ops() | |
# PyTorch deprecated the `enable_onnx_checker` and `use_external_data_format` arguments in v1.11, | |
# so we check the torch version for backwards compatibility | |
if is_torch_less_than_1_11: | |
# export can work with named args but the dict containing named args | |
# has to be the last element of the args tuple. | |
try: | |
onnx_export( | |
model, | |
(model_inputs,), | |
f=output.as_posix(), | |
input_names=list(config.inputs.keys()), | |
output_names=onnx_outputs, | |
dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())), | |
do_constant_folding=True, | |
use_external_data_format=config.use_external_data_format(model.num_parameters()), | |
enable_onnx_checker=True, | |
opset_version=opset, | |
) | |
except RuntimeError as err: | |
message = str(err) | |
if ( | |
message | |
== "Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export without" | |
" setting use_external_data_format parameter." | |
): | |
message = ( | |
"Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export" | |
" without setting use_external_data_format parameter or try with torch 1.10+." | |
) | |
raise RuntimeError(message) | |
else: | |
raise err | |
else: | |
onnx_export( | |
model, | |
(model_inputs,), | |
f=output.as_posix(), | |
input_names=list(config.inputs.keys()), | |
output_names=onnx_outputs, | |
dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())), | |
do_constant_folding=True, | |
opset_version=opset, | |
) | |
config.restore_ops() | |
return matched_inputs, onnx_outputs | |
def export_tensorflow( | |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"], | |
model: "TFPreTrainedModel", | |
config: OnnxConfig, | |
opset: int, | |
output: Path, | |
tokenizer: "PreTrainedTokenizer" = None, | |
) -> Tuple[List[str], List[str]]: | |
""" | |
Export a TensorFlow model to an ONNX Intermediate Representation (IR) | |
Args: | |
preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]): | |
The preprocessor used for encoding the data. | |
model ([`TFPreTrainedModel`]): | |
The model to export. | |
config ([`~onnx.config.OnnxConfig`]): | |
The ONNX configuration associated with the exported model. | |
opset (`int`): | |
The version of the ONNX operator set to use. | |
output (`Path`): | |
Directory to store the exported ONNX model. | |
Returns: | |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from | |
the ONNX configuration. | |
""" | |
import onnx | |
import tensorflow as tf | |
import tf2onnx | |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: | |
raise ValueError("You cannot provide both a tokenizer and preprocessor to export the model.") | |
if tokenizer is not None: | |
warnings.warn( | |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" | |
" `preprocessor` instead.", | |
FutureWarning, | |
) | |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") | |
preprocessor = tokenizer | |
model.config.return_dict = True | |
# Check if we need to override certain configuration item | |
if config.values_override is not None: | |
logger.info(f"Overriding {len(config.values_override)} configuration item(s)") | |
for override_config_key, override_config_value in config.values_override.items(): | |
logger.info(f"\t- {override_config_key} -> {override_config_value}") | |
setattr(model.config, override_config_key, override_config_value) | |
# Ensure inputs match | |
model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.TENSORFLOW) | |
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys()) | |
onnx_outputs = list(config.outputs.keys()) | |
input_signature = [ | |
tf.TensorSpec([None] * tensor.ndim, dtype=tensor.dtype, name=key) for key, tensor in model_inputs.items() | |
] | |
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=opset) | |
onnx.save(onnx_model, output.as_posix()) | |
config.restore_ops() | |
return matched_inputs, onnx_outputs | |
def export( | |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], | |
model: Union["PreTrainedModel", "TFPreTrainedModel"], | |
config: OnnxConfig, | |
opset: int, | |
output: Path, | |
tokenizer: "PreTrainedTokenizer" = None, | |
device: str = "cpu", | |
) -> Tuple[List[str], List[str]]: | |
""" | |
Export a Pytorch or TensorFlow model to an ONNX Intermediate Representation (IR) | |
Args: | |
preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): | |
The preprocessor used for encoding the data. | |
model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): | |
The model to export. | |
config ([`~onnx.config.OnnxConfig`]): | |
The ONNX configuration associated with the exported model. | |
opset (`int`): | |
The version of the ONNX operator set to use. | |
output (`Path`): | |
Directory to store the exported ONNX model. | |
device (`str`, *optional*, defaults to `cpu`): | |
The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for | |
export on CUDA devices. | |
Returns: | |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from | |
the ONNX configuration. | |
""" | |
if not (is_torch_available() or is_tf_available()): | |
raise ImportError( | |
"Cannot convert because neither PyTorch nor TensorFlow are not installed. " | |
"Please install torch or tensorflow first." | |
) | |
if is_tf_available() and isinstance(model, TFPreTrainedModel) and device == "cuda": | |
raise RuntimeError("`tf2onnx` does not support export on CUDA device.") | |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: | |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") | |
if tokenizer is not None: | |
warnings.warn( | |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" | |
" `preprocessor` instead.", | |
FutureWarning, | |
) | |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") | |
preprocessor = tokenizer | |
if is_torch_available(): | |
from ..utils import get_torch_version | |
if not config.is_torch_support_available: | |
logger.warning( | |
f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}," | |
f" got: {get_torch_version()}" | |
) | |
if is_torch_available() and issubclass(type(model), PreTrainedModel): | |
return export_pytorch(preprocessor, model, config, opset, output, tokenizer=tokenizer, device=device) | |
elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): | |
return export_tensorflow(preprocessor, model, config, opset, output, tokenizer=tokenizer) | |
def validate_model_outputs( | |
config: OnnxConfig, | |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], | |
reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], | |
onnx_model: Path, | |
onnx_named_outputs: List[str], | |
atol: float, | |
tokenizer: "PreTrainedTokenizer" = None, | |
): | |
from onnxruntime import InferenceSession, SessionOptions | |
logger.info("Validating ONNX model...") | |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: | |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to validate the model outputs.") | |
if tokenizer is not None: | |
warnings.warn( | |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" | |
" `preprocessor` instead.", | |
FutureWarning, | |
) | |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") | |
preprocessor = tokenizer | |
# generate inputs with a different batch_size and seq_len that was used for conversion to properly test | |
# dynamic input shapes. | |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): | |
reference_model_inputs = config.generate_dummy_inputs( | |
preprocessor, | |
batch_size=config.default_fixed_batch + 1, | |
seq_length=config.default_fixed_sequence + 1, | |
framework=TensorType.PYTORCH, | |
) | |
else: | |
reference_model_inputs = config.generate_dummy_inputs( | |
preprocessor, | |
batch_size=config.default_fixed_batch + 1, | |
seq_length=config.default_fixed_sequence + 1, | |
framework=TensorType.TENSORFLOW, | |
) | |
# Create ONNX Runtime session | |
options = SessionOptions() | |
session = InferenceSession(onnx_model.as_posix(), options, providers=["CPUExecutionProvider"]) | |
# Compute outputs from the reference model | |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): | |
reference_model.to("cpu") | |
ref_outputs = reference_model(**reference_model_inputs) | |
ref_outputs_dict = {} | |
# We flatten potential collection of outputs (i.e. past_keys) to a flat structure | |
for name, value in ref_outputs.items(): | |
# Overwriting the output name as "present" since it is the name used for the ONNX outputs | |
# ("past_key_values" being taken for the ONNX inputs) | |
if name == "past_key_values": | |
name = "present" | |
if isinstance(value, (list, tuple)): | |
value = config.flatten_output_collection_property(name, value) | |
ref_outputs_dict.update(value) | |
else: | |
ref_outputs_dict[name] = value | |
# Create onnxruntime inputs from the reference model inputs | |
reference_model_inputs_onnxruntime = config.generate_dummy_inputs_onnxruntime(reference_model_inputs) | |
# We flatten potential collection of inputs (i.e. past_keys) | |
onnx_inputs = {} | |
for name, value in reference_model_inputs_onnxruntime.items(): | |
if isinstance(value, (list, tuple)): | |
value = config.flatten_output_collection_property(name, value) | |
onnx_inputs.update({tensor_name: pt_tensor.numpy() for tensor_name, pt_tensor in value.items()}) | |
else: | |
onnx_inputs[name] = value.numpy() | |
# Compute outputs from the ONNX model | |
onnx_outputs = session.run(onnx_named_outputs, onnx_inputs) | |
# Check we have a subset of the keys into onnx_outputs against ref_outputs | |
ref_outputs_set, onnx_outputs_set = set(ref_outputs_dict.keys()), set(onnx_named_outputs) | |
if not onnx_outputs_set.issubset(ref_outputs_set): | |
logger.info( | |
f"\t-[x] ONNX model output names {onnx_outputs_set} do not match reference model {ref_outputs_set}" | |
) | |
raise ValueError( | |
"Outputs doesn't match between reference model and ONNX exported model: " | |
f"{onnx_outputs_set.difference(ref_outputs_set)}" | |
) | |
else: | |
logger.info(f"\t-[β] ONNX model output names match reference model ({onnx_outputs_set})") | |
# Check the shape and values match | |
for name, ort_value in zip(onnx_named_outputs, onnx_outputs): | |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): | |
ref_value = ref_outputs_dict[name].detach().numpy() | |
else: | |
ref_value = ref_outputs_dict[name].numpy() | |
logger.info(f'\t- Validating ONNX Model output "{name}":') | |
# Shape | |
if not ort_value.shape == ref_value.shape: | |
logger.info(f"\t\t-[x] shape {ort_value.shape} doesn't match {ref_value.shape}") | |
raise ValueError( | |
"Outputs shape doesn't match between reference model and ONNX exported model: " | |
f"Got {ref_value.shape} (reference) and {ort_value.shape} (ONNX)" | |
) | |
else: | |
logger.info(f"\t\t-[β] {ort_value.shape} matches {ref_value.shape}") | |
# Values | |
if not np.allclose(ref_value, ort_value, atol=atol): | |
bad_indices = np.logical_not(np.isclose(ref_value, ort_value, atol=atol)) | |
logger.info(f"\t\t-[x] values not close enough (atol: {atol})") | |
raise ValueError( | |
"Outputs values doesn't match between reference model and ONNX exported model: " | |
f"Got max absolute difference of: {np.amax(np.abs(ref_value - ort_value))} for " | |
f"{ref_value[bad_indices]} vs {ort_value[bad_indices]}" | |
) | |
else: | |
logger.info(f"\t\t-[β] all values close (atol: {atol})") | |
def ensure_model_and_config_inputs_match( | |
model: Union["PreTrainedModel", "TFPreTrainedModel"], model_inputs: Iterable[str] | |
) -> Tuple[bool, List[str]]: | |
""" | |
:param model_inputs: :param config_inputs: :return: | |
""" | |
if is_torch_available() and issubclass(type(model), PreTrainedModel): | |
forward_parameters = signature(model.forward).parameters | |
else: | |
forward_parameters = signature(model.call).parameters | |
model_inputs_set = set(model_inputs) | |
# We are fine if config_inputs has more keys than model_inputs | |
forward_inputs_set = set(forward_parameters.keys()) | |
is_ok = model_inputs_set.issubset(forward_inputs_set) | |
# Make sure the input order match (VERY IMPORTANT !!!!) | |
matching_inputs = forward_inputs_set.intersection(model_inputs_set) | |
ordered_inputs = [parameter for parameter in forward_parameters.keys() if parameter in matching_inputs] | |
return is_ok, ordered_inputs | |