{ | |
"compression": [ | |
{ | |
"algorithm": "movement_sparsity", | |
"ignored_scopes": [ | |
"{re}.*NNCFEmbedding.*", | |
"{re}.*qa_outputs.*", | |
"{re}.*LayerNorm.*" | |
], | |
"params": { | |
"enable_structured_masking": true, | |
"importance_regularization_factor": 0.02, | |
"warmup_end_epoch": 4, | |
"warmup_start_epoch": 1 | |
}, | |
"sparse_structure_by_scopes": [ | |
{ | |
"mode": "block", | |
"sparse_factors": [ | |
32, | |
32 | |
], | |
"target_scopes": "{re}.*BertAttention.*" | |
}, | |
{ | |
"axis": 0, | |
"mode": "per_dim", | |
"target_scopes": "{re}.*BertIntermediate.*" | |
}, | |
{ | |
"axis": 1, | |
"mode": "per_dim", | |
"target_scopes": "{re}.*BertOutput.*" | |
} | |
] | |
}, | |
{ | |
"algorithm": "quantization", | |
"export_to_onnx_standard_ops": false, | |
"ignored_scopes": [ | |
"{re}.*__add___[0-1]", | |
"{re}.*layer_norm_0", | |
"{re}.*matmul_1", | |
"{re}.*__truediv__*" | |
], | |
"initializer": { | |
"batchnorm_adaptation": { | |
"num_bn_adaptation_samples": 200 | |
}, | |
"range": { | |
"num_init_samples": 32, | |
"params": { | |
"max_percentile": 99.99, | |
"min_percentile": 0.01 | |
}, | |
"type": "percentile" | |
} | |
}, | |
"overflow_fix": "enable", | |
"preset": "mixed", | |
"scope_overrides": { | |
"activations": { | |
"{re}.*matmul_0": { | |
"mode": "symmetric" | |
} | |
} | |
} | |
} | |
], | |
"input_info": [ | |
{ | |
"keyword": "input_ids", | |
"sample_size": [ | |
16, | |
384 | |
], | |
"type": "long" | |
}, | |
{ | |
"keyword": "token_type_ids", | |
"sample_size": [ | |
16, | |
384 | |
], | |
"type": "long" | |
}, | |
{ | |
"keyword": "attention_mask", | |
"sample_size": [ | |
16, | |
384 | |
], | |
"type": "long" | |
} | |
], | |
"optimum_version": "1.6.4", | |
"save_onnx_model": false, | |
"transformers_version": "4.26.1" | |
} | |