Mixtral_ether / cluster_preserve_quantize_registry.py

Upload 22 files

516a027 verified 6 months ago

19.2 kB

	# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Registry responsible for built-in keras classes."""

	import logging
	import warnings

	import tensorflow as tf

	from tensorflow_model_optimization.python.core.clustering.keras import cluster_config
	from tensorflow_model_optimization.python.core.clustering.keras import clustering_registry
	from tensorflow_model_optimization.python.core.keras.compat import keras
	from tensorflow_model_optimization.python.core.quantization.keras import quant_ops
	from tensorflow_model_optimization.python.core.quantization.keras import quantizers
	from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import default_8bit_quantize_registry
	from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import default_8bit_quantizers


	layers = keras.layers
	K = keras.backend

	CLUSTER_CENTROIDS = 'cluster_centroids_tf'
	PULLING_INDICES = 'pulling_indices_tf'
	ORIGINAL_WEIGHTS = 'ori_weights_vars_tf'
	WEIGHT_NAME = 'weight_name'
	CLUSTERING_IMPL = 'clst_impl'
	CENTROIDS_MASK = 'centroids_mask'
	SPARSITY_MASK = 'sparsity_mask'


	def get_unique(t):
	"""Get unique values and lookup index from N-D tensor.

	Args:
	t: tensor
	Returns:
	centroids (unique values), lookup index (same shape as input tensor)
	Example:
	t:
	([[1.0, 2.0],
	[2.0, 3.0],
	[3.0, 3.0],
	[1.0, 2.0]]
	)
	centroids(unique values):
	([1.0, 2.0, 3.0])
	output final index:
	([[0, 1],
	[1, 2],
	[2, 2],
	[0, 1]]
	)
	"""
	t_flatten = tf.reshape(t, shape=(-1,))
	uniques, index = tf.unique(t_flatten)
	return uniques, tf.reshape(index, shape=tf.shape(t))


	def get_centroids(layer, weight, data_format):
	"""Gets centroid infos from the weights of a layer.

	Args:
	layer: The Keras layer from which the weight belong.
	weight: The weight tensor to get the centroids info from.
	data_format: string to indicate format: "channels_first" or "channels_last".
	Returns:
	A 4-tuple of centroids (unique values), number of centroids, lookup index,
	whether to cluster per channel (boolean).
	"""
	cluster_per_channel = layer.layer and isinstance(
	layer.layer, keras.layers.Conv2D
	)

	if not cluster_per_channel:
	centroids, index = get_unique(weight)
	return centroids, tf.size(centroids), index, False

	# In case of cluster_per_channel we need to extract
	# unique values (centroids) for each channel.
	num_channels = weight.shape[1 if data_format == 'channels_first' else -1]
	channel_centroids = []
	channel_indices = []
	num_centroids = []

	for channel in range(num_channels):
	channel_weights = weight[:, :, :, channel]
	centroids, indices = get_unique(channel_weights)

	channel_centroids.append(centroids)
	channel_indices.append(indices)
	num_centroids.append(tf.size(centroids))

	max_centroid = max(num_centroids)
	max_diff = max_centroid - min(num_centroids)

	if max_diff > 1:
	centroids, index = get_unique(weight)
	return centroids, tf.size(centroids), index, False

	for i, centroid in enumerate(channel_centroids):
	if num_centroids[i] != max_centroid:
	one_padding = tf.ones([max_centroid - num_centroids[i]])
	channel_centroids[i] = tf.concat([centroid, one_padding], 0)

	centroids = tf.convert_to_tensor(channel_centroids)
	lookup = tf.convert_to_tensor(channel_indices)

	lookup = tf.transpose(
	lookup,
	perm=(1, 0, 2, 3) if data_format == 'channels_first' else (1, 2, 3, 0))

	return centroids, max_centroid, lookup, True


	class _ClusterPreserveInfo(object):
	"""ClusterPreserveInfo."""

	def __init__(self, weight_attrs, quantize_config_attrs):
	"""ClusterPreserveInfo.

	Args:
	weight_attrs: list of cluster preservable weight attributes of layer.
	quantize_config_attrs: list of quantization configuration class name.
	"""
	self.weight_attrs = weight_attrs
	self.quantize_config_attrs = quantize_config_attrs


	class ClusterPreserveQuantizeRegistry(object):
	"""ClusterPreserveQuantizeRegistry is for built-in keras layers."""
	# The keys represent built-in keras layers; the first values represent the
	# the variables within the layers which hold the kernel weights, second
	# values represent the class name of quantization configuration for layers.
	# This decide the weights of layers with quantization configurations are
	# cluster preservable.
	_LAYERS_CONFIG_MAP = {
	layers.Conv2D:
	_ClusterPreserveInfo(['kernel'], ['Default8BitConvQuantizeConfig']),
	layers.Dense:
	_ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),

	# DepthwiseConv2D is supported with 8bit qat, but not with
	# clustering, thus for DepthwiseConv2D CQAT,
	# preserving clustered weights is disabled.
	layers.DepthwiseConv2D:
	_ClusterPreserveInfo(['depthwise_kernel'],
	['Default8BitQuantizeConfig']),

	# layers that are supported with clustering, but not yet with qat
	# layers.Conv1D:
	# _ClusterPreserveInfo(['kernel'], []),
	# layers.Conv2DTranspose:
	# _ClusterPreserveInfo(['kernel'], []),
	# layers.Conv3D:
	# _ClusterPreserveInfo(['kernel'], []),
	# layers.Conv3DTranspose:
	# _ClusterPreserveInfo(['kernel'], []),
	# layers.LocallyConnected1D:
	# _ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
	# layers.LocallyConnected2D:
	# _ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),

	# SeparableConv need verify from 8bit qat
	# layers.SeparableConv1D:
	# _ClusterPreserveInfo(['pointwise_kernel'],
	# ['Default8BitConvQuantizeConfig']),
	# layers.SeparableConv2D:
	# _ClusterPreserveInfo(['pointwise_kernel'],
	# ['Default8BitConvQuantizeConfig']),

	# Embedding need verify from 8bit qat
	# layers.Embedding: _ClusterPreserveInfo(['embeddings'], []),
	}

	_DISABLE_CLUSTER_PRESERVE = frozenset({
	layers.DepthwiseConv2D,
	})

	def __init__(self, preserve_sparsity):
	self._config_quantizer_map = {
	'Default8BitQuantizeConfig':
	ClusterPreserveDefault8BitWeightsQuantizer(preserve_sparsity),
	'Default8BitConvQuantizeConfig':
	ClusterPreserveDefault8BitConvWeightsQuantizer(preserve_sparsity),
	}

	@classmethod
	def _no_trainable_weights(cls, layer):
	"""Returns whether this layer has trainable weights.

	Args:
	layer: The layer to check for trainable weights.
	Returns:
	True/False whether the layer has trainable weights.
	"""
	return not layer.trainable_weights

	@classmethod
	def _disable_cluster_preserve(cls, layer):
	"""Returns whether to disable this layer for preserving clusters.

	Args:
	layer: The layer to check for disabling.
	Returns:
	True/False whether disabling this layer for preserving clusters.
	"""
	return layer.__class__ in cls._DISABLE_CLUSTER_PRESERVE

	@classmethod
	def supports(cls, layer):
	"""Returns whether the registry supports this layer type.

	Args:
	layer: The layer to check for support.
	Returns:
	True/False whether the layer type is supported.
	"""
	# layers without trainable weights are consider supported,
	# e.g., ReLU, Softmax, and AveragePooling2D.
	if cls._no_trainable_weights(layer):
	return True

	if layer.__class__ in cls._LAYERS_CONFIG_MAP:
	return True

	return False

	@classmethod
	def _weight_names(cls, layer):

	if cls._no_trainable_weights(layer):
	return []

	return cls._LAYERS_CONFIG_MAP[layer.__class__].weight_attrs

	def apply_cluster_preserve_quantize_config(self, layer, quantize_config):
	"""Applies cluster-preserve weight quantizer.

	Args:
	layer: The layer to check for support.
	quantize_config: quantization config for supporting cluster preservation
	on clustered weights
	Returns:
	The quantize_config with addon cluster preserve weight_quantizer.
	"""
	if not self.supports(layer):
	raise ValueError('Layer ' + str(layer.__class__) + ' is not supported.')

	# Example: ReLU, Softmax, and AveragePooling2D (without trainable weights)
	# DepthwiseConv2D (cluster_preserve is disabled)
	if self._no_trainable_weights(layer) or self._disable_cluster_preserve(
	layer):
	return quantize_config

	# Example: Conv2D, Dense layers
	if quantize_config.__class__.__name__ in self._LAYERS_CONFIG_MAP[
	layer.__class__].quantize_config_attrs:
	quantize_config.weight_quantizer = self._config_quantizer_map[
	quantize_config.__class__.__name__]
	else:
	raise ValueError('Configuration ' +
	str(quantize_config.__class__.__name__) +
	' is not supported for Layer ' + str(layer.__class__) +
	'.')

	return quantize_config


	class Default8bitClusterPreserveQuantizeRegistry(
	ClusterPreserveQuantizeRegistry):
	"""Default 8 bit ClusterPreserveQuantizeRegistry."""

	def get_quantize_config(self, layer):
	"""Returns the quantization config with weight_quantizer for a given layer.

	Args:
	layer: input layer to return quantize config for.
	Returns:
	Returns the quantization config for cluster preserve weight_quantizer.
	"""
	quantize_config = (default_8bit_quantize_registry.
	Default8BitQuantizeRegistry().
	get_quantize_config(layer))
	cluster_aware_quantize_config = super(
	Default8bitClusterPreserveQuantizeRegistry,
	self).apply_cluster_preserve_quantize_config(layer, quantize_config)

	return cluster_aware_quantize_config


	class ClusterPreserveDefaultWeightsQuantizer(quantizers.LastValueQuantizer):
	"""Quantize weights while preserving clusters."""

	def __init__(
	self, num_bits, per_axis, symmetric, narrow_range, preserve_sparsity):
	"""ClusterPreserveDefaultWeightsQuantizer.

	Args:
	num_bits: Number of bits for quantization
	per_axis: Whether to apply per_axis quantization. The last dimension is
	used as the axis.
	symmetric: If true, use symmetric quantization limits instead of training
	the minimum and maximum of each quantization range separately.
	narrow_range: In case of 8 bits, narrow_range nudges the quantized range
	to be [-127, 127] instead of [-128, 127]. This ensures symmetric
	range has 0 as the centre.
	preserve_sparsity: Whether to apply prune-cluster-preserving quantization
	aware training.
	"""
	super(ClusterPreserveDefaultWeightsQuantizer, self).__init__(
	num_bits=num_bits,
	per_axis=per_axis,
	symmetric=symmetric,
	narrow_range=narrow_range,
	)
	self.preserve_sparsity = preserve_sparsity

	def _build_clusters(self, name, layer):
	"""Extracts the cluster centroids and cluster indices.

	Extracts cluster centroids and cluster indices from the pretrained
	clustered model when the input layer is clustered.

	Args:
	name: Name of weights in layer.
	layer: Quantization wrapped keras layer.
	Returns:
	A dictionary of the initial values of the
	cluster centroids, cluster indices, original weights,
	the pretrained flag for marking the first training
	epoch, and weight name.
	"""
	result = {}
	weights = getattr(layer.layer, name)
	if self.preserve_sparsity and not tf.reduce_any(weights == 0):
	self.preserve_sparsity = False
	logging.warning(
	'Input layer does not contain zero weights, so apply CQAT instead.')
	centroids_mask = None

	# Detects whether layer is convolutional and is clustered per channel
	data_format = getattr(layer.layer, 'data_format', None)
	centroids, num_centroids, lookup, cluster_per_channel = get_centroids(
	layer, weights, data_format)

	if self.preserve_sparsity:
	sparsity_mask = tf.math.divide_no_nan(weights, weights)
	zero_idx = tf.argmin(tf.abs(centroids), axis=-1)
	centroids_mask = 1.0 - tf.one_hot(zero_idx, num_centroids)
	result = {SPARSITY_MASK: sparsity_mask}

	# Prepare clustering variables for the Keras graph when clusters
	# exist, assuming we do not use number_of_clusters larger than 1024
	if num_centroids > 1024:
	warnings.warn(f'No clustering performed on layer {layer.name}.\n'
	f'Too many centroids to cluster.')
	return result
	# If not enough clusters, we do not preserve clustering
	elif num_centroids <= 1:
	warnings.warn(f'No clustering performed on layer {layer.name}.\n'
	f'Perhaps too many clusters requested for this layer?')
	return result
	else:
	clst_centroids_tf = layer.add_weight(
	CLUSTER_CENTROIDS,
	shape=centroids.shape,
	initializer=keras.initializers.Constant(
	value=K.batch_get_value([centroids])[0]
	),
	dtype=centroids.dtype,
	trainable=True,
	)

	ori_weights_tf = layer.add_weight(
	ORIGINAL_WEIGHTS,
	shape=weights.shape,
	initializer=keras.initializers.Constant(
	value=K.batch_get_value([weights])[0]
	),
	dtype=weights.dtype,
	trainable=True,
	)

	# Get clustering implementation according to layer type
	clustering_impl_cls = clustering_registry.ClusteringLookupRegistry(
	).get_clustering_impl(
	layer.layer, name, cluster_per_channel=cluster_per_channel)
	clustering_impl = clustering_impl_cls(
	clst_centroids_tf, cluster_config.GradientAggregation.SUM,
	data_format)

	pulling_indices = tf.dtypes.cast(
	clustering_impl.get_pulling_indices(ori_weights_tf),
	lookup.dtype
	)

	pulling_indices_tf = layer.add_weight(
	PULLING_INDICES,
	shape=lookup.shape,
	initializer=keras.initializers.Constant(
	value=K.batch_get_value([pulling_indices])[0]
	),
	dtype=lookup.dtype,
	trainable=False,
	)

	result_clst = {
	CLUSTER_CENTROIDS: clst_centroids_tf,
	PULLING_INDICES: pulling_indices_tf,
	ORIGINAL_WEIGHTS: ori_weights_tf,
	WEIGHT_NAME: name,
	CLUSTERING_IMPL: clustering_impl,
	CENTROIDS_MASK: centroids_mask,
	}
	result.update(result_clst)
	return result

	def build(self, tensor_shape, name, layer):
	"""Build (P)CQAT wrapper.

	When preserve_sparsity is true and the input is clustered.

	Args:
	tensor_shape: Shape of weights which needs to be quantized.
	name: Name of weights in layer.
	layer: Quantization wrapped keras layer.
	Returns:
	Dictionary of centroids, indices and
	quantization params, the dictionary will be passed
	to __call__ function.
	"""
	# To get all the initial values from pretrained clustered model
	result = self._build_clusters(name, layer)
	# Result can have clustering nodes, then this is CQAT
	# Result can have both clustering nodes and sparsity mask, then
	# this will be PCQAT
	result.update(
	super(ClusterPreserveDefaultWeightsQuantizer,
	self).build(tensor_shape, name, layer))

	return result

	def __call__(self, inputs, training, weights, **kwargs):
	"""Apply cluster preserved quantization to the input tensor.

	Args:
	inputs: Input tensor (layer's weights) to be quantized.
	training: Whether the graph is currently training.
	weights: Dictionary of weights (params) the quantizer can use to
	quantize the tensor (layer's weights). This contains the weights
	created in the `build` function.
	**kwargs: Additional variables which may be passed to the quantizer.
	Returns:
	quantized tensor.
	"""
	if training:
	if CLUSTER_CENTROIDS in weights:
	if self.preserve_sparsity:
	weights[ORIGINAL_WEIGHTS].assign(
	tf.multiply(weights[ORIGINAL_WEIGHTS],
	weights[SPARSITY_MASK]))
	weights[CLUSTERING_IMPL].cluster_centroids.assign(
	weights[CLUSTERING_IMPL].
	cluster_centroids * weights[CENTROIDS_MASK]
	)
	weights[CLUSTER_CENTROIDS].assign(
	weights[CLUSTERING_IMPL].cluster_centroids
	)
	# Insert clustering variables
	weights[PULLING_INDICES].assign(tf.dtypes.cast(
	weights[CLUSTERING_IMPL].get_pulling_indices(
	weights[ORIGINAL_WEIGHTS]),
	weights[PULLING_INDICES].dtype
	))

	output = weights[CLUSTERING_IMPL].get_clustered_weight(
	weights[PULLING_INDICES], weights[ORIGINAL_WEIGHTS])
	inputs.assign(output)
	else:
	if self.preserve_sparsity:
	inputs = tf.multiply(inputs, weights[SPARSITY_MASK])
	output = inputs
	else:
	output = inputs

	return quant_ops.LastValueQuantize(
	output,
	weights['min_var'],
	weights['max_var'],
	is_training=training,
	num_bits=self.num_bits,
	per_channel=self.per_axis,
	symmetric=self.symmetric,
	narrow_range=self.narrow_range
	)


	class ClusterPreserveDefault8BitWeightsQuantizer(
	ClusterPreserveDefaultWeightsQuantizer):
	"""ClusterPreserveWeightsQuantizer for default 8bit weights."""

	def __init__(self, preserve_sparsity):
	super(ClusterPreserveDefault8BitWeightsQuantizer,
	self).__init__(num_bits=8,
	per_axis=False,
	symmetric=True,
	narrow_range=True,
	preserve_sparsity=preserve_sparsity)
	self.preserve_sparsity = preserve_sparsity


	class ClusterPreserveDefault8BitConvWeightsQuantizer(
	ClusterPreserveDefaultWeightsQuantizer,
	default_8bit_quantizers.Default8BitConvWeightsQuantizer):
	"""ClusterPreserveWeightsQuantizer for default 8bit Conv2D weights."""

	def __init__(self, preserve_sparsity): # pylint: disable=super-init-not-called
	default_8bit_quantizers.Default8BitConvWeightsQuantizer.__init__(self)
	self.preserve_sparsity = preserve_sparsity

	def build(self, tensor_shape, name, layer):
	result = ClusterPreserveDefaultWeightsQuantizer._build_clusters(
	self, name, layer)
	result.update(
	default_8bit_quantizers.Default8BitConvWeightsQuantizer.build(
	self, tensor_shape, name, layer))
	return result