ntt123's picture
add sparse matmul
21f3d42
raw
history blame
8.01 kB
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "sparse_matmul/layers/utils.h"
#include <algorithm>
#include <cmath>
#include <random>
#include <type_traits>
#include <vector>
#include "absl/flags/flag.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "include/ghc/filesystem.hpp"
#include "sparse_matmul/layers/csr_blocksparse_matrix.h"
#include "sparse_matmul/layers/errno_mapping.h"
#include "sparse_matmul/layers/sparse_linear_layer.h"
#include "sparse_matmul/numerics/fast_transcendentals.h"
#include "sparse_matmul/numerics/fixed_types.h"
#include "sparse_matmul/numerics/float16_types.h"
#include "sparse_matmul/numerics/test_utils.h"
#include "sparse_matmul/numerics/type_utils.h"
#include "sparse_matmul/vector/cache_aligned_vector.h"
namespace csrblocksparse {
namespace {
static constexpr char kTempOutputDir[] =
"third_party/lyra_codec/sparse_matmul/layers/testdata/";
static constexpr int kTestExponentBits = 5;
template <typename ComputeType>
class CsrBlockSparseMatrixUtilsTest : public testing::Test {
protected:
CsrBlockSparseMatrixUtilsTest()
: output_dir_((ghc::filesystem::path(testing::TempDir()) / kTempOutputDir)
.string()) {
if (std::is_floating_point<ComputeType>::value) {
tolerance_ = 1e-5;
} else if (csrblocksparse::IsCustomFloatType<ComputeType>::value) {
// Casting float --> bfloat truncates the least significant 16 bits from
// the mantissa, thus the larger the exponent bits the larger the rounding
// error.
// The exponent for max_val is 2^4, meaning the max rounding error
// for the weight input is ~ 0.124. The tolerance is 2x this because
// although the intermediate multiplications are accumulated in float,
// the output is cast to bfloat.
// Placeholder for internal diagram.
float max_val =
std::pow<float>(2, kTestExponentBits) -
std::pow<float>(2, -fixed16<kTestExponentBits>::kMantissaBits);
tolerance_ = 2 * (max_val - static_cast<float>(ComputeType(max_val)));
} else {
tolerance_ = std::pow<float>(2, -MantissaBitsOf<ComputeType>::value);
}
}
void SetUp() override {
std::error_code error_code;
ghc::filesystem::create_directories(output_dir_, error_code);
ASSERT_FALSE(error_code);
}
void TearDown() override {
std::error_code error_code;
ghc::filesystem::remove_all(output_dir_, error_code);
ASSERT_FALSE(error_code);
}
const std::string output_dir_;
float tolerance_;
};
void GenerateRandomWeightBiasMaskVectors(
int weight_vector_size, int bias_vector_size,
std::vector<float>* weight_vector, std::vector<float>* bias_vector,
std::vector<float>* mask_vector, std::vector<float>* masked_weight_vector) {
weight_vector->resize(weight_vector_size);
bias_vector->resize(bias_vector_size);
mask_vector->resize(weight_vector_size);
masked_weight_vector->resize(weight_vector_size);
// Fill Weight and Bias with random values between +/-[2^|kTestExponentBits| -
// 1] - 0.5 to prevent clipping in the fixed16 case when the weight and bias
// are added with all 1s in the exponent and mantissa.
const float max_abs_random_value =
std::pow<float>(2, kTestExponentBits - 1) - 0.5;
std::uniform_real_distribution<float> distribution(-max_abs_random_value,
max_abs_random_value);
std::default_random_engine generator(1337);
std::generate(weight_vector->begin(), weight_vector->end(),
[&]() { return distribution(generator); });
std::generate(bias_vector->begin(), bias_vector->end(),
[&]() { return distribution(generator); });
std::bernoulli_distribution mask_distribution(0.5);
std::generate(mask_vector->begin(), mask_vector->end(),
[&]() { return mask_distribution(generator) ? 1 : 0; });
// Construct the combined weight and mask vector.
std::transform(mask_vector->begin(), mask_vector->end(),
weight_vector->begin(), masked_weight_vector->begin(),
[&](float mask_value, float weight_value) {
return mask_value * weight_value;
});
}
using ComputeTypes =
testing::Types<float, csrblocksparse::fixed16<kTestExponentBits>,
csrblocksparse::bfloat16>;
TYPED_TEST_SUITE(CsrBlockSparseMatrixUtilsTest, ComputeTypes);
TYPED_TEST(CsrBlockSparseMatrixUtilsTest, LoadLayer) {
const int kWeightVectorSize = 16;
const int kBiasVectorSize = 4;
std::vector<float> ref_weight_vector;
std::vector<float> ref_bias_vector;
std::vector<float> ref_mask_vector;
std::vector<float> ref_masked_weight_vector;
GenerateRandomWeightBiasMaskVectors(
kWeightVectorSize, kBiasVectorSize, &ref_weight_vector, &ref_bias_vector,
&ref_mask_vector, &ref_masked_weight_vector);
// This fixed16_weights.raw vector should only be read by LoadGenericLayer
// when |TypeParam| is a fixed16_type.
std::vector<int16_t> fixed_weight_vector(ref_weight_vector.size());
std::transform(ref_weight_vector.begin(), ref_weight_vector.end(),
fixed_weight_vector.begin(), [](float weight) {
return fixed16<kTestExponentBits>(weight).raw_val();
});
ASSERT_TRUE(WriteArrayToFile(fixed_weight_vector, "fixed16_weights.raw",
this->output_dir_)
.ok());
ASSERT_TRUE(
WriteArrayToFile(ref_weight_vector, "weights.raw", this->output_dir_)
.ok());
ASSERT_TRUE(
WriteArrayToFile(ref_bias_vector, "bias.raw", this->output_dir_).ok());
ASSERT_TRUE(
WriteArrayToFile(ref_mask_vector, "mask.raw", this->output_dir_).ok());
// Read in the weights, mask, and bias to a layer.
SparseLinearLayer<TypeParam, TypeParam> actual_layer;
using DiskWeightType =
typename std::conditional<csrblocksparse::IsFixed16Type<TypeParam>::value,
csrblocksparse::fixed16_type, TypeParam>::type;
auto status = LoadGenericLayer<TypeParam, TypeParam, DiskWeightType>(
/*prefix=*/"", /*zipped=*/false, this->output_dir_,
/*default_bias=*/0.f, &actual_layer);
ASSERT_TRUE(status.ok());
// Multiply the read in layer with an identity matrix so we just get
// the weights added with bias.
std::vector<TypeParam> identity(kBiasVectorSize * kBiasVectorSize,
TypeParam(0.f));
for (int i = 0; i < identity.size(); i += kBiasVectorSize + 1) {
identity.at(i) = TypeParam(1.f);
}
FatCacheAlignedVector<TypeParam> masked_weights_plus_bias(kBiasVectorSize,
kBiasVectorSize);
actual_layer.SpMM_bias(
VectorView<TypeParam>(identity.data(), /*rows=*/kBiasVectorSize,
/*cols=*/kBiasVectorSize),
&masked_weights_plus_bias);
// |masked_weights_plus_bias| - bias = masked weights.
for (int col = 0; col < masked_weights_plus_bias.cols(); col++) {
MutableVectorView<TypeParam> col_data = masked_weights_plus_bias.slice(col);
for (int row = 0; row < masked_weights_plus_bias.rows(); row++) {
int flat_index = row * masked_weights_plus_bias.cols() + col;
EXPECT_NEAR(static_cast<float>(col_data[row]) - ref_bias_vector.at(row),
ref_masked_weight_vector.at(flat_index), this->tolerance_);
}
}
}
} // namespace
} // namespace csrblocksparse