|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <array> |
|
#include <cstdint> |
|
#include <tuple> |
|
#include <vector> |
|
|
|
|
|
#include "absl/status/status.h" |
|
#include "absl/strings/str_cat.h" |
|
#include "absl/strings/string_view.h" |
|
#include "absl/types/span.h" |
|
#include "gtest/gtest.h" |
|
#include "include/ghc/filesystem.hpp" |
|
#include "sparse_matmul/compute/matmul.h" |
|
#include "sparse_matmul/layers/utils.h" |
|
#include "sparse_matmul/numerics/test_utils.h" |
|
#include "sparse_matmul/os/coop_threads.h" |
|
|
|
namespace csrblocksparse { |
|
namespace { |
|
|
|
inline constexpr absl::string_view kTestdataPath = "layers/testdata"; |
|
|
|
TEST(CSRBlockSparseMatrix, FlatBufferSerialization) { |
|
const int kRows = 8; |
|
const int kCols = 8; |
|
std::vector<int> mask = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, |
|
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, |
|
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, |
|
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}; |
|
std::vector<float> values(kRows * kCols, 1.f); |
|
values[1] = 2.f; |
|
values[3] = 3.f; |
|
values[36] = -1.f; |
|
values[45] = -2.f; |
|
|
|
csrblocksparse::CacheAlignedVector<float> bias(kRows); |
|
csrblocksparse::CacheAlignedVector<float> rhs(kCols); |
|
csrblocksparse::CacheAlignedVector<float> out_ref(kRows); |
|
csrblocksparse::CacheAlignedVector<float> out_test(kRows); |
|
|
|
bias.FillZero(); |
|
rhs.FillOnes(); |
|
|
|
csrblocksparse::MaskedSparseMatrix<float> matrix(kRows, kCols, mask.data(), |
|
values.data()); |
|
|
|
matrix.SpMM_bias(rhs, bias, &out_ref); |
|
|
|
csrblocksparse::CsrBlockSparseMatrix<csrblocksparse::bfloat16, float, int16_t> |
|
block_sparse_matrix(matrix); |
|
|
|
std::string buffer; |
|
std::size_t num_bytes = block_sparse_matrix.WriteToFlatBuffer(&buffer); |
|
|
|
csrblocksparse::CsrBlockSparseMatrix<csrblocksparse::bfloat16, float, int16_t> |
|
new_block_sparse_matrix(reinterpret_cast<const uint8_t*>(buffer.c_str()), |
|
num_bytes); |
|
|
|
new_block_sparse_matrix.SpMM_bias(rhs, bias, &out_test); |
|
|
|
CheckResult(out_ref, out_test, kCols); |
|
} |
|
|
|
template <typename ComputeType, typename RhsType, typename OutType> |
|
void CorrectnessCheckBlockSpMM(int rows, int cols, int block_height, |
|
int block_width, float sparsity, |
|
bool use_relu = false, int num_threads = 1, |
|
int fatness = 1, bool test_matmul = false) { |
|
using BiasType = typename TypeOfProduct<ComputeType, RhsType>::type; |
|
MaskedSparseMatrix<float> matrix(rows, cols, sparsity, block_height, |
|
block_width); |
|
matrix.CastWeights<ComputeType>(); |
|
FatCacheAlignedVector<RhsType> rhs(cols, fatness); |
|
CacheAlignedVector<BiasType> bias(rows); |
|
FatCacheAlignedVector<OutType> out(rows, fatness); |
|
|
|
bias.FillRandom(); |
|
rhs.FillRandom(); |
|
out.FillZero(); |
|
FatCacheAlignedVector<OutType> out_reference = out; |
|
|
|
matrix.SpMM_bias(rhs, bias, &out_reference, use_relu); |
|
|
|
CsrBlockSparseMatrix<ComputeType, RhsType> sparse_matrix(matrix); |
|
|
|
SparseLinearLayer<ComputeType, RhsType> sparse_linear_layer( |
|
std::move(sparse_matrix), std::move(bias)); |
|
num_threads = sparse_linear_layer.PrepareForThreads(num_threads); |
|
|
|
|
|
|
|
for (int thread_id = 0; thread_id < num_threads; ++thread_id) { |
|
sparse_linear_layer.SpMM_bias(rhs, &out, use_relu, thread_id); |
|
} |
|
|
|
CheckResult(out_reference, out, sparse_linear_layer.cols()); |
|
|
|
if (test_matmul) { |
|
for (int thread_id = 0; thread_id < num_threads; ++thread_id) { |
|
sparse_linear_layer.MatVec(rhs, use_relu, thread_id, |
|
1, 0, &out); |
|
} |
|
|
|
CheckResult(out_reference, out, sparse_linear_layer.cols()); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename ComputeType, typename RhsType, typename OutType> |
|
void ThreadBody( |
|
SpinBarrier* spin_barrier, int tid, |
|
const SparseLinearLayer<ComputeType, RhsType>& sparse_linear_layer, |
|
FatCacheAlignedVector<RhsType>* rhs, FatCacheAlignedVector<OutType>* out, |
|
bool use_relu) { |
|
sparse_linear_layer.SpMM_bias(*rhs, out, use_relu, tid); |
|
spin_barrier->barrier(); |
|
sparse_linear_layer.SpMM_bias(*out, rhs, use_relu, tid); |
|
spin_barrier->barrier(); |
|
sparse_linear_layer.SpMM_bias(*rhs, out, use_relu, tid); |
|
} |
|
|
|
template <typename ComputeType, typename RhsType, typename OutType> |
|
void CorrectnessCheckBlockSpMM_MultiThread(int rows, int cols, int block_height, |
|
int block_width, float sparsity, |
|
bool use_relu = false, |
|
int num_threads = 1, |
|
int fatness = 1) { |
|
typedef typename TypeOfProduct<ComputeType, RhsType>::type BiasType; |
|
CHECK(rows == cols); |
|
MaskedSparseMatrix<float> matrix(rows, cols, sparsity, block_height, |
|
block_width); |
|
matrix.CastWeights<ComputeType>(); |
|
FatCacheAlignedVector<RhsType> rhs(cols, fatness); |
|
FatCacheAlignedVector<RhsType> rhs_mt(cols, fatness); |
|
CacheAlignedVector<BiasType> bias(rows); |
|
FatCacheAlignedVector<OutType> out(rows, fatness); |
|
|
|
bias.FillOnes(); |
|
rhs.FillOnes(); |
|
rhs_mt.FillOnes(); |
|
out.FillZero(); |
|
FatCacheAlignedVector<OutType> out_reference = out; |
|
|
|
matrix.SpMM_bias(rhs, bias, &out_reference, use_relu); |
|
matrix.SpMM_bias(out_reference, bias, &rhs, use_relu); |
|
matrix.SpMM_bias(rhs, bias, &out_reference, use_relu); |
|
|
|
CsrBlockSparseMatrix<ComputeType, RhsType> sparse_matrix(matrix); |
|
|
|
num_threads = sparse_matrix.PrepareForThreads(num_threads, |
|
1); |
|
|
|
SparseLinearLayer<ComputeType, RhsType> sparse_linear_layer( |
|
std::move(sparse_matrix), std::move(bias)); |
|
|
|
csrblocksparse::LaunchOnThreadsWithBarrier( |
|
num_threads, ThreadBody<ComputeType, RhsType, OutType>, |
|
sparse_linear_layer, &rhs_mt, &out, use_relu); |
|
|
|
CheckResult(out_reference, out, cols); |
|
} |
|
|
|
} |
|
|
|
TEST(MaskedSparseCorrectness, HandCoded) { |
|
const int kRows = 8; |
|
const int kCols = 8; |
|
|
|
std::vector<int> mask = {1, 1, 0, 0, 0, 1, 1, 1, |
|
0, 1, 0, 1, 0, 1, 0, 1, |
|
1, 0, 0, 1, 1, 1, 1, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
0, 0, 0, 0, 1, 1, 0, 0, |
|
1, 1, 0, 0, 1, 1, 0, 0, |
|
1, 0, 0, 0, 0, 1, 0, 1}; |
|
|
|
std::vector<float> values(kRows * kCols, 1.f); |
|
|
|
std::vector<float> answer = {6.f, 5.f, 6.f, 1.f, 9.f, 3.f, 5.f, 4.f}; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, mask.data(), values.data()); |
|
CacheAlignedVector<float> rhs(kCols); |
|
CacheAlignedVector<float> bias(kRows); |
|
CacheAlignedVector<float> out(kRows); |
|
|
|
bias.FillOnes(); |
|
rhs.FillOnes(); |
|
out.FillZero(); |
|
|
|
MaskedLinearLayer<float> masked_linear_layer(std::move(matrix), |
|
std::move(bias)); |
|
|
|
masked_linear_layer.SpMM_bias(rhs, &out); |
|
|
|
for (int i = 0; i < kRows; ++i) { |
|
EXPECT_EQ(answer[i], out[i]); |
|
} |
|
} |
|
|
|
TEST(MaskedSparseCorrectness, HandCodedFatVector) { |
|
const int kRows = 8; |
|
const int kCols = 8; |
|
|
|
std::vector<int> mask = {1, 1, 0, 0, 0, 1, 1, 1, |
|
0, 1, 0, 1, 0, 1, 0, 1, |
|
1, 0, 0, 1, 1, 1, 1, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
0, 0, 0, 0, 1, 1, 0, 0, |
|
1, 1, 0, 0, 1, 1, 0, 0, |
|
1, 0, 0, 0, 0, 1, 0, 1}; |
|
|
|
|
|
std::vector<float> values(kRows * kCols, 1.f); |
|
std::vector<float> answer = {6.f, 5.f, 6.f, 1.f, 9.f, 3.f, 5.f, 4.f}; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, mask.data(), values.data()); |
|
const int kMaxWidth = 5; |
|
for (int width = 5; width <= kMaxWidth; ++width) { |
|
FatCacheAlignedVector<float> rhs(kCols, width); |
|
CacheAlignedVector<float> bias(kRows); |
|
FatCacheAlignedVector<float> out(kRows, width); |
|
|
|
bias.FillOnes(); |
|
rhs.FillOnes(); |
|
out.FillZero(); |
|
|
|
MaskedLinearLayer<float> masked_linear_layer(std::move(matrix), |
|
std::move(bias)); |
|
|
|
masked_linear_layer.SpMM_bias(rhs, &out); |
|
|
|
for (int i = 0; i < kRows; ++i) { |
|
for (int width = 0; width < kMaxWidth; ++width) { |
|
EXPECT_EQ(answer[i], out[i + width * kRows]); |
|
} |
|
} |
|
} |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, HandCodedMultiThread) { |
|
const int kRows = 8; |
|
const int kCols = 8; |
|
|
|
std::vector<int> mask = {1, 1, 0, 0, 0, 1, 1, 1, |
|
0, 1, 0, 1, 0, 1, 0, 1, |
|
1, 0, 0, 1, 1, 1, 1, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
0, 0, 0, 0, 1, 1, 0, 0, |
|
1, 1, 0, 0, 1, 1, 0, 0, |
|
1, 0, 0, 0, 0, 1, 0, 1}; |
|
|
|
std::vector<float> values(kRows * kCols, 1.f); |
|
|
|
std::vector<float> answer = {6.f, 5.f, 6.f, 1.f, 9.f, 3.f, 5.f, 4.f}; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, mask.data(), values.data()); |
|
CacheAlignedVector<float> rhs(kCols); |
|
CacheAlignedVector<float> bias(kRows); |
|
CacheAlignedVector<float> out(kRows); |
|
|
|
bias.FillOnes(); |
|
rhs.FillOnes(); |
|
out.FillZero(); |
|
|
|
CacheAlignedVector<float> bias_csr = bias; |
|
|
|
CsrBlockSparseMatrix<bfloat16, float> sparse_matrix(matrix); |
|
|
|
MaskedLinearLayer<float> masked_linear_layer(std::move(matrix), |
|
std::move(bias)); |
|
|
|
masked_linear_layer.SpMM_bias(rhs, &out); |
|
|
|
SparseLinearLayer<bfloat16, float> sparse_linear_layer( |
|
std::move(sparse_matrix), std::move(bias_csr)); |
|
sparse_linear_layer.PrepareForThreads(2, 1); |
|
|
|
CacheAlignedVector<float> out_tmp(kRows); |
|
const bool kUseRelu = false; |
|
sparse_linear_layer.SpMM_bias(rhs, &out_tmp, kUseRelu, 0); |
|
sparse_linear_layer.SpMM_bias(rhs, &out_tmp, kUseRelu, 1); |
|
|
|
for (int i = 0; i < kRows; ++i) { |
|
EXPECT_EQ(answer[i], out_tmp[i]); |
|
} |
|
} |
|
|
|
TEST(TestCasts, TestBfloat16) { |
|
const int kRows = 1000; |
|
const int kCols = 100; |
|
const float kSparsity = 0.f; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, kSparsity); |
|
MaskedSparseMatrix<float> matrix_bfloat16(kRows, kCols, matrix.mask().data(), |
|
matrix.values().data()); |
|
|
|
matrix_bfloat16.CastWeights<bfloat16>(); |
|
|
|
CheckResult(matrix.values(), matrix_bfloat16.values(), kCols); |
|
} |
|
|
|
TEST(TestCasts, TestFP16) { |
|
const int kRows = 1000; |
|
const int kCols = 100; |
|
const float kSparsity = 0.f; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, kSparsity); |
|
#if !defined __arm__ && !defined __aarch64__ |
|
|
|
for (int i = 0; i < matrix.values().size(); ++i) { |
|
if (matrix.data()[i] < 1. / static_cast<float>(1 << 14)) |
|
matrix.data()[i] = 0.f; |
|
} |
|
#endif |
|
MaskedSparseMatrix<float> matrix_fp16(kRows, kCols, matrix.mask().data(), |
|
matrix.values().data()); |
|
|
|
matrix_fp16.CastWeights<csrblocksparse::fp16>(); |
|
|
|
CheckResult(matrix.values(), matrix_fp16.values(), kCols); |
|
} |
|
|
|
TEST(TestCasts, TestFixed16) { |
|
const int kRows = 100000; |
|
const int kCols = 1; |
|
const float kSparsity = 0.f; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, kSparsity); |
|
|
|
|
|
for (int i = 0; i < matrix.values().size(); ++i) { |
|
|
|
|
|
if (std::abs(matrix.data()[i]) < 1.1e-3) { |
|
matrix.data()[i] = 0.f; |
|
} |
|
} |
|
|
|
MaskedSparseMatrix<float> matrix_fixed16 = matrix; |
|
|
|
matrix_fixed16.CastWeights<csrblocksparse::fixed16</*ExponentBits=*/0>>(); |
|
|
|
CheckResult(matrix.values(), matrix_fixed16.values(), kCols); |
|
} |
|
|
|
TEST(TestCasts, TestFixed32) { |
|
const int kRows = 100000; |
|
const int kCols = 1; |
|
const float kSparsity = 0.f; |
|
|
|
MaskedSparseMatrix<float> matrix(kRows, kCols, kSparsity); |
|
MaskedSparseMatrix<float> matrix_fixed32 = matrix; |
|
|
|
matrix_fixed32.CastWeights<csrblocksparse::fixed32</*ExponentBits=*/0>>(); |
|
|
|
CheckResult(matrix.values(), matrix_fixed32.values(), kCols); |
|
} |
|
|
|
template <typename ComputeType, typename RhsType, typename OutType> |
|
void TestSpMM(int block_width, int block_height, int fatness, |
|
bool test_matmul = false) { |
|
std::array<bool, 2> use_relu = {false, true}; |
|
std::vector<float> sparsity_levels = {.5, .8, .9, .95, .98}; |
|
std::vector<std::pair<int, int>> sizes = {{8, 8}, {128, 128}, {128, 64}, |
|
{256, 192}, {512, 512}, {1024, 512}, |
|
{384, 384}, {512, 384}}; |
|
for (int num_threads = 1; num_threads < 2 + test_matmul; ++num_threads) { |
|
for (const auto& relu : use_relu) { |
|
for (const auto& sparsity : sparsity_levels) { |
|
for (const auto& size : sizes) { |
|
int rows, cols; |
|
std::tie(rows, cols) = size; |
|
CorrectnessCheckBlockSpMM<ComputeType, RhsType, OutType>( |
|
rows, cols, block_height, block_width, sparsity, relu, |
|
num_threads, fatness, test_matmul); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
template <typename ComputeType, typename RhsType, typename OutType> |
|
void TestSpMM_MultiThread(int block_width, int block_height, int fatness) { |
|
std::array<bool, 2> use_relu = {false, true}; |
|
std::vector<float> sparsity_levels = {.5, .8, .9, .95, .98}; |
|
std::vector<std::pair<int, int>> sizes = { |
|
{48, 48}, {128, 128}, {512, 512}, {384, 384}}; |
|
for (int num_threads = 1; num_threads < 5; ++num_threads) { |
|
for (const auto& relu : use_relu) { |
|
for (const auto& sparsity : sparsity_levels) { |
|
for (const auto& size : sizes) { |
|
int rows, cols; |
|
std::tie(rows, cols) = size; |
|
CorrectnessCheckBlockSpMM_MultiThread<ComputeType, RhsType, OutType>( |
|
rows, cols, block_height, block_width, sparsity, relu, |
|
num_threads, fatness); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
template <typename DataType> |
|
void TestSumVectors(int start = 0, int end = -1, int size = 6) { |
|
std::vector<DataType> values; |
|
std::vector<DataType> answer; |
|
|
|
for (int i = 1; i < size + 1; ++i) { |
|
const float x = static_cast<float>(i); |
|
values.push_back(static_cast<DataType>(x)); |
|
answer.push_back(static_cast<DataType>(x * 2)); |
|
} |
|
|
|
if (end == -1) { |
|
end = values.size(); |
|
} |
|
|
|
csrblocksparse::CacheAlignedVector<DataType> result(values.size()); |
|
csrblocksparse::CacheAlignedVector<DataType> values_aligned(values); |
|
detail::SumVectors(start, end, values_aligned.data(), values_aligned.data(), |
|
result.data()); |
|
for (int i = start; i < end; ++i) { |
|
EXPECT_EQ(static_cast<float>(answer[i]), static_cast<float>(result[i])); |
|
} |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SumVectors_Generic) { |
|
TestSumVectors<float>(); |
|
TestSumVectors<float>(1); |
|
TestSumVectors<float>(1, 4); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SumVectors_Bfloat16) { |
|
TestSumVectors<csrblocksparse::bfloat16>(); |
|
TestSumVectors<csrblocksparse::bfloat16>(1); |
|
TestSumVectors<csrblocksparse::bfloat16>(1, 4); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SumVectors_Fixed16) { |
|
TestSumVectors<csrblocksparse::fixed16<8>>(0, -1, 100); |
|
TestSumVectors<csrblocksparse::fixed16<8>>(0, 4, 100); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SumVectors_Fixed32) { |
|
TestSumVectors<csrblocksparse::fixed32<11>>(0, -1, 100); |
|
TestSumVectors<csrblocksparse::fixed32<11>>(0, 4, 100); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block4x4_Bfloat16) { |
|
TestSpMM<csrblocksparse::bfloat16, float, float>(4, |
|
4, |
|
7); |
|
} |
|
|
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_4x4MultiThreading_Bfloat16) { |
|
TestSpMM_MultiThread<csrblocksparse::bfloat16, float, float>( |
|
4, |
|
4, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_4x4MultiThreading_Bfloat16) { |
|
TestSpMM_MultiThread<csrblocksparse::bfloat16, float, float>( |
|
4, |
|
4, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block1x1_Bfloat16) { |
|
TestSpMM<csrblocksparse::bfloat16, float, float>(1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block1x1_Bfloat16) { |
|
TestSpMM<csrblocksparse::bfloat16, float, float>(1, |
|
1, |
|
7); |
|
} |
|
|
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_1x1MultiThreading_Bfloat16) { |
|
TestSpMM_MultiThread<csrblocksparse::bfloat16, float, float>( |
|
1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_1x1MultiThreading_Bfloat16) { |
|
TestSpMM_MultiThread<csrblocksparse::bfloat16, float, float>( |
|
1, |
|
1, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block4x4_float) { |
|
TestSpMM<float, float, float>(4, |
|
4, |
|
1, |
|
true); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block4x4_float) { |
|
TestSpMM<float, float, float>(4, |
|
4, |
|
7); |
|
} |
|
|
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_4x4MultiThreading_float) { |
|
TestSpMM_MultiThread<float, float, float>(4, |
|
4, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_4x4MultiThreading_float) { |
|
TestSpMM_MultiThread<float, float, float>(4, |
|
4, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block1x1_float) { |
|
TestSpMM<float, float, float>(1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block1x1_float) { |
|
TestSpMM<float, float, float>(1, |
|
1, |
|
7); |
|
} |
|
|
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_1x1MultiThreading_float) { |
|
TestSpMM_MultiThread<float, float, float>(1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_1x1MultiThreading_float) { |
|
TestSpMM_MultiThread<float, float, float>(1, |
|
1, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block4x4_fixed16x16_32) { |
|
TestSpMM<csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>, |
|
typename csrblocksparse::TypeOfProduct< |
|
csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>>::type>( |
|
4, |
|
4, |
|
1, |
|
true); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block4x4_fixed16x16_32) { |
|
TestSpMM<csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>, |
|
typename csrblocksparse::TypeOfProduct< |
|
csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>>::type>( |
|
4, |
|
4, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block1x1_fixed16x16_32) { |
|
TestSpMM<csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>, |
|
typename csrblocksparse::TypeOfProduct< |
|
csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>>::type>( |
|
1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block1x1_fixed16x16_32) { |
|
TestSpMM<csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>, |
|
typename csrblocksparse::TypeOfProduct< |
|
csrblocksparse::fixed16<4>, csrblocksparse::fixed16<4>>::type>( |
|
1, |
|
1, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block4x4_fixed16x16_16) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed16<8>>( |
|
4, |
|
4, |
|
1, |
|
true); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block4x4_fixed16x16_16) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed16<8>>( |
|
4, |
|
4, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block1x1_fixed16x16_16) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed16<8>>( |
|
1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block1x1_fixed16x16_16) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed16<8>>( |
|
1, |
|
1, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block4x4_fixed16x16_32_unmatched) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed32<13>>( |
|
4, |
|
4, |
|
1, |
|
true); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block4x4_fixed16x16_32_unmatched) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed32<13>>( |
|
4, |
|
4, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMV_Block1x1_fixed16x16_32_unmatched) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed32<13>>( |
|
1, |
|
1, |
|
1); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, SpMM_Block1x1_fixed16x16_32_unmatched) { |
|
TestSpMM<csrblocksparse::fixed16<5>, csrblocksparse::fixed16<5>, |
|
csrblocksparse::fixed32<13>>( |
|
1, |
|
1, |
|
7); |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, RhsIndicesDeltasRoundTrip) { |
|
MaskedSparseMatrix<float> matrix(256, 256, |
|
0.9, 4, |
|
4); |
|
CsrBlockSparseMatrix<float, float> sparse_matrix(matrix); |
|
CacheAlignedVector<int16_t> copy_indices = sparse_matrix.rhs_indices(); |
|
sparse_matrix.ComputeColDeltas(); |
|
sparse_matrix.ComputeRHSIndices(); |
|
|
|
EXPECT_LE(copy_indices.size(), sparse_matrix.rhs_indices().size()); |
|
for (int i = 0; i < copy_indices.size(); ++i) { |
|
EXPECT_EQ(copy_indices[i], sparse_matrix.rhs_indices()[i]) << "i=" << i; |
|
} |
|
} |
|
|
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SplitByCol) { |
|
int kRows = 1024; |
|
int kCols = 1024; |
|
MaskedSparseMatrix<float> matrix(kRows, kCols, 0.95, 4, |
|
4); |
|
FatCacheAlignedVector<float> rhs(kCols, 1); |
|
CacheAlignedVector<float> bias(kRows); |
|
FatCacheAlignedVector<float> out1(kRows, 1); |
|
FatCacheAlignedVector<float> out2(kRows, 1); |
|
|
|
bias.FillRandom(); |
|
rhs.FillRandom(); |
|
out1.FillZero(); |
|
out2.FillZero(); |
|
FatCacheAlignedVector<float> out_reference = out1; |
|
|
|
CsrBlockSparseMatrix<float, float> sparse_matrix(matrix); |
|
|
|
SparseLinearLayer<float, float> sparse_linear_layer(std::move(sparse_matrix), |
|
std::move(bias)); |
|
sparse_linear_layer.PrepareForThreads(1); |
|
sparse_linear_layer.SpMM_bias(rhs, &out_reference, false, |
|
0); |
|
|
|
SparseLinearLayer<float, float> part1, part2; |
|
sparse_linear_layer.SplitInputs(&part1, &part2); |
|
part1.PrepareForThreads(1); |
|
part2.PrepareForThreads(1); |
|
EXPECT_EQ(kRows, part1.rows()); |
|
EXPECT_EQ(kCols / 2, part1.cols()); |
|
EXPECT_EQ(kRows, part2.rows()); |
|
EXPECT_EQ(kCols / 2, part2.cols()); |
|
MutableVectorView<float> rhs1(&rhs, 0, kCols / 2); |
|
MutableVectorView<float> rhs2(&rhs, kCols / 2, kCols / 2); |
|
for (int i = 0; i < kCols / 2; ++i) { |
|
EXPECT_FLOAT_EQ(rhs[i], rhs1.data()[i]); |
|
EXPECT_FLOAT_EQ(rhs[i + kCols / 2], rhs2.data()[i]); |
|
} |
|
part1.SpMM_bias(rhs1, &out1, false, 0); |
|
part2.SpMM_bias(rhs2, &out2, false, 0); |
|
|
|
for (int i = 0; i < kRows; ++i) { |
|
EXPECT_NEAR(out_reference[i], out1[i] + out2[i], 2e-5) |
|
<< " i=" << i << " out1=" << out1[i] << " out2=" << out2[i]; |
|
} |
|
} |
|
|
|
|
|
TEST(CsrBlockSparseMatrix, SplitByRow) { |
|
int kRows = 1024; |
|
int kCols = 1024; |
|
MaskedSparseMatrix<float> matrix(kRows, kCols, 0.95, 4, |
|
4); |
|
FatCacheAlignedVector<float> rhs(kCols, 1); |
|
CacheAlignedVector<float> bias(kRows); |
|
FatCacheAlignedVector<float> out1(kRows, 1); |
|
FatCacheAlignedVector<float> out2(kRows, 1); |
|
|
|
bias.FillRandom(); |
|
rhs.FillRandom(); |
|
out1.FillZero(); |
|
out2.FillZero(); |
|
FatCacheAlignedVector<float> out_reference = out1; |
|
|
|
CsrBlockSparseMatrix<float, float> sparse_matrix(matrix); |
|
|
|
SparseLinearLayer<float, float> sparse_linear_layer(std::move(sparse_matrix), |
|
std::move(bias)); |
|
sparse_linear_layer.PrepareForThreads(1); |
|
sparse_linear_layer.SpMM_bias(rhs, &out_reference, false, |
|
0); |
|
|
|
SparseLinearLayer<float, float> part1, part2; |
|
sparse_linear_layer.SplitOutputs(&part1, &part2); |
|
part1.PrepareForThreads(1); |
|
part2.PrepareForThreads(1); |
|
EXPECT_EQ(kRows / 2, part1.rows()); |
|
EXPECT_EQ(kCols, part1.cols()); |
|
EXPECT_EQ(kRows / 2, part2.rows()); |
|
EXPECT_EQ(kCols, part2.cols()); |
|
MutableVectorView<float> out2a(&out2, 0, kRows / 2); |
|
MutableVectorView<float> out2b(&out2, kRows / 2, kRows / 2); |
|
part1.SpMM_bias(rhs, &out2a, false, 0); |
|
part2.SpMM_bias(rhs, &out2b, false, 0); |
|
|
|
for (int i = 0; i < kRows; ++i) { |
|
EXPECT_NEAR(out_reference[i], out2[i], 2e-5) |
|
<< " i=" << i << " out1=" << out_reference[i] << " out2=" << out2[i]; |
|
} |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, MutableVectorView) { |
|
const int kRows = 1024; |
|
const int kCols = 1024; |
|
const int kFatness = 2; |
|
|
|
std::vector<float> values(kRows * kCols, 1.f); |
|
std::vector<int> mask(kRows * kCols); |
|
for (int i = 0; i < mask.size(); ++i) mask[i] = i % 2; |
|
|
|
auto masked_matrix = |
|
MaskedSparseMatrix<float>(kRows, kCols, mask.data(), values.data()); |
|
auto sparse_matrix = CsrBlockSparseMatrix<bfloat16, float>(masked_matrix); |
|
FatCacheAlignedVector<float> x(kCols, kFatness); |
|
x.FillOnes(); |
|
|
|
CacheAlignedVector<float> bias(kRows); |
|
bias.FillZero(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FatCacheAlignedVector<float> out(kRows, kFatness); |
|
FatCacheAlignedVector<float> out_view(kRows, kFatness); |
|
|
|
MutableVectorView<float> out_view_top(&out_view, 0, kRows / 2); |
|
MutableVectorView<float> out_view_bottom(&out_view, kRows / 2, kRows / 2); |
|
|
|
sparse_matrix.SpMM_bias(x, bias, &out); |
|
|
|
auto masked_matrix_top = |
|
MaskedSparseMatrix<float>(kRows / 2, kCols, mask.data(), values.data()); |
|
auto masked_matrix_bottom = MaskedSparseMatrix<float>( |
|
kRows / 2, kCols, mask.data() + kRows * kCols / 2, |
|
values.data() + kRows * kCols / 2); |
|
auto sparse_matrix_top = |
|
CsrBlockSparseMatrix<bfloat16, float>(masked_matrix_top); |
|
auto sparse_matrix_bottom = |
|
CsrBlockSparseMatrix<bfloat16, float>(masked_matrix_bottom); |
|
|
|
sparse_matrix_top.SpMM_bias(x, bias, &out_view_top); |
|
sparse_matrix_bottom.SpMM_bias(x, bias, &out_view_bottom); |
|
|
|
CheckResult(out, out_view, kCols); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto masked_matrix_quarter = MaskedSparseMatrix<float>( |
|
kRows / 2, kCols / 2, mask.data(), values.data()); |
|
auto sparse_matrix_quarter = |
|
CsrBlockSparseMatrix<bfloat16, float>(masked_matrix_quarter); |
|
|
|
MutableVectorView<float> x_top(&x, 0, kCols / 2); |
|
FatCacheAlignedVector<float> out_correct(kRows / 2, 2); |
|
|
|
for (int i = 0; i < kFatness * (kRows / 2); ++i) out_correct[i] = 256.f; |
|
|
|
MutableVectorView<float> bias_top(&bias, 0, kRows / 2); |
|
FatCacheAlignedVector<float> out_quarter(kRows / 2, kFatness); |
|
|
|
sparse_matrix_quarter.SpMM_bias(x_top, bias_top, &out_quarter); |
|
|
|
CheckResult(out_correct, out_quarter, kCols / 2); |
|
} |
|
|
|
namespace { |
|
|
|
bool skip_test(const absl::Status& status, absl::string_view msg) { |
|
if (!status.ok()) { |
|
LOG(INFO) << "Couldn't load " << msg << ", skipping test " << status; |
|
return true; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, ModelMatrices_Bfloat16) { |
|
std::vector<std::string> names = { |
|
"768_512_95_4x4_wavernn_gru_", "768_512_95_4x4_coarseproj_", |
|
"768_512_95_4x4_coarselogit_", "768_512_95_4x4_fineproj_", |
|
"768_512_95_4x4_finelogit_", "lyra_conv1d_"}; |
|
const std::string kPath = |
|
#if defined __arm__ || defined __aarch64__ |
|
"/data/local/tmp/"; |
|
#else |
|
(ghc::filesystem::current_path() / kTestdataPath).string(); |
|
#endif |
|
for (auto& layer_name : names) { |
|
SparseLinearLayer<bfloat16, float> sparse_linear_layer; |
|
auto status = LoadSparseLayer<bfloat16, float>(layer_name, true, |
|
&sparse_linear_layer, kPath); |
|
|
|
|
|
if (skip_test(status, layer_name)) return; |
|
|
|
int rows = sparse_linear_layer.rows(); |
|
int cols = sparse_linear_layer.cols(); |
|
|
|
MaskedLinearLayer<float> masked_linear_layer; |
|
status = LoadMaskedLayer<float>(layer_name, true, |
|
&masked_linear_layer, kPath); |
|
if (skip_test(status, layer_name)) return; |
|
masked_linear_layer.CastWeights<csrblocksparse::bfloat16>(); |
|
|
|
CacheAlignedVector<float> rhs(cols); |
|
CacheAlignedVector<float> out_ref(rows); |
|
CacheAlignedVector<float> out_spmv(rows); |
|
|
|
rhs.FillRandom(); |
|
out_ref.FillZero(); |
|
out_spmv.FillZero(); |
|
|
|
std::array<bool, 2> use_relus = {false, true}; |
|
for (bool use_relu : use_relus) { |
|
masked_linear_layer.SpMM_bias(rhs, &out_ref, use_relu); |
|
sparse_linear_layer.SpMM_bias(rhs, &out_spmv, use_relu); |
|
|
|
CheckResult(out_ref, out_spmv, cols); |
|
} |
|
} |
|
} |
|
|
|
TEST(CsrBlockSparseMatrix, ModelMatrices_float) { |
|
std::vector<std::string> names = { |
|
"768_512_95_4x4_wavernn_gru_", "768_512_95_4x4_coarseproj_", |
|
"768_512_95_4x4_coarselogit_", "768_512_95_4x4_fineproj_", |
|
"768_512_95_4x4_finelogit_", "lyra_conv1d_"}; |
|
const std::string kPath = |
|
#if defined __arm__ || defined __aarch64__ |
|
"/data/local/tmp/"; |
|
#else |
|
(ghc::filesystem::current_path() / kTestdataPath).string(); |
|
#endif |
|
for (auto& layer_name : names) { |
|
SparseLinearLayer<float, float> sparse_linear_layer; |
|
auto status = LoadSparseLayer<float, float>(layer_name, true, |
|
&sparse_linear_layer, kPath); |
|
|
|
|
|
if (skip_test(status, layer_name)) return; |
|
|
|
int rows = sparse_linear_layer.rows(); |
|
int cols = sparse_linear_layer.cols(); |
|
|
|
MaskedLinearLayer<float> masked_linear_layer; |
|
status = LoadMaskedLayer<float>(layer_name, true, |
|
&masked_linear_layer, kPath); |
|
if (skip_test(status, layer_name)) return; |
|
|
|
CacheAlignedVector<float> rhs(cols); |
|
CacheAlignedVector<float> out_ref(rows); |
|
CacheAlignedVector<float> out_spmv(rows); |
|
|
|
rhs.FillRandom(); |
|
out_ref.FillZero(); |
|
out_spmv.FillZero(); |
|
|
|
std::array<bool, 2> use_relus = {false, true}; |
|
for (bool use_relu : use_relus) { |
|
masked_linear_layer.SpMM_bias(rhs, &out_ref, use_relu); |
|
sparse_linear_layer.SpMM_bias(rhs, &out_spmv, use_relu); |
|
|
|
CheckResult(out_ref, out_spmv, cols); |
|
} |
|
} |
|
} |
|
|
|
#undef SKIP_TEST |
|
|
|
} |
|
|