rocsparse: add spack build test (#31529)

* rocsparse: add spack build test

* Fix Navi 1x patch for ROCm 5.2

* Remove bench variant and other cleanup

* Fix style
This commit is contained in:
Cory Bloor 2022-09-13 09:42:42 -06:00 committed by GitHub
parent 3e331c7397
commit 633a4cbd46
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 364 additions and 7 deletions

View file

@ -0,0 +1,32 @@
--- a/clients/tests/CMakeLists.txt
+++ b/clients/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ if(NOT EXISTS "${CMAKE_MATRICES_DIR}")
# Download.
#
set(CMAKE_MATRICES_DIR ${PROJECT_BINARY_DIR}/matrices CACHE STRING "Matrices directory.")
+ file(MAKE_DIRECTORY ${CMAKE_MATRICES_DIR})
if(NOT TARGET rocsparse)
set(CONVERT_SOURCE ${CMAKE_SOURCE_DIR}/../deps/convert.cpp CACHE STRING "Convert tool mtx2csr.")
diff --git a/cmake/ClientMatrices.cmake b/cmake/ClientMatrices.cmake
index 871ebc4..b7fb7bc 100644
--- a/cmake/ClientMatrices.cmake
+++ b/cmake/ClientMatrices.cmake
@@ -99,6 +99,7 @@ foreach(i RANGE 0 ${len1})
# Download test matrices if not already downloaded
if(NOT EXISTS "${CMAKE_MATRICES_DIR}/${mat}.csr")
+ if(NOT ROCSPARSE_MTX_DIR)
# First try user specified mirror, if available
if(DEFINED ENV{ROCSPARSE_TEST_MIRROR} AND NOT $ENV{ROCSPARSE_TEST_MIRROR} STREQUAL "")
message("-- Downloading and extracting test matrix ${m}.tar.gz from user specified test mirror: $ENV{ROCSPARSE_TEST_MIRROR}")
@@ -149,6 +150,9 @@ foreach(i RANGE 0 ${len1})
WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
file(RENAME ${CMAKE_MATRICES_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
+ else()
+ file(RENAME ${ROCSPARSE_MTX_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
+ endif()
execute_process(COMMAND ${PROJECT_BINARY_DIR}/mtx2csr.exe ${mat}.mtx ${mat}.csr
WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
# TODO: add 'COMMAND_ERROR_IS_FATAL ANY' once cmake supported version is 3.19

View file

@ -0,0 +1,8 @@
--- a/clients/common/rocsparse_gentest.py
+++ b/clients/common/rocsparse_gentest.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
# ########################################################################
# Copyright (c) 2019-2021 Advanced Micro Devices, Inc.

View file

@ -0,0 +1,78 @@
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
From: Cory Bloor <Cordell.Bloor@amd.com>
Date: Fri, 8 Jul 2022 20:53:32 -0600
Subject: [PATCH] Improve guards for arch-specific instructions (#368)
When choosing between a specialized implementation that uses
architecture-specific functionality and a generic fallback, it is
usually preferable to make the fallback the default. This will give the
software the best possible chance of functioning without modification
on future hardware.
Of course, the library will still need code updates to function
optimally on hardware released after the software was written.
rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
to force the use of the fallback implementation. Or with the value 1 to
force the use of the specialized __hip_move_dpp implementation.
This change fixes the compilation error:
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
when building for unsupported Navi 1x and Navi 2x GPUs as was
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
---
library/src/include/common.h | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/library/src/include/common.h b/library/src/include/common.h
index 6a4654af..975c5f7d 100644
--- a/library/src/include/common.h
+++ b/library/src/include/common.h
@@ -34,6 +34,18 @@
// clang-format off
+#ifndef ROCSPARSE_USE_MOVE_DPP
+#if defined(__gfx803__) || \
+ defined(__gfx900__) || \
+ defined(__gfx906__) || \
+ defined(__gfx908__) || \
+ defined(__gfx90a__)
+#define ROCSPARSE_USE_MOVE_DPP 1
+#else
+#define ROCSPARSE_USE_MOVE_DPP 0
+#endif
+#endif
+
// BSR indexing macros
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
}
-#ifndef __gfx1030__
+#if ROCSPARSE_USE_MOVE_DPP
// DPP-based wavefront reduction maximum
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
sum = temp_sum.val;
return sum;
}
-#else
+#else /* ROCSPARSE_USE_MOVE_DPP */
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
{
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
return sum;
}
-#endif
+#endif /* ROCSPARSE_USE_MOVE_DPP */
// DPP-based complex float wavefront reduction sum
template <unsigned int WFSIZE>

View file

@ -0,0 +1,78 @@
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
From: Cory Bloor <Cordell.Bloor@amd.com>
Date: Fri, 8 Jul 2022 20:53:32 -0600
Subject: [PATCH] Improve guards for arch-specific instructions (#368)
When choosing between a specialized implementation that uses
architecture-specific functionality and a generic fallback, it is
usually preferable to make the fallback the default. This will give the
software the best possible chance of functioning without modification
on future hardware.
Of course, the library will still need code updates to function
optimally on hardware released after the software was written.
rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
to force the use of the fallback implementation. Or with the value 1 to
force the use of the specialized __hip_move_dpp implementation.
This change fixes the compilation error:
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
when building for unsupported Navi 1x and Navi 2x GPUs as was
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
---
library/src/include/common.h | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/library/src/include/common.h b/library/src/include/common.h
index 6a4654af..975c5f7d 100644
--- a/library/src/include/common.h
+++ b/library/src/include/common.h
@@ -34,6 +34,18 @@
// clang-format off
+#ifndef ROCSPARSE_USE_MOVE_DPP
+#if defined(__gfx803__) || \
+ defined(__gfx900__) || \
+ defined(__gfx906__) || \
+ defined(__gfx908__) || \
+ defined(__gfx90a__)
+#define ROCSPARSE_USE_MOVE_DPP 1
+#else
+#define ROCSPARSE_USE_MOVE_DPP 0
+#endif
+#endif
+
// BSR indexing macros
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
}
-#if (!defined(__gfx1030__)) && (!defined(__gfx1011__))
+#if ROCSPARSE_USE_MOVE_DPP
// DPP-based wavefront reduction maximum
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
sum = temp_sum.val;
return sum;
}
-#else
+#else /* ROCSPARSE_USE_MOVE_DPP */
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
{
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
return sum;
}
-#endif
+#endif /* ROCSPARSE_USE_MOVE_DPP */
// DPP-based complex float wavefront reduction sum
template <unsigned int WFSIZE>

View file

@ -3,7 +3,6 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import itertools
import re import re
from spack.package import * from spack.package import *
@ -33,6 +32,7 @@ class Rocsparse(CMakePackage):
values=("Release", "Debug", "RelWithDebInfo"), values=("Release", "Debug", "RelWithDebInfo"),
description="CMake build type", description="CMake build type",
) )
variant("test", default=False, description="Build rocsparse-test client")
version("5.2.0", sha256="7ed929af16d2502135024a6463997d9a95f03899b8a33aa95db7029575c89572") version("5.2.0", sha256="7ed929af16d2502135024a6463997d9a95f03899b8a33aa95db7029575c89572")
version("5.1.3", sha256="ef9641045b36c9aacc87e4fe7717b41b1e29d97e21432678dce7aca633a8edc2") version("5.1.3", sha256="ef9641045b36c9aacc87e4fe7717b41b1e29d97e21432678dce7aca633a8edc2")
@ -114,13 +114,173 @@ class Rocsparse(CMakePackage):
"5.2.0", "5.2.0",
]: ]:
depends_on("hip@" + ver, when="@" + ver) depends_on("hip@" + ver, when="@" + ver)
for tgt in itertools.chain(["auto"], amdgpu_targets): depends_on("rocprim@" + ver, when="@" + ver)
depends_on(
"rocprim@{0} amdgpu_target={1}".format(ver, tgt),
when="@{0} amdgpu_target={1}".format(ver, tgt),
)
depends_on("rocm-cmake@%s:" % ver, type="build", when="@" + ver) depends_on("rocm-cmake@%s:" % ver, type="build", when="@" + ver)
# Add option so Spack can manage downloaded test matricies as resources.
patch("0001-set-mtx-directory.patch", when="@4.5.0: +test")
# Enable use of Spack-provided Python.
patch("0002-fix-gentest-shebang.patch", when="@4.5.0: +test")
# Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
patch("0003-fix-navi-1x-rocm-4.5.patch", when="@4.5.0:5.1")
patch("0003-fix-navi-1x-rocm-5.2.patch", when="@5.2.0:")
depends_on("googletest@1.11.0:", when="@5.1.0: +test")
depends_on("googletest@1.10.0:", when="+test")
depends_on("python@3:", type="build", when="+test")
depends_on("py-pyyaml", type="build", when="+test")
with when("+test"):
resource(
name="amazon0312",
url="https://sparse.tamu.edu/MM/SNAP/amazon0312.tar.gz",
sha256="75ffd36b33675856f370f508d53e6197caa972ac52929991db7dc4198bd64910",
destination="mtx",
)
resource(
name="Chebyshev4",
url="https://sparse.tamu.edu/MM/Muite/Chebyshev4.tar.gz",
sha256="82553d73281587ea70e5faa427910e979524412c89e59ada7fa47a97142ae8a6",
destination="mtx",
)
resource(
name="sme3Dc",
url="https://sparse.tamu.edu/MM/FEMLAB/sme3Dc.tar.gz",
sha256="82f03904849cceea0af1b9975942717527ecc5e87a98cfddea78ffbe7e7c076d",
destination="mtx",
)
resource(
name="webbase-1M",
url="https://sparse.tamu.edu/MM/Williams/webbase-1M.tar.gz",
sha256="17a0391cdd966350b2b41f32aaf8e6684f3c55f25eb68c4be088f44f728a3ed4",
destination="mtx",
)
resource(
name="rma10",
url="https://sparse.tamu.edu/MM/Bova/rma10.tar.gz",
sha256="50db8d278d371531b3dd0638444d47a77f3a3e189663993a857861dbc34c5e3f",
destination="mtx",
)
resource(
name="bibd_22_8",
url="https://sparse.tamu.edu/MM/JGD_BIBD/bibd_22_8.tar.gz",
sha256="534b5210662d1b5b14a3938671501189685d12abf9f2a206778508345181014c",
destination="mtx",
)
resource(
name="mac_econ_fwd500",
url="https://sparse.tamu.edu/MM/Williams/mac_econ_fwd500.tar.gz",
sha256="0dec2952b2908e3d59e4179289245db7f2c84f9e5e6543e818491deed5978f82",
destination="mtx",
)
resource(
name="mc2depi",
url="https://sparse.tamu.edu/MM/Williams/mc2depi.tar.gz",
sha256="c02fef86efdd4f4322487e7472697a3d30b084ede1021e6d6889b347d3f6b268",
destination="mtx",
)
resource(
name="scircuit",
url="https://sparse.tamu.edu/MM/Hamm/scircuit.tar.gz",
sha256="227d4c98e51c8af49c07f89929c62f2523e115e81b672e7f306185ea92c2996f",
destination="mtx",
)
resource(
name="ASIC_320k",
url="https://sparse.tamu.edu/MM/Sandia/ASIC_320k.tar.gz",
sha256="d0d4ac477f641c8372d7347bc262ffcbde017f50fb17bb1a1539c98dd3440145",
destination="mtx",
)
resource(
name="bmwcra_1",
url="https://sparse.tamu.edu/MM/GHS_psdef/bmwcra_1.tar.gz",
sha256="31467b319f3d4e8a8fc3a320344650bee14b285755b13ee29264b7a488b3d222",
destination="mtx",
)
resource(
name="nos1",
url="https://sparse.tamu.edu/MM/HB/nos1.tar.gz",
sha256="7e64dc2408890e85a60dbd2ad048963c74625cc3037dbdff9647d30844a52674",
destination="mtx",
)
resource(
name="nos2",
url="https://sparse.tamu.edu/MM/HB/nos2.tar.gz",
sha256="7439318b969e8cad0e96f154937a35256374bb8f0e16ed7ecc3a5219f8dc903b",
destination="mtx",
)
resource(
name="nos3",
url="https://sparse.tamu.edu/MM/HB/nos3.tar.gz",
sha256="7dd62179bbcaeb693c774712a8d70b97316364983f1cbf06cecb3900da8954a5",
destination="mtx",
)
resource(
name="nos4",
url="https://sparse.tamu.edu/MM/HB/nos4.tar.gz",
sha256="ec2323a5195db153fd6ae32ff537b22eb47f08e73949754b71f8d4104358f10f",
destination="mtx",
)
resource(
name="nos5",
url="https://sparse.tamu.edu/MM/HB/nos5.tar.gz",
sha256="dd67e906b0392cfbbe5a01a1f1a569c50875cbf88249a31721fb87519666a342",
destination="mtx",
)
resource(
name="nos6",
url="https://sparse.tamu.edu/MM/HB/nos6.tar.gz",
sha256="a0301c38ed91b849571303db581205cfae113565a7938eaa1a7466320f0d03c4",
destination="mtx",
)
resource(
name="nos7",
url="https://sparse.tamu.edu/MM/HB/nos7.tar.gz",
sha256="c5d8d99bf4b54ee45e2f45d78530e3787f2e9670c000a68ad986a3b923e9e5ae",
destination="mtx",
)
resource(
name="shipsec1",
url="https://sparse.tamu.edu/MM/DNVS/shipsec1.tar.gz",
sha256="d021889affed5429f85b606900f76870d0b1b1aefd92529cc6f43bf9d7ef0eb1",
destination="mtx",
)
resource(
name="mplate",
url="https://sparse.tamu.edu/MM/Cote/mplate.tar.gz",
sha256="647b848343e423a24e05d3a3d462fa6b77958e362aadf70e9bb51bd420730df2",
destination="mtx",
)
resource(
name="qc2534",
url="https://sparse.tamu.edu/MM/Bai/qc2534.tar.gz",
sha256="591c54ceee70222909353d2a400dd9819e3432143b2c25b6c4ffa262b8e397c8",
destination="mtx",
)
resource(
name="Chevron2",
url="https://sparse.tamu.edu/MM/Chevron/Chevron2.tar.gz",
sha256="9334b61c25958f5221fd114e9698c11ac0ec57a0432150731d3fe80033da3026",
destination="mtx",
)
resource(
name="Chevron3",
url="https://sparse.tamu.edu/MM/Chevron/Chevron3.tar.gz",
sha256="5679292ba86defedb0a6afc25274948521ace7ca90fc765265be11ca6eaaaee4",
destination="mtx",
)
resource(
name="Chevron4",
url="https://sparse.tamu.edu/MM/Chevron/Chevron4.tar.gz",
sha256="2ac9dc0d8d38cbf4a62089c74e53aea87edbb3f0b553b77b27c70df70e1d17d5",
destination="mtx",
)
def check(self):
if self.spec.satisfies("+test"):
exe = join_path(self.build_directory, "clients", "staging", "rocsparse-test")
self.run_test(exe, options=["--gtest_filter=*quick*:*pre_checkin*-*known_bug*"])
def setup_build_environment(self, env): def setup_build_environment(self, env):
env.set("CXX", self.spec["hip"].hipcc) env.set("CXX", self.spec["hip"].hipcc)
@ -138,8 +298,9 @@ def determine_version(cls, lib):
def cmake_args(self): def cmake_args(self):
args = [ args = [
self.define("BUILD_CLIENTS_SAMPLES", "OFF"), self.define("BUILD_CLIENTS_SAMPLES", "OFF"),
self.define("BUILD_CLIENTS_TESTS", "OFF"), self.define_from_variant("BUILD_CLIENTS_TESTS", "test"),
self.define("BUILD_CLIENTS_BENCHMARKS", "OFF"), self.define("BUILD_CLIENTS_BENCHMARKS", "OFF"),
self.define("ROCSPARSE_MTX_DIR", join_path(self.stage.source_path, "mtx")),
] ]
if "auto" not in self.spec.variants["amdgpu_target"]: if "auto" not in self.spec.variants["amdgpu_target"]: