rocsparse: add spack build test (#31529)
* rocsparse: add spack build test * Fix Navi 1x patch for ROCm 5.2 * Remove bench variant and other cleanup * Fix style
This commit is contained in:
parent
3e331c7397
commit
633a4cbd46
5 changed files with 364 additions and 7 deletions
|
@ -0,0 +1,32 @@
|
|||
--- a/clients/tests/CMakeLists.txt
|
||||
+++ b/clients/tests/CMakeLists.txt
|
||||
@@ -33,6 +33,7 @@ if(NOT EXISTS "${CMAKE_MATRICES_DIR}")
|
||||
# Download.
|
||||
#
|
||||
set(CMAKE_MATRICES_DIR ${PROJECT_BINARY_DIR}/matrices CACHE STRING "Matrices directory.")
|
||||
+ file(MAKE_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
|
||||
if(NOT TARGET rocsparse)
|
||||
set(CONVERT_SOURCE ${CMAKE_SOURCE_DIR}/../deps/convert.cpp CACHE STRING "Convert tool mtx2csr.")
|
||||
diff --git a/cmake/ClientMatrices.cmake b/cmake/ClientMatrices.cmake
|
||||
index 871ebc4..b7fb7bc 100644
|
||||
--- a/cmake/ClientMatrices.cmake
|
||||
+++ b/cmake/ClientMatrices.cmake
|
||||
@@ -99,6 +99,7 @@ foreach(i RANGE 0 ${len1})
|
||||
|
||||
# Download test matrices if not already downloaded
|
||||
if(NOT EXISTS "${CMAKE_MATRICES_DIR}/${mat}.csr")
|
||||
+ if(NOT ROCSPARSE_MTX_DIR)
|
||||
# First try user specified mirror, if available
|
||||
if(DEFINED ENV{ROCSPARSE_TEST_MIRROR} AND NOT $ENV{ROCSPARSE_TEST_MIRROR} STREQUAL "")
|
||||
message("-- Downloading and extracting test matrix ${m}.tar.gz from user specified test mirror: $ENV{ROCSPARSE_TEST_MIRROR}")
|
||||
@@ -149,6 +150,9 @@ foreach(i RANGE 0 ${len1})
|
||||
WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
|
||||
file(RENAME ${CMAKE_MATRICES_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
|
||||
+ else()
|
||||
+ file(RENAME ${ROCSPARSE_MTX_DIR}/${mat}/${mat}.mtx ${CMAKE_MATRICES_DIR}/${mat}.mtx)
|
||||
+ endif()
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/mtx2csr.exe ${mat}.mtx ${mat}.csr
|
||||
WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
# TODO: add 'COMMAND_ERROR_IS_FATAL ANY' once cmake supported version is 3.19
|
|
@ -0,0 +1,8 @@
|
|||
--- a/clients/common/rocsparse_gentest.py
|
||||
+++ b/clients/common/rocsparse_gentest.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python3
|
||||
+#!/usr/bin/env python3
|
||||
|
||||
# ########################################################################
|
||||
# Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
|
|
@ -0,0 +1,78 @@
|
|||
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
|
||||
From: Cory Bloor <Cordell.Bloor@amd.com>
|
||||
Date: Fri, 8 Jul 2022 20:53:32 -0600
|
||||
Subject: [PATCH] Improve guards for arch-specific instructions (#368)
|
||||
|
||||
When choosing between a specialized implementation that uses
|
||||
architecture-specific functionality and a generic fallback, it is
|
||||
usually preferable to make the fallback the default. This will give the
|
||||
software the best possible chance of functioning without modification
|
||||
on future hardware.
|
||||
|
||||
Of course, the library will still need code updates to function
|
||||
optimally on hardware released after the software was written.
|
||||
|
||||
rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
|
||||
to force the use of the fallback implementation. Or with the value 1 to
|
||||
force the use of the specialized __hip_move_dpp implementation.
|
||||
|
||||
This change fixes the compilation error:
|
||||
|
||||
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
|
||||
|
||||
when building for unsupported Navi 1x and Navi 2x GPUs as was
|
||||
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
|
||||
---
|
||||
library/src/include/common.h | 18 +++++++++++++++---
|
||||
1 file changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/library/src/include/common.h b/library/src/include/common.h
|
||||
index 6a4654af..975c5f7d 100644
|
||||
--- a/library/src/include/common.h
|
||||
+++ b/library/src/include/common.h
|
||||
@@ -34,6 +34,18 @@
|
||||
|
||||
// clang-format off
|
||||
|
||||
+#ifndef ROCSPARSE_USE_MOVE_DPP
|
||||
+#if defined(__gfx803__) || \
|
||||
+ defined(__gfx900__) || \
|
||||
+ defined(__gfx906__) || \
|
||||
+ defined(__gfx908__) || \
|
||||
+ defined(__gfx90a__)
|
||||
+#define ROCSPARSE_USE_MOVE_DPP 1
|
||||
+#else
|
||||
+#define ROCSPARSE_USE_MOVE_DPP 0
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
// BSR indexing macros
|
||||
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
|
||||
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
|
||||
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
|
||||
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
|
||||
}
|
||||
|
||||
-#ifndef __gfx1030__
|
||||
+#if ROCSPARSE_USE_MOVE_DPP
|
||||
// DPP-based wavefront reduction maximum
|
||||
template <unsigned int WFSIZE>
|
||||
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
||||
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
||||
sum = temp_sum.val;
|
||||
return sum;
|
||||
}
|
||||
-#else
|
||||
+#else /* ROCSPARSE_USE_MOVE_DPP */
|
||||
template <unsigned int WFSIZE>
|
||||
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
||||
{
|
||||
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
||||
|
||||
return sum;
|
||||
}
|
||||
-#endif
|
||||
+#endif /* ROCSPARSE_USE_MOVE_DPP */
|
||||
|
||||
// DPP-based complex float wavefront reduction sum
|
||||
template <unsigned int WFSIZE>
|
|
@ -0,0 +1,78 @@
|
|||
From f9446b8f4c3cb3a3c6d38734f9980712a82b9db9 Mon Sep 17 00:00:00 2001
|
||||
From: Cory Bloor <Cordell.Bloor@amd.com>
|
||||
Date: Fri, 8 Jul 2022 20:53:32 -0600
|
||||
Subject: [PATCH] Improve guards for arch-specific instructions (#368)
|
||||
|
||||
When choosing between a specialized implementation that uses
|
||||
architecture-specific functionality and a generic fallback, it is
|
||||
usually preferable to make the fallback the default. This will give the
|
||||
software the best possible chance of functioning without modification
|
||||
on future hardware.
|
||||
|
||||
Of course, the library will still need code updates to function
|
||||
optimally on hardware released after the software was written.
|
||||
|
||||
rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
|
||||
to force the use of the fallback implementation. Or with the value 1 to
|
||||
force the use of the specialized __hip_move_dpp implementation.
|
||||
|
||||
This change fixes the compilation error:
|
||||
|
||||
Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+
|
||||
|
||||
when building for unsupported Navi 1x and Navi 2x GPUs as was
|
||||
reported in https://github.com/ROCmSoftwarePlatform/rocSPARSE/issues/250
|
||||
---
|
||||
library/src/include/common.h | 18 +++++++++++++++---
|
||||
1 file changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/library/src/include/common.h b/library/src/include/common.h
|
||||
index 6a4654af..975c5f7d 100644
|
||||
--- a/library/src/include/common.h
|
||||
+++ b/library/src/include/common.h
|
||||
@@ -34,6 +34,18 @@
|
||||
|
||||
// clang-format off
|
||||
|
||||
+#ifndef ROCSPARSE_USE_MOVE_DPP
|
||||
+#if defined(__gfx803__) || \
|
||||
+ defined(__gfx900__) || \
|
||||
+ defined(__gfx906__) || \
|
||||
+ defined(__gfx908__) || \
|
||||
+ defined(__gfx90a__)
|
||||
+#define ROCSPARSE_USE_MOVE_DPP 1
|
||||
+#else
|
||||
+#define ROCSPARSE_USE_MOVE_DPP 0
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
// BSR indexing macros
|
||||
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
|
||||
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
|
||||
@@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
|
||||
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
|
||||
}
|
||||
|
||||
-#if (!defined(__gfx1030__)) && (!defined(__gfx1011__))
|
||||
+#if ROCSPARSE_USE_MOVE_DPP
|
||||
// DPP-based wavefront reduction maximum
|
||||
template <unsigned int WFSIZE>
|
||||
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
||||
@@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
||||
sum = temp_sum.val;
|
||||
return sum;
|
||||
}
|
||||
-#else
|
||||
+#else /* ROCSPARSE_USE_MOVE_DPP */
|
||||
template <unsigned int WFSIZE>
|
||||
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
|
||||
{
|
||||
@@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
|
||||
|
||||
return sum;
|
||||
}
|
||||
-#endif
|
||||
+#endif /* ROCSPARSE_USE_MOVE_DPP */
|
||||
|
||||
// DPP-based complex float wavefront reduction sum
|
||||
template <unsigned int WFSIZE>
|
|
@ -3,7 +3,6 @@
|
|||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from spack.package import *
|
||||
|
@ -33,6 +32,7 @@ class Rocsparse(CMakePackage):
|
|||
values=("Release", "Debug", "RelWithDebInfo"),
|
||||
description="CMake build type",
|
||||
)
|
||||
variant("test", default=False, description="Build rocsparse-test client")
|
||||
|
||||
version("5.2.0", sha256="7ed929af16d2502135024a6463997d9a95f03899b8a33aa95db7029575c89572")
|
||||
version("5.1.3", sha256="ef9641045b36c9aacc87e4fe7717b41b1e29d97e21432678dce7aca633a8edc2")
|
||||
|
@ -114,13 +114,173 @@ class Rocsparse(CMakePackage):
|
|||
"5.2.0",
|
||||
]:
|
||||
depends_on("hip@" + ver, when="@" + ver)
|
||||
for tgt in itertools.chain(["auto"], amdgpu_targets):
|
||||
depends_on(
|
||||
"rocprim@{0} amdgpu_target={1}".format(ver, tgt),
|
||||
when="@{0} amdgpu_target={1}".format(ver, tgt),
|
||||
)
|
||||
depends_on("rocprim@" + ver, when="@" + ver)
|
||||
depends_on("rocm-cmake@%s:" % ver, type="build", when="@" + ver)
|
||||
|
||||
# Add option so Spack can manage downloaded test matricies as resources.
|
||||
patch("0001-set-mtx-directory.patch", when="@4.5.0: +test")
|
||||
# Enable use of Spack-provided Python.
|
||||
patch("0002-fix-gentest-shebang.patch", when="@4.5.0: +test")
|
||||
# Fix build for most Radeon 5000 and Radeon 6000 series GPUs.
|
||||
patch("0003-fix-navi-1x-rocm-4.5.patch", when="@4.5.0:5.1")
|
||||
patch("0003-fix-navi-1x-rocm-5.2.patch", when="@5.2.0:")
|
||||
|
||||
depends_on("googletest@1.11.0:", when="@5.1.0: +test")
|
||||
depends_on("googletest@1.10.0:", when="+test")
|
||||
depends_on("python@3:", type="build", when="+test")
|
||||
depends_on("py-pyyaml", type="build", when="+test")
|
||||
|
||||
with when("+test"):
|
||||
resource(
|
||||
name="amazon0312",
|
||||
url="https://sparse.tamu.edu/MM/SNAP/amazon0312.tar.gz",
|
||||
sha256="75ffd36b33675856f370f508d53e6197caa972ac52929991db7dc4198bd64910",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="Chebyshev4",
|
||||
url="https://sparse.tamu.edu/MM/Muite/Chebyshev4.tar.gz",
|
||||
sha256="82553d73281587ea70e5faa427910e979524412c89e59ada7fa47a97142ae8a6",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="sme3Dc",
|
||||
url="https://sparse.tamu.edu/MM/FEMLAB/sme3Dc.tar.gz",
|
||||
sha256="82f03904849cceea0af1b9975942717527ecc5e87a98cfddea78ffbe7e7c076d",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="webbase-1M",
|
||||
url="https://sparse.tamu.edu/MM/Williams/webbase-1M.tar.gz",
|
||||
sha256="17a0391cdd966350b2b41f32aaf8e6684f3c55f25eb68c4be088f44f728a3ed4",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="rma10",
|
||||
url="https://sparse.tamu.edu/MM/Bova/rma10.tar.gz",
|
||||
sha256="50db8d278d371531b3dd0638444d47a77f3a3e189663993a857861dbc34c5e3f",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="bibd_22_8",
|
||||
url="https://sparse.tamu.edu/MM/JGD_BIBD/bibd_22_8.tar.gz",
|
||||
sha256="534b5210662d1b5b14a3938671501189685d12abf9f2a206778508345181014c",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="mac_econ_fwd500",
|
||||
url="https://sparse.tamu.edu/MM/Williams/mac_econ_fwd500.tar.gz",
|
||||
sha256="0dec2952b2908e3d59e4179289245db7f2c84f9e5e6543e818491deed5978f82",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="mc2depi",
|
||||
url="https://sparse.tamu.edu/MM/Williams/mc2depi.tar.gz",
|
||||
sha256="c02fef86efdd4f4322487e7472697a3d30b084ede1021e6d6889b347d3f6b268",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="scircuit",
|
||||
url="https://sparse.tamu.edu/MM/Hamm/scircuit.tar.gz",
|
||||
sha256="227d4c98e51c8af49c07f89929c62f2523e115e81b672e7f306185ea92c2996f",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="ASIC_320k",
|
||||
url="https://sparse.tamu.edu/MM/Sandia/ASIC_320k.tar.gz",
|
||||
sha256="d0d4ac477f641c8372d7347bc262ffcbde017f50fb17bb1a1539c98dd3440145",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="bmwcra_1",
|
||||
url="https://sparse.tamu.edu/MM/GHS_psdef/bmwcra_1.tar.gz",
|
||||
sha256="31467b319f3d4e8a8fc3a320344650bee14b285755b13ee29264b7a488b3d222",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos1",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos1.tar.gz",
|
||||
sha256="7e64dc2408890e85a60dbd2ad048963c74625cc3037dbdff9647d30844a52674",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos2",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos2.tar.gz",
|
||||
sha256="7439318b969e8cad0e96f154937a35256374bb8f0e16ed7ecc3a5219f8dc903b",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos3",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos3.tar.gz",
|
||||
sha256="7dd62179bbcaeb693c774712a8d70b97316364983f1cbf06cecb3900da8954a5",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos4",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos4.tar.gz",
|
||||
sha256="ec2323a5195db153fd6ae32ff537b22eb47f08e73949754b71f8d4104358f10f",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos5",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos5.tar.gz",
|
||||
sha256="dd67e906b0392cfbbe5a01a1f1a569c50875cbf88249a31721fb87519666a342",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos6",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos6.tar.gz",
|
||||
sha256="a0301c38ed91b849571303db581205cfae113565a7938eaa1a7466320f0d03c4",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="nos7",
|
||||
url="https://sparse.tamu.edu/MM/HB/nos7.tar.gz",
|
||||
sha256="c5d8d99bf4b54ee45e2f45d78530e3787f2e9670c000a68ad986a3b923e9e5ae",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="shipsec1",
|
||||
url="https://sparse.tamu.edu/MM/DNVS/shipsec1.tar.gz",
|
||||
sha256="d021889affed5429f85b606900f76870d0b1b1aefd92529cc6f43bf9d7ef0eb1",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="mplate",
|
||||
url="https://sparse.tamu.edu/MM/Cote/mplate.tar.gz",
|
||||
sha256="647b848343e423a24e05d3a3d462fa6b77958e362aadf70e9bb51bd420730df2",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="qc2534",
|
||||
url="https://sparse.tamu.edu/MM/Bai/qc2534.tar.gz",
|
||||
sha256="591c54ceee70222909353d2a400dd9819e3432143b2c25b6c4ffa262b8e397c8",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="Chevron2",
|
||||
url="https://sparse.tamu.edu/MM/Chevron/Chevron2.tar.gz",
|
||||
sha256="9334b61c25958f5221fd114e9698c11ac0ec57a0432150731d3fe80033da3026",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="Chevron3",
|
||||
url="https://sparse.tamu.edu/MM/Chevron/Chevron3.tar.gz",
|
||||
sha256="5679292ba86defedb0a6afc25274948521ace7ca90fc765265be11ca6eaaaee4",
|
||||
destination="mtx",
|
||||
)
|
||||
resource(
|
||||
name="Chevron4",
|
||||
url="https://sparse.tamu.edu/MM/Chevron/Chevron4.tar.gz",
|
||||
sha256="2ac9dc0d8d38cbf4a62089c74e53aea87edbb3f0b553b77b27c70df70e1d17d5",
|
||||
destination="mtx",
|
||||
)
|
||||
|
||||
def check(self):
|
||||
if self.spec.satisfies("+test"):
|
||||
exe = join_path(self.build_directory, "clients", "staging", "rocsparse-test")
|
||||
self.run_test(exe, options=["--gtest_filter=*quick*:*pre_checkin*-*known_bug*"])
|
||||
|
||||
def setup_build_environment(self, env):
|
||||
env.set("CXX", self.spec["hip"].hipcc)
|
||||
|
||||
|
@ -138,8 +298,9 @@ def determine_version(cls, lib):
|
|||
def cmake_args(self):
|
||||
args = [
|
||||
self.define("BUILD_CLIENTS_SAMPLES", "OFF"),
|
||||
self.define("BUILD_CLIENTS_TESTS", "OFF"),
|
||||
self.define_from_variant("BUILD_CLIENTS_TESTS", "test"),
|
||||
self.define("BUILD_CLIENTS_BENCHMARKS", "OFF"),
|
||||
self.define("ROCSPARSE_MTX_DIR", join_path(self.stage.source_path, "mtx")),
|
||||
]
|
||||
|
||||
if "auto" not in self.spec.variants["amdgpu_target"]:
|
||||
|
|
Loading…
Reference in a new issue