Compare commits

...

6 commits
main ... hipify

13 changed files with 201 additions and 18 deletions

View file

@ -140,6 +140,44 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
endif()
endif()
#----------------------------------------------------------#
# Setup GPU runtime (works for HIP) #
#----------------------------------------------------------#
if (NOT CMAKE_GPU_RUNTIME)
set(GPU_RUNTIME "ROCM" CACHE STRING "Switches between ROCM and CUDA")
else (NOT CMAKE_GPU_RUNTIME)
set(GPU_RUNTIME "${CMAKE_GPU_RUNTIME}" CACHE STRING "Switches between ROCM and CUDA")
endif (NOT CMAKE_GPU_RUNTIME)
# Really should only be ROCM or CUDA, but allowing HIP because it is the currently built-in option
set(GPU_RUNTIMES "ROCM" "CUDA" "HIP")
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP, ROCM, or CUDA.")
message(FATAL_ERROR ${ERROR_MESSAGE})
endif()
# GPU_RUNTIME for AMD GPUs should really be ROCM, if selecting AMD GPUs
# so manually resetting to HIP if ROCM is selected
if (${GPU_RUNTIME} MATCHES "ROCM")
set(GPU_RUNTIME "HIP")
endif (${GPU_RUNTIME} MATCHES "ROCM")
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
enable_language(${GPU_RUNTIME})
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
if (DEFINED ENV{HIP_PATH})
set(HIP_PATH $ENV{HIP_PATH})
else (DEFINED ENV{HIP_PATH})
execute_process(COMMAND hipconfig --path OUTPUT_VARIABLE HIP_PATH ERROR_QUIET)
endif (DEFINED ENV{HIP_PATH})
set(ROCMCC_FLAGS "${ROCMCC_FLAGS} -munsafe-fp-atomics")
if (${GPU_RUNTIME} MATCHES "HIP")
set(HIPCC_FLAGS "${ROCMCC_FLAGS}")
else (${GPU_RUNTIME} MATCHES "HIP")
set(HIPCC_FLAGS "${CUDACC_FLAGS} -I/${HIP_PATH}/include")
endif (${GPU_RUNTIME} MATCHES "HIP")
#----------------------------------------------------------#
# Add all necessary compiler warnings for debugging. #
#----------------------------------------------------------#

View file

@ -0,0 +1,10 @@
//#ifdef __cpluplus
extern "C" {
//#endif
void bwc_saxpy();
//#ifdef __cpluplus
}
//#endif

View file

@ -91,7 +91,7 @@ destination = current_path.joinpath('include/library/public')
if os.path.isdir(destination) == False:
os.mkdir(destination)
include_files = ['macros.h', 'constants.h', 'dwt.h', 'tagtree.h', 'mq_types.h', 'mq.h',
include_files = ['macros.h', 'constants.h', 'saxpy.h', 'dwt.h', 'tagtree.h', 'mq_types.h', 'mq.h',
'bitstream.h', 'codestream.h', 'tier1.h', 'tier2.h', 'types.h', 'libbwc.h']
exclude_files = ["prim_types_double.h", "prim_types_single.h"]
all_files = [f for f in os.listdir(source) if os.path.isfile(os.path.join(source, f))]

View file

@ -80,6 +80,7 @@ add_library(bwclib ${BWC_LINK} bitstream.c
codestream.c
dwt.c
mq.c
saxpy.hip
tier1.c
tier2.c
tagtree.c)

View file

@ -637,7 +637,7 @@ parse_main_header(bwc_data *const data,bitstream *const stream)
info->precision = codec_prec = (uint8)get_symbol(stream, 1);
buffer_char = (char*)get_chunck(stream, 10);
strncpy(info->f_ext, buffer_char, sizeof(buffer_char)/sizeof(*buffer_char));
strncpy(info->f_ext, buffer_char, 10);
free(buffer_char);
for(p = 0; p < nPar; ++p)

View file

@ -484,6 +484,8 @@ whole_point_symmetric_extend(bwc_sample *const working_buffer, uint64 res0, uint
! 25.06.2018 Patrick Vogler B87D120 V 0.1.0 function created !
! !
\*----------------------------------------------------------------------------------------------------------*/
// INCLUDE HIP SAXPY
//#include "saxpy.h"
static void
forward_9x7_CDF_wavelet_transform(bwc_sample *const working_buffer, uint64 res0, uint64 res1)
{
@ -535,6 +537,9 @@ forward_9x7_CDF_wavelet_transform(bwc_sample *const working_buffer, uint64 res0,
working_buffer[i].f += DELTA * (working_buffer[i - 1].f + working_buffer[i + 1].f);
working_buffer[i].f = KAPPA_L * working_buffer[i].f;
}
// INCLUDE HIP SAXPY
//bwc_saxpy();
}
/*----------------------------------------------------------------------------------------------------------*\

View file

@ -3563,7 +3563,7 @@ bwc_set_tiles(bwc_field *const field, uint64 tilesX, uint64 tilesY, uint64 tiles
! Check if the number of tiles exceeds its maximum allowa- !
! ble value. !
\*--------------------------------------------------------*/
if(((double)num_tiles_X * num_tiles_Y * num_tiles_Z * num_tiles_TS) > 0xFFFFFFFFFFFFFFFF)
if(((double)num_tiles_X * num_tiles_Y * num_tiles_Z * num_tiles_TS) > (double)0xFFFFFFFFFFFFFFFF)
{
fprintf(stderr,"o==========================================================o\n"\
"| WARNING: Invalid Tile Dimensions |\n"\
@ -3593,7 +3593,7 @@ bwc_set_tiles(bwc_field *const field, uint64 tilesX, uint64 tilesY, uint64 tiles
! Check if the number of tiles exceeds its maximum allowa- !
! ble value. !
\*--------------------------------------------------------*/
if(((double)tilesX * tilesY * tilesZ * tilesTS) > 0xFFFFFFFFFFFFFFFF)
if(((double)tilesX * tilesY * tilesZ * tilesTS) > (double)0xFFFFFFFFFFFFFFFF)
{
fprintf(stderr,"o==========================================================o\n"\
"| WARNING: Invalid Number Of Tiles |\n"\

53
src/library/saxpy.hip Normal file
View file

@ -0,0 +1,53 @@
#include <hip/hip_runtime.h>
#include <iostream>
#include "saxpy.h"
__constant__ float a = 1.0f;
void init (int n, float *x, float *y)
{
for (std::size_t i = 0; i < n; ++i)
{
x[i] = 1.0;
y[i] = 0.0;
}
}
__global__
void saxpy (int n, float const* x, int incx, float* y, int incy)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n)
y[i] += a*x[i];
}
void bwc_saxpy()
{
int n = 256;
std::size_t size = sizeof(float)*n;
std::cout << "2 Doing saxpy\n";
float *h_x = new float [n];
float *h_y = new float [n];
init(n, h_x, h_y);
float* d_x;
float *d_y;
hipMalloc(&d_x, size);
hipMalloc(&d_y, size);
hipMemcpy(d_x, h_x, size, hipMemcpyHostToDevice);
hipMemcpy(d_y, h_y, size, hipMemcpyHostToDevice);
int num_groups = 2;
int group_size = 128;
saxpy<<<num_groups, group_size>>>(n, d_x, 1, d_y, 1);
hipDeviceSynchronize();
hipMemcpy(h_y, d_y, size, hipMemcpyDeviceToHost);
std::cout << "Done saxpy\n";
}

View file

@ -2191,7 +2191,7 @@ compute_convex_hull(bwc_encoded_cblk *const encoded_codeblock, double *const mse
h = hull;
hlast = 0;
lambda [0] = 0xFFFFFFFFFFFFFFFF;
lambda [0] = (double)0xFFFFFFFFFFFFFFFF;
for(i = 0; i < encoded_codeblock->Z; ++i)
{
@ -2233,7 +2233,7 @@ compute_convex_hull(bwc_encoded_cblk *const encoded_codeblock, double *const mse
}
else
{
lambda[hlast] = 0xFFFFFFFFFFFFFFFF;
lambda[hlast] = (double)0xFFFFFFFFFFFFFFFF;
}
}
else

View file

@ -48,6 +48,10 @@
add_executable(bwccmd bwccmdl.c
../interfaces/reader/eas3.c)
add_executable(bwccmdhip bwccmdl.hip
../interfaces/reader/eas3.c)
#----------------------------------------------------------#
# Set the target compile definition for the requested file #
# format support. #
@ -56,16 +60,18 @@ MESSAGE(STATUS "EAS3 file format support: ${BUILD_EAS3}")
if(${BUILD_EAS3})
target_compile_definitions(bwccmd PRIVATE -DBWC_EAS3)
target_compile_definitions(bwccmdhip PRIVATE -DBWC_EAS3)
endif()
if(${BUILD_NETCDF})
target_compile_definitions(bwccmd PRIVATE -DBWC_NETCDF)
target_compile_definitions(bwccmdhip PRIVATE -DBWC_NETCDF)
endif()
#----------------------------------------------------------#
# Define the output name for the utility binaries. #
#----------------------------------------------------------#
set_property(TARGET bwccmd PROPERTY OUTPUT_NAME bwc)
set_property(TARGET bwccmdhip PROPERTY OUTPUT_NAME bwchip)
#----------------------------------------------------------#
# Setup up the include directory for the bwc utilities. #
@ -74,12 +80,19 @@ target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/tools)
target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/library/public)
target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/interfaces/reader)
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/tools)
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/library/public)
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/interfaces/reader)
#----------------------------------------------------------#
# Setup the install directories. #
#----------------------------------------------------------#
install(TARGETS bwccmd DESTINATION ${CMAKE_INSTALL_BINDIR})
install(TARGETS bwccmdhip DESTINATION ${CMAKE_INSTALL_BINDIR})
#----------------------------------------------------------#
# Link the bwc utility to the bwc library. #
#----------------------------------------------------------#
target_link_libraries(bwccmd PRIVATE bwclib m)
target_link_libraries(bwccmdhip PRIVATE bwclib m)

View file

@ -802,6 +802,9 @@ parse_arguments(int argc,
}
args->root = args;
// INCLUDE HIP SAXPY
bwc_saxpy();
/*--------------------------------------------------------*\
! Walk through all the command-line arguments passed to !
! main. !
@ -2951,11 +2954,11 @@ main(int argc,
printf("----------------- Compression Parameters -----------------\n\n");
if((control->CSsgc &0x200) != 0)
{
printf(" Number of Tiles: %27d\n", control->nTiles);
printf(" Number of Tiles: %27lu\n", control->nTiles);
printf(" - Samples in 1.D: %27ld\n", control->tileSizeX);
printf(" - Samples in 2.D: %27ld\n", control->tileSizeY);
printf(" - Samples in 3.D: %27ld\n", control->tileSizeZ);
printf(" - Timesteps: %27d\n", control->tileSizeTS);
printf(" - Timesteps: %27lu\n", control->tileSizeTS);
printf(" ..........................................................\n");
printf("\n");
}

60
src/tools/bwccmdl.hip Normal file
View file

@ -0,0 +1,60 @@
#include <bwc.h>
#include <stdlib.h>
#include "bwccmdl.h"
#include <hip/hip_runtime.h>
#include <iostream>
__constant__ float a = 1.0f;
void init (int n, float *x, float *y)
{
for (std::size_t i = 0; i < n; ++i)
{
x[i] = 1.0;
y[i] = 0.0;
}
}
__global__
void saxpy (int n, float const* x, int incx, float* y, int incy)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n)
y[i] += a*x[i];
}
int
main(int argc,
char *argv[])
{
// INCLUDE HIP SAXPY
bwc_saxpy();
int n = 256;
std::size_t size = sizeof(float)*n;
std::cout << "Doing saxpy\n";
float *h_x = new float [n];
float *h_y = new float [n];
init(n, h_x, h_y);
float* d_x;
float *d_y;
hipMalloc(&d_x, size);
hipMalloc(&d_y, size);
hipMemcpy(d_x, h_x, size, hipMemcpyHostToDevice);
hipMemcpy(d_y, h_y, size, hipMemcpyHostToDevice);
int num_groups = 2;
int group_size = 128;
saxpy<<<num_groups, group_size>>>(n, d_x, 1, d_y, 1);
hipDeviceSynchronize();
hipMemcpy(h_y, d_y, size, hipMemcpyDeviceToHost);
std::cout << "Done saxpy\n";
return 0;
}