Compare commits
6 commits
Author | SHA1 | Date | |
---|---|---|---|
3b212bb940 | |||
bdff484f1e | |||
fdb313b0bf | |||
b5244d2817 | |||
515685ce18 | |||
15cc9628d6 |
13 changed files with 201 additions and 18 deletions
|
@ -140,6 +140,44 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
|||
endif()
|
||||
endif()
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Setup GPU runtime (works for HIP) #
|
||||
#----------------------------------------------------------#
|
||||
if (NOT CMAKE_GPU_RUNTIME)
|
||||
set(GPU_RUNTIME "ROCM" CACHE STRING "Switches between ROCM and CUDA")
|
||||
else (NOT CMAKE_GPU_RUNTIME)
|
||||
set(GPU_RUNTIME "${CMAKE_GPU_RUNTIME}" CACHE STRING "Switches between ROCM and CUDA")
|
||||
endif (NOT CMAKE_GPU_RUNTIME)
|
||||
# Really should only be ROCM or CUDA, but allowing HIP because it is the currently built-in option
|
||||
set(GPU_RUNTIMES "ROCM" "CUDA" "HIP")
|
||||
if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
|
||||
set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP, ROCM, or CUDA.")
|
||||
message(FATAL_ERROR ${ERROR_MESSAGE})
|
||||
endif()
|
||||
# GPU_RUNTIME for AMD GPUs should really be ROCM, if selecting AMD GPUs
|
||||
# so manually resetting to HIP if ROCM is selected
|
||||
if (${GPU_RUNTIME} MATCHES "ROCM")
|
||||
set(GPU_RUNTIME "HIP")
|
||||
endif (${GPU_RUNTIME} MATCHES "ROCM")
|
||||
set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
|
||||
|
||||
enable_language(${GPU_RUNTIME})
|
||||
set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
|
||||
set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
|
||||
|
||||
if (DEFINED ENV{HIP_PATH})
|
||||
set(HIP_PATH $ENV{HIP_PATH})
|
||||
else (DEFINED ENV{HIP_PATH})
|
||||
execute_process(COMMAND hipconfig --path OUTPUT_VARIABLE HIP_PATH ERROR_QUIET)
|
||||
endif (DEFINED ENV{HIP_PATH})
|
||||
|
||||
set(ROCMCC_FLAGS "${ROCMCC_FLAGS} -munsafe-fp-atomics")
|
||||
if (${GPU_RUNTIME} MATCHES "HIP")
|
||||
set(HIPCC_FLAGS "${ROCMCC_FLAGS}")
|
||||
else (${GPU_RUNTIME} MATCHES "HIP")
|
||||
set(HIPCC_FLAGS "${CUDACC_FLAGS} -I/${HIP_PATH}/include")
|
||||
endif (${GPU_RUNTIME} MATCHES "HIP")
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Add all necessary compiler warnings for debugging. #
|
||||
#----------------------------------------------------------#
|
||||
|
|
10
include/library/private/saxpy.h
Normal file
10
include/library/private/saxpy.h
Normal file
|
@ -0,0 +1,10 @@
|
|||
|
||||
//#ifdef __cpluplus
|
||||
extern "C" {
|
||||
//#endif
|
||||
|
||||
void bwc_saxpy();
|
||||
|
||||
//#ifdef __cpluplus
|
||||
}
|
||||
//#endif
|
|
@ -91,7 +91,7 @@ destination = current_path.joinpath('include/library/public')
|
|||
if os.path.isdir(destination) == False:
|
||||
os.mkdir(destination)
|
||||
|
||||
include_files = ['macros.h', 'constants.h', 'dwt.h', 'tagtree.h', 'mq_types.h', 'mq.h',
|
||||
include_files = ['macros.h', 'constants.h', 'saxpy.h', 'dwt.h', 'tagtree.h', 'mq_types.h', 'mq.h',
|
||||
'bitstream.h', 'codestream.h', 'tier1.h', 'tier2.h', 'types.h', 'libbwc.h']
|
||||
exclude_files = ["prim_types_double.h", "prim_types_single.h"]
|
||||
all_files = [f for f in os.listdir(source) if os.path.isfile(os.path.join(source, f))]
|
||||
|
|
|
@ -80,6 +80,7 @@ add_library(bwclib ${BWC_LINK} bitstream.c
|
|||
codestream.c
|
||||
dwt.c
|
||||
mq.c
|
||||
saxpy.hip
|
||||
tier1.c
|
||||
tier2.c
|
||||
tagtree.c)
|
||||
|
|
|
@ -637,7 +637,7 @@ parse_main_header(bwc_data *const data,bitstream *const stream)
|
|||
info->precision = codec_prec = (uint8)get_symbol(stream, 1);
|
||||
|
||||
buffer_char = (char*)get_chunck(stream, 10);
|
||||
strncpy(info->f_ext, buffer_char, sizeof(buffer_char)/sizeof(*buffer_char));
|
||||
strncpy(info->f_ext, buffer_char, 10);
|
||||
free(buffer_char);
|
||||
|
||||
for(p = 0; p < nPar; ++p)
|
||||
|
|
|
@ -484,6 +484,8 @@ whole_point_symmetric_extend(bwc_sample *const working_buffer, uint64 res0, uint
|
|||
! 25.06.2018 Patrick Vogler B87D120 V 0.1.0 function created !
|
||||
! !
|
||||
\*----------------------------------------------------------------------------------------------------------*/
|
||||
// INCLUDE HIP SAXPY
|
||||
//#include "saxpy.h"
|
||||
static void
|
||||
forward_9x7_CDF_wavelet_transform(bwc_sample *const working_buffer, uint64 res0, uint64 res1)
|
||||
{
|
||||
|
@ -535,6 +537,9 @@ forward_9x7_CDF_wavelet_transform(bwc_sample *const working_buffer, uint64 res0,
|
|||
working_buffer[i].f += DELTA * (working_buffer[i - 1].f + working_buffer[i + 1].f);
|
||||
working_buffer[i].f = KAPPA_L * working_buffer[i].f;
|
||||
}
|
||||
|
||||
// INCLUDE HIP SAXPY
|
||||
//bwc_saxpy();
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------------------------------------*\
|
||||
|
|
|
@ -3563,7 +3563,7 @@ bwc_set_tiles(bwc_field *const field, uint64 tilesX, uint64 tilesY, uint64 tiles
|
|||
! Check if the number of tiles exceeds its maximum allowa- !
|
||||
! ble value. !
|
||||
\*--------------------------------------------------------*/
|
||||
if(((double)num_tiles_X * num_tiles_Y * num_tiles_Z * num_tiles_TS) > 0xFFFFFFFFFFFFFFFF)
|
||||
if(((double)num_tiles_X * num_tiles_Y * num_tiles_Z * num_tiles_TS) > (double)0xFFFFFFFFFFFFFFFF)
|
||||
{
|
||||
fprintf(stderr,"o==========================================================o\n"\
|
||||
"| WARNING: Invalid Tile Dimensions |\n"\
|
||||
|
@ -3593,7 +3593,7 @@ bwc_set_tiles(bwc_field *const field, uint64 tilesX, uint64 tilesY, uint64 tiles
|
|||
! Check if the number of tiles exceeds its maximum allowa- !
|
||||
! ble value. !
|
||||
\*--------------------------------------------------------*/
|
||||
if(((double)tilesX * tilesY * tilesZ * tilesTS) > 0xFFFFFFFFFFFFFFFF)
|
||||
if(((double)tilesX * tilesY * tilesZ * tilesTS) > (double)0xFFFFFFFFFFFFFFFF)
|
||||
{
|
||||
fprintf(stderr,"o==========================================================o\n"\
|
||||
"| WARNING: Invalid Number Of Tiles |\n"\
|
||||
|
|
53
src/library/saxpy.hip
Normal file
53
src/library/saxpy.hip
Normal file
|
@ -0,0 +1,53 @@
|
|||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "saxpy.h"
|
||||
|
||||
__constant__ float a = 1.0f;
|
||||
|
||||
void init (int n, float *x, float *y)
|
||||
{
|
||||
for (std::size_t i = 0; i < n; ++i)
|
||||
{
|
||||
x[i] = 1.0;
|
||||
y[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void saxpy (int n, float const* x, int incx, float* y, int incy)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < n)
|
||||
y[i] += a*x[i];
|
||||
}
|
||||
|
||||
void bwc_saxpy()
|
||||
{
|
||||
int n = 256;
|
||||
std::size_t size = sizeof(float)*n;
|
||||
std::cout << "2 Doing saxpy\n";
|
||||
|
||||
float *h_x = new float [n];
|
||||
float *h_y = new float [n];
|
||||
init(n, h_x, h_y);
|
||||
|
||||
float* d_x;
|
||||
float *d_y;
|
||||
hipMalloc(&d_x, size);
|
||||
hipMalloc(&d_y, size);
|
||||
hipMemcpy(d_x, h_x, size, hipMemcpyHostToDevice);
|
||||
hipMemcpy(d_y, h_y, size, hipMemcpyHostToDevice);
|
||||
|
||||
int num_groups = 2;
|
||||
int group_size = 128;
|
||||
saxpy<<<num_groups, group_size>>>(n, d_x, 1, d_y, 1);
|
||||
hipDeviceSynchronize();
|
||||
hipMemcpy(h_y, d_y, size, hipMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Done saxpy\n";
|
||||
}
|
||||
|
|
@ -2191,7 +2191,7 @@ compute_convex_hull(bwc_encoded_cblk *const encoded_codeblock, double *const mse
|
|||
|
||||
h = hull;
|
||||
hlast = 0;
|
||||
lambda [0] = 0xFFFFFFFFFFFFFFFF;
|
||||
lambda [0] = (double)0xFFFFFFFFFFFFFFFF;
|
||||
|
||||
for(i = 0; i < encoded_codeblock->Z; ++i)
|
||||
{
|
||||
|
@ -2233,7 +2233,7 @@ compute_convex_hull(bwc_encoded_cblk *const encoded_codeblock, double *const mse
|
|||
}
|
||||
else
|
||||
{
|
||||
lambda[hlast] = 0xFFFFFFFFFFFFFFFF;
|
||||
lambda[hlast] = (double)0xFFFFFFFFFFFFFFFF;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -48,6 +48,10 @@
|
|||
add_executable(bwccmd bwccmdl.c
|
||||
../interfaces/reader/eas3.c)
|
||||
|
||||
add_executable(bwccmdhip bwccmdl.hip
|
||||
../interfaces/reader/eas3.c)
|
||||
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Set the target compile definition for the requested file #
|
||||
# format support. #
|
||||
|
@ -56,16 +60,18 @@ MESSAGE(STATUS "EAS3 file format support: ${BUILD_EAS3}")
|
|||
|
||||
if(${BUILD_EAS3})
|
||||
target_compile_definitions(bwccmd PRIVATE -DBWC_EAS3)
|
||||
target_compile_definitions(bwccmdhip PRIVATE -DBWC_EAS3)
|
||||
endif()
|
||||
|
||||
if(${BUILD_NETCDF})
|
||||
target_compile_definitions(bwccmd PRIVATE -DBWC_NETCDF)
|
||||
target_compile_definitions(bwccmdhip PRIVATE -DBWC_NETCDF)
|
||||
endif()
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Define the output name for the utility binaries. #
|
||||
#----------------------------------------------------------#
|
||||
set_property(TARGET bwccmd PROPERTY OUTPUT_NAME bwc)
|
||||
set_property(TARGET bwccmdhip PROPERTY OUTPUT_NAME bwchip)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Setup up the include directory for the bwc utilities. #
|
||||
|
@ -74,12 +80,19 @@ target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/tools)
|
|||
target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/library/public)
|
||||
target_include_directories(bwccmd PRIVATE ${CMAKE_SOURCE_DIR}/include/interfaces/reader)
|
||||
|
||||
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/tools)
|
||||
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/library/public)
|
||||
target_include_directories(bwccmdhip PRIVATE ${CMAKE_SOURCE_DIR}/include/interfaces/reader)
|
||||
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Setup the install directories. #
|
||||
#----------------------------------------------------------#
|
||||
install(TARGETS bwccmd DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
install(TARGETS bwccmdhip DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# Link the bwc utility to the bwc library. #
|
||||
#----------------------------------------------------------#
|
||||
target_link_libraries(bwccmd PRIVATE bwclib m)
|
||||
target_link_libraries(bwccmdhip PRIVATE bwclib m)
|
||||
|
|
|
@ -802,6 +802,9 @@ parse_arguments(int argc,
|
|||
}
|
||||
args->root = args;
|
||||
|
||||
// INCLUDE HIP SAXPY
|
||||
bwc_saxpy();
|
||||
|
||||
/*--------------------------------------------------------*\
|
||||
! Walk through all the command-line arguments passed to !
|
||||
! main. !
|
||||
|
@ -2951,11 +2954,11 @@ main(int argc,
|
|||
printf("----------------- Compression Parameters -----------------\n\n");
|
||||
if((control->CSsgc &0x200) != 0)
|
||||
{
|
||||
printf(" Number of Tiles: %27d\n", control->nTiles);
|
||||
printf(" Number of Tiles: %27lu\n", control->nTiles);
|
||||
printf(" - Samples in 1.D: %27ld\n", control->tileSizeX);
|
||||
printf(" - Samples in 2.D: %27ld\n", control->tileSizeY);
|
||||
printf(" - Samples in 3.D: %27ld\n", control->tileSizeZ);
|
||||
printf(" - Timesteps: %27d\n", control->tileSizeTS);
|
||||
printf(" - Timesteps: %27lu\n", control->tileSizeTS);
|
||||
printf(" ..........................................................\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
|
60
src/tools/bwccmdl.hip
Normal file
60
src/tools/bwccmdl.hip
Normal file
|
@ -0,0 +1,60 @@
|
|||
#include <bwc.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "bwccmdl.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <iostream>
|
||||
|
||||
__constant__ float a = 1.0f;
|
||||
|
||||
void init (int n, float *x, float *y)
|
||||
{
|
||||
for (std::size_t i = 0; i < n; ++i)
|
||||
{
|
||||
x[i] = 1.0;
|
||||
y[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void saxpy (int n, float const* x, int incx, float* y, int incy)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < n)
|
||||
y[i] += a*x[i];
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc,
|
||||
char *argv[])
|
||||
{
|
||||
// INCLUDE HIP SAXPY
|
||||
bwc_saxpy();
|
||||
|
||||
int n = 256;
|
||||
std::size_t size = sizeof(float)*n;
|
||||
std::cout << "Doing saxpy\n";
|
||||
|
||||
float *h_x = new float [n];
|
||||
float *h_y = new float [n];
|
||||
init(n, h_x, h_y);
|
||||
|
||||
float* d_x;
|
||||
float *d_y;
|
||||
hipMalloc(&d_x, size);
|
||||
hipMalloc(&d_y, size);
|
||||
hipMemcpy(d_x, h_x, size, hipMemcpyHostToDevice);
|
||||
hipMemcpy(d_y, h_y, size, hipMemcpyHostToDevice);
|
||||
|
||||
int num_groups = 2;
|
||||
int group_size = 128;
|
||||
saxpy<<<num_groups, group_size>>>(n, d_x, 1, d_y, 1);
|
||||
hipDeviceSynchronize();
|
||||
hipMemcpy(h_y, d_y, size, hipMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Done saxpy\n";
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue