Unverified Commit d6624fe4 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1863 from Vsevak/gpu_hip

HIP back end to GPU package to support AMD GPUs via the ROCm toolkit
parents 4acdd161 d2c4ade6
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -733,6 +733,14 @@ if(PKG_GPU)
    message(STATUS "GPU architecture: ${GPU_ARCH}")
  elseif(GPU_API STREQUAL "OPENCL")
    message(STATUS "OpenCL tuning:    ${OCL_TUNE}")
  elseif(GPU_API STREQUAL "HIP")
    message(STATUS "HIP platform:     ${HIP_PLATFORM}")
    message(STATUS "HIP architecture: ${HIP_ARCH}")
    if(HIP_USE_DEVICE_SORT)
      message(STATUS "HIP GPU sorting: on")
    else()
      message(STATUS "HIP GPU sorting: off")
    endif()
  endif()
  message(STATUS "GPU precision:    ${GPU_PREC}")
endif()
+16 −0
Original line number Diff line number Diff line
# - Find CUB
# Find the CUB header library
#
#  CUB_INCLUDE_DIRS - where to find cub/cub.cuh
#  CUB_FOUND        - True if CUB found.
#

find_path(CUB_INCLUDE_DIR cub.cuh PATH_SUFFIXES cub)

include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set CUB_FOUND to TRUE
# if all listed variables are TRUE

find_package_handle_standard_args(CUB DEFAULT_MSG CUB_INCLUDE_DIR)

mark_as_advanced(CUB_INCLUDE_DIR)
+3 −0
Original line number Diff line number Diff line
# utility script to call GenerateBinaryHeader function
include(${SOURCE_DIR}/Modules/LAMMPSUtils.cmake)
GenerateBinaryHeader(${VARNAME} ${HEADER_FILE} ${SOURCE_FILES})
+16 −0
Original line number Diff line number Diff line
@@ -69,3 +69,19 @@ macro(pkg_depends PKG1 PKG2)
    message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}")
  endif()
endmacro()

# CMake-only replacement for bin2c and xxd
function(GenerateBinaryHeader varname outfile files)
    message("Creating ${outfile}...")
    file(WRITE ${outfile} "// CMake generated file\n")
    math(EXPR ARG_END   "${ARGC}-1")

    foreach(IDX RANGE 2 ${ARG_END})
        list(GET ARGV ${IDX} filename)
        file(READ ${filename} content HEX)
        string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," content "${content}")
        string(REGEX REPLACE ",$" "" content "${content}")
        file(APPEND ${outfile} "const unsigned char ${varname}[] = { ${content} };\n")
        file(APPEND ${outfile} "const unsigned int ${varname}_size = sizeof(${varname});\n")
    endforeach()
endfunction(GenerateBinaryHeader)
+331 −175
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@
                ${GPU_SOURCES_DIR}/fix_gpu.cpp)

set(GPU_API "opencl" CACHE STRING "API used by GPU package")
  set(GPU_API_VALUES opencl cuda)
set(GPU_API_VALUES opencl cuda hip)
set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES})
validate_option(GPU_API GPU_API_VALUES)
string(TOUPPER ${GPU_API} GPU_API)
@@ -187,6 +187,162 @@
  add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
  target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL)
elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
      else()
          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
      endif()
  endif()
  set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
  find_package(HIP REQUIRED)
  option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)

  if(NOT DEFINED HIP_PLATFORM)
      if(NOT DEFINED ENV{HIP_PLATFORM})
          set(HIP_PLATFORM "hcc" CACHE PATH "HIP Platform to be used during compilation")
      else()
          set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE PATH "HIP Platform used during compilation")
      endif()
  endif()

  set(ENV{HIP_PLATFORM} ${HIP_PLATFORM})

  if(HIP_PLATFORM STREQUAL "hcc")
    set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture")
  elseif(HIP_PLATFORM STREQUAL "nvcc")
    find_package(CUDA REQUIRED)
    set(HIP_ARCH "sm_30" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)")

    # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
    # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
    set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH} ")
    # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
    if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_20,code=[sm_20,compute_20] ")
    endif()
    # Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
    if(CUDA_VERSION VERSION_GREATER "4.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
    endif()
    # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
    if(CUDA_VERSION VERSION_GREATER "5.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] ")
    endif()
    # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
    if(CUDA_VERSION VERSION_GREATER "7.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] ")
    endif()
    # Volta (GPU Arch 7.0) is supported by CUDA 9 and later
    if(CUDA_VERSION VERSION_GREATER "8.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_70,code=[sm_70,compute_70] ")
    endif()
    # Turing (GPU Arch 7.5) is supported by CUDA 10 and later
    if(CUDA_VERSION VERSION_GREATER "9.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_75,code=[sm_75,compute_75] ")
    endif()
  endif()

  file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
  list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)

  set(GPU_LIB_CU_HIP "")
  foreach(CU_FILE ${GPU_LIB_CU})
    get_filename_component(CU_NAME ${CU_FILE} NAME_WE)
    string(REGEX REPLACE "^.*lal_" "" CU_NAME "${CU_NAME}")

    set(CU_CPP_FILE  "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cu.cpp")
    set(CUBIN_FILE   "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}.cubin")
    set(CUBIN_H_FILE "${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h")

    if(HIP_PLATFORM STREQUAL "hcc")
        configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)

        add_custom_command(OUTPUT ${CUBIN_FILE}
          VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --genco -t="${HIP_ARCH}" -f=\"-O3 -ffast-math -DUSE_HIP -D_${GPU_PREC_SETTING} -I${LAMMPS_LIB_SOURCE_DIR}/gpu\" -o ${CUBIN_FILE} ${CU_CPP_FILE}
          DEPENDS ${CU_CPP_FILE}
          COMMENT "Generating ${CU_NAME}.cubin")
    elseif(HIP_PLATFORM STREQUAL "nvcc")
        add_custom_command(OUTPUT ${CUBIN_FILE}
          VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE}
          DEPENDS ${CU_FILE}
          COMMENT "Generating ${CU_NAME}.cubin")
    endif()

    add_custom_command(OUTPUT ${CUBIN_H_FILE}
      COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILES=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake
      DEPENDS ${CUBIN_FILE}
      COMMENT "Generating ${CU_NAME}_cubin.h")

    list(APPEND GPU_LIB_SOURCES ${CUBIN_H_FILE})
  endforeach()

  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")

  hip_add_library(gpu STATIC ${GPU_LIB_SOURCES})
  target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
  target_compile_definitions(gpu PRIVATE -DUSE_HIP)

  if(HIP_USE_DEVICE_SORT)
    # add hipCUB
    target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/../include)
    target_compile_definitions(gpu PRIVATE -DUSE_HIP_DEVICE_SORT)

    if(HIP_PLATFORM STREQUAL "nvcc")
      find_package(CUB)

      if(CUB_FOUND)
        set(DOWNLOAD_CUB_DEFAULT OFF)
      else()
        set(DOWNLOAD_CUB_DEFAULT ON)
      endif()

      option(DOWNLOAD_CUB "Download and compile the CUB library instead of using an already installed one" ${DOWNLOAD_CUB_DEFAULT})

      if(DOWNLOAD_CUB)
        message(STATUS "CUB download requested")
        include(ExternalProject)

        ExternalProject_Add(CUB
          GIT_REPOSITORY https://github.com/NVlabs/cub
          TIMEOUT 5
          PREFIX "${CMAKE_CURRENT_BINARY_DIR}"
          CONFIGURE_COMMAND ""
          BUILD_COMMAND ""
          INSTALL_COMMAND ""
          UPDATE_COMMAND ""
        )
        ExternalProject_get_property(CUB SOURCE_DIR)
        set(CUB_INCLUDE_DIR ${SOURCE_DIR})
      else()
        find_package(CUB)
        if(NOT CUB_FOUND)
          message(FATAL_ERROR "CUB library not found. Help CMake to find it by setting CUB_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it")
        endif()
      endif()

      target_include_directories(gpu PRIVATE ${CUB_INCLUDE_DIR})
    endif()
  endif()

  hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)

  if(HIP_PLATFORM STREQUAL "nvcc")
    target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
    target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/include)
    target_include_directories(gpu PRIVATE ${CUDA_INCLUDE_DIRS})
    target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})

    target_compile_definitions(hip_get_devices PRIVATE -D__HIP_PLATFORM_NVCC__)
    target_include_directories(hip_get_devices PRIVATE ${HIP_ROOT_DIR}/include)
    target_include_directories(hip_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
    target_link_libraries(hip_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
  endif()

  target_link_libraries(lammps PRIVATE gpu)
endif()

# GPU package
Loading