Merge pull request #2234 from akohlmey/collected-small-fixes (4cbe0954) · Commits · 郑智淋 / lammps

cmake/Modules/Packages/GPU.cmake

+50 −28

Original line number	Diff line number	Diff line
		@@ -66,29 +66,40 @@ if(GPU_API STREQUAL "CUDA")
		# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
		set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
		# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
		if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
		if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
		endif()
		# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
		if(CUDA_VERSION VERSION_GREATER "4.9")
		# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
		endif()
		# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]")
		endif()
		# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
		if(CUDA_VERSION VERSION_GREATER "5.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
		endif()
		# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
		if(CUDA_VERSION VERSION_GREATER "7.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
		endif()
		# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
		if(CUDA_VERSION VERSION_GREATER "8.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
		endif()
		# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
		if(CUDA_VERSION VERSION_GREATER "9.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
		endif()
		# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
		if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
		endif()
		if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
		message(WARNING "Unsupported CUDA version. Use at your own risk.")
		endif()

		cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
		-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
		@@ -228,29 +239,40 @@ elseif(GPU_API STREQUAL "HIP")
		# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
		set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH}")
		# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
		if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
		if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20]")
		endif()
		# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
		if(CUDA_VERSION VERSION_GREATER "4.9")
		string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
		# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30]")
		endif()
		# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11.0
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
		endif()
		# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
		if(CUDA_VERSION VERSION_GREATER "5.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
		endif()
		# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
		if(CUDA_VERSION VERSION_GREATER "7.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
		endif()
		# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
		if(CUDA_VERSION VERSION_GREATER "8.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
		endif()
		# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
		if(CUDA_VERSION VERSION_GREATER "9.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
		endif()
		# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
		if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
		endif()
		if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
		message(WARNING "Unsupported CUDA version. Use at your own risk.")
		endif()
		endif()

		file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.].cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.].cu)

doc/src/Build_extras.rst

+11 −2

Original line number	Diff line number	Diff line
		@@ -105,10 +105,10 @@ CMake build
		# generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
		-D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda
		# value = sm_XX, see below
		# default is sm_30
		# default is sm_50
		-D HIP_ARCH=value # primary GPU hardware choice for GPU_API=hip
		# value depends on selected HIP_PLATFORM
		# default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_30' for HIP_PLATFORM=nvcc
		# default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_50' for HIP_PLATFORM=nvcc
		-D HIP_USE_DEVICE_SORT=value # enables GPU sorting
		# value = yes (default) or no
		-D CUDPP_OPT=value # optimization setting for GPU_API=cuda
		@@ -1255,6 +1255,15 @@ also typically :ref:`install the USER-OMP package <user-omp>`, as it can be
		used in tandem with the USER-INTEL package to good effect, as explained
		on the :doc:`Speed intel <Speed_intel>` doc page.

		When using Intel compilers version 16.0 or later is required. You can
		also use the GNU or Clang compilers and they will provide performance
		improvements over regular styles and USER-OMP styles, but less so than
		with the Intel compilers. Please also note, that some compilers have
		been found to apply memory alignment constraints incompletely or
		incorrectly and thus can cause segmentation faults in otherwise correct
		code when using features from the USER-INTEL package.


		CMake build
		^^^^^^^^^^^

doc/src/Packages_details.rst

+2 −1

Original line number	Diff line number	Diff line
		@@ -306,7 +306,8 @@ gpu" or "-suffix gpu" :doc:`command-line switches <Run_options>`. See
		also the :ref:`KOKKOS <PKG-KOKKOS>` package, which has GPU-enabled styles.

		Authors: Mike Brown (Intel) while at Sandia and ORNL and Trung Nguyen
		(Northwestern U) while at ORNL.
		(Northwestern U) while at ORNL and later. AMD HIP support by Evgeny
		Kuznetsov, Vladimir Stegailov, and Vsevolod Nikolskiy (HSE University).

		Install:

doc/src/Speed_gpu.rst

+4 −0

Original line number	Diff line number	Diff line
		@@ -50,6 +50,10 @@ but this can be overridden using the device option of the :doc:`package <package
		command. run lammps/lib/gpu/ocl_get_devices to get a list of available
		platforms and devices with a suitable ICD available.

		To compute and use this package in HIP mode, you have to have the AMD ROCm
		software installed. Versions of ROCm older than 3.5 are currently deprecated
		by AMD.

		Building LAMMPS with the GPU package:

		See the :ref:`Build extras <gpu>` doc page for

doc/src/Speed_intel.rst

+3 −3

Original line number	Diff line number	Diff line
		@@ -138,10 +138,10 @@ For Intel Xeon Phi co-processors (Offload):

		Required hardware/software:

		When using Intel compilers version 16.0 or later is required.

		In order to use offload to co-processors, an Intel Xeon Phi
		co-processor and an Intel compiler are required. For this, the
		recommended version of the Intel compiler is 14.0.1.106 or
		versions 15.0.2.044 and higher.
		co-processor and an Intel compiler are required.

		Although any compiler can be used with the USER-INTEL package,
		currently, vectorization directives are disabled by default when

Admin message