Merge branch 'master' into widom (94464f53) · Commits · 郑智淋 / lammps

cmake/Modules/Packages/GPU.cmake

+50 −28

Original line number	Diff line number	Diff line
		@@ -66,29 +66,40 @@ if(GPU_API STREQUAL "CUDA")
		# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
		set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
		# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
		if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
		if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
		endif()
		# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
		if(CUDA_VERSION VERSION_GREATER "4.9")
		# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
		endif()
		# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]")
		endif()
		# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
		if(CUDA_VERSION VERSION_GREATER "5.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
		endif()
		# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
		if(CUDA_VERSION VERSION_GREATER "7.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
		endif()
		# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
		if(CUDA_VERSION VERSION_GREATER "8.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
		endif()
		# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
		if(CUDA_VERSION VERSION_GREATER "9.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
		endif()
		# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
		if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
		string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
		endif()
		if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
		message(WARNING "Unsupported CUDA version. Use at your own risk.")
		endif()

		cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
		-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
		@@ -228,29 +239,40 @@ elseif(GPU_API STREQUAL "HIP")
		# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
		set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH}")
		# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
		if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
		if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20]")
		endif()
		# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
		if(CUDA_VERSION VERSION_GREATER "4.9")
		string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
		# Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30]")
		endif()
		# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11.0
		if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
		endif()
		# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
		if(CUDA_VERSION VERSION_GREATER "5.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
		endif()
		# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
		if(CUDA_VERSION VERSION_GREATER "7.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
		endif()
		# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
		if(CUDA_VERSION VERSION_GREATER "8.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
		endif()
		# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
		if(CUDA_VERSION VERSION_GREATER "9.9")
		if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
		endif()
		# Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
		if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
		string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
		endif()
		if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
		message(WARNING "Unsupported CUDA version. Use at your own risk.")
		endif()
		endif()

		file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.].cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.].cu)

doc/src/Build_extras.rst

+11 −2

Original line number	Diff line number	Diff line
		@@ -105,10 +105,10 @@ CMake build
		# generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
		-D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda
		# value = sm_XX, see below
		# default is sm_30
		# default is sm_50
		-D HIP_ARCH=value # primary GPU hardware choice for GPU_API=hip
		# value depends on selected HIP_PLATFORM
		# default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_30' for HIP_PLATFORM=nvcc
		# default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_50' for HIP_PLATFORM=nvcc
		-D HIP_USE_DEVICE_SORT=value # enables GPU sorting
		# value = yes (default) or no
		-D CUDPP_OPT=value # optimization setting for GPU_API=cuda
		@@ -1255,6 +1255,15 @@ also typically :ref:`install the USER-OMP package <user-omp>`, as it can be
		used in tandem with the USER-INTEL package to good effect, as explained
		on the :doc:`Speed intel <Speed_intel>` doc page.

		When using Intel compilers version 16.0 or later is required. You can
		also use the GNU or Clang compilers and they will provide performance
		improvements over regular styles and USER-OMP styles, but less so than
		with the Intel compilers. Please also note, that some compilers have
		been found to apply memory alignment constraints incompletely or
		incorrectly and thus can cause segmentation faults in otherwise correct
		code when using features from the USER-INTEL package.


		CMake build
		^^^^^^^^^^^

doc/src/Commands_all.rst

+2 −1

Original line number	Diff line number	Diff line
		@@ -101,7 +101,8 @@ An alphabetic list of all general LAMMPS commands.
		* :doc:`region <region>`
		* :doc:`replicate <replicate>`
		* :doc:`rerun <rerun>`
		* :doc:`reset_ids <reset_ids>`
		* :doc:`reset_atom_ids <reset_atom_ids>`
		* :doc:`reset_mol_ids <reset_mol_ids>`
		* :doc:`reset_timestep <reset_timestep>`
		* :doc:`restart <restart>`
		* :doc:`run <run>`

doc/src/Howto_output.rst

+35 −16

Original line number	Diff line number	Diff line
		@@ -3,13 +3,15 @@ Output from LAMMPS (thermo, dumps, computes, fixes, variables)

		There are four basic kinds of LAMMPS output:

		* :doc:`Thermodynamic output <thermo_style>`, which is a list
		of quantities printed every few timesteps to the screen and logfile.
		* :doc:`Thermodynamic output <thermo_style>`, which is a list of
		quantities printed every few timesteps to the screen and logfile.
		* :doc:`Dump files <dump>`, which contain snapshots of atoms and various
		per-atom values and are written at a specified frequency.
		* Certain fixes can output user-specified quantities to files: :doc:`fix ave/time <fix_ave_time>` for time averaging, :doc:`fix ave/chunk <fix_ave_chunk>` for spatial or other averaging, and :doc:`fix print <fix_print>` for single-line output of
		:doc:`variables <variable>`. Fix print can also output to the
		screen.
		* Certain fixes can output user-specified quantities to files:
		:doc:`fix ave/time <fix_ave_time>` for time averaging,
		:doc:`fix ave/chunk <fix_ave_chunk>` for spatial or other averaging, and
		:doc:`fix print <fix_print>` for single-line output of
		:doc:`variables <variable>`. Fix print can also output to the screen.
		* :doc:`Restart files <restart>`.

		A simulation prints one set of thermodynamic output and (optionally)
		@@ -41,7 +43,7 @@ to output and the kind of data they operate on and produce:
		.. _global:

		Global/per-atom/local data
		---------------------------------------
		--------------------------

		Various output-related commands work with three different styles of
		data: global, per-atom, or local. A global datum is one or more
		@@ -54,7 +56,7 @@ bond distances.
		.. _scalar:

		Scalar/vector/array data
		-------------------------------------
		------------------------

		Global, per-atom, and local datums can each come in three kinds: a
		single scalar value, a vector of values, or a 2d array of values. The
		@@ -81,10 +83,27 @@ the dimension twice (array -> scalar). Thus a command that uses
		scalar values as input can typically also process elements of a vector
		or array.

		.. _disambiguation:

		Disambiguation
		--------------

		Some computes and fixes produce data in multiple styles, e.g. a global
		scalar and a per-atom vector. Usually the context in which the input
		script references the data determines which style is meant. Example: if
		a compute provides both a global scalar and a per-atom vector, the
		former will be accessed by using ``c_ID`` in an equal-style variable,
		while the latter will be accessed by using ``c_ID`` in an atom-style
		variable. Note that atom-style variable formulas can also access global
		scalars, but in this case it is not possible to do directly because of
		the ambiguity. Instead, an equal-style variable can be defined which
		accesses the global scalar, and that variable used in the atom-style
		variable formula in place of ``c_ID``.

		.. _thermo:

		Thermodynamic output
		---------------------------------
		--------------------

		The frequency and format of thermodynamic output is set by the
		:doc:`thermo <thermo>`, :doc:`thermo_style <thermo_style>`, and
		@@ -112,7 +131,7 @@ intensive result.
		.. _dump:

		Dump file output
		---------------------------
		----------------

		Dump file output is specified by the :doc:`dump <dump>` and
		:doc:`dump_modify <dump_modify>` commands. There are several
		@@ -138,7 +157,7 @@ command.
		.. _fixoutput:

		Fixes that write output files
		---------------------------------------------
		-----------------------------

		Several fixes take various quantities as input and can write output
		files: :doc:`fix ave/time <fix_ave_time>`, :doc:`fix ave/chunk <fix_ave_chunk>`, :doc:`fix ave/histo <fix_ave_histo>`,
		@@ -192,7 +211,7 @@ from normal thermodynamic or dump file output.
		.. _computeoutput:

		Computes that process output quantities
		-----------------------------------------------------------
		---------------------------------------

		The :doc:`compute reduce <compute_reduce>` and :doc:`compute reduce/region <compute_reduce>` commands take one or more per-atom
		or local vector quantities as inputs and "reduce" them (sum, min, max,
		@@ -219,7 +238,7 @@ output commands.
		.. _fixprocoutput:

		Fixes that process output quantities
		--------------------------------------------------------
		------------------------------------

		The :doc:`fix vector <fix_vector>` command can create global vectors as
		output from global scalars as input, accumulating them one element at
		@@ -244,7 +263,7 @@ The output of this fix can be used as input to other output commands.
		.. _compute:

		Computes that generate values to output
		-----------------------------------------------------
		---------------------------------------

		Every :doc:`compute <compute>` in LAMMPS produces either global or
		per-atom or local values. The values can be scalars or vectors or
		@@ -257,7 +276,7 @@ without the word "atom" or "local" produce global values.
		.. _fix:

		Fixes that generate values to output
		----------------------------------------------
		------------------------------------

		Some :doc:`fixes <fix>` in LAMMPS produces either global or per-atom or
		local values which can be accessed by other commands. The values can
		@@ -269,7 +288,7 @@ describes them.
		.. _variable:

		Variables that generate values to output
		-------------------------------------------------------
		----------------------------------------

		:doc:`Variables <variable>` defined in an input script can store one or
		more strings. But equal-style, vector-style, and atom-style or
		@@ -284,7 +303,7 @@ commands described in this section.
		.. _table:

		Summary table of output options and data flow between commands
		--------------------------------------------------------------------------
		--------------------------------------------------------------

		This table summarizes the various commands that can be used for
		generating output from LAMMPS. Each command produces output data of

doc/src/Packages_details.rst

+2 −1

Original line number	Diff line number	Diff line
		@@ -306,7 +306,8 @@ gpu" or "-suffix gpu" :doc:`command-line switches <Run_options>`. See
		also the :ref:`KOKKOS <PKG-KOKKOS>` package, which has GPU-enabled styles.

		Authors: Mike Brown (Intel) while at Sandia and ORNL and Trung Nguyen
		(Northwestern U) while at ORNL.
		(Northwestern U) while at ORNL and later. AMD HIP support by Evgeny
		Kuznetsov, Vladimir Stegailov, and Vsevolod Nikolskiy (HSE University).

		Install:

Admin message