Unverified Commit f2ee88d1 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

Merge branch 'master' into progguide

# Conflicts:
#	doc/utils/sphinx-config/false_positives.txt
#	python/lammps.py
parents 10b830d6 39cefc4e
Loading
Loading
Loading
Loading
+2 −10
Original line number Diff line number Diff line
@@ -69,18 +69,10 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
  endif()
endif()

if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
  set(CMAKE_TUNE_DEFAULT "-march=native")
endif()

if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
  set(CMAKE_TUNE_DEFAULT "-march=native")
endif()

# we require C++11 without extensions
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Use compiler extensions")

########################################################################
# User input options                                                   #
@@ -366,7 +358,7 @@ foreach(PKG_WITH_INCL KSPACE PYTHON VORONOI USER-COLVARS USER-MOLFILE USER-NETCD
  endif()
endforeach()

set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler specific optimization or instrumentation")
set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler and machine specific optimization flags (compilation only)")
separate_arguments(CMAKE_TUNE_FLAGS)
foreach(_FLAG ${CMAKE_TUNE_FLAGS})
  target_compile_options(lammps PRIVATE ${_FLAG})
+4 −4
Original line number Diff line number Diff line
@@ -52,26 +52,26 @@ add_library(GTest::GTest UNKNOWN IMPORTED)
set_target_properties(GTest::GTest PROPERTIES
        IMPORTED_LOCATION ${GTEST_LIBRARY_PATH}
        INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIR}
        IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
        INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
add_dependencies(GTest::GTest googletest)

add_library(GTest::GMock UNKNOWN IMPORTED)
set_target_properties(GTest::GMock PROPERTIES
        IMPORTED_LOCATION ${GMOCK_LIBRARY_PATH}
        INTERFACE_INCLUDE_DIRECTORIES ${GMOCK_INCLUDE_DIR}
        IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
        INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
add_dependencies(GTest::GMock googletest)

add_library(GTest::GTestMain UNKNOWN IMPORTED)
set_target_properties(GTest::GTestMain PROPERTIES
        IMPORTED_LOCATION ${GTEST_MAIN_LIBRARY_PATH}
        INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIR}
        IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
        INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
add_dependencies(GTest::GTestMain googletest)

add_library(GTest::GMockMain UNKNOWN IMPORTED)
set_target_properties(GTest::GMockMain PROPERTIES
        IMPORTED_LOCATION ${GMOCK_MAIN_LIBRARY_PATH}
        INTERFACE_INCLUDE_DIRECTORIES ${GMOCK_INCLUDE_DIR}
        IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
        INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
add_dependencies(GTest::GMockMain googletest)
+50 −28
Original line number Diff line number Diff line
@@ -66,29 +66,40 @@ if(GPU_API STREQUAL "CUDA")
  # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
  set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
  # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
  if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
  if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
  endif()
  # Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
  if(CUDA_VERSION VERSION_GREATER "4.9")
  # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
  if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] ")
  endif()
  # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
  if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]")
  endif()
  # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
  if(CUDA_VERSION VERSION_GREATER "5.9")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
  endif()
  # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
  if(CUDA_VERSION VERSION_GREATER "7.9")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
  endif()
  # Volta (GPU Arch 7.0) is supported by CUDA 9 and later
  if(CUDA_VERSION VERSION_GREATER "8.9")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
  endif()
  # Turing (GPU Arch 7.5) is supported by CUDA 10 and later
  if(CUDA_VERSION VERSION_GREATER "9.9")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
  endif()
  # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
  endif()
  if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
    message(WARNING "Unsupported CUDA version. Use at your own risk.")
  endif()

  cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
          -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
@@ -228,29 +239,40 @@ elseif(GPU_API STREQUAL "HIP")
    # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
    set(HIP_CUDA_GENCODE "-arch=${HIP_ARCH}")
    # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
    if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
    if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20]")
    endif()
    # Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
    if(CUDA_VERSION VERSION_GREATER "4.9")
      string(APPEND HIP_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
    # Kepler (GPU Arch 3.0) is supported by CUDA 5 to CUDA 10.2
    if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "11.0"))
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30]")
    endif()
    # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11.0
    if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
    endif()
    # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
    if(CUDA_VERSION VERSION_GREATER "5.9")
    if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]")
    endif()
    # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
    if(CUDA_VERSION VERSION_GREATER "7.9")
    if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61]")
    endif()
    # Volta (GPU Arch 7.0) is supported by CUDA 9 and later
    if(CUDA_VERSION VERSION_GREATER "8.9")
    if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_70,code=[sm_70,compute_70]")
    endif()
    # Turing (GPU Arch 7.5) is supported by CUDA 10 and later
    if(CUDA_VERSION VERSION_GREATER "9.9")
    if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
    endif()
    # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
    if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
      string(APPEND HIP_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
    endif()
    if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
      message(WARNING "Unsupported CUDA version. Use at your own risk.")
    endif()
  endif()

  file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
+12 −13
Original line number Diff line number Diff line
@@ -193,14 +193,17 @@ compiler and any :doc:`accelerator packages <Speed_packages>` you have
included in the build.

You can tell CMake to look for a specific compiler with setting CMake
variable during configuration.  For a few common choices, there are also
presets in the ``cmake/presets`` folder.  For convenience, there is a
``CMAKE_TUNE_FLAGS`` variable that can be set to apply global compiler
options.  More on that below, but you can also specify the corresponding
``CMAKE_*_FLAGS`` variables individually if you want to experiment with
alternate optimization flags.  You should specify all 3 compilers, so
that the (few) LAMMPS source files written in C or Fortran are built
with a compiler consistent with the one used for the C++ files:
variables (listed below) during configuration.  For a few common
choices, there are also presets in the ``cmake/presets`` folder.  For
convenience, there is a ``CMAKE_TUNE_FLAGS`` variable that can be set to
apply global compiler options (applied to compilation only), to be used
for adding compiler or host specific optimization flags in addition to
the "flags" variables listed below. You may also specify the
corresponding ``CMAKE_*_FLAGS`` variables individually, if you want to
experiment with alternate optimization flags.  You should specify all 3
compilers, so that the (few) LAMMPS source files written in C or Fortran
are built with a compiler consistent with the one used for the C++
files:

.. code-block:: bash

@@ -229,11 +232,7 @@ can be loaded with `-C ../cmake/presets/clang.cmake`. Similarly,

In addition you can set ``CMAKE_TUNE_FLAGS`` to specifically add
compiler flags to tune for optimal performance on given hosts. By
default these are initialized to some compiler specific flags, to
optimize the LAMMPS executable with optimizations and instructions
available on the host where LAMMPS is compiled. For example, for Intel
compilers this would be ``-xHost`` and for GNU compilers this would be
``-march=native``. To turn these flags off, do ``-D CMAKE_TUNE_FLAGS=``.
default this variable is empty.

.. note::

+11 −2
Original line number Diff line number Diff line
@@ -105,10 +105,10 @@ CMake build
                                # generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
   -D GPU_ARCH=value            # primary GPU hardware choice for GPU_API=cuda
                                # value = sm_XX, see below
                                # default is sm_30
                                # default is sm_50
   -D HIP_ARCH=value            # primary GPU hardware choice for GPU_API=hip
                                # value depends on selected HIP_PLATFORM
                                # default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_30' for HIP_PLATFORM=nvcc
                                # default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_50' for HIP_PLATFORM=nvcc
   -D HIP_USE_DEVICE_SORT=value # enables GPU sorting
                                # value = yes (default) or no
   -D CUDPP_OPT=value           # optimization setting for GPU_API=cuda
@@ -1255,6 +1255,15 @@ also typically :ref:`install the USER-OMP package <user-omp>`, as it can be
used in tandem with the USER-INTEL package to good effect, as explained
on the :doc:`Speed intel <Speed_intel>` doc page.

When using Intel compilers version 16.0 or later is required.  You can
also use the GNU or Clang compilers and they will provide performance
improvements over regular styles and USER-OMP styles, but less so than
with the Intel compilers.  Please also note, that some compilers have
been found to apply memory alignment constraints incompletely or
incorrectly and thus can cause segmentation faults in otherwise correct
code when using features from the USER-INTEL package.


CMake build
^^^^^^^^^^^

Loading