Unverified Commit e08080ca authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1436 from rbberger/gpu_opencl_bugfixes

GPU package bugfixes
parents 0005ee3e 7b7f405d
Loading
Loading
Loading
Loading
+24 −2
Original line number Diff line number Diff line
@@ -1370,7 +1370,15 @@ if(PKG_GPU)
      set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)

      file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
      list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
      list(REMOVE_ITEM GPU_LIB_CU
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
        ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
      )

      foreach(GPU_KERNEL ${GPU_LIB_CU})
          get_filename_component(basename ${GPU_KERNEL} NAME_WE)
@@ -1381,7 +1389,21 @@ if(PKG_GPU)

      GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
      GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
      list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h)
      GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
      GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
      GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
      GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
      GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)

      list(APPEND GPU_LIB_SOURCES
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
        ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
      )

      add_library(gpu STATIC ${GPU_LIB_SOURCES})
      target_link_libraries(gpu ${OpenCL_LIBRARIES})
+2 −2
Original line number Diff line number Diff line
@@ -42,9 +42,9 @@ texture<int4,1> vel_tex;
// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19

#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */
#define LCGC 0x2600e1f7
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylPeriod 0xda879add /* Prime period 3666320093 */
#define oWeylOffset 0x8009d14b
#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */

+3 −3
Original line number Diff line number Diff line
@@ -27,9 +27,9 @@ texture<int4,1> pos_tex;
// LJ quantities scaled by epsilon and rmin = sigma*2^1/6 (see src/pair_lj_cubic.h)

#define _RT6TWO (numtyp)1.1224621
#define _PHIS (numtyp)-0.7869823   // energy at s
#define _DPHIDS (numtyp)2.6899009  // gradient at s
#define _A3 (numtyp)27.93357       // cubic coefficient
#define _PHIS (numtyp)-0.7869823  /* energy at s */
#define _DPHIDS (numtyp)2.6899009 /* gradient at s */
#define _A3 (numtyp)27.93357 /* cubic coefficient */

__kernel void k_lj_cubic(const __global numtyp4 *restrict x_,
                         const __global numtyp4 *restrict lj1,