Unverified Commit 66184813 authored by Steve Plimpton's avatar Steve Plimpton Committed by GitHub
Browse files

Merge pull request #926 from ndtrung81/gpu-updates

some updates to the GPU package
parents 29e55521 3c781afa
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -760,7 +760,7 @@ if(PKG_GPU)
      add_library(gpu STATIC ${GPU_LIB_SOURCES})
      target_link_libraries(gpu ${OpenCL_LIBRARIES})
      target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS})
      target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -DMPI_GERYON -DUCL_NO_EXIT)
      target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
      target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)

      list(APPEND LAMMPS_LINK_LIBS gpu)
+53 −0
Original line number Diff line number Diff line
# /* ----------------------------------------------------------------------   
#  Generic Linux Makefile for CUDA 
#     - Change CUDA_ARCH for your GPU
# ------------------------------------------------------------------------- */

# which file will be copied to Makefile.lammps

EXTRAMAKE = Makefile.lammps.standard

ifeq ($(CUDA_HOME),)
CUDA_HOME = /usr/local/cuda
endif

NVCC = nvcc

# Kepler CUDA
#CUDA_ARCH = -arch=sm_35
# newer CUDA
#CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE

CUDA_ARCH = -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61

# this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL

LMP_INC = -DLAMMPS_SMALLBIG

# precision for GPU calculations
# -D_SINGLE_SINGLE  # Single precision for all calculations
# -D_DOUBLE_DOUBLE  # Double precision for all calculations
# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double

CUDA_PRECISION = -D_SINGLE_DOUBLE

CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -Xcompiler "-fPIC -std=c++98"

CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias

BIN_DIR = ./
OBJ_DIR = ./
LIB_DIR = ./
AR = ar
BSH = /bin/sh

CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini

include Nvidia.makefile_multi
+30 −2
Original line number Diff line number Diff line
@@ -77,7 +77,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
       $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
       $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
       $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
       $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o 
       $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
       $(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o

CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
       $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@@ -133,7 +135,9 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
       $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
       $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
       $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
       $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h
       $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
       $(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
       $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h

all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)

@@ -809,6 +813,30 @@ $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/l
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
	$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/dipole_long_lj.cubin: lal_dipole_long_lj.cu lal_precision.h lal_preprocessor.h
	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_long_lj.cu

$(OBJ_DIR)/dipole_long_lj_cubin.h: $(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj.cubin
	$(BIN2C) -c -n dipole_long_lj $(OBJ_DIR)/dipole_long_lj.cubin > $(OBJ_DIR)/dipole_long_lj_cubin.h

$(OBJ_DIR)/lal_dipole_long_lj.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj.cpp $(OBJ_DIR)/dipole_long_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o
	$(CUDR) -o $@ -c lal_dipole_long_lj.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_dipole_long_lj_ext.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj_ext.cpp lal_base_dipole.h
	$(CUDR) -o $@ -c lal_dipole_long_lj_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lj_expand_coul_long.cubin: lal_lj_expand_coul_long.cu lal_precision.h lal_preprocessor.h
	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_expand_coul_long.cu

$(OBJ_DIR)/lj_expand_coul_long_cubin.h: $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long.cubin
	$(BIN2C) -c -n lj_expand_coul_long $(OBJ_DIR)/lj_expand_coul_long.cubin > $(OBJ_DIR)/lj_expand_coul_long_cubin.h

$(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long.cpp $(OBJ_DIR)/lj_expand_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
	$(CUDR) -o $@ -c lal_lj_expand_coul_long.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
	$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)

$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 

+854 −0

File added.

Preview size limit exceeded, changes collapsed.

+24 −3
Original line number Diff line number Diff line
@@ -66,7 +66,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
       $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
       $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
       $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
       $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o
       $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
       $(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o

KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
       $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
@@ -95,7 +97,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
       $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
       $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
       $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/vashishta_cl.h \
       $(OBJ_DIR)/ufm_cl.h
       $(OBJ_DIR)/ufm_cl.h  $(OBJ_DIR)/dipole_long_lj_cl.h \
       $(OBJ_DIR)/lj_expand_coul_long_cl.h


OCL_EXECS = $(BIN_DIR)/ocl_get_devices
@@ -588,7 +591,25 @@ $(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cl.h $(OBJ_
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
	$(OCL) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)

$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
$(OBJ_DIR)/dipole_long_lj_cl.h: lal_dipole_long_lj.cu $(PRE1_H)
	$(BSH) ./geryon/file_to_cstr.sh dipole_long_lj $(PRE1_H) lal_dipole_long_lj.cu $(OBJ_DIR)/dipole_long_lj_cl.h;

$(OBJ_DIR)/lal_dipole_long_lj.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj.cpp  $(OBJ_DIR)/dipole_long_lj_cl.h $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
	$(OCL) -o $@ -c lal_dipole_long_lj.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_dipole_long_lj_ext.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj_ext.cpp lal_base_dipole.h
	$(OCL) -o $@ -c lal_dipole_long_lj_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lj_expand_coul_long_cl.h: lal_lj_expand_coul_long.cu $(PRE1_H)
	$(BSH) ./geryon/file_to_cstr.sh lj_expand_coul_long $(PRE1_H) lal_lj_expand_coul_long.cu $(OBJ_DIR)/lj_expand_coul_long_cl.h;

$(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long.cpp  $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
	$(OCL) -o $@ -c lal_lj_expand_coul_long.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
	$(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)

$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
	$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) 

$(OCL_LIB): $(OBJS) $(PTXS)
Loading