From 66c5fa2abd78c0de8fce4caafd4629b670cfc7c0 Mon Sep 17 00:00:00 2001 From: Vsevak Date: Tue, 28 Jan 2020 20:09:40 +0300 Subject: [PATCH 001/328] Merge 'gpu_hip_port' into master --- lib/gpu/Makefile.hip | 148 ++++++++ lib/gpu/geryon/hip_device.h | 519 +++++++++++++++++++++++++++++ lib/gpu/geryon/hip_kernel.h | 298 +++++++++++++++++ lib/gpu/geryon/hip_macros.h | 83 +++++ lib/gpu/geryon/hip_mat.h | 43 +++ lib/gpu/geryon/hip_memory.h | 279 ++++++++++++++++ lib/gpu/geryon/hip_texture.h | 113 +++++++ lib/gpu/geryon/hip_timer.h | 107 ++++++ lib/gpu/geryon/ucl_get_devices.cpp | 5 + lib/gpu/lal_answer.cpp | 17 +- lib/gpu/lal_answer.h | 4 + lib/gpu/lal_atom.cpp | 75 +++++ lib/gpu/lal_atom.cu | 2 +- lib/gpu/lal_atom.h | 13 + lib/gpu/lal_aux_fun1.h | 2 +- lib/gpu/lal_base_atomic.h | 2 + lib/gpu/lal_base_charge.h | 2 + lib/gpu/lal_base_dipole.h | 2 + lib/gpu/lal_base_dpd.h | 2 + lib/gpu/lal_base_ellipsoid.h | 2 + lib/gpu/lal_base_three.h | 2 + lib/gpu/lal_beck.cu | 6 +- lib/gpu/lal_born.cu | 6 +- lib/gpu/lal_born_coul_long.cu | 10 +- lib/gpu/lal_born_coul_long_cs.cu | 11 +- lib/gpu/lal_born_coul_wolf.cu | 10 +- lib/gpu/lal_born_coul_wolf_cs.cu | 10 +- lib/gpu/lal_buck.cu | 6 +- lib/gpu/lal_buck_coul.cu | 10 +- lib/gpu/lal_buck_coul_long.cu | 10 +- lib/gpu/lal_charmm_long.cu | 10 +- lib/gpu/lal_colloid.cu | 6 +- lib/gpu/lal_coul.cu | 10 +- lib/gpu/lal_coul_debye.cu | 10 +- lib/gpu/lal_coul_dsf.cu | 10 +- lib/gpu/lal_coul_long.cu | 10 +- lib/gpu/lal_coul_long_cs.cu | 10 +- lib/gpu/lal_device.cpp | 6 +- lib/gpu/lal_device.cu | 2 +- lib/gpu/lal_dipole_lj.cu | 14 +- lib/gpu/lal_dipole_lj_sf.cu | 14 +- lib/gpu/lal_dipole_long_lj.cu | 14 +- lib/gpu/lal_dpd.cu | 10 +- lib/gpu/lal_eam.cu | 34 +- lib/gpu/lal_ellipsoid_extra.h | 8 +- lib/gpu/lal_ellipsoid_nbor.cu | 6 +- lib/gpu/lal_gauss.cu | 6 +- lib/gpu/lal_gayberne.cu | 2 +- lib/gpu/lal_gayberne_lj.cu | 2 +- lib/gpu/lal_lj.cu | 6 +- lib/gpu/lal_lj96.cu | 6 +- lib/gpu/lal_lj_class2_long.cu | 10 +- lib/gpu/lal_lj_coul.cu | 10 +- lib/gpu/lal_lj_coul_debye.cu | 10 +- lib/gpu/lal_lj_coul_long.cu | 10 +- lib/gpu/lal_lj_coul_msm.cu | 18 +- lib/gpu/lal_lj_cubic.cu | 6 +- lib/gpu/lal_lj_dsf.cu | 10 +- lib/gpu/lal_lj_expand.cu | 6 +- lib/gpu/lal_lj_expand_coul_long.cu | 10 +- lib/gpu/lal_lj_gromacs.cu | 6 +- lib/gpu/lal_lj_sdk.cu | 6 +- lib/gpu/lal_lj_sdk_long.cu | 10 +- lib/gpu/lal_lj_tip4p_long.cpp | 5 +- lib/gpu/lal_lj_tip4p_long.cu | 10 +- lib/gpu/lal_mie.cu | 6 +- lib/gpu/lal_morse.cu | 6 +- lib/gpu/lal_neighbor_cpu.cu | 2 +- lib/gpu/lal_neighbor_gpu.cu | 6 +- lib/gpu/lal_neighbor_shared.h | 4 + lib/gpu/lal_pppm.cu | 10 +- lib/gpu/lal_pppm.h | 2 + lib/gpu/lal_precision.h | 2 + lib/gpu/lal_preprocessor.h | 146 +++++++- lib/gpu/lal_re_squared.cu | 2 +- lib/gpu/lal_re_squared_lj.cu | 2 +- lib/gpu/lal_soft.cu | 6 +- lib/gpu/lal_sw.cu | 18 +- lib/gpu/lal_table.cu | 6 +- lib/gpu/lal_tersoff.cu | 26 +- lib/gpu/lal_tersoff_extra.h | 2 +- lib/gpu/lal_tersoff_mod.cu | 26 +- lib/gpu/lal_tersoff_mod_extra.h | 2 +- lib/gpu/lal_tersoff_zbl.cu | 30 +- lib/gpu/lal_tersoff_zbl_extra.h | 2 +- lib/gpu/lal_ufm.cu | 6 +- lib/gpu/lal_vashishta.cu | 26 +- lib/gpu/lal_yukawa.cu | 6 +- lib/gpu/lal_yukawa_colloid.cu | 10 +- lib/gpu/lal_zbl.cu | 6 +- src/MAKE/OPTIONS/Makefile.hip | 120 +++++++ 91 files changed, 2290 insertions(+), 312 deletions(-) create mode 100644 lib/gpu/Makefile.hip create mode 100644 lib/gpu/geryon/hip_device.h create mode 100644 lib/gpu/geryon/hip_kernel.h create mode 100644 lib/gpu/geryon/hip_macros.h create mode 100644 lib/gpu/geryon/hip_mat.h create mode 100644 lib/gpu/geryon/hip_memory.h create mode 100644 lib/gpu/geryon/hip_texture.h create mode 100644 lib/gpu/geryon/hip_timer.h create mode 100644 src/MAKE/OPTIONS/Makefile.hip diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip new file mode 100644 index 0000000000..5c9f251004 --- /dev/null +++ b/lib/gpu/Makefile.hip @@ -0,0 +1,148 @@ +# /* ---------------------------------------------------------------------- +# Generic Linux Makefile for HIP +# - export HIP_PLATFORM=hcc (or nvcc) before execution +# - change HIP_ARCH for your GPU +# ------------------------------------------------------------------------- */ + +# this setting should match LAMMPS Makefile +# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL + +LMP_INC = -DLAMMPS_SMALLBIG + +# precision for GPU calculations +# -D_SINGLE_SINGLE # Single precision for all calculations +# -D_DOUBLE_DOUBLE # Double precision for all calculations +# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double + +HIP_PRECISION = -D_SINGLE_DOUBLE + +HIP_OPTS = -O3 +HIP_HOST_OPTS = -Wno-deprecated-declarations +HIP_HOST_INCLUDE = + +# use device sort +# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively) +HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT +# path to cub +HIP_HOST_INCLUDE += -I./ +# path to hipcub +HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include + +# use mpi +HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT +# this settings should match LAMMPS Makefile +MPI_COMP_OPTS = $(shell mpicxx --showme:compile) +MPI_LINK_OPTS = $(shell mpicxx --showme:link) +#MPI_COMP_OPTS += -I/usr/include/mpi -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 + +HIP_PATH ?= $(wildcard /opt/rocm/hip) +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) + +ifeq (hcc,$(HIP_PLATFORM)) + HIP_OPTS += -ffast-math + # possible values: gfx803,gfx900,gfx906 + HIP_ARCH = gfx906 +else ifeq (nvcc,$(HIP_PLATFORM)) + HIP_OPTS += --use_fast_math + HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ + -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\ + -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\ + -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75] +else + $(error Specify HIP platform using 'export HIP_PLATFORM=(hcc,nvcc)') +endif + +BIN_DIR = . +OBJ_DIR = ./obj +LIB_DIR = . +AR = ar +BSH = /bin/sh + + +# /* ---------------------------------------------------------------------- +# don't change section below without need +# ------------------------------------------------------------------------- */ + +HIP_OPTS += -DUSE_HIP $(HIP_PRECISION) +HIP_GPU_OPTS += $(HIP_OPTS) -I./ + +ifeq (hcc,$(HIP_PLATFORM)) + HIP_HOST_OPTS += -fPIC + HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco + HIP_GPU_OPTS_S = -t="$(HIP_ARCH)" -f=\" + HIP_GPU_OPTS_E = \" + HIP_KERNEL_SUFFIX = .cpp + HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH) + export HCC_AMDGPU_TARGET := $(HIP_ARCH) +else ifeq (nvcc,$(HIP_PLATFORM)) + HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --fatbin + HIP_GPU_OPTS += $(HIP_ARCH) + HIP_GPU_SORT_ARCH = $(HIP_ARCH) + # fix nvcc can't handle -pthread flag + MPI_COMP_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_COMP_OPTS)) + MPI_LINK_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_LINK_OPTS)) +endif + +# hipcc is essential for device sort, because of hipcub is header only library and ROCm gpu code generation is deferred to the linking stage +HIP_HOST_CC = $(HIP_PATH)/bin/hipcc +HIP_HOST_OPTS += $(HIP_OPTS) $(MPI_COMP_OPTS) $(LMP_INC) +HIP_HOST_CC_CMD = $(HIP_HOST_CC) $(HIP_HOST_OPTS) $(HIP_HOST_INCLUDE) + +# sources + +ALL_H = $(wildcard ./geryon/ucl*.h) $(wildcard ./geryon/hip*.h) $(wildcard ./lal_*.h) +SRCS := $(wildcard ./lal_*.cpp) +OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o)) +CUS := $(wildcard lal_*.cu) +CUHS := $(filter-out pppm_cubin.h, $(CUS:lal_%.cu=%_cubin.h)) pppm_f_cubin.h pppm_d_cubin.h +CUHS := $(addprefix $(OBJ_DIR)/, $(CUHS)) + +all: $(OBJ_DIR) $(CUHS) $(LIB_DIR)/libgpu.a $(BIN_DIR)/hip_get_devices + +$(OBJ_DIR): + mkdir -p $@ + +# GPU kernels compilation + +$(OBJ_DIR)/pppm_f_cubin.h: lal_pppm.cu $(ALL_H) + @cp $< $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) + $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=float -Dgrdtyp4=float4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) + @xxd -i $(OBJ_DIR)/pppm_f.cubin $@ + @sed -i "s/[a-zA-Z0-9_]*pppm_f_cubin/pppm_f/g" $@ + @rm $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_f.cubin + +$(OBJ_DIR)/pppm_d_cubin.h: lal_pppm.cu $(ALL_H) + @cp $< $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) + $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=double -Dgrdtyp4=double4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) + @xxd -i $(OBJ_DIR)/pppm_d.cubin $@ + @sed -i "s/[a-zA-Z0-9_]*pppm_d_cubin/pppm_d/g" $@ + @rm $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_d.cubin + +$(OBJ_DIR)/%_cubin.h: lal_%.cu $(ALL_H) + @cp $< $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) + $(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) + @xxd -i $(OBJ_DIR)/$*.cubin $@ + @sed -i "s/[a-zA-Z0-9_]*$*_cubin/$*/g" $@ + @rm $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/$*.cubin + +# host sources compilation + +$(OBJ_DIR)/lal_atom.o: lal_atom.cpp $(CUHS) $(ALL_H) + $(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) $(HIP_GPU_SORT_ARCH) + +$(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(ALL_H) + $(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) + +# libgpu building + +$(LIB_DIR)/libgpu.a: $(OBJS) + $(AR) -crs $@ $(OBJS) + echo "export HIP_PLATFORM := $(HIP_PLATFORM)\n$(HIP_LIBS_TARGET)" > 'Makefile.lammps' + +# test app building + +$(BIN_DIR)/hip_get_devices: ./geryon/ucl_get_devices.cpp $(ALL_H) + $(HIP_HOST_CC_CMD) -o $@ $< -DUCL_HIP $(MPI_LINK_OPTS) + +clean: + -rm -f $(BIN_DIR)/hip_get_devices $(LIB_DIR)/libgpu.a $(OBJS) $(OBJ_DIR)/temp_* $(CUHS) diff --git a/lib/gpu/geryon/hip_device.h b/lib/gpu/geryon/hip_device.h new file mode 100644 index 0000000000..93f38d28bb --- /dev/null +++ b/lib/gpu/geryon/hip_device.h @@ -0,0 +1,519 @@ +/* ----------------------------------------------------------------------- + Copyright (2009) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +#ifndef HIP_DEVICE +#define HIP_DEVICE + + +#include +#include +#include +#include +#include +#include "hip_macros.h" +#include "ucl_types.h" + +namespace ucl_hip { + +// -------------------------------------------------------------------------- +// - COMMAND QUEUE STUFF +// -------------------------------------------------------------------------- +typedef hipStream_t command_queue; + +inline void ucl_sync(hipStream_t &stream) { + CU_SAFE_CALL(hipStreamSynchronize(stream)); +} + +struct NVDProperties { + int device_id; + std::string name; + int major; + int minor; + CUDA_INT_TYPE totalGlobalMem; + int multiProcessorCount; + + int maxThreadsPerBlock; + int maxThreadsDim[3]; + int maxGridSize[3]; + int sharedMemPerBlock; + int totalConstantMemory; + int SIMDWidth; + int memPitch; + int regsPerBlock; + int clockRate; + int textureAlign; + + int kernelExecTimeoutEnabled; + int integrated; + int canMapHostMemory; + int concurrentKernels; + int ECCEnabled; + int computeMode; +}; + +/// Class for looking at device properties +/** \note Calls to change the device outside of the class results in incorrect + * behavior + * \note There is no error checking for indexing past the number of devices **/ +class UCL_Device { + public: + /// Collect properties for every GPU on the node + /** \note You must set the active GPU with set() before using the device **/ + inline UCL_Device(); + + inline ~UCL_Device(); + + /// Returns 1 (For compatibility with OpenCL) + inline int num_platforms() { return 1; } + + /// Return a string with name and info of the current platform + inline std::string platform_name() + { return "HIP platform"; } + + /// Delete any contexts/data and set the platform number to be used + inline int set_platform(const int pid); + + /// Return the number of devices that support CUDA + inline int num_devices() { return _properties.size(); } + + /// Set the CUDA device to the specified device number + /** A context and default command queue will be created for the device + * Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not + * be allocated for use. clear() is called to delete any contexts and + * associated data from previous calls to set(). **/ + inline int set(int num); + + /// Delete any context and associated data stored from a call to set() + inline void clear(); + + /// Get the current device number + inline int device_num() { return _device; } + + /// Returns the default stream for the current device + inline command_queue & cq() { return cq(0); } + + /// Returns the stream indexed by i + inline command_queue & cq(const int i) { return _cq[i]; } + + /// Block until all commands in the default stream have completed + inline void sync() { sync(0); } + + /// Block until all commands in the specified stream have completed + inline void sync(const int i) { ucl_sync(cq(i)); } + + /// Get the number of command queues currently available on device + inline int num_queues() + { return _cq.size(); } + + /// Add a stream for device computations + inline void push_command_queue() { + _cq.push_back(hipStream_t()); + CU_SAFE_CALL(hipStreamCreateWithFlags(&_cq.back(),0)); + } + + /// Remove a stream for device computations + /** \note You cannot delete the default stream **/ + inline void pop_command_queue() { + if (_cq.size()<2) return; + CU_SAFE_CALL_NS(hipStreamDestroy(_cq.back())); + _cq.pop_back(); + } + + /// Set the default command queue (by default this is the null stream) + /** \param i index of the command queue (as added by push_command_queue()) + If i is 0, the default command queue is set to the null stream **/ + inline void set_command_queue(const int i) { + if (i==0) _cq[0]=0; + else _cq[0]=_cq[i]; + } + + /// Get the current CUDA device name + inline std::string name() { return name(_device); } + /// Get the CUDA device name + inline std::string name(const int i) + { return std::string(_properties[i].name); } + + /// Get a string telling the type of the current device + inline std::string device_type_name() { return device_type_name(_device); } + /// Get a string telling the type of the device + inline std::string device_type_name(const int i) { return "GPU"; } + + /// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT) + inline int device_type() { return device_type(_device); } + /// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT) + inline int device_type(const int i) { return UCL_GPU; } + + /// Returns true if host memory is efficiently addressable from device + inline bool shared_memory() { return shared_memory(_device); } + /// Returns true if host memory is efficiently addressable from device + inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; } + + /// Returns true if double precision is support for the current device + inline bool double_precision() { return double_precision(_device); } + /// Returns true if double precision is support for the device + inline bool double_precision(const int i) {return arch(i)>=1.3;} + + /// Get the number of compute units on the current device + inline unsigned cus() { return cus(_device); } + /// Get the number of compute units + inline unsigned cus(const int i) + { return _properties[i].multiProcessorCount; } + + /// Get the number of cores in the current device + inline unsigned cores() { return cores(_device); } + /// Get the number of cores + inline unsigned cores(const int i) + { if (arch(i)<2.0) return _properties[i].multiProcessorCount*8; + else if (arch(i)<2.1) return _properties[i].multiProcessorCount*32; + else if (arch(i)<3.0) return _properties[i].multiProcessorCount*48; + else return _properties[i].multiProcessorCount*192; } + + /// Get the gigabytes of global memory in the current device + inline double gigabytes() { return gigabytes(_device); } + /// Get the gigabytes of global memory + inline double gigabytes(const int i) + { return static_cast(_properties[i].totalGlobalMem)/1073741824; } + + /// Get the bytes of global memory in the current device + inline size_t bytes() { return bytes(_device); } + /// Get the bytes of global memory + inline size_t bytes(const int i) { return _properties[i].totalGlobalMem; } + + // Get the gigabytes of free memory in the current device + inline double free_gigabytes() { return free_gigabytes(_device); } + // Get the gigabytes of free memory + inline double free_gigabytes(const int i) + { return static_cast(free_bytes(i))/1073741824; } + + // Get the bytes of free memory in the current device + inline size_t free_bytes() { return free_bytes(_device); } + // Get the bytes of free memory + inline size_t free_bytes(const int i) { + CUDA_INT_TYPE dfree, dtotal; + CU_SAFE_CALL_NS(hipMemGetInfo(&dfree, &dtotal)); + return static_cast(dfree); + } + + /// Return the GPGPU compute capability for current device + inline double arch() { return arch(_device); } + /// Return the GPGPU compute capability + inline double arch(const int i) + { return static_cast(_properties[i].minor)/10+_properties[i].major;} + + /// Clock rate in GHz for current device + inline double clock_rate() { return clock_rate(_device); } + /// Clock rate in GHz + inline double clock_rate(const int i) + { return _properties[i].clockRate*1e-6;} + + /// Get the maximum number of threads per block + inline size_t group_size() { return group_size(_device); } + /// Get the maximum number of threads per block + inline size_t group_size(const int i) + { return _properties[i].maxThreadsPerBlock; } + + /// Return the maximum memory pitch in bytes for current device + inline size_t max_pitch() { return max_pitch(_device); } + /// Return the maximum memory pitch in bytes + inline size_t max_pitch(const int i) { return _properties[i].memPitch; } + + /// Returns false if accelerator cannot be shared by multiple processes + /** If it cannot be determined, true is returned **/ + inline bool sharing_supported() { return sharing_supported(_device); } + /// Returns false if accelerator cannot be shared by multiple processes + /** If it cannot be determined, true is returned **/ + inline bool sharing_supported(const int i) + { return (_properties[i].computeMode == hipComputeModeDefault); } + + /// True if splitting device into equal subdevices supported + inline bool fission_equal() + { return fission_equal(_device); } + /// True if splitting device into equal subdevices supported + inline bool fission_equal(const int i) + { return false; } + /// True if splitting device into subdevices by specified counts supported + inline bool fission_by_counts() + { return fission_by_counts(_device); } + /// True if splitting device into subdevices by specified counts supported + inline bool fission_by_counts(const int i) + { return false; } + /// True if splitting device into subdevices by affinity domains supported + inline bool fission_by_affinity() + { return fission_by_affinity(_device); } + /// True if splitting device into subdevices by affinity domains supported + inline bool fission_by_affinity(const int i) + { return false; } + + /// Maximum number of subdevices allowed from device fission + inline int max_sub_devices() + { return max_sub_devices(_device); } + /// Maximum number of subdevices allowed from device fission + inline int max_sub_devices(const int i) + { return 0; } + + /// List all devices along with all properties + inline void print_all(std::ostream &out); + + /// Select the platform that has accelerators (for compatibility with OpenCL) + inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; } + + inline int load_module(const void* program, hipModule_t& module, std::string *log=NULL){ + auto it = _loaded_modules.emplace(program, hipModule_t()); + if(!it.second){ + module = it.first->second; + return UCL_SUCCESS; + } + const unsigned int num_opts=2; + hipJitOption options[num_opts]; + void *values[num_opts]; + + // set up size of compilation log buffer + options[0] = hipJitOptionInfoLogBufferSizeBytes; + values[0] = (void *)(int)10240; + // set up pointer to the compilation log buffer + options[1] = hipJitOptionInfoLogBuffer; + char clog[10240] = { 0 }; + values[1] = clog; + + hipError_t err=hipModuleLoadDataEx(&module,program,num_opts, options,(void **)values); + + if (log!=NULL) + *log=std::string(clog); + + if (err != hipSuccess) { + #ifndef UCL_NO_EXIT + std::cerr << std::endl + << "----------------------------------------------------------\n" + << " UCL Error: Error compiling PTX Program...\n" + << "----------------------------------------------------------\n"; + std::cerr << log << std::endl; + #endif + _loaded_modules.erase(it.first); + return UCL_COMPILE_ERROR; + } + it.first->second = module; + return UCL_SUCCESS; + } + private: + std::unordered_map _loaded_modules; + int _device, _num_devices; + std::vector _properties; + std::vector _cq; + hipDevice_t _cu_device; +}; + +// Grabs the properties for all devices +UCL_Device::UCL_Device() { + CU_SAFE_CALL_NS(hipInit(0)); + CU_SAFE_CALL_NS(hipGetDeviceCount(&_num_devices)); + for (int i=0; i<_num_devices; ++i) { + hipDevice_t dev; + CU_SAFE_CALL_NS(hipDeviceGet(&dev,i)); + int major, minor; + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, dev)); + if (major==9999) + continue; + + NVDProperties prop; + prop.device_id = i; + prop.major=major; + prop.minor=minor; + + char namecstr[1024]; + CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev)); + prop.name=namecstr; + + CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev)); + + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev)); + //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev)); + //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev)); + + //#if CUDA_VERSION >= 2020 + //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev)); + //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev)); + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev)); + //#endif + //#if CUDA_VERSION >= 3010 + CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev)); + //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev)); + //#endif + + _properties.push_back(prop); + } + _device=-1; + _cq.push_back(hipStream_t()); + _cq.back()=0; +} + +UCL_Device::~UCL_Device() { + clear(); +} + +int UCL_Device::set_platform(const int pid) { + clear(); + #ifdef UCL_DEBUG + assert(pid-1) { + for (int i=1; i= 2020 + int driver_version; + hipDriverGetVersion(&driver_version); + out << "Driver Version: " + << driver_version/1000 << "." << driver_version%100 + << std::endl; + //#endif + + if (num_devices() == 0) + out << "There is no device supporting HIP\n"; + for (int i=0; i= 2000 + out << " Number of compute units/multiprocessors: " + << _properties[i].multiProcessorCount << std::endl; + out << " Number of cores: " + << cores(i) << std::endl; + //#endif + out << " Total amount of constant memory: " + << _properties[i].totalConstantMemory << " bytes\n"; + out << " Total amount of local/shared memory per block: " + << _properties[i].sharedMemPerBlock << " bytes\n"; + out << " Total number of registers available per block: " + << _properties[i].regsPerBlock << std::endl; + out << " Warp size: " + << _properties[i].SIMDWidth << std::endl; + out << " Maximum number of threads per block: " + << _properties[i].maxThreadsPerBlock << std::endl; + out << " Maximum group size (# of threads per block) " + << _properties[i].maxThreadsDim[0] << " x " + << _properties[i].maxThreadsDim[1] << " x " + << _properties[i].maxThreadsDim[2] << std::endl; + out << " Maximum item sizes (# threads for each dim) " + << _properties[i].maxGridSize[0] << " x " + << _properties[i].maxGridSize[1] << " x " + << _properties[i].maxGridSize[2] << std::endl; + //out << " Maximum memory pitch: " + // << max_pitch(i) << " bytes\n"; + //out << " Texture alignment: " + // << _properties[i].textureAlign << " bytes\n"; + out << " Clock rate: " + << clock_rate(i) << " GHz\n"; + //#if CUDA_VERSION >= 2020 + //out << " Run time limit on kernels: "; + //if (_properties[i].kernelExecTimeoutEnabled) + // out << "Yes\n"; + //else + // out << "No\n"; + out << " Integrated: "; + if (_properties[i].integrated) + out << "Yes\n"; + else + out << "No\n"; + //out << " Support host page-locked memory mapping: "; + //if (_properties[i].canMapHostMemory) + // out << "Yes\n"; + //else + // out << "No\n"; + out << " Compute mode: "; + if (_properties[i].computeMode == hipComputeModeDefault) + out << "Default\n"; // multiple threads can use device +//#if CUDA_VERSION >= 8000 +// else if (_properties[i].computeMode == hipComputeModeExclusiveProcess) +//#else + else if (_properties[i].computeMode == hipComputeModeExclusive) +//#endif + out << "Exclusive\n"; // only thread can use device + else if (_properties[i].computeMode == hipComputeModeProhibited) + out << "Prohibited\n"; // no thread can use device + //#if CUDART_VERSION >= 4000 + else if (_properties[i].computeMode == hipComputeModeExclusiveProcess) + out << "Exclusive Process\n"; // multiple threads 1 process + //#endif + else + out << "Unknown\n"; + //#endif + //#if CUDA_VERSION >= 3010 + out << " Concurrent kernel execution: "; + if (_properties[i].concurrentKernels) + out << "Yes\n"; + else + out << "No\n"; + //out << " Device has ECC support enabled: "; + //if (_properties[i].ECCEnabled) + // out << "Yes\n"; + //else + // out << "No\n"; + //#endif + } +} + +} + +#endif diff --git a/lib/gpu/geryon/hip_kernel.h b/lib/gpu/geryon/hip_kernel.h new file mode 100644 index 0000000000..654eb44772 --- /dev/null +++ b/lib/gpu/geryon/hip_kernel.h @@ -0,0 +1,298 @@ +/* ----------------------------------------------------------------------- + Copyright (2010) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +#ifndef HIP_KERNEL +#define HIP_KERNEL + + +#include +#include "hip_device.h" +#include +#include +#include + +namespace ucl_hip { + +class UCL_Texture; +template class UCL_D_Vec; +template class UCL_D_Mat; +template class UCL_Vector; +template class UCL_Matrix; +#define UCL_MAX_KERNEL_ARGS 256 + +/// Class storing 1 or more kernel functions from a single string or file +class UCL_Program { + UCL_Device* _device_ptr; + public: + inline UCL_Program(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); } + inline UCL_Program(UCL_Device &device, const void *program, + const char *flags="", std::string *log=NULL) { + _device_ptr = &device; _cq=device.cq(); + init(device); + load_string(program,flags,log); + } + + inline ~UCL_Program() {} + + /// Initialize the program with a device + inline void init(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); } + + /// Clear any data associated with program + /** \note Must call init() after each clear **/ + inline void clear() { } + + /// Load a program from a file and compile with flags + inline int load(const char *filename, const char *flags="", std::string *log=NULL) { + std::ifstream in(filename); + if (!in || in.is_open()==false) { + #ifndef UCL_NO_EXIT + std::cerr << "UCL Error: Could not open kernel file: " + << filename << std::endl; + UCL_GERYON_EXIT; + #endif + return UCL_FILE_NOT_FOUND; + } + + std::string program((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + in.close(); + return load_string(program.c_str(),flags,log); + } + + /// Load a program from a string and compile with flags + inline int load_string(const void *program, const char *flags="", std::string *log=NULL) { + return _device_ptr->load_module(program, _module, log); + } + + friend class UCL_Kernel; + private: + hipModule_t _module; + hipStream_t _cq; + friend class UCL_Texture; +}; + +/// Class for dealing with CUDA Driver kernels +class UCL_Kernel { + public: + UCL_Kernel() : _dimensions(1), _num_args(0) { + _num_blocks[0]=0; + } + + UCL_Kernel(UCL_Program &program, const char *function) : + _dimensions(1), _num_args(0) { + _num_blocks[0]=0; + set_function(program,function); + _cq=program._cq; + } + + ~UCL_Kernel() {} + + /// Clear any function associated with the kernel + inline void clear() { } + + /// Get the kernel function from a program + /** \ret UCL_ERROR_FLAG (UCL_SUCCESS, UCL_FILE_NOT_FOUND, UCL_ERROR) **/ + inline int set_function(UCL_Program &program, const char *function) { + hipError_t err=hipModuleGetFunction(&_kernel,program._module,function); + if (err!=hipSuccess) { + #ifndef UCL_NO_EXIT + std::cerr << "UCL Error: Could not find function: " << function + << " in program.\n"; + UCL_GERYON_EXIT; + #endif + return UCL_FUNCTION_NOT_FOUND; + } + _cq=program._cq; + return UCL_SUCCESS; + } + + /// Set the kernel argument. + /** If not a device pointer, this must be repeated each time the argument + * changes + * \note To set kernel parameter i (i>0), parameter i-1 must be set **/ + template + inline void set_arg(const unsigned index, const dtype * const arg) { + if (index==_num_args) + add_arg(arg); + else if (index<_num_args){ + assert(0==1); // not implemented + } + else + assert(0==1); // Must add kernel parameters in sequential order + } + + /// Set a geryon container as a kernel argument. + template + inline void set_arg(const UCL_D_Vec * const arg) + { set_arg(&arg->begin()); } + + /// Set a geryon container as a kernel argument. + template + inline void set_arg(const UCL_D_Mat * const arg) + { set_arg(&arg->begin()); } + + /// Set a geryon container as a kernel argument. + template + inline void set_arg(const UCL_Vector * const arg) + { set_arg(&arg->device.begin()); } + + /// Set a geryon container as a kernel argument. + template + inline void set_arg(const UCL_Matrix * const arg) + { set_arg(&arg->device.begin()); } + + /// Add a kernel argument. + inline void add_arg(const hipDeviceptr_t* const arg) { + add_arg((void**)arg); + } + + /// Add a kernel argument. + template + inline void add_arg(const dtype* const arg) { + const auto old_size = _hip_kernel_args.size(); + const auto aligned_size = (old_size+alignof(dtype)-1) & ~(alignof(dtype)-1); + const auto arg_size = sizeof(dtype); + _hip_kernel_args.resize(aligned_size + arg_size); + *((dtype*)(&_hip_kernel_args[aligned_size])) = *arg; + _num_args++; + if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1); + } + + /// Add a geryon container as a kernel argument. + template + inline void add_arg(const UCL_D_Vec * const arg) + { add_arg(&arg->begin()); } + + /// Add a geryon container as a kernel argument. + template + inline void add_arg(const UCL_D_Mat * const arg) + { add_arg(&arg->begin()); } + + /// Add a geryon container as a kernel argument. + template + inline void add_arg(const UCL_Vector * const arg) + { add_arg(&arg->device.begin()); } + + /// Add a geryon container as a kernel argument. + template + inline void add_arg(const UCL_Matrix * const arg) + { add_arg(&arg->device.begin()); } + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue is used for the kernel execution **/ + inline void set_size(const size_t num_blocks, const size_t block_size) { + _dimensions=1; + _num_blocks[0]=num_blocks; + _num_blocks[1]=1; + _num_blocks[2]=1; + + _block_size[0]=block_size; + _block_size[1]=1; + _block_size[2]=1; + } + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue for the kernel is changed to cq **/ + inline void set_size(const size_t num_blocks, const size_t block_size, + command_queue &cq) + { _cq=cq; set_size(num_blocks,block_size); } + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue is used for the kernel execution **/ + inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y, + const size_t block_size_x, const size_t block_size_y) { + _dimensions=2; + _num_blocks[0]=num_blocks_x; + _num_blocks[1]=num_blocks_y; + _num_blocks[2]=1; + + _block_size[0]=block_size_x; + _block_size[1]=block_size_y; + _block_size[2]=1; + } + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue for the kernel is changed to cq **/ + inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y, + const size_t block_size_x, const size_t block_size_y, + command_queue &cq) + {_cq=cq; set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y);} + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue is used for the kernel execution **/ + inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y, + const size_t block_size_x, + const size_t block_size_y, const size_t block_size_z) { + _dimensions=2; + _num_blocks[0]=num_blocks_x; + _num_blocks[1]=num_blocks_y; + _num_blocks[2]=1; + + _block_size[0]=block_size_x; + _block_size[1]=block_size_y; + _block_size[2]=block_size_z; + } + + /// Set the number of thread blocks and the number of threads in each block + /** \note This should be called before any arguments have been added + \note The default command queue is used for the kernel execution **/ + inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y, + const size_t block_size_x, const size_t block_size_y, + const size_t block_size_z, command_queue &cq) { + _cq=cq; + set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y, + block_size_z); + } + + /// Run the kernel in the default command queue + inline void run() { + size_t args_size = _hip_kernel_args.size(); + void *config[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, (void*)_hip_kernel_args.data(), + HIP_LAUNCH_PARAM_BUFFER_SIZE, &args_size, + HIP_LAUNCH_PARAM_END + }; + const auto res = hipModuleLaunchKernel(_kernel,_num_blocks[0],_num_blocks[1], + _num_blocks[2],_block_size[0],_block_size[1], + _block_size[2],0,_cq, NULL, config); + CU_SAFE_CALL(res); +//#endif + } + + /// Clear any arguments associated with the kernel + inline void clear_args() { + _num_args=0; + _hip_kernel_args.clear(); + } + + /// Return the default command queue/stream associated with this data + inline command_queue & cq() { return _cq; } + /// Change the default command queue associated with matrix + inline void cq(command_queue &cq_in) { _cq=cq_in; } + #include "ucl_arg_kludge.h" + + private: + hipFunction_t _kernel; + hipStream_t _cq; + unsigned _dimensions; + unsigned _num_blocks[3]; + unsigned _num_args; + friend class UCL_Texture; + + unsigned _block_size[3]; + std::vector _hip_kernel_args; +}; + +} // namespace + +#endif + diff --git a/lib/gpu/geryon/hip_macros.h b/lib/gpu/geryon/hip_macros.h new file mode 100644 index 0000000000..9c9971b896 --- /dev/null +++ b/lib/gpu/geryon/hip_macros.h @@ -0,0 +1,83 @@ +#ifndef HIP_MACROS_H +#define HIP_MACROS_H + +#include +#include +#include + +//#if CUDA_VERSION >= 3020 +#define CUDA_INT_TYPE size_t +//#else +//#define CUDA_INT_TYPE unsigned +//#endif + +#ifdef MPI_GERYON +#include "mpi.h" +#define NVD_GERYON_EXIT do { \ + int is_final; \ + MPI_Finalized(&is_final); \ + if (!is_final) \ + MPI_Abort(MPI_COMM_WORLD,-1); \ + } while(0) +#else +#define NVD_GERYON_EXIT assert(0==1) +#endif + +#ifndef UCL_GERYON_EXIT +#define UCL_GERYON_EXIT NVD_GERYON_EXIT +#endif + +#ifdef UCL_DEBUG +#define UCL_SYNC_DEBUG +#define UCL_DESTRUCT_CHECK +#endif + +#ifndef UCL_NO_API_CHECK + +#define CU_SAFE_CALL_NS( call ) do { \ + hipError_t err = call; \ + if( hipSuccess != err) { \ + fprintf(stderr, "HIP runtime error %d in call at file '%s' in line %i.\n", \ + err, __FILE__, __LINE__ ); \ + NVD_GERYON_EXIT; \ + } } while (0) + +#ifdef UCL_SYNC_DEBUG + +#define CU_SAFE_CALL( call ) do { \ + CU_SAFE_CALL_NS( call ); \ + hipError_t err=hipCtxSynchronize(); \ + if( hipSuccess != err) { \ + fprintf(stderr, "HIP runtime error %d in file '%s' in line %i.\n", \ + err, __FILE__, __LINE__ ); \ + NVD_GERYON_EXIT; \ + } } while (0) + +#else + +#define CU_SAFE_CALL( call ) CU_SAFE_CALL_NS( call ) + +#endif + +#else // not DEBUG + +// void macros for performance reasons +#define CU_SAFE_CALL_NS( call ) call +#define CU_SAFE_CALL( call) call + +#endif + +#ifdef UCL_DESTRUCT_CHECK + +#define CU_DESTRUCT_CALL( call) CU_SAFE_CALL( call) +#define CU_DESTRUCT_CALL_NS( call) CU_SAFE_CALL_NS( call) + +#else + +#define CU_DESTRUCT_CALL( call) call +#define CU_DESTRUCT_CALL_NS( call) call + +#endif + +#endif + diff --git a/lib/gpu/geryon/hip_mat.h b/lib/gpu/geryon/hip_mat.h new file mode 100644 index 0000000000..d9bbb4e521 --- /dev/null +++ b/lib/gpu/geryon/hip_mat.h @@ -0,0 +1,43 @@ +/* ----------------------------------------------------------------------- + Copyright (2010) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +/*! \file */ + +#ifndef HIP_MAT_H +#define HIP_MAT_H + + +#include +#include "hip_memory.h" + +/// Namespace for CUDA Driver routines +namespace ucl_hip { + +#define _UCL_MAT_ALLOW +#define _UCL_DEVICE_PTR_MAT +#include "ucl_basemat.h" +#include "ucl_h_vec.h" +#include "ucl_h_mat.h" +#include "ucl_d_vec.h" +#include "ucl_d_mat.h" +#include "ucl_s_obj_help.h" +#include "ucl_vector.h" +#include "ucl_matrix.h" +#undef _UCL_DEVICE_PTR_MAT +#undef _UCL_MAT_ALLOW + +#define UCL_COPY_ALLOW +#include "ucl_copy.h" +#undef UCL_COPY_ALLOW + +#define UCL_PRINT_ALLOW +#include "ucl_print.h" +#undef UCL_PRINT_ALLOW + +} // namespace ucl_cudadr + +#endif diff --git a/lib/gpu/geryon/hip_memory.h b/lib/gpu/geryon/hip_memory.h new file mode 100644 index 0000000000..13f60ad939 --- /dev/null +++ b/lib/gpu/geryon/hip_memory.h @@ -0,0 +1,279 @@ +/* ----------------------------------------------------------------------- + Copyright (2010) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +#ifndef HIP_MEMORY_H +#define HIP_MEMORY_H + + +#include +#include +#include +#include +#include "hip_macros.h" +#include "hip_device.h" +#include "ucl_types.h" + +namespace ucl_hip { + +// -------------------------------------------------------------------------- +// - API Specific Types +// -------------------------------------------------------------------------- +//typedef dim3 ucl_kernel_dim; + +#ifdef __HIP_PLATFORM_NVCC__ +typedef enum hipArray_Format { + HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, + HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, + HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, + HIP_AD_FORMAT_SIGNED_INT8 = 0x08, + HIP_AD_FORMAT_SIGNED_INT16 = 0x09, + HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, + HIP_AD_FORMAT_HALF = 0x10, + HIP_AD_FORMAT_FLOAT = 0x20 +}hipArray_Format; +#endif + +// -------------------------------------------------------------------------- +// - API SPECIFIC DEVICE POINTERS +// -------------------------------------------------------------------------- +typedef hipDeviceptr_t device_ptr; + +// -------------------------------------------------------------------------- +// - HOST MEMORY ALLOCATION ROUTINES +// -------------------------------------------------------------------------- +template +inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n, + const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){ + hipError_t err=hipSuccess; + if (kind==UCL_NOT_PINNED) + *(mat.host_ptr())=(typename mat_type::data_type*)malloc(n); + else if (kind==UCL_WRITE_ONLY) + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined); + else + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault); + if (err!=hipSuccess || *(mat.host_ptr())==NULL) + return UCL_MEMORY_ERROR; + mat.cq()=cm.cq(); + return UCL_SUCCESS; +} + +template +inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n, + const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){ + hipError_t err=hipSuccess; + if (kind==UCL_NOT_PINNED) + *(mat.host_ptr())=(typename mat_type::data_type*)malloc(n); + else if (kind==UCL_WRITE_ONLY) + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined); + else + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault); + if (err!=hipSuccess || *(mat.host_ptr())==NULL) + return UCL_MEMORY_ERROR; + mat.cq()=dev.cq(); + return UCL_SUCCESS; +} + +template +inline void _host_free(mat_type &mat) { + if (mat.kind()==UCL_VIEW) + return; + else if (mat.kind()!=UCL_NOT_PINNED) + CU_DESTRUCT_CALL(hipHostFree(mat.begin())); + else + free(mat.begin()); +} + +template +inline int _host_resize(mat_type &mat, const size_t n) { + _host_free(mat); + hipError_t err=hipSuccess; + if (mat.kind()==UCL_NOT_PINNED) + *(mat.host_ptr())=(typename mat_type::data_type*)malloc(n); + else if (mat.kind()==UCL_WRITE_ONLY) + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined); + else + err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault); + if (err!=hipSuccess || *(mat.host_ptr())==NULL) + return UCL_MEMORY_ERROR; + return UCL_SUCCESS; +} + +// -------------------------------------------------------------------------- +// - DEVICE MEMORY ALLOCATION ROUTINES +// -------------------------------------------------------------------------- +template +inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t n, + const enum UCL_MEMOPT kind) { + hipError_t err=hipMalloc((void**)&mat.cbegin(),n); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + mat.cq()=cm.cq(); + return UCL_SUCCESS; +} + +template +inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t n, + const enum UCL_MEMOPT kind) { + hipError_t err=hipMalloc((void**)&mat.cbegin(),n); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + mat.cq()=dev.cq(); + return UCL_SUCCESS; +} + +template +inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t rows, + const size_t cols, size_t &pitch, + const enum UCL_MEMOPT kind) { + hipError_t err; + size_t upitch; + err=hipMallocPitch((void**)&mat.cbegin(),&upitch, + cols*sizeof(typename mat_type::data_type),rows); + pitch=static_cast(upitch); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + mat.cq()=cm.cq(); + return UCL_SUCCESS; +} + +template +inline int _device_alloc(mat_type &mat, UCL_Device &d, const size_t rows, + const size_t cols, size_t &pitch, + const enum UCL_MEMOPT kind) { + hipError_t err; + size_t upitch; + err=hipMallocPitch((void**)&mat.cbegin(),&upitch, + cols*sizeof(typename mat_type::data_type),rows); + pitch=static_cast(upitch); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + mat.cq()=d.cq(); + return UCL_SUCCESS; +} + +template +inline void _device_free(mat_type &mat) { + if (mat.kind()!=UCL_VIEW){ + CU_DESTRUCT_CALL(hipFree((void*)mat.cbegin())); + } +} + +template +inline int _device_resize(mat_type &mat, const size_t n) { + _device_free(mat); + hipError_t err=hipMalloc((void**)&mat.cbegin(),n); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + return UCL_SUCCESS; +} + +template +inline int _device_resize(mat_type &mat, const size_t rows, + const size_t cols, size_t &pitch) { + _device_free(mat); + hipError_t err; + size_t upitch; + err=hipMallocPitch((void**)&mat.cbegin(),&upitch, + cols*sizeof(typename mat_type::data_type),rows); + pitch=static_cast(upitch); + if (err!=hipSuccess) + return UCL_MEMORY_ERROR; + return UCL_SUCCESS; +} + +inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in) { + *ptr=in; +} + +template +inline void _device_view(hipDeviceptr_t *ptr, numtyp *in) { + *ptr=0; +} + +inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in, + const size_t offset, const size_t numsize) { + *ptr=(hipDeviceptr_t)(((char*)in)+offset*numsize); +} + +template +inline void _device_view(hipDeviceptr_t *ptr, numtyp *in, + const size_t offset, const size_t numsize) { + *ptr=0; +} + +// -------------------------------------------------------------------------- +// - DEVICE IMAGE ALLOCATION ROUTINES +// -------------------------------------------------------------------------- +template +inline void _device_image_alloc(mat_type &mat, copy_type &cm, const size_t rows, + const size_t cols) { + assert(0==1); +} + +template +inline void _device_image_alloc(mat_type &mat, UCL_Device &d, const size_t rows, + const size_t cols) { + assert(0==1); +} + +template +inline void _device_image_free(mat_type &mat) { + assert(0==1); +} + +// -------------------------------------------------------------------------- +// - ZERO ROUTINES +// -------------------------------------------------------------------------- +inline void _host_zero(void *ptr, const size_t n) { + memset(ptr,0,n); +} + +template +inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) { + CU_SAFE_CALL(hipMemsetAsync((void*)mat.cbegin(),0,n,cq)); +} + + +// -------------------------------------------------------------------------- +// - MEMCPY ROUTINES +// -------------------------------------------------------------------------- + + +template +hipMemcpyKind _memcpy_kind(mat1 &dst, const mat2 &src){ + assert(mat1::MEM_TYPE < 2 && mat2::MEM_TYPE < 2); + return (hipMemcpyKind)((1 - mat2::MEM_TYPE)*2 + (1 - mat1::MEM_TYPE)); +} + +template +inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n) { + CU_SAFE_CALL(hipMemcpy((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src))); +} + +template +inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n, hipStream_t &cq) { + CU_SAFE_CALL(hipMemcpyAsync((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src), cq)); +} + +template +inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src, + const size_t spitch, const size_t cols, + const size_t rows) { + CU_SAFE_CALL(hipMemcpy2D((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src))); +} + +template +inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src, + const size_t spitch, const size_t cols, + const size_t rows,hipStream_t &cq) { + CU_SAFE_CALL(hipMemcpy2DAsync((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src), cq)); +} + +} // namespace ucl_cudart + +#endif + diff --git a/lib/gpu/geryon/hip_texture.h b/lib/gpu/geryon/hip_texture.h new file mode 100644 index 0000000000..e7aa4e1461 --- /dev/null +++ b/lib/gpu/geryon/hip_texture.h @@ -0,0 +1,113 @@ +/* ----------------------------------------------------------------------- + Copyright (2010) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +#ifndef HIP_TEXTURE +#define HIP_TEXTURE + + +#include +#include "hip_kernel.h" +#include "hip_mat.h" + +namespace ucl_hip { + +#ifdef __HIP_PLATFORM_NVCC__ +inline hipError_t hipModuleGetTexRef(CUtexref* texRef, hipModule_t hmod, const char* name){ + return hipCUResultTohipError(cuModuleGetTexRef(texRef, hmod, name)); +} +inline hipError_t hipTexRefSetFormat(CUtexref tex, hipArray_Format fmt, int NumPackedComponents) { + return hipCUResultTohipError(cuTexRefSetFormat(tex, (CUarray_format)fmt, NumPackedComponents )); +} +inline hipError_t hipTexRefSetAddress(size_t* offset, CUtexref tex, hipDeviceptr_t devPtr, size_t size) { + return hipCUResultTohipError(cuTexRefSetAddress(offset, tex, devPtr, size)); +} +#endif + +/// Class storing a texture reference +class UCL_Texture { + public: + UCL_Texture() {} + ~UCL_Texture() {} + /// Construct with a specified texture reference + inline UCL_Texture(UCL_Program &prog, const char *texture_name) + { get_texture(prog,texture_name); } + /// Set the texture reference for this object + inline void get_texture(UCL_Program &prog, const char *texture_name) + { + #ifdef __HIP_PLATFORM_NVCC__ + CU_SAFE_CALL(hipModuleGetTexRef(&_tex, prog._module, texture_name)); + #else + size_t _global_var_size; + CU_SAFE_CALL(hipModuleGetGlobal(&_device_ptr_to_global_var, &_global_var_size, prog._module, texture_name)); + #endif + } + + /// Bind a float array where each fetch grabs a vector of length numel + template + inline void bind_float(UCL_D_Vec &vec, const unsigned numel) + { _bind_float(vec,numel); } + + /// Bind a float array where each fetch grabs a vector of length numel + template + inline void bind_float(UCL_D_Mat &vec, const unsigned numel) + { _bind_float(vec,numel); } + + /// Bind a float array where each fetch grabs a vector of length numel + template + inline void bind_float(UCL_Vector &vec, const unsigned numel) + { _bind_float(vec.device,numel); } + + /// Bind a float array where each fetch grabs a vector of length numel + template + inline void bind_float(UCL_Matrix &vec, const unsigned numel) + { _bind_float(vec.device,numel); } + + /// Unbind the texture reference from the memory allocation + inline void unbind() { } + + /// Make a texture reference available to kernel + inline void allow(UCL_Kernel &kernel) { + //#if CUDA_VERSION < 4000 + //CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex)); + //#endif + } + + private: +#ifdef __HIP_PLATFORM_NVCC__ + CUtexref _tex; +#else + void* _device_ptr_to_global_var; +#endif + friend class UCL_Kernel; + + template + inline void _bind_float(mat_typ &vec, const unsigned numel) { + #ifdef UCL_DEBUG + assert(numel!=0 && numel<5); + #endif + +#ifdef __HIP_PLATFORM_NVCC__ + if (vec.element_size()==sizeof(float)) + CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_FLOAT, numel)); + else { + if (numel>2) + CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_SIGNED_INT32, numel)); + else + CU_SAFE_CALL(hipTexRefSetFormat(_tex,HIP_AD_FORMAT_SIGNED_INT32,numel*2)); + } + CU_SAFE_CALL(hipTexRefSetAddress(NULL, _tex, vec.cbegin(), vec.numel()*vec.element_size())); +#else + void* data_ptr = (void*)vec.cbegin(); + CU_SAFE_CALL(hipMemcpyHtoD(hipDeviceptr_t(_device_ptr_to_global_var), &data_ptr, sizeof(void*))); +#endif + } +}; + +} // namespace + +#endif + diff --git a/lib/gpu/geryon/hip_timer.h b/lib/gpu/geryon/hip_timer.h new file mode 100644 index 0000000000..3be0b8cfd6 --- /dev/null +++ b/lib/gpu/geryon/hip_timer.h @@ -0,0 +1,107 @@ +/* ----------------------------------------------------------------------- + Copyright (2010) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the Simplified BSD License. + ----------------------------------------------------------------------- */ + +#ifndef HIP_TIMER_H +#define HIP_TIMER_H + + +#include +#include "hip_macros.h" +#include "hip_device.h" + +namespace ucl_hip { + +/// Class for timing CUDA Driver events +class UCL_Timer { + public: + inline UCL_Timer() : _total_time(0.0f), _initialized(false) { } + inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false) + { init(dev); } + + inline ~UCL_Timer() { clear(); } + + /// Clear any data associated with timer + /** \note init() must be called to reuse timer after a clear() **/ + inline void clear() { + if (_initialized) { + CU_DESTRUCT_CALL(hipEventDestroy(start_event)); + CU_DESTRUCT_CALL(hipEventDestroy(stop_event)); + _initialized=false; + _total_time=0.0; + } + } + + /// Initialize default command queue for timing + inline void init(UCL_Device &dev) { init(dev, dev.cq()); } + + /// Initialize command queue for timing + inline void init(UCL_Device &dev, command_queue &cq) { + clear(); + _cq=cq; + _initialized=true; + CU_SAFE_CALL( hipEventCreateWithFlags(&start_event,0) ); + CU_SAFE_CALL( hipEventCreateWithFlags(&stop_event,0) ); + } + + /// Start timing on command queue + inline void start() { CU_SAFE_CALL(hipEventRecord(start_event,_cq)); } + + /// Stop timing on command queue + inline void stop() { CU_SAFE_CALL(hipEventRecord(stop_event,_cq)); } + + /// Block until the start event has been reached on device + inline void sync_start() + { CU_SAFE_CALL(hipEventSynchronize(start_event)); } + + /// Block until the stop event has been reached on device + inline void sync_stop() + { CU_SAFE_CALL(hipEventSynchronize(stop_event)); } + + /// Set the time elapsed to zero (not the total_time) + inline void zero() { + CU_SAFE_CALL(hipEventRecord(start_event,_cq)); + CU_SAFE_CALL(hipEventRecord(stop_event,_cq)); + } + + /// Set the total time to zero + inline void zero_total() { _total_time=0.0; } + + /// Add time from previous start and stop to total + /** Forces synchronization **/ + inline double add_to_total() + { double t=time(); _total_time+=t; return t/1000.0; } + + /// Add a user specified time to the total (ms) + inline void add_time_to_total(const double t) { _total_time+=t; } + + /// Return the time (ms) of last start to stop - Forces synchronization + inline double time() { + float timer; + CU_SAFE_CALL(hipEventSynchronize(stop_event)); + CU_SAFE_CALL( hipEventElapsedTime(&timer,start_event,stop_event) ); + return timer; + } + + /// Return the time (s) of last start to stop - Forces synchronization + inline double seconds() { return time()/1000.0; } + + /// Return the total time in ms + inline double total_time() { return _total_time; } + + /// Return the total time in seconds + inline double total_seconds() { return _total_time/1000.0; } + + private: + hipEvent_t start_event, stop_event; + hipStream_t _cq; + double _total_time; + bool _initialized; +}; + +} // namespace + +#endif diff --git a/lib/gpu/geryon/ucl_get_devices.cpp b/lib/gpu/geryon/ucl_get_devices.cpp index 1fa758fb46..b8dfc6f7b1 100644 --- a/lib/gpu/geryon/ucl_get_devices.cpp +++ b/lib/gpu/geryon/ucl_get_devices.cpp @@ -36,6 +36,11 @@ using namespace ucl_cudadr; using namespace ucl_cudart; #endif +#ifdef UCL_HIP +#include "hip_device.h" +using namespace ucl_hip; +#endif + int main(int argc, char** argv) { UCL_Device cop; std::cout << "Found " << cop.num_platforms() << " platform(s).\n"; diff --git a/lib/gpu/lal_answer.cpp b/lib/gpu/lal_answer.cpp index aa6d33d334..95d40c0d0a 100644 --- a/lib/gpu/lal_answer.cpp +++ b/lib/gpu/lal_answer.cpp @@ -179,13 +179,15 @@ double AnswerT::energy_virial(double *eatom, double **vatom, if (_eflag) { for (int i=0; i<_inum; i++) evdwl+=engv[i]; - if (_ef_atom) - if (_ilist==NULL) + if (_ef_atom) { + if (_ilist==NULL) { for (int i=0; i<_inum; i++) eatom[i]+=engv[i]; - else + } else { for (int i=0; i<_inum; i++) eatom[_ilist[i]]+=engv[i]; + } + } vstart=_inum; } if (_vflag) { @@ -193,7 +195,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom, for (int j=0; j<6; j++) { for (int i=vstart; i +#include +#endif + namespace LAMMPS_AL { #define AtomT Atom @@ -70,6 +75,26 @@ bool AtomT::alloc(const int nall) { } #endif + #ifdef USE_HIP_DEVICE_SORT + if (_gpu_nbor==1) { + size_t temp_storage_bytes = 0; + if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms)) + return false; + if(sort_out_size < _max_atoms){ + if (sort_out_keys ) hipFree(sort_out_keys); + if (sort_out_values) hipFree(sort_out_values); + hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned)); + hipMalloc(&sort_out_values, _max_atoms * sizeof(int )); + sort_out_size = _max_atoms; + } + if(temp_storage_bytes > sort_temp_storage_size){ + if(sort_temp_storage) hipFree(sort_temp_storage); + hipMalloc(&sort_temp_storage, temp_storage_bytes); + sort_temp_storage_size = temp_storage_bytes; + } + } + #endif + // --------------------------- Device allocations int gpu_bytes=0; success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY, @@ -184,6 +209,27 @@ bool AtomT::add_fields(const bool charge, const bool rot, return false; } #endif + + #ifdef USE_HIP_DEVICE_SORT + if (_gpu_nbor==1) { + size_t temp_storage_bytes = 0; + if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms)) + return false; + if(sort_out_size < _max_atoms){ + if (sort_out_keys ) hipFree(sort_out_keys); + if (sort_out_values) hipFree(sort_out_values); + hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned)); + hipMalloc(&sort_out_values, _max_atoms * sizeof(int )); + sort_out_size = _max_atoms; + } + if(temp_storage_bytes > sort_temp_storage_size){ + if(sort_temp_storage) hipFree(sort_temp_storage); + hipMalloc(&sort_temp_storage, temp_storage_bytes); + sort_temp_storage_size = temp_storage_bytes; + } + } + #endif + success=success && (dev_particle_id.alloc(_max_atoms,*dev, UCL_READ_ONLY)==UCL_SUCCESS); gpu_bytes+=dev_particle_id.row_bytes(); @@ -275,6 +321,19 @@ void AtomT::clear_resize() { if (_gpu_nbor==1) cudppDestroyPlan(sort_plan); #endif + #ifdef USE_HIP_DEVICE_SORT + if (_gpu_nbor==1) { + if(sort_out_keys) hipFree(sort_out_keys); + if(sort_out_values) hipFree(sort_out_values); + if(sort_temp_storage) hipFree(sort_temp_storage); + sort_out_keys = nullptr; + sort_out_values = nullptr; + sort_temp_storage = nullptr; + sort_temp_storage_size = 0; + sort_out_size = 0; + } + #endif + if (_gpu_nbor==2) { host_particle_id.clear(); host_cell_id.clear(); @@ -326,6 +385,22 @@ void AtomT::sort_neighbor(const int num_atoms) { UCL_GERYON_EXIT; } #endif + + #ifdef USE_HIP_DEVICE_SORT + if(sort_out_size < num_atoms){ + printf("AtomT::sort_neighbor: invalid temp buffer size\n"); + UCL_GERYON_EXIT; + } + if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(sort_temp_storage, sort_temp_storage_size, (unsigned *)dev_cell_id.begin(), sort_out_keys, (int *)dev_particle_id.begin(), sort_out_values, num_atoms)){ + printf("AtomT::sort_neighbor: DeviceRadixSort error\n"); + UCL_GERYON_EXIT; + } + if(hipSuccess != hipMemcpy((unsigned *)dev_cell_id.begin(), sort_out_keys , num_atoms*sizeof(unsigned), hipMemcpyDeviceToDevice) || + hipSuccess != hipMemcpy((int *) dev_particle_id.begin(), sort_out_values, num_atoms*sizeof(int ), hipMemcpyDeviceToDevice)){ + printf("AtomT::sort_neighbor: copy output error\n"); + UCL_GERYON_EXIT; + } + #endif } #ifdef GPU_CAST diff --git a/lib/gpu/lal_atom.cu b/lib/gpu/lal_atom.cu index 28ff31c566..99c76ba625 100644 --- a/lib/gpu/lal_atom.cu +++ b/lib/gpu/lal_atom.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #endif diff --git a/lib/gpu/lal_atom.h b/lib/gpu/lal_atom.h index 57880d7ca9..e39740d6c8 100644 --- a/lib/gpu/lal_atom.h +++ b/lib/gpu/lal_atom.h @@ -29,6 +29,11 @@ using namespace ucl_opencl; #include "geryon/nvc_mat.h" #include "geryon/nvc_kernel.h" using namespace ucl_cudart; +#elif defined(USE_HIP) +#include "geryon/hip_timer.h" +#include "geryon/hip_mat.h" +#include "geryon/hip_kernel.h" +using namespace ucl_hip; #else #include "geryon/nvd_timer.h" #include "geryon/nvd_mat.h" @@ -477,6 +482,14 @@ class Atom { CUDPPConfiguration sort_config; CUDPPHandle sort_plan; #endif + + #ifdef USE_HIP_DEVICE_SORT + unsigned* sort_out_keys = nullptr; + int* sort_out_values = nullptr; + void* sort_temp_storage = nullptr; + size_t sort_temp_storage_size = 0; + size_t sort_out_size = 0; + #endif }; } diff --git a/lib/gpu/lal_aux_fun1.h b/lib/gpu/lal_aux_fun1.h index 47a216ff6f..5b7150d950 100644 --- a/lib/gpu/lal_aux_fun1.h +++ b/lib/gpu/lal_aux_fun1.h @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #endif diff --git a/lib/gpu/lal_base_atomic.h b/lib/gpu/lal_base_atomic.h index e3e9829abc..fef810b17b 100644 --- a/lib/gpu/lal_base_atomic.h +++ b/lib/gpu/lal_base_atomic.h @@ -24,6 +24,8 @@ #include "geryon/ocl_texture.h" #elif defined(USE_CUDART) #include "geryon/nvc_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_base_charge.h b/lib/gpu/lal_base_charge.h index 64c19554b9..ea81dcdc4e 100644 --- a/lib/gpu/lal_base_charge.h +++ b/lib/gpu/lal_base_charge.h @@ -25,6 +25,8 @@ #include "geryon/ocl_texture.h" #elif defined(USE_CUDART) #include "geryon/nvc_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_base_dipole.h b/lib/gpu/lal_base_dipole.h index b51c4303cf..31a2a2d5f7 100644 --- a/lib/gpu/lal_base_dipole.h +++ b/lib/gpu/lal_base_dipole.h @@ -23,6 +23,8 @@ #ifdef USE_OPENCL #include "geryon/ocl_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_base_dpd.h b/lib/gpu/lal_base_dpd.h index 7a75282d0a..1e6f2ab1f2 100644 --- a/lib/gpu/lal_base_dpd.h +++ b/lib/gpu/lal_base_dpd.h @@ -23,6 +23,8 @@ #ifdef USE_OPENCL #include "geryon/ocl_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_base_ellipsoid.h b/lib/gpu/lal_base_ellipsoid.h index 7deeccbf44..061baac5b6 100644 --- a/lib/gpu/lal_base_ellipsoid.h +++ b/lib/gpu/lal_base_ellipsoid.h @@ -24,6 +24,8 @@ #include "geryon/ocl_texture.h" #elif defined(USE_CUDART) #include "geryon/nvc_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_base_three.h b/lib/gpu/lal_base_three.h index f5f36863c4..75589f705d 100644 --- a/lib/gpu/lal_base_three.h +++ b/lib/gpu/lal_base_three.h @@ -24,6 +24,8 @@ #include "geryon/ocl_texture.h" #elif defined(USE_CUDART) #include "geryon/nvc_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_beck.cu b/lib/gpu/lal_beck.cu index 7d72128b5f..bdfa57a0ce 100644 --- a/lib/gpu/lal_beck.cu +++ b/lib/gpu/lal_beck.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_born.cu b/lib/gpu/lal_born.cu index 0ca7fea5fe..6e1d7d95a0 100644 --- a/lib/gpu/lal_born.cu +++ b/lib/gpu/lal_born.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_born_coul_long.cu b/lib/gpu/lal_born_coul_long.cu index 71e5e0ae50..441ce4beb5 100644 --- a/lib/gpu/lal_born_coul_long.cu +++ b/lib/gpu/lal_born_coul_long.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index b3e79d9ec8..f4b6da2d0d 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -13,15 +13,16 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" + #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_born_coul_wolf.cu b/lib/gpu/lal_born_coul_wolf.cu index 2c2249feeb..e34367e18b 100644 --- a/lib/gpu/lal_born_coul_wolf.cu +++ b/lib/gpu/lal_born_coul_wolf.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_born_coul_wolf_cs.cu b/lib/gpu/lal_born_coul_wolf_cs.cu index 847387bfe8..1a02420736 100644 --- a/lib/gpu/lal_born_coul_wolf_cs.cu +++ b/lib/gpu/lal_born_coul_wolf_cs.cu @@ -13,15 +13,15 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_buck.cu b/lib/gpu/lal_buck.cu index c1e1c7d7e2..c23186f2d8 100644 --- a/lib/gpu/lal_buck.cu +++ b/lib/gpu/lal_buck.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_buck_coul.cu b/lib/gpu/lal_buck_coul.cu index 6f0d414825..2282532f4a 100644 --- a/lib/gpu/lal_buck_coul.cu +++ b/lib/gpu/lal_buck_coul.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_buck_coul_long.cu b/lib/gpu/lal_buck_coul_long.cu index da3237a31f..469c235571 100644 --- a/lib/gpu/lal_buck_coul_long.cu +++ b/lib/gpu/lal_buck_coul_long.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_charmm_long.cu b/lib/gpu/lal_charmm_long.cu index 244131f833..a797707057 100644 --- a/lib/gpu/lal_charmm_long.cu +++ b/lib/gpu/lal_charmm_long.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_colloid.cu b/lib/gpu/lal_colloid.cu index 28a9809b19..437faff25b 100644 --- a/lib/gpu/lal_colloid.cu +++ b/lib/gpu/lal_colloid.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_coul.cu b/lib/gpu/lal_coul.cu index 503e674c81..21d849bb6f 100644 --- a/lib/gpu/lal_coul.cu +++ b/lib/gpu/lal_coul.cu @@ -13,15 +13,15 @@ // email : ndtrung@umich.edu // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_coul_debye.cu b/lib/gpu/lal_coul_debye.cu index 464a1b18de..ab8bc5b961 100644 --- a/lib/gpu/lal_coul_debye.cu +++ b/lib/gpu/lal_coul_debye.cu @@ -13,15 +13,15 @@ // email : ndtrung@umich.edu // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_coul_dsf.cu b/lib/gpu/lal_coul_dsf.cu index 82c44cd382..147ac68552 100644 --- a/lib/gpu/lal_coul_dsf.cu +++ b/lib/gpu/lal_coul_dsf.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_coul_long.cu b/lib/gpu/lal_coul_long.cu index 365195e00c..f97a039629 100644 --- a/lib/gpu/lal_coul_long.cu +++ b/lib/gpu/lal_coul_long.cu @@ -13,15 +13,15 @@ // email : a.kohlmeyer@temple.edu // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_coul_long_cs.cu b/lib/gpu/lal_coul_long_cs.cu index 3c34666131..1479157944 100644 --- a/lib/gpu/lal_coul_long_cs.cu +++ b/lib/gpu/lal_coul_long_cs.cu @@ -13,15 +13,15 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 5bd306ea5b..aaf74ed28c 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -268,7 +268,7 @@ int DeviceT::init(Answer &ans, const bool charge, gpu_nbor=1; else if (_gpu_mode==Device::GPU_HYB_NEIGH) gpu_nbor=2; - #ifndef USE_CUDPP + #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT) if (gpu_nbor==1) gpu_nbor=2; #endif @@ -341,7 +341,7 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal, gpu_nbor=1; else if (_gpu_mode==Device::GPU_HYB_NEIGH) gpu_nbor=2; - #ifndef USE_CUDPP + #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT) if (gpu_nbor==1) gpu_nbor=2; #endif @@ -712,7 +712,7 @@ int DeviceT::compile_kernels() { gpu_lib_data.update_host(false); _ptx_arch=static_cast(gpu_lib_data[0])/100.0; - #ifndef USE_OPENCL + #if !(defined(USE_OPENCL) || defined(USE_HIP)) if (_ptx_arch>gpu->arch() || floor(_ptx_arch)arch())) return -4; #endif diff --git a/lib/gpu/lal_device.cu b/lib/gpu/lal_device.cu index 37d0758845..afc7a0b988 100644 --- a/lib/gpu/lal_device.cu +++ b/lib/gpu/lal_device.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // *************************************************************************** -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #endif diff --git a/lib/gpu/lal_dipole_lj.cu b/lib/gpu/lal_dipole_lj.cu index 745bdb7f27..8ea49e7f60 100644 --- a/lib/gpu/lal_dipole_lj.cu +++ b/lib/gpu/lal_dipole_lj.cu @@ -13,16 +13,16 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); +_texture( mu_tex,float4); #else -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +_texture_2d( mu_tex,int4); #endif #else diff --git a/lib/gpu/lal_dipole_lj_sf.cu b/lib/gpu/lal_dipole_lj_sf.cu index 9847e84823..9d753d9b63 100644 --- a/lib/gpu/lal_dipole_lj_sf.cu +++ b/lib/gpu/lal_dipole_lj_sf.cu @@ -13,17 +13,17 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); +_texture( mu_tex,float4); #else -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +_texture_2d( mu_tex,int4); #endif #else diff --git a/lib/gpu/lal_dipole_long_lj.cu b/lib/gpu/lal_dipole_long_lj.cu index f888dece9b..95c3b7a3db 100644 --- a/lib/gpu/lal_dipole_long_lj.cu +++ b/lib/gpu/lal_dipole_long_lj.cu @@ -13,16 +13,16 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); +_texture( mu_tex,float4); #else -texture pos_tex; -texture q_tex; -texture mu_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +_texture_2d( mu_tex,int4); #endif #else diff --git a/lib/gpu/lal_dpd.cu b/lib/gpu/lal_dpd.cu index 462401ad70..d97f430f77 100644 --- a/lib/gpu/lal_dpd.cu +++ b/lib/gpu/lal_dpd.cu @@ -13,14 +13,14 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture vel_tex; +_texture( pos_tex,float4); +_texture( vel_tex,float4); #else -texture pos_tex; -texture vel_tex; +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_eam.cu b/lib/gpu/lal_eam.cu index 13440b7d45..9427b1832f 100644 --- a/lib/gpu/lal_eam.cu +++ b/lib/gpu/lal_eam.cu @@ -13,27 +13,27 @@ // email : brownw@ornl.gov nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture fp_tex; -texture rhor_sp1_tex; -texture rhor_sp2_tex; -texture frho_sp1_tex; -texture frho_sp2_tex; -texture z2r_sp1_tex; -texture z2r_sp2_tex; +_texture( pos_tex,float4); +_texture( fp_tex,float); +_texture( rhor_sp1_tex,float4); +_texture( rhor_sp2_tex,float4); +_texture( frho_sp1_tex,float4); +_texture( frho_sp2_tex,float4); +_texture( z2r_sp1_tex,float4); +_texture( z2r_sp2_tex,float4); #else -texture pos_tex; -texture fp_tex; -texture rhor_sp1_tex; -texture rhor_sp2_tex; -texture frho_sp1_tex; -texture frho_sp2_tex; -texture z2r_sp1_tex; -texture z2r_sp2_tex; +_texture( pos_tex,int4); +_texture( fp_tex,int2); +_texture( rhor_sp1_tex,int4); +_texture( rhor_sp2_tex,int4); +_texture( frho_sp1_tex,int4); +_texture( frho_sp2_tex,int4); +_texture( z2r_sp1_tex,int4); +_texture( z2r_sp2_tex,int4); #endif #else diff --git a/lib/gpu/lal_ellipsoid_extra.h b/lib/gpu/lal_ellipsoid_extra.h index 71668f5e02..e6122c7404 100644 --- a/lib/gpu/lal_ellipsoid_extra.h +++ b/lib/gpu/lal_ellipsoid_extra.h @@ -18,12 +18,14 @@ enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE}; -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex, quat_tex; +_texture( pos_tex, float4); +_texture( quat_tex,float4); #else -texture pos_tex, quat_tex; +_texture_2d( pos_tex,int4); +_texture_2d( quat_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_ellipsoid_nbor.cu b/lib/gpu/lal_ellipsoid_nbor.cu index cac77f5dd3..e6eedc7159 100644 --- a/lib/gpu/lal_ellipsoid_nbor.cu +++ b/lib/gpu/lal_ellipsoid_nbor.cu @@ -13,12 +13,12 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_gauss.cu b/lib/gpu/lal_gauss.cu index 98e71ea413..f9d3741537 100644 --- a/lib/gpu/lal_gauss.cu +++ b/lib/gpu/lal_gauss.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_gayberne.cu b/lib/gpu/lal_gayberne.cu index cd1ee59fc6..5c035da004 100644 --- a/lib/gpu/lal_gayberne.cu +++ b/lib/gpu/lal_gayberne.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_ellipsoid_extra.h" #endif diff --git a/lib/gpu/lal_gayberne_lj.cu b/lib/gpu/lal_gayberne_lj.cu index 7925b72784..eb9c797dc7 100644 --- a/lib/gpu/lal_gayberne_lj.cu +++ b/lib/gpu/lal_gayberne_lj.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_ellipsoid_extra.h" #endif diff --git a/lib/gpu/lal_lj.cu b/lib/gpu/lal_lj.cu index 5838ac95cf..716346a83d 100644 --- a/lib/gpu/lal_lj.cu +++ b/lib/gpu/lal_lj.cu @@ -13,12 +13,12 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_lj96.cu b/lib/gpu/lal_lj96.cu index 8dd63ef920..aa06caa4ae 100644 --- a/lib/gpu/lal_lj96.cu +++ b/lib/gpu/lal_lj96.cu @@ -13,12 +13,12 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_lj_class2_long.cu b/lib/gpu/lal_lj_class2_long.cu index 41ceca35d7..4e1bf9c1f7 100644 --- a/lib/gpu/lal_lj_class2_long.cu +++ b/lib/gpu/lal_lj_class2_long.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_coul.cu b/lib/gpu/lal_lj_coul.cu index 5c7f0da46f..cd72f72d97 100644 --- a/lib/gpu/lal_lj_coul.cu +++ b/lib/gpu/lal_lj_coul.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_coul_debye.cu b/lib/gpu/lal_lj_coul_debye.cu index 91b105b3da..40b7046623 100644 --- a/lib/gpu/lal_lj_coul_debye.cu +++ b/lib/gpu/lal_lj_coul_debye.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_coul_long.cu b/lib/gpu/lal_lj_coul_long.cu index 0e25bb2dbc..6a09cc4b75 100644 --- a/lib/gpu/lal_lj_coul_long.cu +++ b/lib/gpu/lal_lj_coul_long.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_coul_msm.cu b/lib/gpu/lal_lj_coul_msm.cu index 3f73c6f47d..c8eaa47b3d 100644 --- a/lib/gpu/lal_lj_coul_msm.cu +++ b/lib/gpu/lal_lj_coul_msm.cu @@ -13,19 +13,19 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; -texture gcons_tex; -texture dgcons_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); +_texture( gcons_tex,float); +_texture( dgcons_tex,float); #else -texture pos_tex; -texture q_tex; -texture gcons_tex; -texture dgcons_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +_texture( gcons_tex,int2); +_texture( dgcons_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_cubic.cu b/lib/gpu/lal_lj_cubic.cu index 683c6b2aac..b6a0768a36 100644 --- a/lib/gpu/lal_lj_cubic.cu +++ b/lib/gpu/lal_lj_cubic.cu @@ -13,12 +13,12 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_lj_dsf.cu b/lib/gpu/lal_lj_dsf.cu index 323576fe77..2475743ccc 100644 --- a/lib/gpu/lal_lj_dsf.cu +++ b/lib/gpu/lal_lj_dsf.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_expand.cu b/lib/gpu/lal_lj_expand.cu index 9281ad27bd..4496835588 100644 --- a/lib/gpu/lal_lj_expand.cu +++ b/lib/gpu/lal_lj_expand.cu @@ -13,13 +13,13 @@ // email : ibains@nvidia.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else diff --git a/lib/gpu/lal_lj_expand_coul_long.cu b/lib/gpu/lal_lj_expand_coul_long.cu index aa8f02be8c..e9de9bab27 100644 --- a/lib/gpu/lal_lj_expand_coul_long.cu +++ b/lib/gpu/lal_lj_expand_coul_long.cu @@ -13,15 +13,15 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_gromacs.cu b/lib/gpu/lal_lj_gromacs.cu index 93dc3d9456..dcef79dc90 100644 --- a/lib/gpu/lal_lj_gromacs.cu +++ b/lib/gpu/lal_lj_gromacs.cu @@ -13,13 +13,13 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else diff --git a/lib/gpu/lal_lj_sdk.cu b/lib/gpu/lal_lj_sdk.cu index 01b2cdd18d..a11b1c7887 100644 --- a/lib/gpu/lal_lj_sdk.cu +++ b/lib/gpu/lal_lj_sdk.cu @@ -13,12 +13,12 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_lj_sdk_long.cu b/lib/gpu/lal_lj_sdk_long.cu index 5ff64b2254..e28fa19db4 100644 --- a/lib/gpu/lal_lj_sdk_long.cu +++ b/lib/gpu/lal_lj_sdk_long.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_lj_tip4p_long.cpp b/lib/gpu/lal_lj_tip4p_long.cpp index d44edc8cbd..0b781300c7 100644 --- a/lib/gpu/lal_lj_tip4p_long.cpp +++ b/lib/gpu/lal_lj_tip4p_long.cpp @@ -23,7 +23,7 @@ const char *lj_tip4p=0; #include "lal_lj_tip4p_long.h" #include -using namespace LAMMPS_AL; +namespace LAMMPS_AL { #define LJTIP4PLongT LJ_TIP4PLong extern Device device; @@ -370,6 +370,5 @@ int** LJTIP4PLongT::compute(const int ago, const int inum_full, } - - template class LJ_TIP4PLong; +} diff --git a/lib/gpu/lal_lj_tip4p_long.cu b/lib/gpu/lal_lj_tip4p_long.cu index 147c460795..092513da4d 100644 --- a/lib/gpu/lal_lj_tip4p_long.cu +++ b/lib/gpu/lal_lj_tip4p_long.cu @@ -13,7 +13,7 @@ // email : thevsevak@gmail.com // *************************************************************************** -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifdef LAMMPS_SMALLBIG @@ -27,11 +27,11 @@ #define tagint int #endif #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif #else diff --git a/lib/gpu/lal_mie.cu b/lib/gpu/lal_mie.cu index 33018566eb..e2ede4d3a1 100644 --- a/lib/gpu/lal_mie.cu +++ b/lib/gpu/lal_mie.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_morse.cu b/lib/gpu/lal_morse.cu index 0a14071d19..7e4e0e54fa 100644 --- a/lib/gpu/lal_morse.cu +++ b/lib/gpu/lal_morse.cu @@ -13,13 +13,13 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else diff --git a/lib/gpu/lal_neighbor_cpu.cu b/lib/gpu/lal_neighbor_cpu.cu index d005eb9f97..29141a8b90 100644 --- a/lib/gpu/lal_neighbor_cpu.cu +++ b/lib/gpu/lal_neighbor_cpu.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #endif diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index 83692a24e4..8a2b603217 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -14,7 +14,7 @@ // email : penwang@nvidia.com, brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #ifdef LAMMPS_SMALLBIG #define tagint int @@ -27,9 +27,9 @@ #define tagint int #endif #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif __kernel void calc_cell_id(const numtyp4 *restrict pos, diff --git a/lib/gpu/lal_neighbor_shared.h b/lib/gpu/lal_neighbor_shared.h index 834ee8406d..5cfc4e4767 100644 --- a/lib/gpu/lal_neighbor_shared.h +++ b/lib/gpu/lal_neighbor_shared.h @@ -24,6 +24,10 @@ using namespace ucl_opencl; #include "geryon/nvc_kernel.h" #include "geryon/nvc_texture.h" using namespace ucl_cudart; +#elif defined(USE_HIP) +#include "geryon/hip_kernel.h" +#include "geryon/hip_texture.h" +using namespace ucl_hip; #else #include "geryon/nvd_kernel.h" #include "geryon/nvd_texture.h" diff --git a/lib/gpu/lal_pppm.cu b/lib/gpu/lal_pppm.cu index 24636b9a93..6a7408c720 100644 --- a/lib/gpu/lal_pppm.cu +++ b/lib/gpu/lal_pppm.cu @@ -13,15 +13,15 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_preprocessor.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture q_tex; +_texture( pos_tex,float4); +_texture( q_tex,float); #else -texture pos_tex; -texture q_tex; +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); #endif // Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error diff --git a/lib/gpu/lal_pppm.h b/lib/gpu/lal_pppm.h index 045423e079..bc5f216076 100644 --- a/lib/gpu/lal_pppm.h +++ b/lib/gpu/lal_pppm.h @@ -23,6 +23,8 @@ #include "geryon/ocl_texture.h" #elif defined(USE_CUDART) #include "geryon/nvc_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" #else #include "geryon/nvd_texture.h" #endif diff --git a/lib/gpu/lal_precision.h b/lib/gpu/lal_precision.h index d5b1b9b6c0..7f82ba18aa 100644 --- a/lib/gpu/lal_precision.h +++ b/lib/gpu/lal_precision.h @@ -24,9 +24,11 @@ struct _lgpu_int2 { int x; int y; }; +#ifndef USE_HIP #ifndef int2 #define int2 _lgpu_int2 #endif +#endif struct _lgpu_float2 { float x; float y; diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index 566a451c21..cd95355ee4 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -1,4 +1,4 @@ -// ************************************************************************** +// ************************************************************************** // preprocessor.cu // ------------------- // W. Michael Brown (ORNL) @@ -60,6 +60,150 @@ // //*************************************************************************/ +#define _texture(name, type) texture name +#define _texture_2d(name, type) texture name + +// ------------------------------------------------------------------------- +// HIP DEFINITIONS +// ------------------------------------------------------------------------- + +#ifdef USE_HIP + #include + #ifdef __HIP_PLATFORM_HCC__ + #define mul24(x, y) __mul24(x, y) + #undef _texture + #undef _texture_2d + #define _texture(name, type) __device__ type* name + #define _texture_2d(name, type) __device__ type* name + #endif + #define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x) + #define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y) + #define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x); + #define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y); + #define THREAD_ID_X threadIdx.x + #define THREAD_ID_Y threadIdx.y + #define BLOCK_ID_X blockIdx.x + #define BLOCK_ID_Y blockIdx.y + #define BLOCK_SIZE_X blockDim.x + #define BLOCK_SIZE_Y blockDim.y + #define __kernel extern "C" __global__ + #ifdef __local + #undef __local + #endif + #define __local __shared__ + #define __global + #define restrict __restrict__ + #define atom_add atomicAdd + #define ucl_inline static __inline__ __device__ + + #define THREADS_PER_ATOM 4 + #define THREADS_PER_CHARGE 8 + #define BLOCK_NBOR_BUILD 128 + #define BLOCK_PAIR 256 + #define BLOCK_BIO_PAIR 256 + #define BLOCK_ELLIPSE 128 + #define MAX_SHARED_TYPES 11 + + #ifdef _SINGLE_SINGLE + ucl_inline double shfl_xor(double var, int laneMask, int width) { + #ifdef __HIP_PLATFORM_HCC__ + return __shfl_xor(var, laneMask, width); + #else + return __shfl_xor_sync(0xffffffff, var, laneMask, width); + #endif + } + #else + ucl_inline double shfl_xor(double var, int laneMask, int width) { + int2 tmp; + tmp.x = __double2hiint(var); + tmp.y = __double2loint(var); + #ifdef __HIP_PLATFORM_HCC__ + tmp.x = __shfl_xor(tmp.x,laneMask,width); + tmp.y = __shfl_xor(tmp.y,laneMask,width); + #else + tmp.x = __shfl_xor_sync(0xffffffff, tmp.x,laneMask,width); + tmp.y = __shfl_xor_sync(0xffffffff, tmp.y,laneMask,width); + #endif + return __hiloint2double(tmp.x,tmp.y); + } + #endif + + #ifdef __HIP_PLATFORM_HCC__ + #define ARCH 600 + #define WARP_SIZE 64 + #endif + + #ifdef __HIP_PLATFORM_NVCC__ + #define ARCH __CUDA_ARCH__ + #define WARP_SIZE 32 + #endif + + #define fast_mul(X,Y) (X)*(Y) + + #define MEM_THREADS WARP_SIZE + #define PPPM_BLOCK_1D 64 + #define BLOCK_CELL_2D 8 + #define BLOCK_CELL_ID 128 + #define MAX_BIO_SHARED_TYPES 128 + + #ifdef __HIP_PLATFORM_NVCC__ + #ifdef _DOUBLE_DOUBLE + #define fetch4(ans,i,pos_tex) { \ + int4 xy = tex1Dfetch(pos_tex,i*2); \ + int4 zt = tex1Dfetch(pos_tex,i*2+1); \ + ans.x=__hiloint2double(xy.y, xy.x); \ + ans.y=__hiloint2double(xy.w, xy.z); \ + ans.z=__hiloint2double(zt.y, zt.x); \ + ans.w=__hiloint2double(zt.w, zt.z); \ + } + #define fetch(ans,i,q_tex) { \ + int2 qt = tex1Dfetch(q_tex,i); \ + ans=__hiloint2double(qt.y, qt.x); \ + } + #else + #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i); + #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i); + #endif + #else + #ifdef _DOUBLE_DOUBLE + #define fetch4(ans,i,pos_tex) (ans=*(((double4*)pos_tex) + i)) + #define fetch(ans,i,q_tex) (ans=*(((double *) q_tex) + i)) + #else + #define fetch4(ans,i,pos_tex) (ans=*(((float4*)pos_tex) + i)) + #define fetch(ans,i,q_tex) (ans=*(((float *) q_tex) + i)) + #endif + #endif + + #ifdef _DOUBLE_DOUBLE + #define ucl_exp exp + #define ucl_powr pow + #define ucl_atan atan + #define ucl_cbrt cbrt + #define ucl_ceil ceil + #define ucl_abs fabs + #define ucl_rsqrt rsqrt + #define ucl_sqrt sqrt + #define ucl_recip(x) ((numtyp)1.0/(x)) + + #else + #define ucl_atan atanf + #define ucl_cbrt cbrtf + #define ucl_ceil ceilf + #define ucl_abs fabsf + #define ucl_recip(x) ((numtyp)1.0/(x)) + #define ucl_rsqrt rsqrtf + #define ucl_sqrt sqrtf + + #ifdef NO_HARDWARE_TRANSCENDENTALS + #define ucl_exp expf + #define ucl_powr powf + #else + #define ucl_exp __expf + #define ucl_powr __powf + #endif + #endif +#endif + // ------------------------------------------------------------------------- // CUDA DEFINITIONS // ------------------------------------------------------------------------- diff --git a/lib/gpu/lal_re_squared.cu b/lib/gpu/lal_re_squared.cu index e238734074..cd525a1ade 100644 --- a/lib/gpu/lal_re_squared.cu +++ b/lib/gpu/lal_re_squared.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_ellipsoid_extra.h" #endif diff --git a/lib/gpu/lal_re_squared_lj.cu b/lib/gpu/lal_re_squared_lj.cu index d69dae2461..b3c44febe7 100644 --- a/lib/gpu/lal_re_squared_lj.cu +++ b/lib/gpu/lal_re_squared_lj.cu @@ -13,7 +13,7 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_ellipsoid_extra.h" #endif diff --git a/lib/gpu/lal_soft.cu b/lib/gpu/lal_soft.cu index 831b986725..bccfa85ed5 100644 --- a/lib/gpu/lal_soft.cu +++ b/lib/gpu/lal_soft.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu index 3b6de5a683..de36d29efb 100644 --- a/lib/gpu/lal_sw.cu +++ b/lib/gpu/lal_sw.cu @@ -13,19 +13,19 @@ // email : brownw@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture sw1_tex; -texture sw2_tex; -texture sw3_tex; +_texture( pos_tex,float4); +_texture( sw1_tex,float4); +_texture( sw2_tex,float4); +_texture( sw3_tex,float4); #else -texture pos_tex; -texture sw1_tex; -texture sw2_tex; -texture sw3_tex; +_texture_2d( pos_tex,int4); +_texture( sw1_tex,int4); +_texture( sw2_tex,int4); +_texture( sw3_tex,int4); #endif #else diff --git a/lib/gpu/lal_table.cu b/lib/gpu/lal_table.cu index 971b56d96e..8c0b52e05f 100644 --- a/lib/gpu/lal_table.cu +++ b/lib/gpu/lal_table.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index 2e29ca721b..d57efaf15c 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -13,23 +13,23 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_tersoff_extra.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; +_texture( pos_tex,float4); +_texture( ts1_tex,float4); +_texture( ts2_tex,float4); +_texture( ts3_tex,float4); +_texture( ts4_tex,float4); +_texture( ts5_tex,float4); #else -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; +_texture_2d( pos_tex,int4); +_texture( ts1_tex,int4); +_texture( ts2_tex,int4); +_texture( ts3_tex,int4); +_texture( ts4_tex,int4); +_texture( ts5_tex,int4); #endif #else diff --git a/lib/gpu/lal_tersoff_extra.h b/lib/gpu/lal_tersoff_extra.h index 47d16678f0..7ee29751b7 100644 --- a/lib/gpu/lal_tersoff_extra.h +++ b/lib/gpu/lal_tersoff_extra.h @@ -16,7 +16,7 @@ #ifndef LAL_TERSOFF_EXTRA_H #define LAL_TERSOFF_EXTRA_H -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #else #endif diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu index c85f5e08ca..da284f39ee 100644 --- a/lib/gpu/lal_tersoff_mod.cu +++ b/lib/gpu/lal_tersoff_mod.cu @@ -13,23 +13,23 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_tersoff_mod_extra.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; +_texture( pos_tex,float4); +_texture( ts1_tex,float4); +_texture( ts2_tex,float4); +_texture( ts3_tex,float4); +_texture( ts4_tex,float4); +_texture( ts5_tex,float4); #else -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; +_texture_2d( pos_tex,int4); +_texture( ts1_tex,int4); +_texture( ts2_tex,int4); +_texture( ts3_tex,int4); +_texture( ts4_tex,int4); +_texture( ts5_tex,int4); #endif #else diff --git a/lib/gpu/lal_tersoff_mod_extra.h b/lib/gpu/lal_tersoff_mod_extra.h index a130d98488..fb658cb0da 100644 --- a/lib/gpu/lal_tersoff_mod_extra.h +++ b/lib/gpu/lal_tersoff_mod_extra.h @@ -16,7 +16,7 @@ #ifndef LAL_TERSOFF_MOD_EXTRA_H #define LAL_TERSOFF_MOD_EXTRA_H -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #else #endif diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu index b574a529c0..a170715f57 100644 --- a/lib/gpu/lal_tersoff_zbl.cu +++ b/lib/gpu/lal_tersoff_zbl.cu @@ -13,25 +13,25 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_tersoff_zbl_extra.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; -texture ts6_tex; +_texture( pos_tex,float4); +_texture( ts1_tex,float4); +_texture( ts2_tex,float4); +_texture( ts3_tex,float4); +_texture( ts4_tex,float4); +_texture( ts5_tex,float4); +_texture( ts6_tex,float4); #else -texture pos_tex; -texture ts1_tex; -texture ts2_tex; -texture ts3_tex; -texture ts4_tex; -texture ts5_tex; -texture ts6_tex; +_texture_2d( pos_tex,int4); +_texture( ts1_tex,int4); +_texture( ts2_tex,int4); +_texture( ts3_tex,int4); +_texture( ts4_tex,int4); +_texture( ts5_tex,int4); +_texture( ts6_tex,int4); #endif #else diff --git a/lib/gpu/lal_tersoff_zbl_extra.h b/lib/gpu/lal_tersoff_zbl_extra.h index 32c05a3716..9e5bcb10b4 100644 --- a/lib/gpu/lal_tersoff_zbl_extra.h +++ b/lib/gpu/lal_tersoff_zbl_extra.h @@ -16,7 +16,7 @@ #ifndef LAL_TERSOFF_ZBL_EXTRA_H #define LAL_TERSOFF_ZBL_EXTRA_H -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #else #endif diff --git a/lib/gpu/lal_ufm.cu b/lib/gpu/lal_ufm.cu index 51c4df3b5b..33d0f3c956 100644 --- a/lib/gpu/lal_ufm.cu +++ b/lib/gpu/lal_ufm.cu @@ -15,12 +15,12 @@ dekoning@ifi.unicamp.br ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_vashishta.cu b/lib/gpu/lal_vashishta.cu index 0da46c3b53..d13bc659e7 100644 --- a/lib/gpu/lal_vashishta.cu +++ b/lib/gpu/lal_vashishta.cu @@ -13,23 +13,23 @@ // email : andershaf@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture param1_tex; -texture param2_tex; -texture param3_tex; -texture param4_tex; -texture param5_tex; +_texture( pos_tex,float4); +_texture( param1_tex,float4); +_texture( param2_tex,float4); +_texture( param3_tex,float4); +_texture( param4_tex,float4); +_texture( param5_tex,float4); #else -texture pos_tex; -texture param1_tex; -texture param2_tex; -texture param3_tex; -texture param4_tex; -texture param5_tex; +_texture_2d( pos_tex,int4); +_texture( param1_tex,int4); +_texture( param2_tex,int4); +_texture( param3_tex,int4); +_texture( param4_tex,int4); +_texture( param5_tex,int4); #endif #else diff --git a/lib/gpu/lal_yukawa.cu b/lib/gpu/lal_yukawa.cu index a8d637ec97..5237549b0a 100644 --- a/lib/gpu/lal_yukawa.cu +++ b/lib/gpu/lal_yukawa.cu @@ -13,12 +13,12 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/lib/gpu/lal_yukawa_colloid.cu b/lib/gpu/lal_yukawa_colloid.cu index a3cbbbc11c..8c006a09be 100644 --- a/lib/gpu/lal_yukawa_colloid.cu +++ b/lib/gpu/lal_yukawa_colloid.cu @@ -13,15 +13,15 @@ // email : nguyentd@ornl.gov // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; -texture rad_tex; +_texture( pos_tex,float4); +_texture( rad_tex,float); #else -texture pos_tex; -texture rad_tex; +_texture_2d( pos_tex,int4); +_texture( rad_tex,int2); #endif #else diff --git a/lib/gpu/lal_zbl.cu b/lib/gpu/lal_zbl.cu index 33c850e134..fbedfe2de2 100644 --- a/lib/gpu/lal_zbl.cu +++ b/lib/gpu/lal_zbl.cu @@ -13,12 +13,12 @@ // email : ndactrung@gmail.com // ***************************************************************************/ -#ifdef NV_KERNEL +#if defined(NV_KERNEL) || defined(USE_HIP) #include "lal_aux_fun1.h" #ifndef _DOUBLE_DOUBLE -texture pos_tex; +_texture( pos_tex,float4); #else -texture pos_tex; +_texture_2d( pos_tex,int4); #endif #else #define pos_tex x_ diff --git a/src/MAKE/OPTIONS/Makefile.hip b/src/MAKE/OPTIONS/Makefile.hip new file mode 100644 index 0000000000..12158a32cb --- /dev/null +++ b/src/MAKE/OPTIONS/Makefile.hip @@ -0,0 +1,120 @@ +# hip = MPI with HIP(clang) + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +CC = mpicxx +CCFLAGS = -g -O3 +SHFLAGS = -fPIC +DEPFLAGS = -M + +HIP_PATH ?= $(wildcard /opt/rocm/hip) +LINK = $(HIP_PATH)/bin/hipcc +LINKFLAGS = -g -O3 $(shell mpicxx --showme:link) +LIB = +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared + +# --------------------------------------------------------------------- +# LAMMPS-specific settings, all OPTIONAL +# specify settings for LAMMPS features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# LAMMPS ifdef settings +# see possible settings in Section 2.2 (step 4) of manual + +LMP_INC = -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 + +# MPI library +# see discussion in Section 2.2 (step 5) of manual +# MPI wrapper compiler/linker can provide this info +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 +MPI_PATH = +MPI_LIB = + +# FFT library +# see discussion in Section 2.2 (step 6) of manual +# can be left blank to use provided KISS FFT library +# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings +# PATH = path for FFT library +# LIB = name of FFT library + +FFT_INC = +FFT_PATH = +FFT_LIB = + +# JPEG and/or PNG library +# see discussion in Section 2.2 (step 7) of manual +# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC +# INC = path(s) for jpeglib.h and/or png.h +# PATH = path(s) for JPEG library and/or PNG library +# LIB = name(s) of JPEG library and/or PNG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# do not edit this section + +include Makefile.package.settings +include Makefile.package + +ifeq (nvcc,${HIP_PLATFORM}) + # fix nvcc can't handle -pthread flag + LINKFLAGS := $(subst -pthread,-Xcompiler -pthread,$(LINKFLAGS)) +endif + +EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) + $(SIZE) $(EXE) + +# Library targets + +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) + +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + +# Compilation rules + +%.o:%.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +depend : fastdep.exe $(SRC) + @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 + +fastdep.exe: ../DEPEND/fastdep.c + cc -O -o $@ $< + +sinclude .depend -- GitLab From 9e8806bd2239d88a83b1c15b1724541b0e8f4d6d Mon Sep 17 00:00:00 2001 From: Vsevak Date: Fri, 31 Jan 2020 21:26:52 +0300 Subject: [PATCH 002/328] Addition to lib/gpu/README for HIP --- lib/gpu/README | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/gpu/README b/lib/gpu/README index 2d98749a40..969417a865 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -67,8 +67,8 @@ library requires installing the CUDA GPU driver and CUDA toolkit for your operating system. Installation of the CUDA SDK is not necessary. In addition to the LAMMPS library, the binary nvc_get_devices will also be built. This can be used to query the names and properties of GPU -devices on your system. A Makefile for OpenCL compilation is provided, -but support for OpenCL use is not currently provided by the developers. +devices on your system. A Makefile for OpenCL and ROCm HIP compilation +is provided, but support for it is not currently provided by the developers. Details of the implementation are provided in: ---- @@ -169,6 +169,25 @@ NOTE: The system-specific setting LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG, src/MAKE/Makefile.foo) should be consistent with that specified when building libgpu.a (i.e. by LMP_INC in the lib/gpu/Makefile.bar). + BUILDING FOR HIP FRAMEWORK + -------------------------------- +1. Install the latest ROCm framework (https://github.com/RadeonOpenCompute/ROCm). +2. GPU sorting requires installing hipcub +(https://github.com/ROCmSoftwarePlatform/hipCUB). The HIP CUDA-backend +additionally requires cub (https://nvlabs.github.io/cub). Download and +extract the cub directory to lammps/lib/gpu/ or specify an appropriate +path in lammps/lib/gpu/Makefile.hip. +3. In Makefile.hip it is possible to specify the target platform via +export HIP_PLATFORM=hcc or HIP_PLATFORM=nvcc as well as the target +architecture (gfx803, gfx900, gfx906 etc.) +4. If your MPI implementation does not support `mpicxx --showme` command, +it is required to specify the corresponding MPI compiler and linker flags +in lammps/lib/gpu/Makefile.hip and in lammps/src/MAKE/OPTIONS/Makefile.hip. +5. Building the GPU library (libgpu.a): + cd lammps/lib/gpu; make -f Makefile.hip -j +6. Building the LAMMPS executable (lmp_hip): + cd ../../src; make hip -j + EXAMPLE CONVENTIONAL BUILD PROCESS -------------------------------- -- GitLab From 33fc22b752c3652df832662a91a1e5fc887c9200 Mon Sep 17 00:00:00 2001 From: Vsevak Date: Mon, 3 Feb 2020 01:55:20 +0300 Subject: [PATCH 003/328] Fix echo in Makefile --- lib/gpu/Makefile.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip index 5c9f251004..1c8e0683ca 100644 --- a/lib/gpu/Makefile.hip +++ b/lib/gpu/Makefile.hip @@ -137,7 +137,7 @@ $(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(ALL_H) $(LIB_DIR)/libgpu.a: $(OBJS) $(AR) -crs $@ $(OBJS) - echo "export HIP_PLATFORM := $(HIP_PLATFORM)\n$(HIP_LIBS_TARGET)" > 'Makefile.lammps' + echo -e "export HIP_PLATFORM := $(HIP_PLATFORM)\n$(HIP_LIBS_TARGET)" > 'Makefile.lammps' # test app building -- GitLab From 5f68f3006f1eafaffdb13b3e85220b5d8ac0dd2a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Feb 2020 15:12:26 -0500 Subject: [PATCH 004/328] replace Fix::box_change_xxx variables with Fix::box_change bitmask and add check to Domain::init() This allows a more specific tracking of whether multiple fixes are modifying the same box parameter and error out in that case. --- src/RIGID/fix_rigid_nh.cpp | 4 ++++ src/RIGID/fix_rigid_nh_small.cpp | 4 ++++ src/RIGID/fix_rigid_nph.cpp | 1 - src/RIGID/fix_rigid_nph_small.cpp | 1 - src/RIGID/fix_rigid_npt.cpp | 1 - src/RIGID/fix_rigid_npt_small.cpp | 1 - src/SHOCK/fix_msst.cpp | 15 +++++++++----- src/SRD/fix_srd.cpp | 11 ++++++---- src/USER-BOCS/fix_bocs.cpp | 8 ++++++-- src/USER-MISC/fix_npt_cauchy.cpp | 8 ++++++-- src/USER-OMP/fix_rigid_nph_omp.cpp | 1 - src/USER-OMP/fix_rigid_npt_omp.cpp | 1 - src/USER-QTB/fix_qbmsst.cpp | 12 ++++++----- src/USER-UEF/fix_nh_uef.cpp | 2 +- src/domain.cpp | 33 +++++++++++++++++++++++++++--- src/fix.cpp | 2 +- src/fix.h | 11 +++++++--- src/fix_balance.cpp | 2 +- src/fix_box_relax.cpp | 9 ++++++-- src/fix_deform.cpp | 8 ++++++-- src/fix_nh.cpp | 8 ++++++-- src/fix_press_berendsen.cpp | 6 ++++-- 22 files changed, 108 insertions(+), 41 deletions(-) diff --git a/src/RIGID/fix_rigid_nh.cpp b/src/RIGID/fix_rigid_nh.cpp index 6ffb997ffa..1df7f34532 100644 --- a/src/RIGID/fix_rigid_nh.cpp +++ b/src/RIGID/fix_rigid_nh.cpp @@ -106,6 +106,10 @@ FixRigidNH::FixRigidNH(LAMMPS *lmp, int narg, char **arg) : p_period[0] != p_period[2])) error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings"); + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if ((tstat_flag && t_period <= 0.0) || (p_flag[0] && p_period[0] <= 0.0) || (p_flag[1] && p_period[1] <= 0.0) || diff --git a/src/RIGID/fix_rigid_nh_small.cpp b/src/RIGID/fix_rigid_nh_small.cpp index 136796ce18..5b75640549 100644 --- a/src/RIGID/fix_rigid_nh_small.cpp +++ b/src/RIGID/fix_rigid_nh_small.cpp @@ -120,6 +120,10 @@ FixRigidNHSmall::FixRigidNHSmall(LAMMPS *lmp, int narg, char **arg) : p_period[0] != p_period[2])) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if ((tstat_flag && t_period <= 0.0) || (p_flag[0] && p_period[0] <= 0.0) || (p_flag[1] && p_period[1] <= 0.0) || diff --git a/src/RIGID/fix_rigid_nph.cpp b/src/RIGID/fix_rigid_nph.cpp index 706e08ec12..5c3b5390aa 100644 --- a/src/RIGID/fix_rigid_nph.cpp +++ b/src/RIGID/fix_rigid_nph.cpp @@ -33,7 +33,6 @@ FixRigidNPH::FixRigidNPH(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/RIGID/fix_rigid_nph_small.cpp b/src/RIGID/fix_rigid_nph_small.cpp index 32ac58220d..7769ced807 100644 --- a/src/RIGID/fix_rigid_nph_small.cpp +++ b/src/RIGID/fix_rigid_nph_small.cpp @@ -33,7 +33,6 @@ FixRigidNPHSmall::FixRigidNPHSmall(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/RIGID/fix_rigid_npt.cpp b/src/RIGID/fix_rigid_npt.cpp index 1f19fb4ec7..5f3fde669c 100644 --- a/src/RIGID/fix_rigid_npt.cpp +++ b/src/RIGID/fix_rigid_npt.cpp @@ -33,7 +33,6 @@ FixRigidNPT::FixRigidNPT(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/RIGID/fix_rigid_npt_small.cpp b/src/RIGID/fix_rigid_npt_small.cpp index e8924be79f..d3bd5b8f57 100644 --- a/src/RIGID/fix_rigid_npt_small.cpp +++ b/src/RIGID/fix_rigid_npt_small.cpp @@ -33,7 +33,6 @@ FixRigidNPTSmall::FixRigidNPTSmall(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/SHOCK/fix_msst.cpp b/src/SHOCK/fix_msst.cpp index 2e9f751824..67b2a7fb0f 100644 --- a/src/SHOCK/fix_msst.cpp +++ b/src/SHOCK/fix_msst.cpp @@ -46,7 +46,6 @@ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : if (narg < 4) error->all(FLERR,"Illegal fix msst command"); restart_global = 1; - box_change_size = 1; time_integrate = 1; scalar_flag = 1; vector_flag = 1; @@ -78,10 +77,16 @@ FixMSST::FixMSST(LAMMPS *lmp, int narg, char **arg) : dftb = 0; beta = 0.0; - if (strcmp(arg[3],"x") == 0) direction = 0; - else if (strcmp(arg[3],"y") == 0) direction = 1; - else if (strcmp(arg[3],"z") == 0) direction = 2; - else error->all(FLERR,"Illegal fix msst command"); + if (strcmp(arg[3],"x") == 0) { + direction = 0; + box_change |= BOX_CHANGE_X; + } else if (strcmp(arg[3],"y") == 0) { + direction = 1; + box_change |= BOX_CHANGE_Y; + } else if (strcmp(arg[3],"z") == 0) { + direction = 2; + box_change |= BOX_CHANGE_Z; + } else error->all(FLERR,"Illegal fix msst command"); velocity = force->numeric(FLERR,arg[4]); if (velocity < 0) error->all(FLERR,"Illegal fix msst command"); diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index d5eec91f50..d87851381f 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -375,13 +375,16 @@ void FixSRD::init() change_size = change_shape = deformflag = 0; if (domain->nonperiodic == 2) change_size = 1; + + Fix **fixes = modify->fix; for (int i = 0; i < modify->nfix; i++) { - if (modify->fix[i]->box_change_size) change_size = 1; - if (modify->fix[i]->box_change_shape) change_shape = 1; - if (strcmp(modify->fix[i]->style,"deform") == 0) { + if (fixes[i]->box_change & BOX_CHANGE_SIZE) change_size = 1; + if (fixes[i]->box_change & BOX_CHANGE_SHAPE) change_shape = 1; + if (strcmp(fixes[i]->style,"deform") == 0) { deformflag = 1; FixDeform *deform = (FixDeform *) modify->fix[i]; - if (deform->box_change_shape && deform->remapflag != Domain::V_REMAP) + if ((deform->box_change & BOX_CHANGE_SHAPE) + && deform->remapflag != Domain::V_REMAP) error->all(FLERR,"Using fix srd with inconsistent " "fix deform remap option"); } diff --git a/src/USER-BOCS/fix_bocs.cpp b/src/USER-BOCS/fix_bocs.cpp index adce231bf1..3543c2f156 100644 --- a/src/USER-BOCS/fix_bocs.cpp +++ b/src/USER-BOCS/fix_bocs.cpp @@ -292,8 +292,12 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) : if (p_flag[i]) pstat_flag = 1; if (pstat_flag) { - if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1; - if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1; + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if (p_flag[3]) box_change |= BOX_CHANGE_YZ; + if (p_flag[4]) box_change |= BOX_CHANGE_XZ; + if (p_flag[5]) box_change |= BOX_CHANGE_XY; no_change_box = 1; if (allremap == 0) restart_pbc = 1; diff --git a/src/USER-MISC/fix_npt_cauchy.cpp b/src/USER-MISC/fix_npt_cauchy.cpp index 1b0deb5ae3..7a4a34bd00 100644 --- a/src/USER-MISC/fix_npt_cauchy.cpp +++ b/src/USER-MISC/fix_npt_cauchy.cpp @@ -492,8 +492,12 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) : if (p_flag[i]) pstat_flag = 1; if (pstat_flag) { - if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1; - if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1; + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if (p_flag[3]) box_change |= BOX_CHANGE_YZ; + if (p_flag[4]) box_change |= BOX_CHANGE_XZ; + if (p_flag[5]) box_change |= BOX_CHANGE_XY; no_change_box = 1; if (allremap == 0) restart_pbc = 1; diff --git a/src/USER-OMP/fix_rigid_nph_omp.cpp b/src/USER-OMP/fix_rigid_nph_omp.cpp index 31d53868c8..ff192341e4 100644 --- a/src/USER-OMP/fix_rigid_nph_omp.cpp +++ b/src/USER-OMP/fix_rigid_nph_omp.cpp @@ -33,7 +33,6 @@ FixRigidNPHOMP::FixRigidNPHOMP(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/USER-OMP/fix_rigid_npt_omp.cpp b/src/USER-OMP/fix_rigid_npt_omp.cpp index 1e7c139d52..6da51e6220 100644 --- a/src/USER-OMP/fix_rigid_npt_omp.cpp +++ b/src/USER-OMP/fix_rigid_npt_omp.cpp @@ -33,7 +33,6 @@ FixRigidNPTOMP::FixRigidNPTOMP(LAMMPS *lmp, int narg, char **arg) : scalar_flag = 1; restart_global = 1; - box_change_size = 1; extscalar = 1; // error checks diff --git a/src/USER-QTB/fix_qbmsst.cpp b/src/USER-QTB/fix_qbmsst.cpp index abbf1701b8..27b1dcd4df 100644 --- a/src/USER-QTB/fix_qbmsst.cpp +++ b/src/USER-QTB/fix_qbmsst.cpp @@ -45,13 +45,16 @@ FixQBMSST::FixQBMSST(LAMMPS *lmp, int narg, char **arg) : { if (narg < 5) error->all(FLERR,"Illegal fix qbmsst command"); - if ( strcmp(arg[3],"x") == 0 ) + if ( strcmp(arg[3],"x") == 0 ) { direction = 0; - else if ( strcmp(arg[3],"y") == 0 ) + box_change |= BOX_CHANGE_X; + } else if ( strcmp(arg[3],"y") == 0 ) { direction = 1; - else if ( strcmp(arg[3],"z") == 0 ) + box_change |= BOX_CHANGE_Y; + } else if ( strcmp(arg[3],"z") == 0 ) { direction = 2; - else { + box_change |= BOX_CHANGE_Z; + } else { error->all(FLERR,"Illegal fix qbmsst command"); } velocity = atof(arg[4]); @@ -64,7 +67,6 @@ FixQBMSST::FixQBMSST(LAMMPS *lmp, int narg, char **arg) : extvector = 0; nevery = 1; restart_global = 1; - box_change_size = 1; time_integrate = 1; scalar_flag = 1; vector_flag = 1; diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp index 8873688eb7..5c98a2dc85 100644 --- a/src/USER-UEF/fix_nh_uef.cpp +++ b/src/USER-UEF/fix_nh_uef.cpp @@ -163,7 +163,7 @@ FixNHUef::FixNHUef(LAMMPS *lmp, int narg, char **arg) : // flag that I change the box here (in case of nvt) - box_change_shape = 1; + box_change |= BOX_CHANGE_SHAPE; // initialize the UEFBox class which computes the box at each step diff --git a/src/domain.cpp b/src/domain.cpp index e894682556..efa2ef00cf 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "style_region.h" #include "atom.h" #include "atom_vec.h" @@ -133,13 +134,39 @@ void Domain::init() box_change_size = box_change_shape = box_change_domain = 0; + // flags for detecting, if multiple fixes try to change the + // same box size or shape parameter + + int box_change_x=0, box_change_y=0, box_change_z=0; + int box_change_yz=0, box_change_xz=0, box_change_xy=0; + Fix **fixes = modify->fix; + if (nonperiodic == 2) box_change_size = 1; for (int i = 0; i < modify->nfix; i++) { - if (modify->fix[i]->box_change_size) box_change_size = 1; - if (modify->fix[i]->box_change_shape) box_change_shape = 1; - if (modify->fix[i]->box_change_domain) box_change_domain = 1; + if (fixes[i]->box_change & Fix::BOX_CHANGE_SIZE) box_change_size = 1; + if (fixes[i]->box_change & Fix::BOX_CHANGE_SHAPE) box_change_shape = 1; + if (fixes[i]->box_change & Fix::BOX_CHANGE_DOMAIN) box_change_domain = 1; + if (fixes[i]->box_change & Fix::BOX_CHANGE_X) box_change_x++; + if (fixes[i]->box_change & Fix::BOX_CHANGE_Y) box_change_y++; + if (fixes[i]->box_change & Fix::BOX_CHANGE_Z) box_change_z++; + if (fixes[i]->box_change & Fix::BOX_CHANGE_YZ) box_change_yz++; + if (fixes[i]->box_change & Fix::BOX_CHANGE_XZ) box_change_xz++; + if (fixes[i]->box_change & Fix::BOX_CHANGE_XY) box_change_xy++; } + std::string mesg = "Must not have multiple fixes change box parameter "; + +#define CHECK_BOX_FIX_ERROR(par) \ + if (box_change_ ## par > 1) error->all(FLERR,(mesg + #par).c_str()) + + CHECK_BOX_FIX_ERROR(x); + CHECK_BOX_FIX_ERROR(y); + CHECK_BOX_FIX_ERROR(z); + CHECK_BOX_FIX_ERROR(yz); + CHECK_BOX_FIX_ERROR(xz); + CHECK_BOX_FIX_ERROR(xy); +#undef CHECK_BOX_FIX_ERROR + box_change = 0; if (box_change_size || box_change_shape || box_change_domain) box_change = 1; diff --git a/src/fix.cpp b/src/fix.cpp index d86acf0ae4..9c883a6897 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -58,7 +58,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : restart_global = restart_peratom = restart_file = 0; force_reneighbor = 0; - box_change_size = box_change_shape = box_change_domain = 0; + box_change = NO_BOX_CHANGE; thermo_energy = 0; thermo_virial = 0; rigid_flag = 0; diff --git a/src/fix.h b/src/fix.h index bcab6f289e..921778ae98 100644 --- a/src/fix.h +++ b/src/fix.h @@ -30,9 +30,14 @@ class Fix : protected Pointers { int restart_file; // 1 if Fix writes own restart file, 0 if not int force_reneighbor; // 1 if Fix forces reneighboring, 0 if not - int box_change_size; // 1 if Fix changes box size, 0 if not - int box_change_shape; // 1 if Fix changes box shape, 0 if not - int box_change_domain; // 1 if Fix changes proc sub-domains, 0 if not + int box_change; // >0 if Fix changes box size, shape, or sub-domains, 0 if not + enum { + NO_BOX_CHANGE = 0, BOX_CHANGE_ANY = 1<<0, BOX_CHANGE_DOMAIN = 1<<1, + BOX_CHANGE_X = 1<<2, BOX_CHANGE_Y = 1<<3, BOX_CHANGE_Z = 1<<4, + BOX_CHANGE_YZ = 1<<5, BOX_CHANGE_XZ = 1<<6, BOX_CHANGE_XY = 1<<7, + BOX_CHANGE_SIZE = BOX_CHANGE_X | BOX_CHANGE_Y | BOX_CHANGE_Z, + BOX_CHANGE_SHAPE = BOX_CHANGE_YZ | BOX_CHANGE_XZ | BOX_CHANGE_XY + }; bigint next_reneighbor; // next timestep to force a reneighboring int thermo_energy; // 1 if fix_modify enabled ThEng, 0 if not diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp index 5ca1ec124a..b178cca83b 100644 --- a/src/fix_balance.cpp +++ b/src/fix_balance.cpp @@ -39,7 +39,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) : { if (narg < 6) error->all(FLERR,"Illegal fix balance command"); - box_change_domain = 1; + box_change = BOX_CHANGE_DOMAIN; scalar_flag = 1; extscalar = 0; vector_flag = 1; diff --git a/src/fix_box_relax.cpp b/src/fix_box_relax.cpp index c19ea918b4..8d6111c0a0 100644 --- a/src/fix_box_relax.cpp +++ b/src/fix_box_relax.cpp @@ -218,8 +218,13 @@ FixBoxRelax::FixBoxRelax(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal fix box/relax command"); } - if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1; - if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1; + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if (p_flag[3]) box_change |= BOX_CHANGE_YZ; + if (p_flag[4]) box_change |= BOX_CHANGE_XZ; + if (p_flag[5]) box_change |= BOX_CHANGE_XY; + if (allremap == 0) restart_pbc = 1; // error checks diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 9d84c4bb62..4ecfa6f433 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -211,8 +211,12 @@ rfix(NULL), irregular(NULL), set(NULL) if (set[i].style == NONE) dimflag[i] = 0; else dimflag[i] = 1; - if (dimflag[0] || dimflag[1] || dimflag[2]) box_change_size = 1; - if (dimflag[3] || dimflag[4] || dimflag[5]) box_change_shape = 1; + if (dimflag[0]) box_change |= BOX_CHANGE_X; + if (dimflag[1]) box_change |= BOX_CHANGE_Y; + if (dimflag[2]) box_change |= BOX_CHANGE_Z; + if (dimflag[3]) box_change |= BOX_CHANGE_YZ; + if (dimflag[4]) box_change |= BOX_CHANGE_XZ; + if (dimflag[5]) box_change |= BOX_CHANGE_XY; // no tensile deformation on shrink-wrapped dims // b/c shrink wrap will change box-length diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index bb3fe7559c..e397e3367a 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -476,8 +476,12 @@ FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) : if (p_flag[i]) pstat_flag = 1; if (pstat_flag) { - if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1; - if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1; + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + if (p_flag[3]) box_change |= BOX_CHANGE_YZ; + if (p_flag[4]) box_change |= BOX_CHANGE_XZ; + if (p_flag[5]) box_change |= BOX_CHANGE_XY; no_change_box = 1; if (allremap == 0) restart_pbc = 1; diff --git a/src/fix_press_berendsen.cpp b/src/fix_press_berendsen.cpp index c090c554e2..184f02c440 100644 --- a/src/fix_press_berendsen.cpp +++ b/src/fix_press_berendsen.cpp @@ -40,8 +40,6 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) : { if (narg < 5) error->all(FLERR,"Illegal fix press/berendsen command"); - box_change_size = 1; - // Berendsen barostat applied every step nevery = 1; @@ -204,6 +202,10 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) : (p_flag[2] && p_period[2] <= 0.0)) error->all(FLERR,"Fix press/berendsen damping parameters must be > 0.0"); + if (p_flag[0]) box_change |= BOX_CHANGE_X; + if (p_flag[1]) box_change |= BOX_CHANGE_Y; + if (p_flag[2]) box_change |= BOX_CHANGE_Z; + // pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof // else pstyle = ANISO -> 3 dof -- GitLab From 8f3c94f33b3eb1fc2a6b4e722a3c30eece4fa831 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Feb 2020 15:22:54 -0500 Subject: [PATCH 005/328] document new error message --- doc/src/Errors_messages.rst | 3 +++ src/domain.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/doc/src/Errors_messages.rst b/doc/src/Errors_messages.rst index bbe84de065..f89b5f5b88 100644 --- a/doc/src/Errors_messages.rst +++ b/doc/src/Errors_messages.rst @@ -5815,6 +5815,9 @@ Doc page with :doc:`WARNING messages ` Cannot use the temper command with only one processor partition. Use the -partition command-line option. +*Must not have multiple fixes change box parameter ...* + Self-explanatory. + *Must read Atoms before Angles* The Atoms section of a data file must come before an Angles section. diff --git a/src/domain.h b/src/domain.h index a0bda8ae72..0ce31a8b4a 100644 --- a/src/domain.h +++ b/src/domain.h @@ -282,6 +282,10 @@ E: Both sides of boundary must be periodic Cannot specify a boundary as periodic only on the lo or hi side. Must be periodic on both sides. +E: Must not have multiple fixes change box parameter ... + +Self-explanatory. + U: Box bounds are invalid The box boundaries specified in the read_data file are invalid. The -- GitLab From f82892c45a5ca45fa01856f4e031e3e9cec855da Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Feb 2020 15:28:10 -0500 Subject: [PATCH 006/328] refactor one more fix --- src/SHOCK/fix_append_atoms.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/SHOCK/fix_append_atoms.cpp b/src/SHOCK/fix_append_atoms.cpp index 5e85b39076..1a6b128cd5 100644 --- a/src/SHOCK/fix_append_atoms.cpp +++ b/src/SHOCK/fix_append_atoms.cpp @@ -40,7 +40,6 @@ FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) : { force_reneighbor = 1; next_reneighbor = -1; - box_change_size = 1; time_depend = 1; if (narg < 4) error->all(FLERR,"Illegal fix append/atoms command"); @@ -75,35 +74,41 @@ FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) : if (strcmp(arg[iarg],"xlo") == 0) { error->all(FLERR,"Only zhi currently implemented for fix append/atoms"); xloflag = 1; + box_change |= BOX_CHANGE_X; iarg++; if (domain->boundary[0][0] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); } else if (strcmp(arg[iarg],"xhi") == 0) { error->all(FLERR,"Only zhi currently implemented for fix append/atoms"); xhiflag = 1; + box_change |= BOX_CHANGE_X; iarg++; if (domain->boundary[0][1] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); } else if (strcmp(arg[iarg],"ylo") == 0) { error->all(FLERR,"Only zhi currently implemented for fix append/atoms"); yloflag = 1; + box_change |= BOX_CHANGE_Y; iarg++; if (domain->boundary[1][0] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); } else if (strcmp(arg[iarg],"yhi") == 0) { error->all(FLERR,"Only zhi currently implemented for fix append/atoms"); yhiflag = 1; + box_change |= BOX_CHANGE_Y; iarg++; if (domain->boundary[1][1] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); } else if (strcmp(arg[iarg],"zlo") == 0) { error->all(FLERR,"Only zhi currently implemented for fix append/atoms"); zloflag = 1; + box_change |= BOX_CHANGE_Z; iarg++; if (domain->boundary[2][0] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); } else if (strcmp(arg[iarg],"zhi") == 0) { zhiflag = 1; + box_change |= BOX_CHANGE_Z; iarg++; if (domain->boundary[2][1] != 3) error->all(FLERR,"Append boundary must be shrink/minimum"); -- GitLab From 1430d1cb126fa7df1bf35527a82760923e1cbb93 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Feb 2020 15:30:13 -0500 Subject: [PATCH 007/328] ...and one more --- src/USER-UEF/fix_nh_uef.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp index 5c98a2dc85..01e2081aca 100644 --- a/src/USER-UEF/fix_nh_uef.cpp +++ b/src/USER-UEF/fix_nh_uef.cpp @@ -244,7 +244,7 @@ void FixNHUef::init() for (int i=0; i < modify->nfix; i++) { if (strcmp(modify->fix[i]->id,id) != 0) - if (modify->fix[i]->box_change_shape != 0) + if ((modify->fix[i]->box_change & BOX_CHANGE_SHAPE) != 0) error->all(FLERR,"Can't use another fix which changes box shape with fix/nvt/npt/uef"); } -- GitLab From 994e1318dd1866201546e98f76064853edeb5731 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 17 Feb 2020 16:59:01 -0700 Subject: [PATCH 008/328] Commit JT 021720 - initial commit - added corrected Neel, new E and w calc. --- src/SPIN/pair_spin_neel.cpp | 127 +++++++++++++++++++++++++----------- src/SPIN/pair_spin_neel.h | 1 + 2 files changed, 89 insertions(+), 39 deletions(-) diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index 4a5d453de2..811276cac5 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -259,7 +259,8 @@ void PairSpinNeel::compute(int eflag, int vflag) } if (eflag) { - evdwl = (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + // evdwl = (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl = compute_neel_energy(i,j,rsq,eij,spi,spj); evdwl *= 0.5*hbar; } else evdwl = 0.0; @@ -365,65 +366,69 @@ void PairSpinNeel::compute_single_pair(int ii, double fmi[3]) /* ---------------------------------------------------------------------- */ -void PairSpinNeel::compute_neel(int i, int j, double rsq, double eij[3], double fmi[3], double spi[3], double spj[3]) +void PairSpinNeel::compute_neel(int i, int j, double rsq, double eij[3], double fmi[3], double spi[3], double spj[3]) { int *type = atom->type; int itype, jtype; itype = type[i]; jtype = type[j]; - double gij, q1ij, q2ij, ra; + double qr,gr,g1r,q1r,q2r,ra; double pdx, pdy, pdz; double pq1x, pq1y, pq1z; double pq2x, pq2y, pq2z; + double eij_si,eij_sj,si_sj,eij_si_2,eij_sj_3,coeff1; - // pseudo-dipolar component + // compute Neel's functions ra = rsq/g3[itype][jtype]/g3[itype][jtype]; - gij = 4.0*g1[itype][jtype]*ra; - gij *= (1.0-g2[itype][jtype]*ra); - gij *= exp(-ra); + gr = 4.0*g1[itype][jtype]*ra; + gr *= (1.0-g2[itype][jtype]*ra); + gr *= exp(-ra); - double scalar_eij_si = eij[0]*spi[0] + eij[1]*spi[1] + eij[2]*spi[2]; - double scalar_eij_sj = eij[0]*spj[0] + eij[1]*spj[1] + eij[2]*spj[2]; - double scalar_si_sj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; + ra = rsq/q3[itype][jtype]/q3[itype][jtype]; + qr = 4.0*q1[itype][jtype]*ra; + qr *= (1.0-q2[itype][jtype]*ra); + qr *= exp(-ra); - double gij_eij_sj = gij*scalar_eij_sj; - double gij_3 = gij/3.0; - pdx = gij_eij_sj*eij[0] - gij_3*spj[0]; - pdy = gij_eij_sj*eij[1] - gij_3*spj[1]; - pdz = gij_eij_sj*eij[2] - gij_3*spj[2]; + g1r = (gr + 12.0*qr/35.0); + q1r = 9.0*qr/5.0; + q2r = -2.0*qr/5.0; - // pseudo-quadrupolar component + // pseudo-dipolar component + + eij_si = eij[0]*spi[0] + eij[1]*spi[1] + eij[2]*spi[2]; + eij_sj = eij[0]*spj[0] + eij[1]*spj[1] + eij[2]*spj[2]; + si_sj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; - ra = rsq/q3[itype][jtype]/q3[itype][jtype]; - q1ij = 4.0*q1[itype][jtype]*ra; - q1ij *= (1.0-q2[itype][jtype]*ra); - q1ij *= exp(-ra); - q2ij = (-2.0*q1ij/9.0); + pdx = g1r*(eij_sj*eij[0] - spj[0]/3.0); + pdy = g1r*(eij_sj*eij[1] - spj[1]/3.0); + pdz = g1r*(eij_sj*eij[2] - spj[2]/3.0); - double scalar_eij_si_2 = scalar_eij_si*scalar_eij_si; - pq1x = -(scalar_eij_si_2*scalar_eij_si_2 - scalar_si_sj/3.0)*spj[0]/3.0; - pq1y = -(scalar_eij_si_2*scalar_eij_si_2 - scalar_si_sj/3.0)*spj[1]/3.0; - pq1z = -(scalar_eij_si_2*scalar_eij_si_2 - scalar_si_sj/3.0)*spj[2]/3.0; + // pseudo-quadrupolar components - double pqt1 = (scalar_eij_sj*scalar_eij_sj-scalar_si_sj/3.0); - pq1x += pqt1*(2.0*scalar_eij_si*eij[0] - spj[0]/3.0); - pq1y += pqt1*(2.0*scalar_eij_si*eij[1] - spj[1]/3.0); - pq1z += pqt1*(2.0*scalar_eij_si*eij[2] - spj[2]/3.0); + eij_si_2 = eij_si*eij_si; + pq1x = -(eij_si_2 - si_sj/3.0)*spj[0]/3.0; + pq1y = -(eij_si_2 - si_sj/3.0)*spj[1]/3.0; + pq1z = -(eij_si_2 - si_sj/3.0)*spj[2]/3.0; - pq1x *= q1ij; - pq1y *= q1ij; - pq1z *= q1ij; + coeff1 = (eij_sj*eij_sj-si_sj/3.0); + pq1x += coeff1*(2.0*eij_si*eij[0] - spj[0]/3.0); + pq1y += coeff1*(2.0*eij_si*eij[1] - spj[1]/3.0); + pq1z += coeff1*(2.0*eij_si*eij[2] - spj[2]/3.0); - double scalar_eij_sj_3 = scalar_eij_sj*scalar_eij_sj*scalar_eij_sj; - pq2x = 3.0*scalar_eij_si_2*scalar_eij_sj*eij[0] + scalar_eij_sj_3*eij[0]; - pq2y = 3.0*scalar_eij_si_2*scalar_eij_sj*eij[1] + scalar_eij_sj_3*eij[1]; - pq2z = 3.0*scalar_eij_si_2*scalar_eij_sj*eij[2] + scalar_eij_sj_3*eij[2]; + pq1x *= q1r; + pq1y *= q1r; + pq1z *= q1r; - pq2x *= q2ij; - pq2y *= q2ij; - pq2z *= q2ij; + eij_sj_3 = eij_sj*eij_sj*eij_sj; + pq2x = 3.0*eij_si_2*eij_sj*eij[0] + eij_sj_3*eij[0]; + pq2y = 3.0*eij_si_2*eij_sj*eij[1] + eij_sj_3*eij[1]; + pq2z = 3.0*eij_si_2*eij_sj*eij[2] + eij_sj_3*eij[2]; + + pq2x *= q2r; + pq2y *= q2r; + pq2z *= q2r; // adding three contributions @@ -563,6 +568,50 @@ void PairSpinNeel::compute_neel_mech(int i, int j, double rsq, double eij[3], do fi[2] = pdz + pq1z + pq2z; } +/* ---------------------------------------------------------------------- */ + +double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3], double spi[3], double spj[3]) +{ + int *type = atom->type; + int itype, jtype; + itype = type[i]; + jtype = type[j]; + + double qr,gr,g1r,q1r,q2r,ra; + double epd,epq1,epq2; + double eij_si,eij_sj,si_sj; + double eij_si_2,eij_sj_2,eij_si_3,eij_sj_3; + + // compute Neel's functions + + ra = rsq/g3[itype][jtype]/g3[itype][jtype]; + gr = 4.0*g1[itype][jtype]*ra; + gr *= (1.0-g2[itype][jtype]*ra); + gr *= exp(-ra); + + ra = rsq/q3[itype][jtype]/q3[itype][jtype]; + qr = 4.0*q1[itype][jtype]*ra; + qr *= (1.0-q2[itype][jtype]*ra); + qr *= exp(-ra); + + g1r = (gr + 12.0*qr/35.0); + q1r = 9.0*qr/5.0; + q2r = -2.0*qr/5.0; + + eij_si = eij[0]*spi[0] + eij[1]*spi[1] + eij[2]*spi[2]; + eij_sj = eij[0]*spj[0] + eij[1]*spj[1] + eij[2]*spj[2]; + si_sj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; + epd = g1r*(eij_si*eij_sj-si_sj/3.0); + eij_si_2 = eij_si*eij_si; + eij_sj_2 = eij_sj*eij_sj; + epq1 = q1r*(eij_si_2-si_sj/3.0)*(eij_sj_2-si_sj/3.0); + eij_si_3 = eij_si*eij_si*eij_si; + eij_sj_3 = eij_sj*eij_sj*eij_sj; + epq2 = q2r*(eij_si*eij_sj_3+eij_sj*eij_si_3); + + return (epd+epq1+epq2); +} + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ diff --git a/src/SPIN/pair_spin_neel.h b/src/SPIN/pair_spin_neel.h index 5261a7f746..a39cf839c9 100644 --- a/src/SPIN/pair_spin_neel.h +++ b/src/SPIN/pair_spin_neel.h @@ -38,6 +38,7 @@ class PairSpinNeel : public PairSpin { void compute_neel(int, int, double, double *, double *, double *, double *); void compute_neel_mech(int, int, double, double *, double *, double *, double *); + double compute_neel_energy(int, int, double, double *, double *, double *); void write_restart(FILE *); void read_restart(FILE *); -- GitLab From a7878096f0e48c4fc9bce241094bc45225564462 Mon Sep 17 00:00:00 2001 From: julient31 Date: Mon, 17 Feb 2020 17:35:59 -0700 Subject: [PATCH 009/328] Commit2 JT 021720 - small change in energy - to do: check w and rework F --- src/SPIN/pair_spin_neel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index 811276cac5..6cf5d4843f 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -605,8 +605,8 @@ double PairSpinNeel::compute_neel_energy(int i, int j, double rsq, double eij[3] eij_si_2 = eij_si*eij_si; eij_sj_2 = eij_sj*eij_sj; epq1 = q1r*(eij_si_2-si_sj/3.0)*(eij_sj_2-si_sj/3.0); - eij_si_3 = eij_si*eij_si*eij_si; - eij_sj_3 = eij_sj*eij_sj*eij_sj; + eij_si_3 = eij_si*eij_si_2; + eij_sj_3 = eij_sj*eij_sj_2; epq2 = q2r*(eij_si*eij_sj_3+eij_sj*eij_si_3); return (epd+epq1+epq2); -- GitLab From 361f7bb0fd7e386d91a47436ec4dcf65a0b4eaac Mon Sep 17 00:00:00 2001 From: julient31 Date: Fri, 21 Feb 2020 12:07:42 -0700 Subject: [PATCH 010/328] Commit JT 022120 - added precession_spin management in compute_spin - to do: add it for pairs - make sure users only declare 1 precession/spin --- src/SPIN/compute_spin.cpp | 70 +++++++++++++++++++++++++++++++- src/SPIN/compute_spin.h | 14 +++++++ src/SPIN/fix_precession_spin.cpp | 10 +++++ src/SPIN/fix_precession_spin.h | 4 ++ 4 files changed, 96 insertions(+), 2 deletions(-) diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 7ee2b5bcfc..0612e5720e 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -24,11 +24,15 @@ #include "compute_spin.h" #include #include +#include #include "atom.h" #include "error.h" +#include "fix_precession_spin.h" #include "force.h" #include "math_const.h" #include "memory.h" +#include "modify.h" +#include "pair_spin.h" #include "update.h" using namespace LAMMPS_NS; @@ -64,6 +68,50 @@ void ComputeSpin::init() { hbar = force->hplanck/MY_2PI; kb = force->boltz; + + // init length of vector of ptrs to Pair/Spin styles + + if (npairspin > 0) { + spin_pairs = new PairSpin*[npairspin]; + } + + // loop 2: fill vector with ptrs to Pair/Spin styles + + int count = 0; + if (npairspin == 1) { + count = 1; + spin_pairs[0] = (PairSpin *) force->pair_match("spin",0,0); + } else if (npairspin > 1) { + for (int i = 0; ipair_match("spin",0,i)) { + spin_pairs[count] = (PairSpin *) force->pair_match("spin",0,i); + count++; + } + } + } + + if (count != npairspin) + error->all(FLERR,"Incorrect number of spin pairs"); + + // set pair/spin and long/spin flags + + if (npairspin >= 1) pair_spin_flag = 1; + + for (int i = 0; ipair_match("spin/long",0,i)) { + long_spin_flag = 1; + } + } + + // ptrs FixPrecessionSpin classes + + int iforce; + for (iforce = 0; iforce < modify->nfix; iforce++) { + if (strstr(modify->fix[iforce]->style,"precession/spin")) { + precession_spin_flag = 1; + lockprecessionspin = (FixPrecessionSpin *) modify->fix[iforce]; + } + } } /* ---------------------------------------------------------------------- */ @@ -104,7 +152,24 @@ void ComputeSpin::compute_vector() mag[0] += sp[i][0]; mag[1] += sp[i][1]; mag[2] += sp[i][2]; - magenergy -= (sp[i][0]*fm[i][0] + sp[i][1]*fm[i][1] + sp[i][2]*fm[i][2]); + // magenergy -= (sp[i][0]*fm[i][0] + sp[i][1]*fm[i][1] + sp[i][2]*fm[i][2]); + + // update magnetic precession energies + + if (precession_spin_flag) { + magenergy -= lockprecessionspin->compute_zeeman_energy(sp[i]); + magenergy -= lockprecessionspin->compute_anisotropy_energy(sp[i]); + magenergy -= lockprecessionspin->compute_cubic_energy(sp[i]); + } + + // update magnetic pair interactions + + if (pair_spin_flag) { + for (int k = 0; k < npairspin; k++) { + // spin_pairs[k]->compute_single_pair(i,fmi); + } + } + tx = sp[i][1]*fm[i][2]-sp[i][2]*fm[i][1]; ty = sp[i][2]*fm[i][0]-sp[i][0]*fm[i][2]; tz = sp[i][0]*fm[i][1]-sp[i][1]*fm[i][0]; @@ -134,7 +199,8 @@ void ComputeSpin::compute_vector() vector[1] = magtot[1]; vector[2] = magtot[2]; vector[3] = magtot[3]; - vector[4] = magenergytot*hbar; + // vector[4] = magenergytot*hbar; + vector[4] = magenergytot; vector[5] = spintemperature; } diff --git a/src/SPIN/compute_spin.h b/src/SPIN/compute_spin.h index c5d55b84cb..2ff0ba8eee 100644 --- a/src/SPIN/compute_spin.h +++ b/src/SPIN/compute_spin.h @@ -32,7 +32,21 @@ class ComputeSpin : public Compute { void compute_vector(); private: + int pair_spin_flag; // magnetic pair flags + int long_spin_flag; // magnetic long-range flag + int precession_spin_flag; // magnetic precession flags + double kb,hbar; + + // pointers to magnetic fixes + + class FixPrecessionSpin *lockprecessionspin; + + // pointers to magnetic pair styles + + int npairs, npairspin; // # of pairs, and # of spin pairs + class Pair *pair; + class PairSpin **spin_pairs; // vector of spin pairs void allocate(); }; diff --git a/src/SPIN/fix_precession_spin.cpp b/src/SPIN/fix_precession_spin.cpp index 2d55de33ea..f9307d7ad0 100644 --- a/src/SPIN/fix_precession_spin.cpp +++ b/src/SPIN/fix_precession_spin.cpp @@ -302,6 +302,16 @@ void FixPrecessionSpin::compute_zeeman(int i, double fmi[3]) /* ---------------------------------------------------------------------- */ +double FixPrecessionSpin::compute_zeeman_energy(double spi[4]) +{ + double energy = 0.0; + double scalar = nhx*spi[0]+nhy*spi[1]+nhz*spi[2]; + energy = hbar*H_field*spi[3]*scalar; + return energy; +} + +/* ---------------------------------------------------------------------- */ + void FixPrecessionSpin::compute_anisotropy(double spi[3], double fmi[3]) { double scalar = nax*spi[0] + nay*spi[1] + naz*spi[2]; diff --git a/src/SPIN/fix_precession_spin.h b/src/SPIN/fix_precession_spin.h index 6ece653ca7..3c809506c1 100644 --- a/src/SPIN/fix_precession_spin.h +++ b/src/SPIN/fix_precession_spin.h @@ -41,7 +41,11 @@ class FixPrecessionSpin : public Fix { int zeeman_flag, aniso_flag, cubic_flag; void compute_single_precession(int, double *, double *); + + // zeeman calculations + void compute_zeeman(int, double *); + double compute_zeeman_energy(double *); // uniaxial aniso calculations -- GitLab From 09d0df43e215e6ce646334b5b60c3e8e53ff85f0 Mon Sep 17 00:00:00 2001 From: julient31 Date: Fri, 21 Feb 2020 17:53:14 -0700 Subject: [PATCH 011/328] Commit JT 022120 - added message for only one precession/spin (+doc) - added a per pair/spin class emag table --- doc/src/fix_precession_spin.rst | 7 +++++- src/SPIN/compute_spin.cpp | 18 +++++++++++++- src/SPIN/fix_precession_spin.cpp | 9 +++++++ src/SPIN/pair_spin.cpp | 6 +++++ src/SPIN/pair_spin.h | 4 +++ src/SPIN/pair_spin_dipole_cut.cpp | 10 ++++++++ src/SPIN/pair_spin_dipole_long.cpp | 24 ++++++++++++++---- src/SPIN/pair_spin_dmi.cpp | 15 ++++++++++- src/SPIN/pair_spin_exchange.cpp | 40 ++++++++++++++++++++++++++++++ src/SPIN/pair_spin_exchange.h | 2 ++ src/SPIN/pair_spin_magelec.cpp | 14 +++++++++++ src/SPIN/pair_spin_neel.cpp | 14 +++++++++++ 12 files changed, 155 insertions(+), 8 deletions(-) diff --git a/doc/src/fix_precession_spin.rst b/doc/src/fix_precession_spin.rst index 9cd15119bd..2d23ed1037 100644 --- a/doc/src/fix_precession_spin.rst +++ b/doc/src/fix_precession_spin.rst @@ -82,6 +82,7 @@ function for the same parameters. .. image:: JPG/zeeman_langevin.jpg :align: center + :width: 600 The temperature effects are accounted for by connecting the spin :math:`i` to a thermal bath using a Langevin thermostat (see @@ -159,11 +160,15 @@ No information about this fix is written to :doc:`binary restart files Restrictions """""""""""" - The *precession/spin* style is part of the SPIN package. This style is only enabled if LAMMPS was built with this package, and if the atom\_style "spin" was declared. See the :doc:`Build package ` doc page for more info. +The *precession/spin* style can only be declared once. If more +than one precession type (for example combining an anisotropy and a Zeeman interactions) +has to be declared, they have to be chained in the same command +line (as shown in the examples above). + Related commands """""""""""""""" diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 0612e5720e..8a71be019b 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -69,6 +69,22 @@ void ComputeSpin::init() hbar = force->hplanck/MY_2PI; kb = force->boltz; + // loop 1: obtain # of Pairs, and # of Pair/Spin styles + + if (force->pair_match("spin",0,0)) { // only one Pair/Spin style + pair = force->pair_match("spin",0,0); + npairs = pair->instance_total; + npairspin = 1; + } else if (force->pair_match("spin",0,1)) { // more than one Pair/Spin style + pair = force->pair_match("spin",0,1); + npairs = pair->instance_total; + for (int i = 0; ipair_match("spin",0,i)) { + npairspin ++; + } + } + } + // init length of vector of ptrs to Pair/Spin styles if (npairspin > 0) { @@ -166,7 +182,7 @@ void ComputeSpin::compute_vector() if (pair_spin_flag) { for (int k = 0; k < npairspin; k++) { - // spin_pairs[k]->compute_single_pair(i,fmi); + magenergy += spin_pairs[k]->emag[i]; } } diff --git a/src/SPIN/fix_precession_spin.cpp b/src/SPIN/fix_precession_spin.cpp index f9307d7ad0..57e4549718 100644 --- a/src/SPIN/fix_precession_spin.cpp +++ b/src/SPIN/fix_precession_spin.cpp @@ -197,6 +197,15 @@ void FixPrecessionSpin::init() error->all(FLERR,"Illegal precession/spin command"); } + // check that fix precession/spin is only declared once + + int iprec = 0; + for (int iforce = 0; iforce < modify->nfix; iforce++) + if (strstr(modify->fix[iforce]->style,"precession/spin")) iprec++; + if (iprec > 1) + error->all(FLERR,"precession/spin command can only be declared once"); + + varflag = CONSTANT; if (magfieldstyle != CONSTANT) varflag = EQUAL; diff --git a/src/SPIN/pair_spin.cpp b/src/SPIN/pair_spin.cpp index f167e3455c..01b8775eab 100644 --- a/src/SPIN/pair_spin.cpp +++ b/src/SPIN/pair_spin.cpp @@ -29,6 +29,7 @@ #include "fix.h" #include "force.h" #include "math_const.h" +#include "memory.h" #include "modify.h" #include "neighbor.h" #include "neigh_request.h" @@ -98,4 +99,9 @@ void PairSpin::init_style() if (ifix >=0) lattice_flag = ((FixNVESpin *) modify->fix[ifix])->lattice_flag; + // test emag list storing mag energies + // init. size of energy stacking lists + + nlocal_max = atom->nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); } diff --git a/src/SPIN/pair_spin.h b/src/SPIN/pair_spin.h index 34f12d8d59..c8292236a3 100644 --- a/src/SPIN/pair_spin.h +++ b/src/SPIN/pair_spin.h @@ -31,6 +31,10 @@ friend class FixNVESpin; virtual void compute(int, int) {} virtual void compute_single_pair(int, double *) {} + + // test emag list storing mag energies + int nlocal_max; // max value of nlocal (for size of lists) + double *emag; // energy list protected: double hbar; // Planck constant (eV.ps.rad-1) diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp index a7372b480d..6029f8bdbb 100644 --- a/src/SPIN/pair_spin_dipole_cut.cpp +++ b/src/SPIN/pair_spin_dipole_cut.cpp @@ -64,6 +64,9 @@ PairSpinDipoleCut::~PairSpinDipoleCut() memory->destroy(setflag); memory->destroy(cut_spin_long); memory->destroy(cutsq); + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -185,6 +188,13 @@ void PairSpinDipoleCut::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + // computation of the exchange interaction // loop over atoms and their neighbors diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp index 124522a9b9..7856035159 100644 --- a/src/SPIN/pair_spin_dipole_long.cpp +++ b/src/SPIN/pair_spin_dipole_long.cpp @@ -69,6 +69,9 @@ PairSpinDipoleLong::~PairSpinDipoleLong() memory->destroy(setflag); memory->destroy(cut_spin_long); memory->destroy(cutsq); + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -212,6 +215,13 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + pre1 = 2.0 * g_ewald / MY_PIS; pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS; pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS; @@ -221,16 +231,20 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) for (ii = 0; ii < inum; ii++) { i = ilist[ii]; + itype = type[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; xi[0] = x[i][0]; xi[1] = x[i][1]; xi[2] = x[i][2]; - jlist = firstneigh[i]; - jnum = numneigh[i]; spi[0] = sp[i][0]; spi[1] = sp[i][1]; spi[2] = sp[i][2]; spi[3] = sp[i][3]; - itype = type[i]; + + // test emag list storing mag energies + emag[i] = 0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -294,9 +308,9 @@ void PairSpinDipoleLong::compute(int eflag, int vflag) if (eflag) { if (rsq <= local_cut2) { - evdwl -= spi[0]*fmi[0] + spi[1]*fmi[1] + - spi[2]*fmi[2]; + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); evdwl *= 0.5*hbar; + emag[i] += evdwl; } } else evdwl = 0.0; diff --git a/src/SPIN/pair_spin_dmi.cpp b/src/SPIN/pair_spin_dmi.cpp index 04c2dc408d..266bc05da4 100644 --- a/src/SPIN/pair_spin_dmi.cpp +++ b/src/SPIN/pair_spin_dmi.cpp @@ -53,6 +53,9 @@ PairSpinDmi::~PairSpinDmi() memory->destroy(vmech_dmy); memory->destroy(vmech_dmz); memory->destroy(cutsq); + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -191,6 +194,13 @@ void PairSpinDmi::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + // dmi computation // loop over all atoms @@ -206,7 +216,9 @@ void PairSpinDmi::compute(int eflag, int vflag) spi[0] = sp[i][0]; spi[1] = sp[i][1]; spi[2] = sp[i][2]; - + + // test emag list storing mag energies + emag[i] = 0.0; // loop on neighbors @@ -260,6 +272,7 @@ void PairSpinDmi::compute(int eflag, int vflag) if (eflag) { evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); evdwl *= 0.5*hbar; + emag[i] += evdwl; } else evdwl = 0.0; if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index 6eacb04ee3..d645515506 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -50,6 +50,9 @@ PairSpinExchange::~PairSpinExchange() memory->destroy(J2); memory->destroy(J3); memory->destroy(cutsq); // to be implemented + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -176,6 +179,13 @@ void PairSpinExchange::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + // computation of the exchange interaction // loop over atoms and their neighbors @@ -191,6 +201,9 @@ void PairSpinExchange::compute(int eflag, int vflag) spi[0] = sp[i][0]; spi[1] = sp[i][1]; spi[2] = sp[i][2]; + + // test emag list storing mag energies + emag[i] = 0.0; // loop on neighbors @@ -243,6 +256,10 @@ void PairSpinExchange::compute(int eflag, int vflag) if (eflag) { evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); evdwl *= 0.5*hbar; + // printf("test ex energy: %g \n",evdwl); + // evdwl = -0.5*compute_energy(i,j,rsq,spi,spj); + // printf("test ex energy: %g \n",evdwl); + emag[i] += evdwl; // evdwl *= hbar; } else evdwl = 0.0; @@ -385,6 +402,29 @@ void PairSpinExchange::compute_exchange_mech(int i, int j, double rsq, double ei fi[2] -= Jex_mech*eij[2]; } +/* ---------------------------------------------------------------------- + compute energy of spin pair i and j +------------------------------------------------------------------------- */ + +// double PairSpinExchange::compute_energy(int i, int j, double rsq, double spi[3], double spj[3]) +// { +// int *type = atom->type; +// int itype, jtype; +// double Jex, ra; +// double energy = 0.0; +// itype = type[i]; +// jtype = type[j]; +// +// Jex = J1_mech[itype][jtype]; +// ra = rsq/J3[itype][jtype]/J3[itype][jtype]; +// Jex = 4.0*Jex*ra; +// Jex *= (1.0-J2[itype][jtype]*ra); +// Jex *= exp(-ra); +// +// energy = Jex*(spi[0]*spj[0]+spi[1]*spj[1]+spi[2]*spj[2]); +// return energy; +// } + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ diff --git a/src/SPIN/pair_spin_exchange.h b/src/SPIN/pair_spin_exchange.h index 19eafeb5ca..4e9e6bfac8 100644 --- a/src/SPIN/pair_spin_exchange.h +++ b/src/SPIN/pair_spin_exchange.h @@ -39,6 +39,8 @@ class PairSpinExchange : public PairSpin { void compute_exchange(int, int, double, double *, double *); void compute_exchange_mech(int, int, double, double *, double *, double *, double *); + // double compute_energy(int , int , double , double *, double *); + void write_restart(FILE *); void read_restart(FILE *); void write_restart_settings(FILE *); diff --git a/src/SPIN/pair_spin_magelec.cpp b/src/SPIN/pair_spin_magelec.cpp index fabad4ae4d..ef91ab764a 100644 --- a/src/SPIN/pair_spin_magelec.cpp +++ b/src/SPIN/pair_spin_magelec.cpp @@ -51,6 +51,9 @@ PairSpinMagelec::~PairSpinMagelec() memory->destroy(v_mey); memory->destroy(v_mez); memory->destroy(cutsq); // to be deteled + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -185,6 +188,13 @@ void PairSpinMagelec::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + // magneto-electric computation // loop over atoms and their neighbors @@ -200,6 +210,9 @@ void PairSpinMagelec::compute(int eflag, int vflag) spi[0] = sp[i][0]; spi[1] = sp[i][1]; spi[2] = sp[i][2]; + + // test emag list storing mag energies + emag[i] = 0.0; // loop on neighbors @@ -252,6 +265,7 @@ void PairSpinMagelec::compute(int eflag, int vflag) if (eflag) { evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); evdwl *= 0.5*hbar; + emag[i] += evdwl; } else evdwl = 0.0; if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, diff --git a/src/SPIN/pair_spin_neel.cpp b/src/SPIN/pair_spin_neel.cpp index 6cf5d4843f..e158906b75 100644 --- a/src/SPIN/pair_spin_neel.cpp +++ b/src/SPIN/pair_spin_neel.cpp @@ -54,6 +54,9 @@ PairSpinNeel::~PairSpinNeel() memory->destroy(q2); memory->destroy(q3); memory->destroy(cutsq); // to be deleted + + // test emag list storing mag energies + memory->destroy(emag); } } @@ -190,6 +193,13 @@ void PairSpinNeel::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + // computation of the neel interaction // loop over atoms and their neighbors @@ -206,6 +216,9 @@ void PairSpinNeel::compute(int eflag, int vflag) spi[1] = sp[i][1]; spi[2] = sp[i][2]; + // test emag list storing mag energies + emag[i] = 0.0; + // loop on neighbors for (jj = 0; jj < jnum; jj++) { @@ -262,6 +275,7 @@ void PairSpinNeel::compute(int eflag, int vflag) // evdwl = (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); evdwl = compute_neel_energy(i,j,rsq,eij,spi,spj); evdwl *= 0.5*hbar; + emag[i] += evdwl; } else evdwl = 0.0; if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, -- GitLab From a739b8c6b766709a67265966753c9b401f9180af Mon Sep 17 00:00:00 2001 From: julient31 Date: Tue, 3 Mar 2020 07:32:13 -0700 Subject: [PATCH 012/328] Commit JT 030320 - modified fix/precession for correct mag energy calc. - reran all benchmark / examples in serial for verif - to do: rerun mpi examples, and clean code --- examples/SPIN/bfo/in.spin.bfo | 8 +- examples/SPIN/cobalt_fcc/in.spin.cobalt_fcc | 4 +- examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp | 9 +- examples/SPIN/iron/in.spin.iron | 1 + examples/SPIN/nickel/in.spin.nickel | 3 +- examples/SPIN/nickel/in.spin.nickel_cubic | 2 +- examples/SPIN/read_restart/in.spin.read_data | 4 +- examples/SPIN/read_restart/in.spin.restart | 4 +- .../SPIN/read_restart/in.spin.write_restart | 3 +- examples/SPIN/run_spin_examples.sh | 2 +- examples/SPIN/run_spin_examples_serial.sh | 120 ++++++++++++++++++ examples/SPIN/setforce_spin/in.spin.setforce | 2 +- examples/SPIN/test_problems/README | 22 ++-- .../run-test-exchange.sh | 2 +- ...-precession.in => test-spin-precession.in} | 0 .../validation_damped_exchange/two_spins.data | 22 ++++ .../run-test-prec.sh | 2 +- ...-precession.in => test-spin-precession.in} | 1 + .../bench-prec-spin.in | 46 +++++++ .../run-test-prec.sh | 7 +- ...-spin.template => test-prec-spin.template} | 0 src/SPIN/compute_spin.cpp | 21 ++- src/SPIN/fix_precession_spin.cpp | 28 +++- src/SPIN/fix_precession_spin.h | 4 + src/SPIN/pair_spin.cpp | 2 +- 25 files changed, 272 insertions(+), 47 deletions(-) create mode 100755 examples/SPIN/run_spin_examples_serial.sh rename examples/SPIN/test_problems/validation_damped_exchange/{bench-spin-precession.in => test-spin-precession.in} (100%) create mode 100644 examples/SPIN/test_problems/validation_damped_exchange/two_spins.data rename examples/SPIN/test_problems/validation_damped_precession/{bench-spin-precession.in => test-spin-precession.in} (92%) create mode 100644 examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.in rename examples/SPIN/test_problems/validation_langevin_precession/{bench-prec-spin.template => test-prec-spin.template} (100%) diff --git a/examples/SPIN/bfo/in.spin.bfo b/examples/SPIN/bfo/in.spin.bfo index b97f7e2d61..47ba535ab6 100644 --- a/examples/SPIN/bfo/in.spin.bfo +++ b/examples/SPIN/bfo/in.spin.bfo @@ -27,7 +27,8 @@ pair_coeff * * spin/dmi dmi 4.5 0.00005 1.0 1.0 1.0 neighbor 0.1 bin neigh_modify every 10 check yes delay 20 -fix 1 all precession/spin anisotropy 0.0000033 0.0 0.0 1.0 +fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 anisotropy 0.00033 0.0 0.0 1.0 +fix_modify 1 energy yes fix 2 all langevin/spin 0.0 0.1 21 fix 3 all nve/spin lattice frozen @@ -43,9 +44,8 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -#thermo_style custom step time v_magnorm v_emag temp etotal -thermo_style custom step time v_magnorm pe ke v_emag temp etotal -thermo 10 +thermo_style custom step time v_magnorm pe v_emag temp etotal +thermo 50 compute outsp all property/atom spx spy spz sp fmx fmy fmz dump 1 all custom 100 dump_bfo.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] diff --git a/examples/SPIN/cobalt_fcc/in.spin.cobalt_fcc b/examples/SPIN/cobalt_fcc/in.spin.cobalt_fcc index dd9ed890ee..f81a962e3f 100644 --- a/examples/SPIN/cobalt_fcc/in.spin.cobalt_fcc +++ b/examples/SPIN/cobalt_fcc/in.spin.cobalt_fcc @@ -45,8 +45,6 @@ compute out_pe all pe compute out_ke all ke compute out_temp all temp -thermo_style custom f_1 - variable magx equal c_out_mag[1] variable magy equal c_out_mag[2] variable magz equal c_out_mag[3] @@ -54,7 +52,7 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo_style custom step time f_1 v_magx v_magy v_magnorm v_emag temp etotal +thermo_style custom step time v_magx v_magy v_magnorm pe v_emag temp etotal thermo 50 # compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp index dd114202cb..2bfa8393f3 100644 --- a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp +++ b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp @@ -32,10 +32,9 @@ pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567 neighbor 0.1 bin neigh_modify every 10 check yes delay 20 -#fix 1 all precession/spin zeeman 1.0 0.0 0.0 1.0 fix 1 all precession/spin anisotropy 0.01 0.0 0.0 1.0 -#fix 2 all langevin/spin 0.0 0.0 21 -fix 2 all langevin/spin 0.0 0.1 21 +fix_modify 1 energy yes +fix 2 all langevin/spin 0.0 0.0 21 fix 3 all nve/spin lattice moving timestep 0.0001 @@ -51,8 +50,8 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo_style custom step time v_magnorm v_emag temp press etotal -thermo 10 +thermo_style custom step time v_magnorm pe v_emag temp press etotal +thermo 50 compute outsp all property/atom spx spy spz sp fmx fmy fmz dump 1 all custom 100 dump_cobalt_hcp.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] diff --git a/examples/SPIN/iron/in.spin.iron b/examples/SPIN/iron/in.spin.iron index d60e6b86f5..58c0537af7 100644 --- a/examples/SPIN/iron/in.spin.iron +++ b/examples/SPIN/iron/in.spin.iron @@ -31,6 +31,7 @@ neighbor 0.1 bin neigh_modify every 10 check yes delay 20 fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 +fix_modify 1 energy yes fix 2 all langevin/spin 0.0 0.0 21 fix 3 all nve/spin lattice moving diff --git a/examples/SPIN/nickel/in.spin.nickel b/examples/SPIN/nickel/in.spin.nickel index 1d62188d8f..0fd2e5f345 100644 --- a/examples/SPIN/nickel/in.spin.nickel +++ b/examples/SPIN/nickel/in.spin.nickel @@ -31,6 +31,7 @@ neighbor 0.1 bin neigh_modify every 10 check yes delay 20 fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 +fix_modify 1 energy yes fix 2 all langevin/spin 0.0 0.0 21 fix 3 all nve/spin lattice moving @@ -48,7 +49,7 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo_style custom step time v_magnorm v_emag temp v_tmag etotal +thermo_style custom step time v_magnorm pe v_emag temp v_tmag etotal thermo 50 compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/nickel/in.spin.nickel_cubic b/examples/SPIN/nickel/in.spin.nickel_cubic index 1ae069a64f..88c477132e 100644 --- a/examples/SPIN/nickel/in.spin.nickel_cubic +++ b/examples/SPIN/nickel/in.spin.nickel_cubic @@ -50,7 +50,7 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo_style custom step time v_magnorm v_emag temp v_tmag etotal +thermo_style custom step time v_magnorm pe v_emag temp v_tmag etotal thermo 50 compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/read_restart/in.spin.read_data b/examples/SPIN/read_restart/in.spin.read_data index e788ecf67e..b2b55a9fcb 100644 --- a/examples/SPIN/read_restart/in.spin.read_data +++ b/examples/SPIN/read_restart/in.spin.read_data @@ -35,8 +35,8 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo 10 -thermo_style custom step time v_magnorm v_emag v_tmag temp etotal +thermo 20 +thermo_style custom step time v_magnorm pe v_emag v_tmag temp etotal thermo_modify format float %20.15g compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/read_restart/in.spin.restart b/examples/SPIN/read_restart/in.spin.restart index ccce25b254..985da65eb4 100644 --- a/examples/SPIN/read_restart/in.spin.restart +++ b/examples/SPIN/read_restart/in.spin.restart @@ -39,8 +39,8 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo 10 -thermo_style custom step time v_magnorm v_emag v_tmag temp etotal +thermo 20 +thermo_style custom step time v_magnorm pe v_emag v_tmag temp etotal thermo_modify format float %20.15g compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/read_restart/in.spin.write_restart b/examples/SPIN/read_restart/in.spin.write_restart index c127101093..19ab8e6b30 100644 --- a/examples/SPIN/read_restart/in.spin.write_restart +++ b/examples/SPIN/read_restart/in.spin.write_restart @@ -44,7 +44,7 @@ variable magnorm equal c_out_mag[4] variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] -thermo_style custom step time v_magnorm v_emag temp etotal +thermo_style custom step time v_magnorm pe v_emag temp etotal thermo 100 compute outsp all property/atom spx spy spz sp fmx fmy fmz @@ -52,4 +52,3 @@ dump 100 all custom 1 dump.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[ run 1000 write_restart restart_hcp_cobalt.equil - diff --git a/examples/SPIN/run_spin_examples.sh b/examples/SPIN/run_spin_examples.sh index a71da82a04..b2188503ee 100755 --- a/examples/SPIN/run_spin_examples.sh +++ b/examples/SPIN/run_spin_examples.sh @@ -1,6 +1,6 @@ #!/bin/bash -DATE=19Nov19 +DATE=21Fev20 # bfo cd bfo/ diff --git a/examples/SPIN/run_spin_examples_serial.sh b/examples/SPIN/run_spin_examples_serial.sh new file mode 100755 index 0000000000..f367dfd8f4 --- /dev/null +++ b/examples/SPIN/run_spin_examples_serial.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +DATE=21Fev20 + +# bfo +cd bfo/ +../../../src/lmp_serial -in in.spin.bfo +cp log.lammps log.${DATE}.spin.bfo.g++.1 +../../../src/lmp_serial -in in.spin.bfo +cp log.lammps log.${DATE}.spin.bfo.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# fcc cobalt +cd cobalt_fcc/ +../../../src/lmp_serial -in in.spin.cobalt_fcc +cp log.lammps log.${DATE}.spin.cobalt_fcc.g++.1 +../../../src/lmp_serial -in in.spin.cobalt_fcc +cp log.lammps log.${DATE}.spin.cobalt_fcc.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# hcp cobalt +cd cobalt_hcp/ +../../../src/lmp_serial -in in.spin.cobalt_hcp +cp log.lammps log.${DATE}.spin.cobalt_hcp.g++.1 +../../../src/lmp_serial -in in.spin.cobalt_hcp +cp log.lammps log.${DATE}.spin.cobalt_hcp.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# dipole spin +cd dipole_spin/ +../../../src/lmp_serial -in in.spin.iron_dipole_cut +cp log.lammps log.${DATE}.spin.iron_dipole_cut.g++.1 +../../../src/lmp_serial -in in.spin.iron_dipole_cut +cp log.lammps log.${DATE}.spin.iron_dipole_cut.g++.4 +../../../src/lmp_serial -in in.spin.iron_dipole_ewald +cp log.lammps log.${DATE}.spin.iron_dipole_ewald.g++.1 +../../../src/lmp_serial -in in.spin.iron_dipole_ewald +cp log.lammps log.${DATE}.spin.iron_dipole_ewald.g++.4 +../../../src/lmp_serial -in in.spin.iron_dipole_pppm +cp log.lammps log.${DATE}.spin.iron_dipole_pppm.g++.1 +../../../src/lmp_serial -in in.spin.iron_dipole_pppm +cp log.lammps log.${DATE}.spin.iron_dipole_pppm.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# bcc iron +cd iron/ +../../../src/lmp_serial -in in.spin.iron +cp log.lammps log.${DATE}.spin.iron.g++.1 +../../../src/lmp_serial -in in.spin.iron +cp log.lammps log.${DATE}.spin.iron.g++.4 +../../../src/lmp_serial -in in.spin.iron_cubic +cp log.lammps log.${DATE}.spin.iron_cubic.g++.1 +../../../src/lmp_serial -in in.spin.iron_cubic +cp log.lammps log.${DATE}.spin.iron_cubic.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# fcc nickel +cd nickel/ +../../../src/lmp_serial -in in.spin.nickel +cp log.lammps log.${DATE}.spin.nickel.g++.1 +../../../src/lmp_serial -in in.spin.nickel +cp log.lammps log.${DATE}.spin.nickel.g++.4 +../../../src/lmp_serial -in in.spin.nickel_cubic +cp log.lammps log.${DATE}.spin.nickel_cubic.g++.1 +../../../src/lmp_serial -in in.spin.nickel_cubic +cp log.lammps log.${DATE}.spin.nickel_cubic.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# read restart +cd read_restart/ +../../../src/lmp_serial -in in.spin.write_restart +cp log.lammps log.${DATE}.spin.write_restart.g++.1 +../../../src/lmp_serial -in in.spin.write_restart +cp log.lammps log.${DATE}.spin.write_restart.g++.4 +../../../src/lmp_serial -in in.spin.restart +cp log.lammps log.${DATE}.spin.restart.g++.1 +../../../src/lmp_serial -in in.spin.restart +cp log.lammps log.${DATE}.spin.restart.g++.4 +../../../src/lmp_serial -in in.spin.read_data +cp log.lammps log.${DATE}.spin.read_data.g++.1 +../../../src/lmp_serial -in in.spin.read_data +cp log.lammps log.${DATE}.spin.read_data.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# setforce +cd setforce_spin/ +../../../src/lmp_serial -in in.spin.setforce +cp log.lammps log.${DATE}.spin.setforce.g++.1 +../../../src/lmp_serial -in in.spin.setforce +cp log.lammps log.${DATE}.spin.setforce.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. + +# spin minimizers +cd spinmin/ +../../../src/lmp_serial -in in.spin.bfo_min +cp log.lammps log.${DATE}.spin.bfo_min.g++.1 +../../../src/lmp_serial -in in.spin.bfo_min +cp log.lammps log.${DATE}.spin.bfo_min.g++.4 +../../../src/lmp_serial -in in.spin.bfo_min_cg +cp log.lammps log.${DATE}.spin.bfo_min_cg.g++.1 +../../../src/lmp_serial -in in.spin.bfo_min_cg +cp log.lammps log.${DATE}.spin.bfo_min_cg.g++.4 +../../../src/lmp_serial -in in.spin.bfo_min_lbfgs +cp log.lammps log.${DATE}.spin.bfo_min_lbfgs.g++.1 +../../../src/lmp_serial -in in.spin.bfo_min_lbfgs +cp log.lammps log.${DATE}.spin.bfo_min_lbfgs.g++.4 +../../../src/lmp_serial -in in.spin.iron_min +cp log.lammps log.${DATE}.spin.iron_min.g++.1 +../../../src/lmp_serial -in in.spin.iron_min +cp log.lammps log.${DATE}.spin.iron_min.g++.4 +rm log.lammps log.cite dump*.lammpstrj +cd .. diff --git a/examples/SPIN/setforce_spin/in.spin.setforce b/examples/SPIN/setforce_spin/in.spin.setforce index 0d65955a29..4edf70eb52 100644 --- a/examples/SPIN/setforce_spin/in.spin.setforce +++ b/examples/SPIN/setforce_spin/in.spin.setforce @@ -48,7 +48,7 @@ variable emag equal c_out_mag[5] variable tmag equal c_out_mag[6] thermo 100 -thermo_style custom step time v_magx v_magz v_magnorm v_tmag etotal +thermo_style custom step time v_magx v_magz v_magnorm v_tmag pe v_emag etotal thermo_modify format float %20.15g compute outsp all property/atom spx spy spz sp fmx fmy fmz diff --git a/examples/SPIN/test_problems/README b/examples/SPIN/test_problems/README index 5557e3d42b..0a1362ec9c 100644 --- a/examples/SPIN/test_problems/README +++ b/examples/SPIN/test_problems/README @@ -1,4 +1,4 @@ -** The objective of the benchmark examples in this directory +** The objective of the test problems in this directory is the following twofold: - verify the implementation of the LAMMPS' SPIN package by comparing its results to well-known analytic results, or @@ -6,39 +6,39 @@ - provide users with simple comparisons, allowing them to better understand what is implemented in the code. -The LAMMPS input file (bench-*) can be modified, as well as the +The LAMMPS input file (test-*) can be modified, as well as the associated python script, in order to try different comparisons. All scripts can be run by executing the shell script from its directory. Example: -./run-bench-exchange.sh from the benchmarck_damped_exchange/ +./run-test-exchange.sh from the validation_damped_exchange/ directory. -** Below a brief description of the different benchmark +** Below a brief description of the different validation problems: -- benchmarck_damped_precession: +- validation_damped_precession: simulates the damped precession of a single spin in a magnetic field. - Run as: ./run-bench-prec.sh + Run as: ./run-test-prec.sh Output: x, y and z components of the magnetization, and magnetic energy. -- benchmarck_damped_exchange: +- validation_damped_exchange: simulates two spins interacting through the exchange interaction. The parameters in the LAMMPS input script - (bench-spin-precession.in) are calibrated to match the + (test-spin-precession.in) are calibrated to match the exchange definition in the python script (llg_exchange.py). - Run as: ./run-bench-exchange.sh + Run as: ./run-test-exchange.sh Output: average magnetization resulting from the damped precession of the two interacting spins. Also plots the evolution of the magnetic energy. -- benchmarck_langevin_precession: +- validation_langevin_precession: simulates a single spin in a magnetic field and in contact with a thermal bath, and compares the statistical averages of the output to the analytical result of the Langevin function. - Run as: ./run-bench-prec.sh + Run as: ./run-test-prec.sh Output: statistical average of the z-component of the magnetization (along the applied field) and of the magnetic energy versus temperature. Comparison to the Langevin function diff --git a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh index 15de8d350e..599730fe7b 100755 --- a/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh +++ b/examples/SPIN/test_problems/validation_damped_exchange/run-test-exchange.sh @@ -5,7 +5,7 @@ rm res_*.dat # compute Lammps ./../../../../src/lmp_serial \ - -in bench-spin-precession.in + -in test-spin-precession.in in="$(grep -n Step log.lammps | awk -F ':' '{print $1}')" en="$(grep -n Loop log.lammps | awk -F ':' '{print $1}')" in="$(echo "$in+1" | bc -l)" diff --git a/examples/SPIN/test_problems/validation_damped_exchange/bench-spin-precession.in b/examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in similarity index 100% rename from examples/SPIN/test_problems/validation_damped_exchange/bench-spin-precession.in rename to examples/SPIN/test_problems/validation_damped_exchange/test-spin-precession.in diff --git a/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data b/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data new file mode 100644 index 0000000000..013f813751 --- /dev/null +++ b/examples/SPIN/test_problems/validation_damped_exchange/two_spins.data @@ -0,0 +1,22 @@ +LAMMPS data file via write_data, version 19 Sep 2019, timestep = 0 + +2 atoms +1 atom types + +0.0 6.0 xlo xhi +0.0 3.0 ylo yhi +0.0 3.0 zlo zhi + +Masses + +1 1 + +Atoms # spin + +1 1 2.0 0.0 0.0 0.0 1.0 0.0 0.0 0 0 0 +2 1 2.0 3.0 0.0 0.0 0.0 1.0 0.0 0 0 0 + +Velocities + +1 0.0 0.0 0.0 +2 0.0 0.0 0.0 diff --git a/examples/SPIN/test_problems/validation_damped_precession/run-test-prec.sh b/examples/SPIN/test_problems/validation_damped_precession/run-test-prec.sh index 49ebc2ac4e..e21f28521a 100755 --- a/examples/SPIN/test_problems/validation_damped_precession/run-test-prec.sh +++ b/examples/SPIN/test_problems/validation_damped_precession/run-test-prec.sh @@ -5,7 +5,7 @@ rm res_*.dat # compute Lammps ./../../../../src/lmp_serial \ - -in bench-spin-precession.in + -in test-spin-precession.in in="$(grep -n Step log.lammps | awk -F ':' '{print $1}')" en="$(grep -n Loop log.lammps | awk -F ':' '{print $1}')" in="$(echo "$in+1" | bc -l)" diff --git a/examples/SPIN/test_problems/validation_damped_precession/bench-spin-precession.in b/examples/SPIN/test_problems/validation_damped_precession/test-spin-precession.in similarity index 92% rename from examples/SPIN/test_problems/validation_damped_precession/bench-spin-precession.in rename to examples/SPIN/test_problems/validation_damped_precession/test-spin-precession.in index ed8a5caeaf..6722e2a5ed 100644 --- a/examples/SPIN/test_problems/validation_damped_precession/bench-spin-precession.in +++ b/examples/SPIN/test_problems/validation_damped_precession/test-spin-precession.in @@ -26,6 +26,7 @@ variable Temperature equal 0.0 variable Nsteps equal 500000 fix 1 all nve/spin lattice no +# fix 2 all precession/spin zeeman ${H} 0.0 0.0 1.0 anisotropy ${Kan} 0.0 0.0 1.0 fix 2 all precession/spin zeeman ${H} 0.0 0.0 1.0 anisotropy ${Kan} 0.0 0.0 1.0 fix_modify 2 energy yes fix 3 all langevin/spin ${Temperature} 0.01 12345 diff --git a/examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.in b/examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.in new file mode 100644 index 0000000000..45da087477 --- /dev/null +++ b/examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.in @@ -0,0 +1,46 @@ +#LAMMPS in.run + +units metal +atom_style spin +atom_modify map array +boundary p p p + +# read_data singlespin.data + +lattice sc 3.0 +region box block 0.0 1.0 0.0 1.0 0.0 1.0 +create_box 1 box +create_atoms 1 box + +mass 1 1.0 +set type 1 spin 1.0 0.0 0.0 1.0 + +# defines a pair/style for neighbor list, but do not use it +pair_style spin/exchange 4.0 +pair_coeff * * exchange 1.0 0.0 0.0 1.0 + +group bead type 1 + +variable H equal 10.0 +variable Kan equal 0.0 +variable Temperature equal 19.00000000000000000000 +variable RUN equal 1000000 + +fix 1 all nve/spin lattice no +fix 2 all precession/spin zeeman ${H} 0.0 0.0 1.0 anisotropy ${Kan} 0.0 0.0 1.0 +fix_modify 2 energy yes +fix 3 all langevin/spin ${Temperature} 0.01 12345 + +compute compute_spin all spin +compute outsp all property/atom spx spy spz sp +compute magsz all reduce ave c_outsp[3] + +thermo 50000 +thermo_style custom step time temp vol pe c_compute_spin[5] etotal + +variable magnetic_energy equal c_compute_spin[5] + +fix avespin all ave/time 1 ${RUN} ${RUN} v_Temperature v_H v_Kan c_magsz v_magnetic_energy file average_spin + +timestep 0.1 +run ${RUN} diff --git a/examples/SPIN/test_problems/validation_langevin_precession/run-test-prec.sh b/examples/SPIN/test_problems/validation_langevin_precession/run-test-prec.sh index 98fceeca95..2427e20095 100755 --- a/examples/SPIN/test_problems/validation_langevin_precession/run-test-prec.sh +++ b/examples/SPIN/test_problems/validation_langevin_precession/run-test-prec.sh @@ -10,10 +10,9 @@ N=20 for (( i=0; i<$N; i++ )) do temp="$(echo "$tempi+$i*($tempf-$tempi)/$N" | bc -l)" - sed s/temperature/${temp}/g bench-prec-spin.template > \ - bench-prec-spin.in - ./../../../../src/lmp_serial \ - -in bench-prec-spin.in + sed s/temperature/${temp}/g test-prec-spin.template > \ + test-prec-spin.in + ./../../../../src/lmp_serial -in test-prec-spin.in Hz="$(tail -n 1 average_spin | awk -F " " '{print $3}')" sz="$(tail -n 1 average_spin | awk -F " " '{print $5}')" en="$(tail -n 1 average_spin | awk -F " " '{print $6}')" diff --git a/examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.template b/examples/SPIN/test_problems/validation_langevin_precession/test-prec-spin.template similarity index 100% rename from examples/SPIN/test_problems/validation_langevin_precession/bench-prec-spin.template rename to examples/SPIN/test_problems/validation_langevin_precession/test-prec-spin.template diff --git a/src/SPIN/compute_spin.cpp b/src/SPIN/compute_spin.cpp index 8a71be019b..9a759134b4 100644 --- a/src/SPIN/compute_spin.cpp +++ b/src/SPIN/compute_spin.cpp @@ -41,13 +41,20 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) : - Compute(lmp, narg, arg) + Compute(lmp, narg, arg), pair(NULL), spin_pairs(NULL) { if ((narg != 3) && (narg != 4)) error->all(FLERR,"Illegal compute compute/spin command"); vector_flag = 1; size_vector = 6; extvector = 0; + // npairs = npairspin = 0; + + // initialize the magnetic interaction flags + + pair_spin_flag = 0; + long_spin_flag = 0; + precession_spin_flag = 0; init(); @@ -60,6 +67,7 @@ ComputeSpin::ComputeSpin(LAMMPS *lmp, int narg, char **arg) : ComputeSpin::~ComputeSpin() { memory->destroy(vector); + delete [] spin_pairs; } /* ---------------------------------------------------------------------- */ @@ -68,7 +76,11 @@ void ComputeSpin::init() { hbar = force->hplanck/MY_2PI; kb = force->boltz; + npairs = npairspin = 0; + precession_spin_flag = 0; + // set ptrs on Pair/Spin styles + // loop 1: obtain # of Pairs, and # of Pair/Spin styles if (force->pair_match("spin",0,0)) { // only one Pair/Spin style @@ -173,9 +185,10 @@ void ComputeSpin::compute_vector() // update magnetic precession energies if (precession_spin_flag) { - magenergy -= lockprecessionspin->compute_zeeman_energy(sp[i]); - magenergy -= lockprecessionspin->compute_anisotropy_energy(sp[i]); - magenergy -= lockprecessionspin->compute_cubic_energy(sp[i]); + magenergy += lockprecessionspin->emag[i]; + // magenergy -= lockprecessionspin->compute_zeeman_energy(sp[i]); + // magenergy -= lockprecessionspin->compute_anisotropy_energy(sp[i]); + // magenergy -= lockprecessionspin->compute_cubic_energy(sp[i]); } // update magnetic pair interactions diff --git a/src/SPIN/fix_precession_spin.cpp b/src/SPIN/fix_precession_spin.cpp index 57e4549718..ffe3fc838c 100644 --- a/src/SPIN/fix_precession_spin.cpp +++ b/src/SPIN/fix_precession_spin.cpp @@ -30,6 +30,7 @@ #include "force.h" #include "input.h" #include "math_const.h" +#include "memory.h" #include "modify.h" #include "respa.h" #include "update.h" @@ -43,7 +44,7 @@ enum{CONSTANT,EQUAL}; /* ---------------------------------------------------------------------- */ -FixPrecessionSpin::FixPrecessionSpin(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) +FixPrecessionSpin::FixPrecessionSpin(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), emag(NULL) { if (narg < 7) error->all(FLERR,"Illegal precession/spin command"); @@ -154,6 +155,9 @@ FixPrecessionSpin::FixPrecessionSpin(LAMMPS *lmp, int narg, char **arg) : Fix(lm FixPrecessionSpin::~FixPrecessionSpin() { delete [] magstr; + + // test emag list storing mag energies + memory->destroy(emag); } /* ---------------------------------------------------------------------- */ @@ -213,6 +217,11 @@ void FixPrecessionSpin::init() if (varflag == CONSTANT) set_magneticprecession(); + // test emag list storing mag energies + // init. size of energy stacking lists + + nlocal_max = atom->nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); } /* ---------------------------------------------------------------------- */ @@ -252,21 +261,33 @@ void FixPrecessionSpin::post_force(int /* vflag */) double **fm = atom->fm; double **sp = atom->sp; const int nlocal = atom->nlocal; - double spi[3], fmi[3], epreci; + double spi[4], fmi[3], epreci; + // test emag list storing mag energies + // checking size of emag + if (nlocal_max < nlocal) { // grow emag lists if necessary + nlocal_max = nlocal; + memory->grow(emag,nlocal_max,"pair/spin:emag"); + } + eflag = 0; eprec = 0.0; for (int i = 0; i < nlocal; i++) { + + // test emag list storing mag energies + emag[i] = 0.0; + if (mask[i] & groupbit) { epreci = 0.0; spi[0] = sp[i][0]; spi[1] = sp[i][1]; spi[2] = sp[i][2]; + spi[3] = sp[i][3]; fmi[0] = fmi[1] = fmi[2] = 0.0; if (zeeman_flag) { // compute Zeeman interaction compute_zeeman(i,fmi); - epreci -= hbar*(spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + epreci -= compute_zeeman_energy(spi); } if (aniso_flag) { // compute magnetic anisotropy @@ -279,6 +300,7 @@ void FixPrecessionSpin::post_force(int /* vflag */) epreci -= compute_cubic_energy(spi); } + emag[i] += epreci; eprec += epreci; fm[i][0] += fmi[0]; fm[i][1] += fmi[1]; diff --git a/src/SPIN/fix_precession_spin.h b/src/SPIN/fix_precession_spin.h index 3c809506c1..7767864655 100644 --- a/src/SPIN/fix_precession_spin.h +++ b/src/SPIN/fix_precession_spin.h @@ -57,6 +57,10 @@ class FixPrecessionSpin : public Fix { void compute_cubic(double *, double *); double compute_cubic_energy(double *); + // test emag list storing mag energies + int nlocal_max; // max value of nlocal (for size of lists) + double *emag; // energy list + protected: int style; // style of the magnetic precession diff --git a/src/SPIN/pair_spin.cpp b/src/SPIN/pair_spin.cpp index 01b8775eab..112f404bc0 100644 --- a/src/SPIN/pair_spin.cpp +++ b/src/SPIN/pair_spin.cpp @@ -42,7 +42,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ -PairSpin::PairSpin(LAMMPS *lmp) : Pair(lmp) +PairSpin::PairSpin(LAMMPS *lmp) : Pair(lmp), emag(NULL) { hbar = force->hplanck/MY_2PI; single_enable = 0; -- GitLab From 437055f9130b1e083ec78d9a6cceb37503f14189 Mon Sep 17 00:00:00 2001 From: "Jibril B. Coulibaly" Date: Thu, 12 Mar 2020 11:11:38 -0500 Subject: [PATCH 013/328] implement the `scale` keyword of `fix adapt` for diameter and charge --- src/fix_adapt.cpp | 51 ++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp index 8668690e1d..10ac290d02 100644 --- a/src/fix_adapt.cpp +++ b/src/fix_adapt.cpp @@ -235,7 +235,7 @@ int FixAdapt::setmask() void FixAdapt::post_constructor() { - if (!resetflag) return; + // Create local Fix Store even when ressetflag == false, to be able to use `scale` keyword for charge and diameter if (!diamflag && !chgflag) return; // new id = fix-ID + FIX_STORE_ATTRIBUTE @@ -251,7 +251,7 @@ void FixAdapt::post_constructor() newarg[4] = (char *) "1"; newarg[5] = (char *) "1"; - if (diamflag) { + if (diamflag && atom->radius_flag) {// Previously unsafe! The radius_flag was not checked, could run an atom_style w/o radius attribute and get here without a previous check / error ! int n = strlen(id) + strlen("_FIX_STORE_DIAM") + 1; id_fix_diam = new char[n]; strcpy(id_fix_diam,id); @@ -274,7 +274,7 @@ void FixAdapt::post_constructor() } } - if (chgflag) { + if (chgflag && atom->q_flag) {// Previously unsafe! The q_flag was not checked, could run an atom_style w/o charge attribute and get here without a previous check / error ! int n = strlen(id) + strlen("_FIX_STORE_CHG") + 1; id_fix_chg = new char[n]; strcpy(id_fix_chg,id); @@ -455,7 +455,7 @@ void FixAdapt::init() } // fixes that store initial per-atom values - + /* Unnecessary ? `fix_diam` and `fix_chg` seem to be already defined in FixAdapt::post_constructor(), commenting them out does not crash my MWE if (id_fix_diam) { int ifix = modify->find_fix(id_fix_diam); if (ifix < 0) error->all(FLERR,"Could not find fix adapt storage fix ID"); @@ -465,7 +465,7 @@ void FixAdapt::init() int ifix = modify->find_fix(id_fix_chg); if (ifix < 0) error->all(FLERR,"Could not find fix adapt storage fix ID"); fix_chg = (FixStore *) modify->fix[ifix]; - } + }*/ if (strstr(update->integrate_style,"respa")) nlevels_respa = ((Respa *) update->integrate)->nlevels; @@ -568,38 +568,39 @@ void FixAdapt::change_settings() // also scale rmass to new value if (ad->aparam == DIAMETER) { - int mflag = 0; - if (atom->rmass_flag) mflag = 1; + /* `mflag` unnecessary ? the test if (!atom->radius_flag) in FixAdapt::init() should perevent `atom->rmass_flag == false`. Unless there can be combinations of atoms with `radius` but without `rmass` + It could also be unsafe since rmass_flag could be added using `fix property/atom` even for an atom_style that does not have radius attributes */ double density; - double *radius = atom->radius; + double *vec = fix_diam->vstore; // Get initial radius to use `scale` keyword + double *radius = atom->radius; double *rmass = atom->rmass; int *mask = atom->mask; int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; - if (mflag == 0) { - for (i = 0; i < nall; i++) - if (mask[i] & groupbit) - radius[i] = 0.5*value; - } else { - for (i = 0; i < nall; i++) - if (mask[i] & groupbit) { - density = rmass[i] / (4.0*MY_PI/3.0 * - radius[i]*radius[i]*radius[i]); - radius[i] = 0.5*value; - rmass[i] = 4.0*MY_PI/3.0 * - radius[i]*radius[i]*radius[i] * density; - } - } + for (i = 0; i < nall; i++) + if (mask[i] & groupbit) { + density = rmass[i] / (4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i]); + if (scaleflag) radius[i] = value * vec[i]; + else radius[i] = 0.5*value; + rmass[i] = 4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i] * density; + } + } else if (ad->aparam == CHARGE) { - double *q = atom->q; + double *vec = fix_chg->vstore; // Get initial charge to use `scale` keyword + double *q = atom->q; int *mask = atom->mask; int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; for (i = 0; i < nall; i++) - if (mask[i] & groupbit) q[i] = value; + if (mask[i] & groupbit) { + if (scaleflag) q[i] = value * vec[i]; + else q[i] = value; + } } } } @@ -607,7 +608,7 @@ void FixAdapt::change_settings() modify->addstep_compute(update->ntimestep + nevery); // re-initialize pair styles if any PAIR settings were changed - // ditto for bond styles if any BOND setitings were changes + // ditto for bond styles if any BOND settings were changed // this resets other coeffs that may depend on changed values, // and also offset and tail corrections -- GitLab From 14bade977e8064c9cc5b09f3701940832585d080 Mon Sep 17 00:00:00 2001 From: "Jibril B. Coulibaly" Date: Fri, 13 Mar 2020 10:59:11 -0500 Subject: [PATCH 014/328] implement diameter/disc option for 2d simulations --- src/fix_adapt.cpp | 30 ++++++++++++++++++++---------- src/fix_adapt.h | 1 + 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp index 10ac290d02..ee85d054c8 100644 --- a/src/fix_adapt.cpp +++ b/src/fix_adapt.cpp @@ -15,6 +15,7 @@ #include #include "atom.h" #include "bond.h" +#include "domain.h" #include "update.h" #include "group.h" #include "modify.h" @@ -139,9 +140,11 @@ nadapt(0), id_fix_diam(NULL), id_fix_chg(NULL), adapt(NULL) } else if (strcmp(arg[iarg],"atom") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal fix adapt command"); adapt[nadapt].which = ATOM; - if (strcmp(arg[iarg+1],"diameter") == 0) { + if (strcmp(arg[iarg+1],"diameter") == 0 || strcmp(arg[iarg+1],"diameter/disc") == 0) { adapt[nadapt].aparam = DIAMETER; diamflag = 1; + discflag = 0; + if(strcmp(arg[iarg+1],"diameter/disc") == 0) discflag = 1; } else if (strcmp(arg[iarg+1],"charge") == 0) { adapt[nadapt].aparam = CHARGE; chgflag = 1; @@ -428,6 +431,8 @@ void FixAdapt::init() if (ad->aparam == DIAMETER) { if (!atom->radius_flag) error->all(FLERR,"Fix adapt requires atom attribute diameter"); + if(discflag && domain->dimension!=2) + error->all(FLERR,"Fix adapt requires 2d simulation"); } if (ad->aparam == CHARGE) { if (!atom->q_flag) @@ -568,8 +573,8 @@ void FixAdapt::change_settings() // also scale rmass to new value if (ad->aparam == DIAMETER) { - /* `mflag` unnecessary ? the test if (!atom->radius_flag) in FixAdapt::init() should perevent `atom->rmass_flag == false`. Unless there can be combinations of atoms with `radius` but without `rmass` - It could also be unsafe since rmass_flag could be added using `fix property/atom` even for an atom_style that does not have radius attributes */ + /* `mflag` unnecessary ? the test `if(!atom->radius_flag)` in `FixAdapt::init()` should perevent `atom->rmass_flag == false`. Unless there can be combinations of atom styles with `radius` but without `rmass` + It could also be unsafe since rmass_flag could be added using `fix property/atom` even for an atom_style that does not have radius attribute, although that possibility should be avoided as well with the test `if(!atom->radius_flag)` in `FixAdapt::init()` */ double density; double *vec = fix_diam->vstore; // Get initial radius to use `scale` keyword @@ -581,12 +586,14 @@ void FixAdapt::change_settings() for (i = 0; i < nall; i++) if (mask[i] & groupbit) { - density = rmass[i] / (4.0*MY_PI/3.0 * - radius[i]*radius[i]*radius[i]); + if(discflag) density = rmass[i] / (MY_PI * radius[i]*radius[i]); + else density = rmass[i] / (4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i]); if (scaleflag) radius[i] = value * vec[i]; else radius[i] = 0.5*value; - rmass[i] = 4.0*MY_PI/3.0 * - radius[i]*radius[i]*radius[i] * density; + if(discflag) rmass[i] = MY_PI * radius[i]*radius[i] * density; + else rmass[i] = 4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i] * density; } } else if (ad->aparam == CHARGE) { @@ -671,10 +678,13 @@ void FixAdapt::restore_settings() for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - density = rmass[i] / (4.0*MY_PI/3.0 * - radius[i]*radius[i]*radius[i]); + if(discflag) density = rmass[i] / (MY_PI * radius[i]*radius[i]); + else density = rmass[i] / (4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i]); radius[i] = vec[i]; - rmass[i] = 4.0*MY_PI/3.0 * radius[i]*radius[i]*radius[i] * density; + if(discflag) rmass[i] = MY_PI * radius[i]*radius[i] * density; + else rmass[i] = 4.0*MY_PI/3.0 * + radius[i]*radius[i]*radius[i] * density; } } if (chgflag) { diff --git a/src/fix_adapt.h b/src/fix_adapt.h index 0bb594b7a4..dbf8f5f792 100644 --- a/src/fix_adapt.h +++ b/src/fix_adapt.h @@ -47,6 +47,7 @@ class FixAdapt : public Fix { int nlevels_respa; char *id_fix_diam,*id_fix_chg; class FixStore *fix_diam,*fix_chg; + int discflag; struct Adapt { int which,ivar; -- GitLab From 8b8f928347812d959d940f9d9e0132e187df16fc Mon Sep 17 00:00:00 2001 From: "Jibril B. Coulibaly" Date: Fri, 13 Mar 2020 12:19:16 -0500 Subject: [PATCH 015/328] update documentation --- doc/src/fix_adapt.rst | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 0e57ec98c1..e96a4ed654 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -15,7 +15,7 @@ Syntax * adapt = style name of this fix command * N = adapt simulation settings every this many timesteps * one or more attribute/arg pairs may be appended -* attribute = *pair* or *kspace* or *atom* +* attribute = *pair* or *bond* or *kspace* or *atom* .. parsed-literal:: @@ -90,8 +90,8 @@ the end of a simulation. Even if *reset* is specified as *yes*\ , a restart file written during a simulation will contain the modified settings. -If the *scale* keyword is set to *no*\ , then the value the parameter is -set to will be whatever the variable generates. If the *scale* +If the *scale* keyword is set to *no*\ , then the value of the altered +parameter will be whatever the variable generates. If the *scale* keyword is set to *yes*\ , then the value of the altered parameter will be the initial value of that parameter multiplied by whatever the variable generates. I.e. the variable is now a "scale factor" applied @@ -324,26 +324,23 @@ The *atom* keyword enables various atom properties to be changed. The current list of atom parameters that can be varied by this fix: * charge = charge on particle -* diameter = diameter of particle +* diameter, or, diameter/disc = diameter of particle The *v\_name* argument of the *atom* keyword is the name of an :doc:`equal-style variable ` which will be evaluated each time -this fix is invoked to set the parameter to a new value. It should be -specified as v\_name, where name is the variable name. See the +this fix is invoked to set, or scale, the parameter to a new value. +It should be specified as v\_name, where name is the variable name. See the discussion above describing the formulas associated with equal-style variables. The new value is assigned to the corresponding attribute for all atoms in the fix group. -.. note:: - - The *atom* keyword works this way whether the *scale* keyword is - set to *no* or *yes*\ . I.e. the use of scale yes is not yet supported - by the *atom* keyword. - If the atom parameter is *diameter* and per-atom density and per-atom mass are defined for particles (e.g. :doc:`atom_style granular `), then the mass of each particle is also -changed when the diameter changes (density is assumed to stay -constant). +changed when the diameter changes. The mass is set from the particle volume +for 3d systems (density is assumed to stay constant). For 2d, the default is +for LAMMPS to model particles with a radius attribute as spheres. +However, if the atom parameter is *diameter/disc*, then the mass is +set from the particle area (the density is assumed to be in mass/distance^2 units). For example, these commands would shrink the diameter of all granular particles in the "center" group from 1.0 to 0.1 in a linear fashion -- GitLab From 05b273d73176a11bfb3911cb0b00e99ad33b43e2 Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:11:58 -0500 Subject: [PATCH 016/328] kim_property command A new KIM command to make it as easy as possible to write material properties computed in LAMMPS to standard KIM property instance format. --- src/KIM/kim_property.cpp | 489 +++++++++++++++++++++++++++++++++++++++ src/KIM/kim_property.h | 85 +++++++ 2 files changed, 574 insertions(+) create mode 100644 src/KIM/kim_property.cpp create mode 100644 src/KIM/kim_property.h diff --git a/src/KIM/kim_property.cpp b/src/KIM/kim_property.cpp new file mode 100644 index 0000000000..888aa4da0c --- /dev/null +++ b/src/KIM/kim_property.cpp @@ -0,0 +1,489 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Yaser Afshar (UMN) +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, see . + + Linking LAMMPS statically or dynamically with other modules is making a + combined work based on LAMMPS. Thus, the terms and conditions of the GNU + General Public License cover the whole combination. + + In addition, as a special exception, the copyright holders of LAMMPS give + you permission to combine LAMMPS with free software programs or libraries + that are released under the GNU LGPL and with code included in the standard + release of the "kim-api" under the CDDL (or modified versions of such code, + with unchanged license). You may copy and distribute such a system following + the terms of the GNU GPL for LAMMPS and the licenses of the other code + concerned, provided that you include the source code of that other code + when and as the GNU GPL requires distribution of source code. + + Note that people who make modified versions of LAMMPS are not obligated to + grant this special exception for their modified versions; it is their choice + whether to do so. The GNU General Public License gives permission to release + a modified version without this exception; this exception also makes it + possible to release a modified version which carries forward this exception. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Designed for use with the kim-api-2.1.0 (and newer) package +------------------------------------------------------------------------- */ + +#if LMP_PYTHON +#define PY_SSIZE_T_CLEAN +#include +#endif + +#include "kim_property.h" + +#include "comm.h" +#include "input.h" +#include "variable.h" +#include "utils.h" +#include "error.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +kimProperty::kimProperty(LAMMPS *lmp) : Pointers(lmp) +{ +#if LMP_PYTHON +#if PY_MAJOR_VERSION != 3 + error->all(FLERR, "Invalid Python version.\n" + "The kim-property Python package requires Python " + "3 >= 3.6 support."); +#endif + // one-time initialization of Python interpreter + if (!Py_IsInitialized()) { + Py_Initialize(); + PyEval_InitThreads(); + } +#else + error->all(FLERR, "Error Python support missing! Compile with PYTHON " + "package installed!"); +#endif // LMP_PYTHON +} + +void kimProperty::command(int narg, char **arg) +{ +#if LMP_PYTHON +#if PY_MAJOR_VERSION == 3 + if (narg < 2) + error->all(FLERR, "Invalid `kim_property` command."); + + if (!(strcmp(arg[0], "create") == 0) && + !(strcmp(arg[0], "destroy") == 0) && + !(strcmp(arg[0], "modify") == 0) && + !(strcmp(arg[0], "remove") == 0) && + !(strcmp(arg[0], "dump") == 0)) { + std::string msg("Error incorrect arguments in kim_property command.\n"); + msg += "`kim_property create/destroy/modify/remove/dump` "; + msg += "is mandatory."; + error->all(FLERR, msg.c_str()); + } + + if (comm->me == 0) { + std::string msg; + msg = "#=== kim-property ===========================================\n"; + input->write_echo(msg.c_str()); + } + + // Get the kim_str ptr to the data associated with a kim_property_str + // variable + char *kim_str = + input->variable->retrieve(const_cast("kim_property_str")); + + char **kim_str_cmd = new char *[3]; + kim_str_cmd[0] = const_cast("kim_property_str"); + + PyGILState_STATE gstate = PyGILState_Ensure(); + + // kim_property module + PyObject *kim_property = NULL; + + // import kim_property + { + PyObject *obj = PyUnicode_FromString("kim_property"); + if (!obj) { + PyGILState_Release(gstate); + error->all(FLERR, "Error creating a `PyObject`!"); + } + + kim_property = PyImport_Import(obj); + if (!kim_property) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to import Python `kim_property` module!" + "\nkim-property Python package can be installed " + "with pip:\n`pip install kim-property`\n" + "See the installation instructions at\n" + "https://github.com/openkim/kim-property#installing-kim-property\n") + "for detailed information."); + } + + // Decrementing of the reference count + Py_XDECREF(obj); + } + + // kim_property create 1 cohesive-potential-energy-cubic-crystal + if (strcmp(arg[0], "create") == 0) { + if (narg != 3) { + PyGILState_Release(gstate); + error->all(FLERR, "Error invalid `kim_property create` command."); + } + + int const ID = utils::inumeric(FLERR, arg[1], true, lmp); + + Py_ssize_t const nSize = (kim_str ? 3 : 2); + + // Python function + // This is the equivalent of the Python expression: + // kim_property.kim_property_create. + PyObject *pFunc = + PyObject_GetAttrString(kim_property, "kim_property_create"); + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to get an attribute named " + "`kim_property_create` from a kim_property object!"); + } + + // Decrementing of the reference count + Py_XDECREF(kim_property); + + // create Python tuple of input arguments + PyObject *pArgs = PyTuple_New(nSize); + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR, "Error could not create Python function arguments."); + } + + // Python object to set the tuple + // Property ID + PyObject *pValue = PyLong_FromLong(ID); + PyTuple_SetItem(pArgs, 0, pValue); + + // Property name + pValue = PyUnicode_FromString(arg[2]); + PyTuple_SetItem(pArgs, 1, pValue); + + if (nSize == 3) { + pValue = PyUnicode_FromString(kim_str); + PyTuple_SetItem(pArgs, 2, pValue); + } + + // call the Python kim_property_create function + // error check with one() since only some procs may fail + pValue = PyObject_CallObject(pFunc, pArgs); + if (!pValue) { + PyErr_Print(); + PyGILState_Release(gstate); + error->one(FLERR, "Error Python `kim_property_create` function " + "evaluation failed!"); + } + + // Python function returned a string value + const char *pystr = PyUnicode_AsUTF8(pValue); + + kim_str_cmd[2] = const_cast(pystr); + + if (kim_str) { + input->variable->set_string(kim_str_cmd[0], kim_str_cmd[2]); + } else { + kim_str_cmd[1] = const_cast("string"); + input->variable->set(3, kim_str_cmd); + } + + Py_XDECREF(pArgs); + Py_XDECREF(pFunc); + Py_XDECREF(pValue); + } + else if (strcmp(arg[0], "destroy") == 0) { + if (narg != 2) { + PyGILState_Release(gstate); + error->all(FLERR, "Error invalid `kim_property destroy` command."); + } + + if (!kim_str) { + PyGILState_Release(gstate); + return; + } + + int const ID = utils::inumeric(FLERR, arg[1], true, lmp); + + // Python function + // This is the equivalent of the Python expression kim_property.kim_property_destroy + PyObject *pFunc = + PyObject_GetAttrString(kim_property, "kim_property_destroy"); + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to get an attribute named " + "`kim_property_destroy` from a kim_property object!"); + } + + // Decrementing of the reference count + Py_XDECREF(kim_property); + + // create Python tuple of input arguments + PyObject *pArgs = PyTuple_New(2); + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR, "Error could not create Python function arguments."); + } + + // Python object to set the tuple + PyObject *pValue = PyUnicode_FromString(kim_str); + PyTuple_SetItem(pArgs, 0, pValue); + + pValue = PyLong_FromLong(ID); + PyTuple_SetItem(pArgs, 1, pValue); + + // call the Python kim_property_destroy function + // error check with one() since only some procs may fail + pValue = PyObject_CallObject(pFunc, pArgs); + if (!pValue) { + PyErr_Print(); + PyGILState_Release(gstate); + error->one(FLERR, "Error Python `kim_property_destroy` function " + "evaluation failed!"); + } + + // Python function returned a string value + const char *pystr = PyUnicode_AsUTF8(pValue); + + kim_str_cmd[2] = const_cast(pystr); + + input->variable->set_string(kim_str_cmd[0], kim_str_cmd[2]); + + Py_XDECREF(pArgs); + Py_XDECREF(pFunc); + Py_XDECREF(pValue); + } + else if (strcmp(arg[0], "modify") == 0) { + if (narg < 6) { + PyGILState_Release(gstate); + error->all(FLERR, "Error invalid `kim_property modify` command."); + } + + if (!kim_str) { + PyGILState_Release(gstate); + error->all(FLERR, "Error There is no property instance to modify " + "the content."); + } + + int const ID = utils::inumeric(FLERR, arg[1], true, lmp); + + // Python function + // This is the equivalent of the Python expression + // kim_property.kim_property_modify + PyObject *pFunc = + PyObject_GetAttrString(kim_property, "kim_property_modify"); + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to get an attribute named " + "`kim_property_modify` from a kim_property object!"); + } + + // Decrementing of the reference count + Py_XDECREF(kim_property); + + // create Python tuple of input arguments + PyObject *pArgs = PyTuple_New(static_cast(narg)); + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR, "Error could not create Python function arguments."); + } + + // Python object to set the tuple + PyObject *pValue = PyUnicode_FromString(kim_str); + PyTuple_SetItem(pArgs, 0, pValue); + + pValue = PyLong_FromLong(ID); + PyTuple_SetItem(pArgs, 1, pValue); + + for (Py_ssize_t i = 2; i < static_cast(narg); ++i) { + pValue = PyUnicode_FromString(arg[i]); + PyTuple_SetItem(pArgs, i, pValue); + } + + // call the Python kim_property_modify function + // error check with one() since only some procs may fail + pValue = PyObject_CallObject(pFunc, pArgs); + if (!pValue) { + PyErr_Print(); + PyGILState_Release(gstate); + error->one(FLERR, "Error Python `kim_property_modify` function " + "evaluation failed!"); + } + + // Python function returned a string value + const char *pystr = PyUnicode_AsUTF8(pValue); + + kim_str_cmd[2] = const_cast(pystr); + + input->variable->set_string(kim_str_cmd[0], kim_str_cmd[2]); + + Py_XDECREF(pArgs); + Py_XDECREF(pFunc); + Py_XDECREF(pValue); + } + else if (strcmp(arg[0], "remove") == 0) { + if (narg < 4) { + PyGILState_Release(gstate); + error->all(FLERR, "Error invalid `kim_property remove` command."); + } + + if (!kim_str) { + PyGILState_Release(gstate); + error->all(FLERR, "Error There is no property instance to remove " + "the content."); + } + + int const ID = utils::inumeric(FLERR, arg[1], true, lmp); + + // Python function + // This is the equivalent of the Python expression + // kim_property.kim_property_remove + PyObject *pFunc = + PyObject_GetAttrString(kim_property, "kim_property_remove"); + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to get an attribute named " + "`kim_property_remove` from a kim_property object!"); + } + + // Decrementing of the reference count + Py_XDECREF(kim_property); + + // create Python tuple of input arguments + PyObject *pArgs = PyTuple_New(static_cast(narg)); + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR, "Error could not create Python function arguments."); + } + + // Python object to set the tuple + PyObject *pValue = PyUnicode_FromString(kim_str); + PyTuple_SetItem(pArgs, 0, pValue); + + pValue = PyLong_FromLong(ID); + PyTuple_SetItem(pArgs, 1, pValue); + + for (Py_ssize_t i = 2; i < static_cast(narg); ++i) { + pValue = PyUnicode_FromString(arg[i]); + PyTuple_SetItem(pArgs, i, pValue); + } + + // call the Python kim_property_remove function + // error check with one() since only some procs may fail + pValue = PyObject_CallObject(pFunc, pArgs); + if (!pValue) { + PyErr_Print(); + PyGILState_Release(gstate); + error->one(FLERR, "Error Python `kim_property_remove` function " + "evaluation failed!"); + } + + // Python function returned a string value + const char *pystr = PyUnicode_AsUTF8(pValue); + + kim_str_cmd[2] = const_cast(pystr); + + input->variable->set_string(kim_str_cmd[0], kim_str_cmd[2]); + + Py_XDECREF(pArgs); + Py_XDECREF(pFunc); + Py_XDECREF(pValue); + } + else if (strcmp(arg[0], "dump") == 0) { + if (narg != 2) { + PyGILState_Release(gstate); + error->all(FLERR, "Error invalid `kim_property dump` command."); + } + + if (!kim_str) { + PyGILState_Release(gstate); + error->all(FLERR, "Error There is no property instance to dump " + "the content."); + } + + // Python function + // This is the equivalent of the Python expression + // kim_property.kim_property_dump + PyObject *pFunc = + PyObject_GetAttrString(kim_property, "kim_property_dump"); + if (!pFunc) { + PyGILState_Release(gstate); + error->all(FLERR, "Error unable to get an attribute named " + "`kim_property_dump` from a kim_property object!"); + } + + // Decrementing of the reference count + Py_XDECREF(kim_property); + + // create Python tuple of input arguments + PyObject *pArgs = PyTuple_New(2); + if (!pArgs) { + PyGILState_Release(gstate); + error->all(FLERR, "Could not create Python function arguments."); + } + + // Python object to set the tuple + PyObject *pValue = PyUnicode_FromString(kim_str); + PyTuple_SetItem(pArgs, 0, pValue); + + pValue = PyUnicode_FromString(arg[1]); + PyTuple_SetItem(pArgs, 1, pValue); + + if (comm->me == 0) { + // call the Python kim_property_dump function + // error check with one() since only some procs may fail + pValue = PyObject_CallObject(pFunc, pArgs); + if (!pValue) { + PyErr_Print(); + PyGILState_Release(gstate); + error->one(FLERR, "Error Python `kim_property_dump` function " + "evaluation failed!"); + } + } + + // Destroy the variable + kim_str_cmd[1] = const_cast("delete"); + input->variable->set(2, kim_str_cmd); + + Py_XDECREF(pArgs); + Py_XDECREF(pFunc); + Py_XDECREF(pValue); + } + + PyGILState_Release(gstate); + + delete[] kim_str_cmd; +#endif // PY_MAJOR_VERSION +#endif // LMP_PYTHON +} diff --git a/src/KIM/kim_property.h b/src/KIM/kim_property.h new file mode 100644 index 0000000000..ff5faa6781 --- /dev/null +++ b/src/KIM/kim_property.h @@ -0,0 +1,85 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Yaser Afshar (UMN) +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, see . + + Linking LAMMPS statically or dynamically with other modules is making a + combined work based on LAMMPS. Thus, the terms and conditions of the GNU + General Public License cover the whole combination. + + In addition, as a special exception, the copyright holders of LAMMPS give + you permission to combine LAMMPS with free software programs or libraries + that are released under the GNU LGPL and with code included in the standard + release of the "kim-api" under the CDDL (or modified versions of such code, + with unchanged license). You may copy and distribute such a system following + the terms of the GNU GPL for LAMMPS and the licenses of the other code + concerned, provided that you include the source code of that other code + when and as the GNU GPL requires distribution of source code. + + Note that people who make modified versions of LAMMPS are not obligated to + grant this special exception for their modified versions; it is their choice + whether to do so. The GNU General Public License gives permission to release + a modified version without this exception; this exception also makes it + possible to release a modified version which carries forward this exception. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Designed for use with the kim-api-2.1.0 (and newer) package +------------------------------------------------------------------------- */ + +#ifdef COMMAND_CLASS + +CommandStyle(kim_property, kimProperty) + +#else + +#ifndef LMP_KIM_PROPERTY_H +#define LMP_KIM_PROPERTY_H + +#include "pointers.h" + +namespace LAMMPS_NS +{ + +class kimProperty : protected Pointers +{ +public: + kimProperty(class LAMMPS *lmp); + + void command(int, char **); +}; + +} // namespace LAMMPS_NS + +#endif // LMP_KIM_PROPERTY_H +#endif // COMMAND_CLASS + +/* ERROR/WARNING messages: + +*/ -- GitLab From dc373dbdeb23b2dd1fd23cd3900dd034915da008 Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:15:59 -0500 Subject: [PATCH 017/328] updating the kim_commands doc --- doc/src/kim_commands.rst | 550 +++++++++++++++++++++++++++++++++++---- 1 file changed, 503 insertions(+), 47 deletions(-) diff --git a/doc/src/kim_commands.rst b/doc/src/kim_commands.rst index c710ddfe5d..aca2529ec7 100644 --- a/doc/src/kim_commands.rst +++ b/doc/src/kim_commands.rst @@ -1,16 +1,19 @@ -.. index:: kim_init, kim_interactions, kim_query, kim_param +.. index:: kim_init, kim_interactions, kim_query, kim_param, kim_property -kim_init command -================= +:ref:`kim_init` command +========================================= -kim_interactions command -========================= +:ref:`kim_interactions` command +========================================================= -kim_query command -================== +:ref:`kim_query` command +=========================================== -kim_param command -================== +:ref:`kim_param` command +=========================================== + +:ref:`kim_property` command +================================================= Syntax """""" @@ -22,6 +25,11 @@ Syntax kim_query variable formatarg query_function queryargs kim_param get param_name index_range variables formatarg kim_param set param_name index_range values + kim_property create instance_id property_id + kim_property modify instance_id key key_name key_name_key key_name_value + kim_property remove instance_id key key_name + kim_property destroy instance_id + kim_property dump file .. _formatarg_options: @@ -41,13 +49,19 @@ Syntax on the prefix specified in *variable* and a number appended to indicate which element in the list of values is in the variable. *explicit* = returns the values separately in one more more variable names - provided as arguments that precede *formatarg*\ . [default for *kim_param*] + provided as arguments that preceed *formatarg*\ . [default for *kim_param*] * query_function = name of the OpenKIM web API query function to be used * queryargs = a series of *keyword=value* pairs that represent the web query; supported keywords depend on the query function * param_name = name of a KIM portable model parameter * index_range = KIM portable model parameter index range (an integer for a single element, or pair of integers separated by a colon for a range of elements) * values = new value(s) to replace the current value(s) of a KIM portable model parameter +* instance_id = a positive integer identifying the KIM property instance +* property_id = identifier of a `KIM Property Definition `_, which can be (1) a property short name, (2) the full unique ID of the property (including the contributor and date), (3) a file name corresponding to a local property definition file +* key_name = one of the keys belonging to the specified KIM property definition +* key_name_key = a key belonging to a key-value pair (standardized in the `KIM Properties Framework `__) +* key_name_value = value to be associated with a key_name_key in a key-value pair +* file = name of a file to write the currently defined set of KIM property instances to Examples """""""" @@ -64,6 +78,15 @@ Examples kim_query a0 get_lattice_constant_cubic crystal=["fcc"] species=["Al"] units=["angstrom"] kim_param get gamma 1 varGamma kim_param set gamma 1 3.0 + kim_property create 1 atomic-mass + kim_property modify 1 key mass source-value 26.98154 + kim_property modify 1 key species source-value Al + kim_property remove 1 key species + kim_property destroy 1 + kim_property dump results.edn + + +.. _kim_description: Description """"""""""" @@ -157,11 +180,10 @@ See the `current list of KIM PMs and SMs archived in OpenKIM `_ to -learn how to install a pre-build binary of the OpenKIM Repository of Models. +See `Obtaining KIM Models `_ to +learn how to install a pre-built binary of the OpenKIM Repository of Models. .. note:: - It is also possible to locally install IMs not archived in OpenKIM, in which case their names do not have to conform to the KIM ID format. @@ -169,15 +191,17 @@ Using OpenKIM IMs with LAMMPS ----------------------------- Two commands are employed when using OpenKIM IMs, one to select the -IM and perform necessary initialization (*kim_init*), and the second +IM and perform necessary initialization (\ *kim_init*\ ), and the second to set up the IM for use by executing any necessary LAMMPS commands -(*kim_interactions*). Both are required. +(\ *kim_interactions*\ ). Both are required. See the *examples/kim* directory for example input scripts that use KIM PMs and KIM SMs. +.. _kim_init command: + OpenKIM IM Initialization (*kim_init*) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The *kim_init* mode command must be issued **before** the simulation box is created (normally at the top of the file). @@ -219,7 +243,7 @@ either match the required units of the IM or the IM must be able to adjust its units to match. (The latter is only possible with some KIM PMs; SMs can never adjust their units.) If a match is possible, the LAMMPS :doc:`units ` command is called to set the units to -*user_units*. If the match fails, the simulation is terminated with +*user_units*\ . If the match fails, the simulation is terminated with an error. Here is an example of a LAMMPS script to compute the cohesive energy @@ -324,8 +348,10 @@ be done to convert the box and all atomic positions to the correct units: all appropriate places in the input script. It is up to the user to do this correctly. +.. _kim_interactions command: + OpenKIM IM Execution (*kim_interactions*) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The second and final step in using an OpenKIM IM is to execute the *kim_interactions* command. This command must be preceded by a *kim_init* @@ -399,12 +425,17 @@ the *kim_interactions* command executes the following LAMMPS input commands: pair_coeff * * ffield.reax.rdx C H N O fix reaxqeq all qeq/reax 1 0.0 10.0 1.0e-6 param.qeq -Note that the files *lmp_control*, *ffield.reax.rdx* and *param.qeq* -are specific to the Strachan et al. (2003) ReaxFF parameterization -and are archived as part of the SM package in OpenKIM. -Note also that parameters like cutoff radii and charge tolerances, -which have an effect on IM predictions, are also included in the -SM definition ensuring reproducibility. +.. note:: + + The files *lmp_control*, *ffield.reax.rdx* and *param.qeq* + are specific to the Strachan et al. (2003) ReaxFF parameterization + and are archived as part of the SM package in OpenKIM. + +.. note:: + + Parameters like cutoff radii and charge tolerances, + which have an effect on IM predictions, are also included in the + SM definition ensuring reproducibility. .. note:: @@ -414,8 +445,10 @@ SM definition ensuring reproducibility. bond_coeff, fixes related to charge equilibration, etc.) should normally not appear in the input script. +.. _kim_query command: + Using OpenKIM Web Queries in LAMMPS (*kim_query*) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The *kim_query* command performs a web query to retrieve the predictions of an IM set by *kim_init* for material properties archived in @@ -427,6 +460,7 @@ of an IM set by *kim_init* for material properties archived in The syntax for the *kim_query* command is as follows: + .. code-block:: LAMMPS kim_query variable formatarg query_function queryargs @@ -442,7 +476,7 @@ individual variables of the form *prefix_I*, where *prefix* is set to the *kim_query* *variable* argument and *I* ranges from 1 to the number of returned values. The number and order of the returned values is determined by the type of query performed. (Note that the "explicit" setting of -*formatarg* is not supported by *kim_query*.) +*formatarg* is not supported by *kim_query*\ .) .. note:: @@ -452,7 +486,7 @@ by the type of query performed. (Note that the "explicit" setting of cases will generate an error. The second required argument *query_function* is the name of the -query function to be called (e.g. *get_lattice_constant_cubic*). +query function to be called (e.g. *get_lattice_constant_cubic*\ ). All following :doc:`arguments ` are parameters handed over to the web query in the format *keyword=value*\ , where *value* is always an array of one or more comma-separated items in brackets. @@ -466,7 +500,7 @@ is available on the OpenKIM webpage at All query functions require the *model* keyword, which identifies the IM whose predictions are being queried. This keyword is automatically generated by *kim_query* based on the IM set in *kim_init* and must not - be specified as an argument to *kim_query*. + be specified as an argument to *kim_query*\ . .. note:: @@ -475,11 +509,11 @@ is available on the OpenKIM webpage at used to compute this property. In cases where there are multiple methods in OpenKIM for computing a property, a *method* keyword can be provided to select the method of choice. See the - `query documentation `_ - to see which methods are available for a given *query function*\ . + `query documentation `_ + to see which methods are available for a given *query_function*\ . *kim_query* Usage Examples and Further Clarifications -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The data obtained by *kim_query* commands can be used as part of the setup or analysis phases of LAMMPS simulations. Some examples are given below. @@ -502,10 +536,12 @@ crystal. By using *kim_query*, the user is saved the trouble and possible error of tracking this value down, or of having to perform an energy minimization to find the equilibrium lattice constant. -Note that in *unit_conversion_mode* the results obtained from a -*kim_query* would need to be converted to the appropriate units system. -For example, in the above script, the lattice command would need to be -changed to: "lattice fcc ${a0}\*${_u_distance}". +.. note:: + + In *unit_conversion_mode* the results obtained from a + *kim_query* would need to be converted to the appropriate units system. + For example, in the above script, the lattice command would need to be + changed to: "lattice fcc ${a0}*${_u_distance}". **Define an equilibrium hcp crystal** @@ -524,7 +560,7 @@ changed to: "lattice fcc ${a0}\*${_u_distance}". In this case the *kim_query* returns two arguments (since the hexagonal close packed (hcp) structure has two independent lattice constants). The *formatarg* keyword "split" places the two values into -the variables *latconst_1* and *latconst_2*. (These variables are +the variables *latconst_1* and *latconst_2*\ . (These variables are created if they do not already exist.) For convenience the variables *a0* and *c0* are created in order to make the remainder of the input script more readable. @@ -555,9 +591,9 @@ potential. If no tolerance is passed a default value is used. If multiple results are returned (indicating that the tolerance is too large), *kim_query* will return an error. See the - `query documentation `_ + `query documentation `_ to see which numerical arguments and tolerances are available for a - given *query function*\ . + given *query_function*\ . **Compute defect formation energy** @@ -586,8 +622,10 @@ ideal fcc cohesive energy of the atoms in the system obtained from from these programs are queried is tracked. No other information about the nature of the query or its source is recorded. +.. _kim_param command: + Accessing KIM Model Parameters from LAMMPS (*kim_param*) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ All IMs are functional forms containing a set of parameters. The values of these parameters are typically @@ -620,7 +658,7 @@ for details). .. note:: - The *kim_param get/set* commands must be preceded by *kim_init*. + The *kim_param get/set* commands must be preceded by *kim_init*\ . The *kim_param set* command must additionally be preceded by a *kim_interactions* command (or alternatively by a *pair_style kim* and *pair_coeff* commands). The *kim_param set* command may be used wherever a *pair_coeff* command may occur. @@ -674,13 +712,13 @@ Multiple parameters can be retrieved with a single call to *kim_param get* by repeating the argument list following *get*\ . For a *set* operation, the *values* argument contains the new value(s) -for the element(s) of the parameter specified by *index_range*. For the case +for the element(s) of the parameter specified by *index_range*\ . For the case where multiple values are being set, *values* contains a set of values separated by spaces. Multiple parameters can be set with a single call to *kim_param set* by repeating the argument list following *set*\ . *kim_param* Usage Examples and Further Clarifications -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Examples of getting and setting KIM PM parameters with further clarifications are provided below. @@ -722,7 +760,7 @@ determined by the *formatarg* argument. In this case, *formatarg* is not specified and therefore the default "explicit" mode is used. (The behavior would be the same if the word -*explicit* were added after *LAM_TeSe*.) Elements 7, 8 and 9 of parameter +*explicit* were added after *LAM_TeSe*\ .) Elements 7, 8 and 9 of parameter lambda retrieved by the *get* operation are placed in the LAMMPS variables *LAM_TeTe*, *LAM_TeZn* and *LAM_TeSe*, respectively. @@ -765,7 +803,7 @@ contains the current value of lambda. In this case, the "split" mode of *formatarg* is used. The three values retrieved by the *get* operation are stored in -the three LAMMPS variables *LAM_15*, *LAM_16* and *LAM_17*. +the three LAMMPS variables *LAM_15*, *LAM_16* and *LAM_17*\ . The provided name "LAM" is used as prefix and the location in the lambda array is appended to create the variable names. @@ -797,7 +835,7 @@ potential, while *NEW_GAMMA* will contain the value 2.6. **Setting multiple scalar parameters with a single call** -.. parsed-literal:: +.. code-block:: LAMMPS kim_init SW_ZhouWardMartin_2013_CdTeZnSeHgS__MO_503261197030_002 metal ... @@ -824,6 +862,421 @@ In this case, elements 2 through 6 of the parameter *sigma* are set to the values 2.35214, 2.23869, 2.04516, 2.43269 and 1.80415 in order. +.. _kim_property command: + +Writing material properties computed in LAMMPS to standard KIM property instance format (*kim_property*) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As explained :ref:`above`, +The OpenKIM system includes a collection of Tests (material property calculation codes), +Models (interatomic potentials), Predictions, and Reference Data (DFT or experiments). +Specifically, a KIM Test is a computation that when coupled with a KIM Model generates +the prediction of that model for a specific material property rigorously defined +by a KIM Property Definition (see the +`KIM Properties Framework `__ +for further details). A prediction of a material property for a given model is a specific +numerical realization of a property definition, referred to as a "Property +Instance." The objective of the *kim_property* command is to make it easy to +output material properties in a standardized, machine readable, format that can be easily +ingested by other programs. +Additionally, it aims to make it as easy as possible to convert a LAMMPS script that computes a +material property into a KIM Test that can then be uploaded to `openkim.org `_ + +A developer interested in creating a KIM Test using a LAMMPS script should +first determine whether a property definition that applies to their calculation +already exists in OpenKIM by searching the `properties page +`_. If none exists, it is possible to use a +locally defined property definition contained in a file until it can be +uploaded to the official repository (see below). Once one or more applicable +property definitions have been identified, the *kim_property create*, +*kim_property modify*, *kim_property remove*, and *kim_property destroy*, +commands provide an interface to create, set, modify, remove, and destroy +instances of them within a LAMMPS script. Their general syntax is as follows: + +.. code-block:: LAMMPS + + kim_property create instance_id property_id + kim_property modify instance_id key key_name key_name_key key_name_value + kim_property remove instance_id key key_name + kim_property destroy instance_id + kim_property dump file + +Here, *instance_id* is a positive integer used to uniquely identify each +property instance; (note that the results file can contain multiple property +instances). A property_id is an identifier of a +`KIM Property Definition `_, +which can be (1) a property short name, (2) the full unique ID of the property +(including the contributor and date), (3) a file name corresponding to a local +property definition file. Examples of each of these cases are shown below: + +.. code-block:: LAMMPS + + kim_property create 1 atomic-mass + kim_property create 2 cohesive-energy-relation-cubic-crystal + +.. code-block:: LAMMPS + + kim_property create 1 tag:brunnels@noreply.openkim.org,2016-05-11:property/atomic-mass + kim_property create 2 tag:staff@noreply.openkim.org,2014-04-15:property/cohesive-energy-relation-cubic-crystal + +.. code-block:: LAMMPS + + kim_property create 1 new-property.edn + kim_property create 2 /home/mary/marys-kim-properties/dissociation-energy.edn + +In the last example, "new-property.edn" and "/home/mary/marys-kim-properties/dissociation-energy.edn" are the +names of files that contain user-defined (local) property definitions. + +A KIM property instance takes the form of a "map," i.e. a set of key-value +pairs akin to Perl's hash, Python's dictionary, or Java's Hashtable. It +consists of a set of property key names, each of which is referred to here by +the *key_name* argument, that are defined as part of the relevant KIM Property +Definition and include only lowercase alphanumeric characters and dashes. The +value paired with each property key is itself a map whose possible keys are +defined as part of the `KIM Properties Framework +`__; these keys are +referred to by the *key_name_key* argument and their associated values by the +*key_name_value* argument. These values may either be scalars or arrays, +as stipulated in the property definition. + +.. note:: + + Each map assigned to a *key_name* must contain the *key_name_key* + "source-value" and an associated *key_name_value* of the appropriate + type (as defined in the relevant KIM Property Definition). For keys that are + defined as having physical units, the + "source-unit" *key_name_key* must also be given a string value recognized + by `GNU units `_. + +Once a *kim_property create* command has been given to instantiate a property +instance, maps associated with the property's keys can be edited using the +*kim_property modify* command. In using this command, the special keyword +"key" should be given, followed by the property key name and the key-value pair +in the map associated with the key that is to be set. For example, the +`atomic-mass `_ +property definition consists of two property keys named "mass" and "species." +An instance of this property could be created like so: + +.. code-block:: LAMMPS + + kim_property create 1 atomic-mass + kim_property modify 1 key species source-value Al + kim_property modify 1 key mass source-value 26.98154 + kim_property modify 1 key mass source-unit amu + +or, equivalently, + +.. code-block:: LAMMPS + + kim_property create 1 atomic-mass + kim_property modify 1 key species source-value Al & + key mass source-value 26.98154 & + source-unit amu + +*kim_property* Usage Examples and Further Clarifications +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Create** + +.. code-block:: LAMMPS + + kim_property create instance_id property_id + +The *kim_property create* command takes as input a property instance ID and the +property definition name, and creates an initial empty property instance data +structure. For example, + +.. code-block:: LAMMPS + + kim_property create 1 atomic-mass + kim_property create 2 cohesive-energy-relation-cubic-crystal + +creates an empty property instance of the "atomic-mass" property definition +with instance ID 1 and an empty instance of the +"cohesive-energy-relation-cubic-crystal" property with ID 2. A list of +published property definitions in OpenKIM can be found on the `properties page +`_. + +One can also provide the name of a file in the current working directory or the +path of a file containing a valid property definition. For example, + +.. code-block:: LAMMPS + + kim_property create 1 new-property.edn + +where "new-property.edn" refers to a file name containing a new property +definition that does not exist in OpenKIM. + +If the *property_id* given cannot be found in OpenKIM and no file of this name +containing a valid property definition can be found, this command will produce +an error with an appropriate message. Calling *kim_property create* with the +same instance ID multiple times will also produce an error. + +**Modify** + +.. code-block:: LAMMPS + + kim_property modify instance_id key key_name key_name_key key_name_value + +The *kim_property modify* command incrementally builds the property instance +by receiving property definition keys along with associated arguments. Each +*key_name* is associated with a map containing one or more key-value pairs (in +the form of *key_name_key*-*key_name_value* pairs). For example, + +.. code-block:: LAMMPS + + kim_property modify 1 key species source-value Al + kim_property modify 1 key mass source-value 26.98154 + kim_property modify 1 key mass source-unit amu + +where the special keyword "key" is followed by a *key_name* ("species" or +"mass" in the above) and one or more key-value pairs. These key-value pairs +may continue until either another "key" keyword is given or the end of the +command line is reached. Thus, the above could equivalently be written as + +.. code-block:: LAMMPS + + kim_property modify 1 key species source-value Al & + key mass source-value 26.98154 & + key mass source-unit amu + +As an example of modifying multiple key-value pairs belonging to the map of a +single property key, the following command modifies the map of the +"cohesive-potential-energy" property key to contain the key "source-unit" which +is assigned a value of "eV" and the key "digits" which is assigned a value of +5: + +.. code-block:: LAMMPS + + kim_property modify 2 key cohesive-potential-energy source-unit eV digits 5 + +.. note:: + + The relevant data types of the values in the map are handled + automatically based on the specification of the key in the + KIM Property Definition. In the example above, + this means that the value "eV" will automatically be interpreted as a string + while the value 5 will be interpreted as an integer. + +The values contained in maps can either be scalars, as in all of the examples +above, or arrays depending on which is stipulated in the corresponding Property +Definition. For one-dimensional arrays, a single one-based index must be +supplied that indicates which element of the array is to be modified. For +multidimensional arrays, multiple indices must be given depending on the +dimensionality of the array. + +.. note:: + + All array indexing used by *kim_property modify* is one-based, i.e. the + indices are enumerated 1, 2, 3, ... + +.. note:: + + The dimensionality of arrays are defined in the the corresponding Property + Definition. The extent of each dimension of an array can either be a + specific finite number or indefinite and determined at run time. If + an array has a fixed extent, attempting to modify an out-of-range index will + fail with an error message. + +For example, the "species" property key of the +`cohesive-energy-relation-cubic-crystal +`_ +property is a one-dimensional array that can contain any number of entries +based on the number of atoms in the unit cell of a given cubic crystal. To +assign an array containing the string "Al" four times to the "source-value" key +of the "species" property key, we can do so by issuing: + +.. code-block:: LAMMPS + + kim_property modify 2 key species source-value 1 Al + kim_property modify 2 key species source-value 2 Al + kim_property modify 2 key species source-value 3 Al + kim_property modify 2 key species source-value 4 Al + +.. note:: + + No declaration of the number of elements in this array was given; + *kim_property modify* will automatically handle memory management to allow + an arbitrary number of elements to be added to the array. + +.. note:: + + In the event that *kim_property modify* is used to set the value of an + array index without having set the values of all lesser indices, they will + be assigned default values based on the data type associated with the key in + the map: + + .. table_from_list:: + :columns: 2 + + * Data type + * Default value + * int + * 0 + * float + * 0.0 + * string + * \"\" + * file + * \"\" + + For example, doing the following: + + .. code-block:: LAMMPS + + kim_property create 2 cohesive-energy-relation-cubic-crystal + kim_property modify 2 key species source-value 4 Al + + will result in the "source-value" key in the map for the property key + "species" being assigned the array ["", "", "", "Al"]. + +For convenience, the index argument provided may refer to an inclusive range of +indices by specifying two integers separated by a colon (the first integer must +be less than or equal to the second integer, and no whitespace should be +included). Thus, the snippet above could equivalently be written: + +.. code-block:: LAMMPS + + kim_property modify 2 key species source-value 1:4 Al Al Al Al + +Calling this command with a non-positive index, e.g. +``kim_property modify 2 key species source-value 0 Al``, or an incorrect +number of input arguments, e.g. +``kim_property modify 2 key species source-value 1:4 Al Al``, will result in an +error. + +As an example of modifying multidimensional arrays, consider the "basis-atoms" +key in the `cohesive-energy-relation-cubic-crystal +`_ +property definition. This is a two-dimensional array containing the fractional +coordinates of atoms in the unit cell of the cubic crystal. In the case of, +e.g. a conventional fcc unit cell, the "source-value" key in the map associated +with this key should be assigned the following value: + +.. code-block:: LAMMPS + + [[0.0, 0.0, 0.0], + [0.5, 0.5, 0.0], + [0.5, 0.0, 0.5], + [0.0, 0.5, 0.5]] + +While each of the twelve components could be set individually, we can instead set +each row at a time using colon notation: + +.. code-block:: LAMMPS + + kim_property modify 2 key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0 + kim_property modify 2 key basis-atom-coordinates source-value 2 1:3 0.5 0.5 0.0 + kim_property modify 2 key basis-atom-coordinates source-value 3 1:3 0.5 0.0 0.5 + kim_property modify 2 key basis-atom-coordinates source-value 4 1:3 0.0 0.5 0.5 + +Where the first index given refers to a row and the second index refers to a +column. We could, instead, choose to set each column at a time like so: + +.. code-block:: LAMMPS + + kim_property modify 2 key basis-atom-coordinates source-value 1:4 1 0.0 0.5 0.5 0.0 & + key basis-atom-coordinates source-value 1:4 2 0.0 0.5 0.0 0.5 & + key basis-atom-coordinates source-value 1:4 3 0.0 0.0 0.5 0.5 + +.. note:: + + Multiple calls of *kim_property modify* made for the same instance ID + can be combined into a single invocation, meaning the following are + both valid: + + .. code-block:: LAMMPS + + kim_property modify 2 key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0 & + key basis-atom-coordinates source-value 2 1:3 0.5 0.5 0.0 & + key basis-atom-coordinates source-value 3 1:3 0.5 0.0 0.5 & + key basis-atom-coordinates source-value 4 1:3 0.0 0.5 0.5 + + .. code-block:: LAMMPS + + kim_property modify 2 key short-name source-value 1 fcc & + key species source-value 1:4 Al Al Al Al & + key a source-value 1:5 3.9149 4.0000 4.032 4.0817 4.1602 & + source-unit angstrom & + digits 5 & + key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0 & + key basis-atom-coordinates source-value 2 1:3 0.5 0.5 0.0 & + key basis-atom-coordinates source-value 3 1:3 0.5 0.0 0.5 & + key basis-atom-coordinates source-value 4 1:3 0.0 0.5 0.5 + +.. note:: + + For multidimensional arrays, only one colon-separated range is allowed + in the index listing. Therefore, + + .. code-block:: LAMMPS + + kim_property modify 2 key basis-atom-coordinates 1 1:3 0.0 0.0 0.0 + + is valid but + + .. code-block:: LAMMPS + + kim_property modify 2 key basis-atom-coordinates 1:2 1:3 0.0 0.0 0.0 0.0 0.0 0.0 + + is not. + +.. note:: + + After one sets a value in a map with the *kim_property modify* command, + additional calls will overwrite the previous value. + +**Remove** + +.. code-block:: LAMMPS + + kim_property remove instance_id key key_name + +The *kim_property remove* command can be used to remove a property key from a +property instance. For example, + +.. code-block:: LAMMPS + + kim_property remove 2 key basis-atom-coordinates + +**Destroy** + +.. code-block:: LAMMPS + + kim_property destroy instance_id + +The *kim_property destroy* command deletes a previously created property +instance ID. For example, + +.. code-block:: LAMMPS + + kim_property destroy 2 + +.. note:: + + If this command is called with an instance ID that does not exist, no + error is raised. + +**Dump** + +The *kim_property dump* command can be used to write the content of all +currently defined property instances to a file: + +.. code-block:: LAMMPS + + kim_property dump file + +For example, + +.. code-block:: LAMMPS + + kim_property dump results.edn + +.. note:: + + Issuing the *kim_property dump* command clears all existing property + instances from memory. + Citation of OpenKIM IMs ----------------------- @@ -847,8 +1300,11 @@ LAMMPS is built with that package. A requirement for the KIM package, is the KIM API library that must be downloaded from the `OpenKIM website `_ and installed before LAMMPS is compiled. When installing LAMMPS from binary, the kim-api package -is a dependency that is automatically downloaded and installed. See the KIM -section of the :doc:`Packages details ` for details. +is a dependency that is automatically downloaded and installed. The *kim_query* +command requires the *libcurl* library to be installed. The *kim_property* +command requires *Python* 3.6 or later and the *kim-property* python package to +be installed. See the KIM section of the :doc:`Packages details ` +for details. Furthermore, when using *kim_commands* to run KIM SMs, any packages required by the native potential being used or other commands or fixes that it invokes -- GitLab From f203258ad59c6d6462e11c3907d17d398dd339df Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:17:27 -0500 Subject: [PATCH 018/328] updating the Build_extras, KIM package --- doc/src/Build_extras.rst | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 4589015e35..45c876d84a 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -189,13 +189,27 @@ KIM package --------------------- To build with this package, the KIM library with API v2 must be downloaded -and built on your system. It must include the KIM models that you want to -use with LAMMPS. If you want to use the :doc:`kim_query ` +and built on your system. It must include the KIM models that you want to +use with LAMMPS. + +If you would like to use the :doc:`kim_query ` command, you also need to have libcurl installed with the matching development headers and the curl-config tool. -See the `Obtaining KIM Models `_ -web page to +If you would like to use the :doc:`kim_property ` +command, you need to build LAMMPS with the Python 3.6 or later package +installed. See the :doc:`Python ` doc page for more info on building +LAMMPS with the version of Python on your system. +After successfully building LAMMPS with Python, you need to +install the kim-property Python package, which can be easily done using +*pip* as ``pip install kim-property``, or from the *conda-forge* channel as +``conda install kim-property`` if LAMMPS is built in Conda. More detailed +information is available at: +`kim-property installation `_. + +In addition to installing the KIM API, it is also necessary to install the +library of KIM models (interatomic potentials). +See `Obtaining KIM Models `_ to learn how to install a pre-build binary of the OpenKIM Repository of Models. See the list of all KIM models here: https://openkim.org/browse/models -- GitLab From cfc9fe7e39a451109b4f32a47fd1d11dd082e0ae Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:18:19 -0500 Subject: [PATCH 019/328] updating the Commands_all, adding kim_property command --- doc/src/Commands_all.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/Commands_all.rst b/doc/src/Commands_all.rst index 36ae112527..92c0ba107a 100644 --- a/doc/src/Commands_all.rst +++ b/doc/src/Commands_all.rst @@ -70,6 +70,7 @@ An alphabetic list of all general LAMMPS commands. * :doc:`kim_init ` * :doc:`kim_interactions ` * :doc:`kim_param ` + * :doc:`kim_property ` * :doc:`kim_query ` * :doc:`kspace_modify ` * :doc:`kspace_style ` -- GitLab From 2d3423a49309830e6969f5870248e4bc01f3e0e9 Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:19:13 -0500 Subject: [PATCH 020/328] updating the Packages_details, KIM package --- doc/src/Packages_details.rst | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst index e20c2886ed..8251c5301e 100644 --- a/doc/src/Packages_details.rst +++ b/doc/src/Packages_details.rst @@ -365,12 +365,17 @@ KIM package This package contains a set of commands that serve as a wrapper on the `Open Knowledgebase of Interatomic Models (OpenKIM) `_ -repository of interatomic models (IMs) -enabling compatible ones to be used in LAMMPS simulations. -This includes :doc:`kim_init and kim_interactions ` -commands to select, initialize and instantiate the IM, and a -:doc:`kim_query ` command to perform web queries -for material property predictions of OpenKIM IMs. +repository of interatomic models (IMs) enabling compatible ones to be used in +LAMMPS simulations. + +This includes :doc:`kim_init `, and +:doc:`kim_interactions ` commands to select, initialize and +instantiate the IM, a :doc:`kim_query ` command to perform web +queries for material property predictions of OpenKIM IMs, a +:doc:`kim_param ` command to access KIM Model Parameters from +LAMMPS, and a :doc:`kim_property ` command to write material +properties computed in LAMMPS to standard KIM property instance format. + Support for KIM IMs that conform to the `KIM Application Programming Interface (API) `_ is provided by the :doc:`pair_style kim ` command. @@ -392,13 +397,16 @@ The KIM project is led by Ellad Tadmor and Ryan Elliott (U Minnesota) and is funded by the `National Science Foundation `_. **Authors:** Ryan Elliott (U Minnesota) is the main developer for the KIM -API and the *pair_style kim* command. Axel Kohlmeyer (Temple U) and -Ellad Tadmor (U Minnesota) contributed to the :doc:`kim_commands ` -interface in close collaboration with Ryan Elliott. +API and the *pair_style kim* command. Yaser Afshar (U Minnesota), +Axel Kohlmeyer (Temple U), Ellad Tadmor (U Minnesota), and +Daniel Karls (U Minnesota) contributed to the +:doc:`kim_commands ` interface in close collaboration with +Ryan Elliott. **Install:** -This package has :ref:`specific installation instructions ` on the :doc:`Build extras ` doc page. +This package has :ref:`specific installation instructions ` on the +:doc:`Build extras ` doc page. **Supporting info:** -- GitLab From 3da1e127bf82049d220ae2f6cfe21f9837b3977e Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Mon, 16 Mar 2020 08:42:11 -0500 Subject: [PATCH 021/328] updating the false_positives with names causing warning --- doc/utils/sphinx-config/false_positives.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 264c3fab50..4110682e6c 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -714,6 +714,7 @@ edgeIDs edihed edim edip +edn edpd eDPD edu @@ -1087,6 +1088,7 @@ Harting Hartree Hartrees Hasan +Hashtable Haswell Haugk Hayoun @@ -1344,6 +1346,7 @@ Kai Kalia Kamberaj Kapfer +Karls Karlsruhe Karniadakis Karplus @@ -1630,6 +1633,8 @@ Marroquin Marsaglia Marseille Martyna +mary +marys Masaglia Mashayak Massimilliano @@ -3135,6 +3140,7 @@ Westview wget Whelan whitesmoke +whitespace Wi Wicaksono Wijk @@ -3201,6 +3207,7 @@ xzhou yaff YAFF Yamada +Yaser Yazdani Ybar ybox -- GitLab From f1e03aefc6503423993ddcd329df6944ccbb7c64 Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Tue, 17 Mar 2020 16:07:22 -0500 Subject: [PATCH 022/328] updating the pair potential docs, adding reference to OpenKIM --- doc/src/pair_adp.rst | 6 ++++-- doc/src/pair_eam.rst | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/src/pair_adp.rst b/doc/src/pair_adp.rst index 6c4796fb85..bbfd98f677 100644 --- a/doc/src/pair_adp.rst +++ b/doc/src/pair_adp.rst @@ -56,8 +56,10 @@ command to specify them. * The NIST WWW site at http://www.ctcms.nist.gov/potentials. Note that ADP potentials obtained from NIST must be converted into the extended DYNAMO *setfl* format discussed below. -* The OpenKIM Project at https://openkim.org/browse/models/by-type provides - ADP potentials that can be used directly in LAMMPS with the :doc:`kim_commands interface `. +* The OpenKIM Project at + `https://openkim.org/browse/models/by-type `_ + provides ADP potentials that can be used directly in LAMMPS with the + :doc:`kim_commands ` interface. ---------- diff --git a/doc/src/pair_eam.rst b/doc/src/pair_eam.rst index 4710f8a9a1..7384a4d54c 100644 --- a/doc/src/pair_eam.rst +++ b/doc/src/pair_eam.rst @@ -149,6 +149,7 @@ potentials stored in DYNAMO or other formats: http://www.ctcms.nist.gov/potentials http://cst-www.nrl.navy.mil/ccm6/ap http://enpub.fulton.asu.edu/cms/potentials/main/main.htm + https://openkim.org These potentials should be usable with LAMMPS, though the alternate formats would need to be converted to the DYNAMO format used by LAMMPS @@ -156,6 +157,11 @@ and described on this page. The NIST site is maintained by Chandler Becker (cbecker at nist.gov) who is good resource for info on interatomic potentials and file formats. +The OpenKIM Project at +`https://openkim.org/browse/models/by-type `_ +provides EAM potentials that can be used directly in LAMMPS with the +:doc:`kim_commands ` interface. + ---------- For style *eam*\ , potential values are read from a file that is in the -- GitLab From d8a948d9b75bc21cb174e85a91070438a20416db Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 17 Mar 2020 21:13:03 -0400 Subject: [PATCH 023/328] correct spelling --- doc/src/kim_commands.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/kim_commands.rst b/doc/src/kim_commands.rst index aca2529ec7..99f7efffd5 100644 --- a/doc/src/kim_commands.rst +++ b/doc/src/kim_commands.rst @@ -49,7 +49,7 @@ Syntax on the prefix specified in *variable* and a number appended to indicate which element in the list of values is in the variable. *explicit* = returns the values separately in one more more variable names - provided as arguments that preceed *formatarg*\ . [default for *kim_param*] + provided as arguments that precede *formatarg*\ . [default for *kim_param*] * query_function = name of the OpenKIM web API query function to be used * queryargs = a series of *keyword=value* pairs that represent the web query; supported keywords depend on the query function -- GitLab From 8b75fb295039f005a4eeb073817cdaa792c56920 Mon Sep 17 00:00:00 2001 From: Yaser Afshar Date: Wed, 18 Mar 2020 16:56:47 -0500 Subject: [PATCH 024/328] initialize the python interpreter instance with python->init() --- src/KIM/kim_property.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/KIM/kim_property.cpp b/src/KIM/kim_property.cpp index 888aa4da0c..0a55d6aae9 100644 --- a/src/KIM/kim_property.cpp +++ b/src/KIM/kim_property.cpp @@ -65,6 +65,7 @@ #include "variable.h" #include "utils.h" #include "error.h" +#include "lmppython.h" #include @@ -81,10 +82,7 @@ kimProperty::kimProperty(LAMMPS *lmp) : Pointers(lmp) "3 >= 3.6 support."); #endif // one-time initialization of Python interpreter - if (!Py_IsInitialized()) { - Py_Initialize(); - PyEval_InitThreads(); - } + python->init(); #else error->all(FLERR, "Error Python support missing! Compile with PYTHON " "package installed!"); @@ -143,7 +141,7 @@ void kimProperty::command(int narg, char **arg) "\nkim-property Python package can be installed " "with pip:\n`pip install kim-property`\n" "See the installation instructions at\n" - "https://github.com/openkim/kim-property#installing-kim-property\n") + "https://github.com/openkim/kim-property#installing-kim-property\n" "for detailed information."); } -- GitLab From 9a1b4a8edb9f1d19f79b0f6ab65a9ec4b1ebc9eb Mon Sep 17 00:00:00 2001 From: Michael Lamparski Date: Wed, 28 Aug 2019 11:09:57 -0400 Subject: [PATCH 025/328] Add infrastructure for openmp4 compat --- cmake/CMakeLists.txt | 18 ++++++------ doc/src/Build_basics.rst | 8 ++---- src/USER-OMP/README | 8 ------ src/USER-OMP/hack_openmp_for_pgi_gcc9.sh | 12 -------- src/omp_compat.h | 35 ++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 35 deletions(-) delete mode 100755 src/USER-OMP/hack_openmp_for_pgi_gcc9.sh create mode 100644 src/omp_compat.h diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 4dd079eaae..3dea0db8b0 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -229,16 +229,7 @@ pkg_depends(USER-SCAFACOS MPI) find_package(OpenMP QUIET) -# TODO: this is a temporary workaround until a better solution is found. AK 2019-05-30 -# GNU GCC 9.x uses settings incompatible with our use of 'default(none)' in OpenMP pragmas -# where we assume older GCC semantics. For the time being, we disable OpenMP by default -# for GCC 9.x and beyond. People may manually turn it on, but need to run the script -# src/USER-OMP/hack_openmp_for_pgi_gcc9.sh on all sources to make it compatible with gcc 9. -if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.99.9)) - option(BUILD_OMP "Build with OpenMP support" OFF) -else() - option(BUILD_OMP "Build with OpenMP support" ${OpenMP_FOUND}) -endif() +option(BUILD_OMP "Build with OpenMP support" ${OpenMP_FOUND}) if(BUILD_OMP) find_package(OpenMP REQUIRED) @@ -248,6 +239,13 @@ if(BUILD_OMP) endif() set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + + if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.99.9)) + # GCC 9.x strictly implements OpenMP 4.0 semantics for consts. + add_definitions(-DLMP_OMP_COMPAT=4) + else() + add_definitions(-DLMP_OMP_COMPAT=3) + endif() endif() if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-QUIP OR PKG_LATTE) diff --git a/doc/src/Build_basics.rst b/doc/src/Build_basics.rst index 8ad48b0810..96bc0f5bd1 100644 --- a/doc/src/Build_basics.rst +++ b/doc/src/Build_basics.rst @@ -145,11 +145,9 @@ Some compilers do not fully support the ``default(none)`` directive and others (e.g. GCC version 9 and beyond) may implement OpenMP 4.0 semantics, which are incompatible with the OpenMP 3.1 semantics used in LAMMPS (for maximal compatibility with compiler versions in use). -In those case, all ``default(none)`` directives (which aid in detecting -incorrect and unwanted sharing) can be replaced with ``default(shared)`` -while dropping all ``shared()`` directives. The script -'src/USER-OMP/hack_openmp_for_pgi_gcc9.sh' can be used to automate -this conversion. +LAMMPS will try to detect compilers that use OpenMP 4.0 semantics and +change the directives accordingly, but if your compiler is not +detected, you may set the CMake variable ``-D LMP_OMP_COMPAT=4``. ---------- diff --git a/src/USER-OMP/README b/src/USER-OMP/README index 46f63f646b..0aef853bca 100644 --- a/src/USER-OMP/README +++ b/src/USER-OMP/README @@ -9,11 +9,3 @@ doc/Section_accelerate.html, sub-section 5.2 The person who created this package is Axel Kohlmeyer at Temple U (akohlmey at gmail.com). Contact him directly if you have questions. --------------------------- - -This directory also contains a shell script: - -hack_openmp_for_pgi.sh - -which will convert OpenMP directives in src files -into a form compatible with the PGI compiler. diff --git a/src/USER-OMP/hack_openmp_for_pgi_gcc9.sh b/src/USER-OMP/hack_openmp_for_pgi_gcc9.sh deleted file mode 100755 index 6f9f30cedd..0000000000 --- a/src/USER-OMP/hack_openmp_for_pgi_gcc9.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -# convert default(none) directives for OpenMP pragmas to default(shared) and remove shared() directive -# this allows compiling OpenMP pragmas in LAMMPS with compilers that don't support default(none) properly -# or require backward incompatible OpenMP 4 and OpenMP 5 semantics - -for f in *.h *.cpp -do \ - sed -e '/#pragma omp/s/^\(.*default\)(none)\(.*\)$/\1(shared)\2/' \ - -e '/#pragma omp/s/shared([a-z0-9,_]\+)//' \ - -i.bak $f -done diff --git a/src/omp_compat.h b/src/omp_compat.h new file mode 100644 index 0000000000..8abf1c54bc --- /dev/null +++ b/src/omp_compat.h @@ -0,0 +1,35 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2020) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// There is no way to annotate an OpenMP construct that +// (a) accesses const variables, (b) has default(none), +// and (c) is valid in both OpenMP 3.0 and 4.0. +// +// (in OpenMP 3.0, the const variables have a predetermined +// sharing attribute and are *forbidden* from being declared +// in the omp construct. In OpenMP 4.0, this predetermined +// sharing attribute is removed, and thus they are *required* +// to be declared in the omp construct) +// +// To date, most compilers still accept the OpenMP 3.0 form, +// so this is what LAMMPS primarily uses. For those compilers +// that strictly implement OpenMP 4.0 (such as GCC 9.0), we +// give up default(none). +#if LMP_OMP_COMPAT == 4 +# define LMP_SHARED(...) +# define LMP_DEFAULT_NONE default(shared) +#else +# define LMP_SHARED(...) shared(__VA_ARGS__) +# define LMP_DEFAULT_NONE default(none) +#endif + -- GitLab From 0bedebc083d326a4a8c32554dc46c98d2c379016 Mon Sep 17 00:00:00 2001 From: Michael Lamparski Date: Fri, 20 Mar 2020 12:16:22 -0400 Subject: [PATCH 026/328] migrate omp constructs to new macros These changes were automatically generated using the script posted in https://github.com/lammps/lammps/pull/1651#issuecomment-525814475 --- src/MPIIO/dump_atom_mpiio.cpp | 5 ++- src/MPIIO/dump_cfg_mpiio.cpp | 3 +- src/MPIIO/dump_custom_mpiio.cpp | 3 +- src/MPIIO/dump_xyz_mpiio.cpp | 3 +- src/USER-DIFFRACTION/compute_saed.cpp | 3 +- src/USER-DIFFRACTION/compute_xrd.cpp | 3 +- src/USER-INTEL/angle_charmm_intel.cpp | 3 +- src/USER-INTEL/angle_harmonic_intel.cpp | 3 +- src/USER-INTEL/bond_fene_intel.cpp | 3 +- src/USER-INTEL/bond_harmonic_intel.cpp | 3 +- src/USER-INTEL/dihedral_charmm_intel.cpp | 5 ++- src/USER-INTEL/dihedral_fourier_intel.cpp | 3 +- src/USER-INTEL/dihedral_harmonic_intel.cpp | 3 +- src/USER-INTEL/dihedral_opls_intel.cpp | 3 +- src/USER-INTEL/fix_intel.cpp | 3 +- src/USER-INTEL/improper_cvff_intel.cpp | 3 +- src/USER-INTEL/improper_harmonic_intel.cpp | 3 +- src/USER-INTEL/npair_intel.cpp | 3 +- src/USER-INTEL/pppm_disp_intel.cpp | 31 ++++++++-------- src/USER-INTEL/pppm_intel.cpp | 11 +++--- src/USER-OMP/angle_charmm_omp.cpp | 3 +- src/USER-OMP/angle_class2_omp.cpp | 3 +- src/USER-OMP/angle_cosine_delta_omp.cpp | 3 +- src/USER-OMP/angle_cosine_omp.cpp | 3 +- src/USER-OMP/angle_cosine_periodic_omp.cpp | 3 +- src/USER-OMP/angle_cosine_shift_exp_omp.cpp | 3 +- src/USER-OMP/angle_cosine_shift_omp.cpp | 3 +- src/USER-OMP/angle_cosine_squared_omp.cpp | 3 +- src/USER-OMP/angle_dipole_omp.cpp | 3 +- src/USER-OMP/angle_fourier_omp.cpp | 3 +- src/USER-OMP/angle_fourier_simple_omp.cpp | 3 +- src/USER-OMP/angle_harmonic_omp.cpp | 3 +- src/USER-OMP/angle_quartic_omp.cpp | 3 +- src/USER-OMP/angle_sdk_omp.cpp | 3 +- src/USER-OMP/angle_table_omp.cpp | 3 +- src/USER-OMP/bond_class2_omp.cpp | 3 +- src/USER-OMP/bond_fene_expand_omp.cpp | 3 +- src/USER-OMP/bond_fene_omp.cpp | 3 +- src/USER-OMP/bond_gromos_omp.cpp | 3 +- src/USER-OMP/bond_harmonic_omp.cpp | 3 +- src/USER-OMP/bond_harmonic_shift_cut_omp.cpp | 3 +- src/USER-OMP/bond_harmonic_shift_omp.cpp | 3 +- src/USER-OMP/bond_morse_omp.cpp | 3 +- src/USER-OMP/bond_nonlinear_omp.cpp | 3 +- src/USER-OMP/bond_quartic_omp.cpp | 3 +- src/USER-OMP/bond_table_omp.cpp | 3 +- src/USER-OMP/dihedral_charmm_omp.cpp | 3 +- src/USER-OMP/dihedral_class2_omp.cpp | 3 +- .../dihedral_cosine_shift_exp_omp.cpp | 3 +- src/USER-OMP/dihedral_fourier_omp.cpp | 3 +- src/USER-OMP/dihedral_harmonic_omp.cpp | 3 +- src/USER-OMP/dihedral_helix_omp.cpp | 3 +- src/USER-OMP/dihedral_multi_harmonic_omp.cpp | 3 +- src/USER-OMP/dihedral_nharmonic_omp.cpp | 3 +- src/USER-OMP/dihedral_opls_omp.cpp | 3 +- src/USER-OMP/dihedral_quadratic_omp.cpp | 3 +- src/USER-OMP/dihedral_table_omp.cpp | 3 +- src/USER-OMP/domain_omp.cpp | 7 ++-- src/USER-OMP/ewald_omp.cpp | 5 ++- src/USER-OMP/fix_gravity_omp.cpp | 5 ++- src/USER-OMP/fix_neigh_history_omp.cpp | 9 +++-- src/USER-OMP/fix_nh_asphere_omp.cpp | 9 +++-- src/USER-OMP/fix_nh_omp.cpp | 19 +++++----- src/USER-OMP/fix_nh_sphere_omp.cpp | 7 ++-- src/USER-OMP/fix_nve_omp.cpp | 9 +++-- src/USER-OMP/fix_nve_sphere_omp.cpp | 9 +++-- src/USER-OMP/fix_nvt_sllod_omp.cpp | 3 +- src/USER-OMP/fix_omp.cpp | 9 +++-- src/USER-OMP/fix_rigid_nh_omp.cpp | 23 ++++++------ src/USER-OMP/fix_rigid_omp.cpp | 19 +++++----- src/USER-OMP/fix_rigid_small_omp.cpp | 17 +++++---- src/USER-OMP/improper_class2_omp.cpp | 3 +- src/USER-OMP/improper_cossq_omp.cpp | 3 +- src/USER-OMP/improper_cvff_omp.cpp | 3 +- src/USER-OMP/improper_fourier_omp.cpp | 3 +- src/USER-OMP/improper_harmonic_omp.cpp | 3 +- src/USER-OMP/improper_ring_omp.cpp | 3 +- src/USER-OMP/improper_umbrella_omp.cpp | 3 +- src/USER-OMP/msm_cg_omp.cpp | 3 +- src/USER-OMP/msm_omp.cpp | 5 ++- src/USER-OMP/npair_full_bin_atomonly_omp.cpp | 3 +- src/USER-OMP/npair_full_bin_ghost_omp.cpp | 3 +- src/USER-OMP/npair_full_bin_omp.cpp | 3 +- src/USER-OMP/npair_full_multi_omp.cpp | 3 +- src/USER-OMP/npair_full_nsq_ghost_omp.cpp | 3 +- src/USER-OMP/npair_full_nsq_omp.cpp | 3 +- .../npair_half_bin_atomonly_newton_omp.cpp | 3 +- .../npair_half_bin_newtoff_ghost_omp.cpp | 3 +- src/USER-OMP/npair_half_bin_newtoff_omp.cpp | 3 +- src/USER-OMP/npair_half_bin_newton_omp.cpp | 3 +- .../npair_half_bin_newton_tri_omp.cpp | 3 +- src/USER-OMP/npair_half_multi_newtoff_omp.cpp | 3 +- src/USER-OMP/npair_half_multi_newton_omp.cpp | 3 +- .../npair_half_multi_newton_tri_omp.cpp | 3 +- .../npair_half_nsq_newtoff_ghost_omp.cpp | 3 +- src/USER-OMP/npair_half_nsq_newtoff_omp.cpp | 3 +- src/USER-OMP/npair_half_nsq_newton_omp.cpp | 3 +- .../npair_half_respa_bin_newtoff_omp.cpp | 3 +- .../npair_half_respa_bin_newton_omp.cpp | 3 +- .../npair_half_respa_bin_newton_tri_omp.cpp | 3 +- .../npair_half_respa_nsq_newtoff_omp.cpp | 3 +- .../npair_half_respa_nsq_newton_omp.cpp | 3 +- .../npair_half_size_bin_newtoff_omp.cpp | 3 +- .../npair_half_size_bin_newton_omp.cpp | 3 +- .../npair_half_size_bin_newton_tri_omp.cpp | 3 +- .../npair_half_size_nsq_newtoff_omp.cpp | 3 +- .../npair_half_size_nsq_newton_omp.cpp | 3 +- src/USER-OMP/npair_halffull_newtoff_omp.cpp | 3 +- src/USER-OMP/npair_halffull_newton_omp.cpp | 3 +- src/USER-OMP/pair_adp_omp.cpp | 3 +- src/USER-OMP/pair_agni_omp.cpp | 3 +- src/USER-OMP/pair_airebo_omp.cpp | 5 ++- src/USER-OMP/pair_beck_omp.cpp | 3 +- src/USER-OMP/pair_born_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_born_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_born_coul_wolf_omp.cpp | 3 +- src/USER-OMP/pair_born_omp.cpp | 3 +- src/USER-OMP/pair_brownian_omp.cpp | 3 +- src/USER-OMP/pair_brownian_poly_omp.cpp | 3 +- src/USER-OMP/pair_buck_coul_cut_omp.cpp | 3 +- src/USER-OMP/pair_buck_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_buck_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_buck_long_coul_long_omp.cpp | 9 +++-- src/USER-OMP/pair_buck_omp.cpp | 3 +- src/USER-OMP/pair_colloid_omp.cpp | 3 +- src/USER-OMP/pair_comb_omp.cpp | 7 ++-- src/USER-OMP/pair_coul_cut_omp.cpp | 3 +- src/USER-OMP/pair_coul_cut_soft_omp.cpp | 3 +- src/USER-OMP/pair_coul_debye_omp.cpp | 3 +- src/USER-OMP/pair_coul_diel_omp.cpp | 3 +- src/USER-OMP/pair_coul_dsf_omp.cpp | 3 +- src/USER-OMP/pair_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_coul_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_coul_wolf_omp.cpp | 3 +- src/USER-OMP/pair_dpd_omp.cpp | 3 +- src/USER-OMP/pair_dpd_tstat_omp.cpp | 3 +- src/USER-OMP/pair_eam_cd_omp.cpp | 3 +- src/USER-OMP/pair_eam_omp.cpp | 3 +- src/USER-OMP/pair_edip_omp.cpp | 3 +- src/USER-OMP/pair_eim_omp.cpp | 3 +- src/USER-OMP/pair_gauss_cut_omp.cpp | 3 +- src/USER-OMP/pair_gauss_omp.cpp | 3 +- src/USER-OMP/pair_gayberne_omp.cpp | 3 +- src/USER-OMP/pair_gran_hertz_history_omp.cpp | 3 +- src/USER-OMP/pair_gran_hooke_history_omp.cpp | 3 +- src/USER-OMP/pair_gran_hooke_omp.cpp | 3 +- src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp | 3 +- .../pair_hbond_dreiding_morse_omp.cpp | 3 +- src/USER-OMP/pair_lj96_cut_omp.cpp | 3 +- ...air_lj_charmm_coul_charmm_implicit_omp.cpp | 3 +- .../pair_lj_charmm_coul_charmm_omp.cpp | 3 +- src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp | 3 +- .../pair_lj_charmm_coul_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp | 3 +- src/USER-OMP/pair_lj_class2_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_lj_class2_omp.cpp | 3 +- src/USER-OMP/pair_lj_cubic_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp | 3 +- .../pair_lj_cut_coul_cut_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_long_omp.cpp | 3 +- .../pair_lj_cut_coul_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_wolf_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_thole_long_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp | 3 +- .../pair_lj_cut_tip4p_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_expand_omp.cpp | 3 +- .../pair_lj_gromacs_coul_gromacs_omp.cpp | 3 +- src/USER-OMP/pair_lj_gromacs_omp.cpp | 3 +- src/USER-OMP/pair_lj_long_coul_long_omp.cpp | 9 +++-- src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp | 9 +++-- src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp | 3 +- src/USER-OMP/pair_lj_sdk_omp.cpp | 3 +- src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp | 3 +- src/USER-OMP/pair_lj_smooth_linear_omp.cpp | 3 +- src/USER-OMP/pair_lj_smooth_omp.cpp | 3 +- src/USER-OMP/pair_lubricate_omp.cpp | 3 +- src/USER-OMP/pair_lubricate_poly_omp.cpp | 3 +- src/USER-OMP/pair_meam_spline_omp.cpp | 3 +- src/USER-OMP/pair_morse_omp.cpp | 3 +- src/USER-OMP/pair_morse_smooth_linear_omp.cpp | 3 +- src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp | 3 +- src/USER-OMP/pair_nm_cut_coul_long_omp.cpp | 3 +- src/USER-OMP/pair_nm_cut_omp.cpp | 3 +- src/USER-OMP/pair_peri_lps_omp.cpp | 3 +- src/USER-OMP/pair_peri_pmb_omp.cpp | 3 +- src/USER-OMP/pair_resquared_omp.cpp | 3 +- src/USER-OMP/pair_soft_omp.cpp | 3 +- src/USER-OMP/pair_sw_omp.cpp | 3 +- src/USER-OMP/pair_table_omp.cpp | 3 +- src/USER-OMP/pair_tersoff_mod_c_omp.cpp | 3 +- src/USER-OMP/pair_tersoff_mod_omp.cpp | 3 +- src/USER-OMP/pair_tersoff_omp.cpp | 3 +- src/USER-OMP/pair_tersoff_table_omp.cpp | 3 +- src/USER-OMP/pair_tip4p_cut_omp.cpp | 3 +- src/USER-OMP/pair_tip4p_long_omp.cpp | 3 +- src/USER-OMP/pair_tip4p_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_ufm_omp.cpp | 3 +- src/USER-OMP/pair_vashishta_omp.cpp | 3 +- src/USER-OMP/pair_vashishta_table_omp.cpp | 3 +- src/USER-OMP/pair_yukawa_colloid_omp.cpp | 3 +- src/USER-OMP/pair_yukawa_omp.cpp | 3 +- src/USER-OMP/pair_zbl_omp.cpp | 3 +- src/USER-OMP/pppm_cg_omp.cpp | 19 +++++----- src/USER-OMP/pppm_disp_omp.cpp | 37 ++++++++++--------- src/USER-OMP/pppm_disp_tip4p_omp.cpp | 37 ++++++++++--------- src/USER-OMP/pppm_omp.cpp | 19 +++++----- src/USER-OMP/pppm_tip4p_omp.cpp | 19 +++++----- src/USER-OMP/reaxc_forces_omp.cpp | 3 +- src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp | 3 +- src/USER-OMP/respa_omp.cpp | 7 ++-- 220 files changed, 587 insertions(+), 367 deletions(-) diff --git a/src/MPIIO/dump_atom_mpiio.cpp b/src/MPIIO/dump_atom_mpiio.cpp index 2b663554cc..d59ed7bde8 100644 --- a/src/MPIIO/dump_atom_mpiio.cpp +++ b/src/MPIIO/dump_atom_mpiio.cpp @@ -15,6 +15,7 @@ Contributing author: Paul Coffman (IBM) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dump_atom_mpiio.h" #include #include @@ -587,7 +588,7 @@ int DumpAtomMPIIO::convert_image_omp(int n, double *mybuf) mpifh_buffer_line_per_thread[i] = (char *) malloc(DUMP_BUF_CHUNK_SIZE * sizeof(char)); mpifh_buffer_line_per_thread[i][0] = '\0'; -#pragma omp parallel default(none) shared(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) { int tid = omp_get_thread_num(); int m=0; @@ -677,7 +678,7 @@ int DumpAtomMPIIO::convert_noimage_omp(int n, double *mybuf) mpifh_buffer_line_per_thread[i] = (char *) malloc(DUMP_BUF_CHUNK_SIZE * sizeof(char)); mpifh_buffer_line_per_thread[i][0] = '\0'; -#pragma omp parallel default(none) shared(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) { int tid = omp_get_thread_num(); int m=0; diff --git a/src/MPIIO/dump_cfg_mpiio.cpp b/src/MPIIO/dump_cfg_mpiio.cpp index 31848b550c..18368db330 100644 --- a/src/MPIIO/dump_cfg_mpiio.cpp +++ b/src/MPIIO/dump_cfg_mpiio.cpp @@ -15,6 +15,7 @@ Contributing author: Paul Coffman (IBM) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dump_cfg_mpiio.h" #include #include @@ -365,7 +366,7 @@ int DumpCFGMPIIO::convert_string_omp(int n, double *mybuf) mpifh_buffer_line_per_thread[i] = (char *) malloc(DUMP_BUF_CHUNK_SIZE * sizeof(char)); mpifh_buffer_line_per_thread[i][0] = '\0'; -#pragma omp parallel default(none) shared(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) { int tid = omp_get_thread_num(); int m=0; diff --git a/src/MPIIO/dump_custom_mpiio.cpp b/src/MPIIO/dump_custom_mpiio.cpp index dca5833c5b..526021d895 100644 --- a/src/MPIIO/dump_custom_mpiio.cpp +++ b/src/MPIIO/dump_custom_mpiio.cpp @@ -15,6 +15,7 @@ Contributing author: Paul Coffman (IBM) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dump_custom_mpiio.h" #include #include @@ -612,7 +613,7 @@ int DumpCustomMPIIO::convert_string_omp(int n, double *mybuf) mpifh_buffer_line_per_thread[i] = (char *) malloc(DUMP_BUF_CHUNK_SIZE * sizeof(char)); mpifh_buffer_line_per_thread[i][0] = '\0'; -#pragma omp parallel default(none) shared(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) { int tid = omp_get_thread_num(); int m=0; diff --git a/src/MPIIO/dump_xyz_mpiio.cpp b/src/MPIIO/dump_xyz_mpiio.cpp index f5caab3a9c..a1e4f21a79 100644 --- a/src/MPIIO/dump_xyz_mpiio.cpp +++ b/src/MPIIO/dump_xyz_mpiio.cpp @@ -15,6 +15,7 @@ Contributing author: Paul Coffman (IBM) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dump_xyz_mpiio.h" #include #include @@ -350,7 +351,7 @@ int DumpXYZMPIIO::convert_string_omp(int n, double *mybuf) mpifh_buffer_line_per_thread[i] = (char *) malloc(DUMP_BUF_CHUNK_SIZE * sizeof(char)); mpifh_buffer_line_per_thread[i][0] = '\0'; -#pragma omp parallel default(none) shared(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(bufOffset, bufRange, bufLength, mpifhStringCountPerThread, mpifh_buffer_line_per_thread, mybuf) { int tid = omp_get_thread_num(); int m=0; diff --git a/src/USER-DIFFRACTION/compute_saed.cpp b/src/USER-DIFFRACTION/compute_saed.cpp index 7b5b2f4f5b..5dd47b28bd 100644 --- a/src/USER-DIFFRACTION/compute_saed.cpp +++ b/src/USER-DIFFRACTION/compute_saed.cpp @@ -15,6 +15,7 @@ Contributing authors: Shawn Coleman & Douglas Spearot (Arkansas) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "compute_saed.h" #include #include @@ -418,7 +419,7 @@ void ComputeSAED::compute_vector() double frac = 0.1; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(offset,ASFSAED,typelocal,xlocal,Fvec,m,frac) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(offset,ASFSAED,typelocal,xlocal,Fvec,m,frac) #endif { double *f = new double[ntypes]; // atomic structure factor by type diff --git a/src/USER-DIFFRACTION/compute_xrd.cpp b/src/USER-DIFFRACTION/compute_xrd.cpp index 7f69449282..e75546b548 100644 --- a/src/USER-DIFFRACTION/compute_xrd.cpp +++ b/src/USER-DIFFRACTION/compute_xrd.cpp @@ -16,6 +16,7 @@ Updated: 06/17/2015-2 ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "compute_xrd.h" #include #include @@ -353,7 +354,7 @@ void ComputeXRD::compute_array() double frac = 0.1; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(typelocal,xlocal,Fvec,m,frac,ASFXRD) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(typelocal,xlocal,Fvec,m,frac,ASFXRD) #endif { double *f = new double[ntypes]; // atomic structure factor by type diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp index 43de50e7fa..9275e82f1c 100644 --- a/src/USER-INTEL/angle_charmm_intel.cpp +++ b/src/USER-INTEL/angle_charmm_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "angle_charmm_intel.h" @@ -134,7 +135,7 @@ void AngleCharmmIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp index d073e7bc56..49a71038da 100644 --- a/src/USER-INTEL/angle_harmonic_intel.cpp +++ b/src/USER-INTEL/angle_harmonic_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "angle_harmonic_intel.h" @@ -134,7 +135,7 @@ void AngleHarmonicIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp index 6578706a9b..5c58e7bf10 100644 --- a/src/USER-INTEL/bond_fene_intel.cpp +++ b/src/USER-INTEL/bond_fene_intel.cpp @@ -15,6 +15,7 @@ Contributing author: Stan Moore (Sandia) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "bond_fene_intel.h" @@ -127,7 +128,7 @@ void BondFENEIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp index 55dda9fa15..8bf0a82218 100644 --- a/src/USER-INTEL/bond_harmonic_intel.cpp +++ b/src/USER-INTEL/bond_harmonic_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "bond_harmonic_intel.h" @@ -127,7 +128,7 @@ void BondHarmonicIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp index 2ea4eb6d21..61fc1dfbd5 100644 --- a/src/USER-INTEL/dihedral_charmm_intel.cpp +++ b/src/USER-INTEL/dihedral_charmm_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "dihedral_charmm_intel.h" @@ -148,7 +149,7 @@ void DihedralCharmmIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \ opv0,opv1,opv2,opv3,opv4,opv5) @@ -522,7 +523,7 @@ void DihedralCharmmIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \ opv0,opv1,opv2,opv3,opv4,opv5) diff --git a/src/USER-INTEL/dihedral_fourier_intel.cpp b/src/USER-INTEL/dihedral_fourier_intel.cpp index 8c4a2ce921..fb8051474e 100644 --- a/src/USER-INTEL/dihedral_fourier_intel.cpp +++ b/src/USER-INTEL/dihedral_fourier_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "dihedral_fourier_intel.h" @@ -127,7 +128,7 @@ void DihedralFourierIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp index 60655f2618..f6c4ea0fde 100644 --- a/src/USER-INTEL/dihedral_harmonic_intel.cpp +++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "dihedral_harmonic_intel.h" @@ -127,7 +128,7 @@ void DihedralHarmonicIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp index 5b580cd7d9..79a8874a72 100644 --- a/src/USER-INTEL/dihedral_opls_intel.cpp +++ b/src/USER-INTEL/dihedral_opls_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "dihedral_opls_intel.h" @@ -131,7 +132,7 @@ void DihedralOPLSIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index 5ecae79b30..c62a6a432a 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -16,6 +16,7 @@ Anupama Kurpad (Intel) - Host Affinitization ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_intel.h" #include "comm.h" #include "error.h" @@ -220,7 +221,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) comm->nthreads = nomp; } else { int nthreads; - #pragma omp parallel default(none) shared(nthreads) + #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(nthreads) nthreads = omp_get_num_threads(); comm->nthreads = nthreads; } diff --git a/src/USER-INTEL/improper_cvff_intel.cpp b/src/USER-INTEL/improper_cvff_intel.cpp index f198e47d5c..4562c63cdb 100644 --- a/src/USER-INTEL/improper_cvff_intel.cpp +++ b/src/USER-INTEL/improper_cvff_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include @@ -138,7 +139,7 @@ void ImproperCvffIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/improper_harmonic_intel.cpp b/src/USER-INTEL/improper_harmonic_intel.cpp index d638e6a66e..fc5cf08c52 100644 --- a/src/USER-INTEL/improper_harmonic_intel.cpp +++ b/src/USER-INTEL/improper_harmonic_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include @@ -139,7 +140,7 @@ void ImproperHarmonicIntel::eval(const int vflag, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(f_start,f_stride,fc) \ reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5) #endif diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp index 4256e03b3c..f7a233efc9 100644 --- a/src/USER-INTEL/npair_intel.cpp +++ b/src/USER-INTEL/npair_intel.cpp @@ -15,6 +15,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "comm.h" #include "domain.h" #include "timer.h" @@ -263,7 +264,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(overflow, nstencilp, binstart, binend) #endif { diff --git a/src/USER-INTEL/pppm_disp_intel.cpp b/src/USER-INTEL/pppm_disp_intel.cpp index fd1302da98..75fdc3b1df 100644 --- a/src/USER-INTEL/pppm_disp_intel.cpp +++ b/src/USER-INTEL/pppm_disp_intel.cpp @@ -15,6 +15,7 @@ Contributing authors: William McDoniel (RWTH Aachen University) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include @@ -729,7 +730,7 @@ void PPPMDispIntel::particle_map(double delx, double dely, double delz, int flag = 0; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr, delx, dely, delz, sft, p2g, nup, nlow, nxlo,\ nylo, nzlo, nxhi, nyhi, nzhi) reduction(+:flag) if(!_use_lrt) #endif @@ -803,7 +804,7 @@ void PPPMDispIntel::make_rho_c(IntelBuffers * /*buffers*/) int nthr = comm->nthreads; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, nlocal, global_density) if(!_use_lrt) #endif { @@ -909,7 +910,7 @@ void PPPMDispIntel::make_rho_c(IntelBuffers * /*buffers*/) // reduce all the perthread_densities into global_density #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, global_density) if(!_use_lrt) #endif { @@ -951,7 +952,7 @@ void PPPMDispIntel::make_rho_g(IntelBuffers * /*buffers*/) int nthr = comm->nthreads; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, nlocal, global_density) if(!_use_lrt) #endif { @@ -1059,7 +1060,7 @@ void PPPMDispIntel::make_rho_g(IntelBuffers * /*buffers*/) // reduce all the perthread_densities into global_density #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, global_density) if(!_use_lrt) #endif { @@ -1234,7 +1235,7 @@ void PPPMDispIntel::make_rho_none(IntelBuffers * /*buffers*/) int nthr = comm->nthreads; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, nlocal, global_density) if(!_use_lrt) #endif { @@ -1343,7 +1344,7 @@ void PPPMDispIntel::make_rho_none(IntelBuffers * /*buffers*/) // reduce all the perthread_densities into global_density #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, global_density) if(!_use_lrt) #endif { @@ -1386,7 +1387,7 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers * /*buffers*/) #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -1536,7 +1537,7 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers * /*buffers*/) FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -1734,7 +1735,7 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers * /*buffers*/) #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -1881,7 +1882,7 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers * /*buffers*/) FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -2078,7 +2079,7 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers * /*buffers*/) #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -2312,7 +2313,7 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers * /*buffers*/) FFT_SCALAR * _noalias const particle_ekz6 = this->particle_ekz6; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -2603,7 +2604,7 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers * /*buffers*/) #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -2762,7 +2763,7 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers * /*buffers*/) int nthr = comm->nthreads; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { diff --git a/src/USER-INTEL/pppm_intel.cpp b/src/USER-INTEL/pppm_intel.cpp index e3bf779cc1..d643da96b2 100644 --- a/src/USER-INTEL/pppm_intel.cpp +++ b/src/USER-INTEL/pppm_intel.cpp @@ -18,6 +18,7 @@ W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include @@ -372,7 +373,7 @@ void PPPMIntel::particle_map(IntelBuffers *buffers) error->one(FLERR,"Non-numeric box dimensions - simulation unstable"); #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) reduction(+:flag) if(!_use_lrt) #endif { @@ -446,7 +447,7 @@ void PPPMIntel::make_rho(IntelBuffers *buffers) nthr = comm->nthreads; #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, nlocal, global_density) if(!_use_lrt) #endif { @@ -549,7 +550,7 @@ void PPPMIntel::make_rho(IntelBuffers *buffers) // reduce all the perthread_densities into global_density if (nthr > 1) { #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nthr, global_density) if(!_use_lrt) #endif { @@ -598,7 +599,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers *buffers) } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { @@ -749,7 +750,7 @@ void PPPMIntel::fieldforce_ad(IntelBuffers *buffers) } #if defined(_OPENMP) - #pragma omp parallel default(none) \ + #pragma omp parallel LMP_DEFAULT_NONE \ shared(nlocal, nthr) if(!_use_lrt) #endif { diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp index 116c937788..6933afdcd9 100644 --- a/src/USER-OMP/angle_charmm_omp.cpp +++ b/src/USER-OMP/angle_charmm_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_charmm_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCharmmOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp index 9aab1d73b3..09ee59d5af 100644 --- a/src/USER-OMP/angle_class2_omp.cpp +++ b/src/USER-OMP/angle_class2_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_class2_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp index 44326c124e..ca5afe1449 100644 --- a/src/USER-OMP/angle_cosine_delta_omp.cpp +++ b/src/USER-OMP/angle_cosine_delta_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_delta_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCosineDeltaOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp index 3bfa2aa39c..48fdd9ba60 100644 --- a/src/USER-OMP/angle_cosine_omp.cpp +++ b/src/USER-OMP/angle_cosine_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCosineOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp index 700179119c..a0e45fe131 100644 --- a/src/USER-OMP/angle_cosine_periodic_omp.cpp +++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_periodic_omp.h" #include #include "atom.h" @@ -49,7 +50,7 @@ void AngleCosinePeriodicOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp index 1e37688425..21019336b3 100644 --- a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_shift_exp_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCosineShiftExpOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp index 35b409edf1..1e6d712b62 100644 --- a/src/USER-OMP/angle_cosine_shift_omp.cpp +++ b/src/USER-OMP/angle_cosine_shift_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_shift_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCosineShiftOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp index 9da5a0fce3..6df1e028a0 100644 --- a/src/USER-OMP/angle_cosine_squared_omp.cpp +++ b/src/USER-OMP/angle_cosine_squared_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_cosine_squared_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleCosineSquaredOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp index 33ec216f6a..26d8a7817e 100644 --- a/src/USER-OMP/angle_dipole_omp.cpp +++ b/src/USER-OMP/angle_dipole_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_dipole_omp.h" #include #include "atom.h" @@ -51,7 +52,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_fourier_omp.cpp b/src/USER-OMP/angle_fourier_omp.cpp index 64ff99e6c6..b6ddea3ff9 100644 --- a/src/USER-OMP/angle_fourier_omp.cpp +++ b/src/USER-OMP/angle_fourier_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_fourier_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleFourierOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_fourier_simple_omp.cpp b/src/USER-OMP/angle_fourier_simple_omp.cpp index 2ae8c5561d..992acf7c99 100644 --- a/src/USER-OMP/angle_fourier_simple_omp.cpp +++ b/src/USER-OMP/angle_fourier_simple_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_fourier_simple_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleFourierSimpleOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp index 66c0602396..0cfc6f95af 100644 --- a/src/USER-OMP/angle_harmonic_omp.cpp +++ b/src/USER-OMP/angle_harmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_harmonic_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_quartic_omp.cpp b/src/USER-OMP/angle_quartic_omp.cpp index 903b0e4225..a774edb8c4 100644 --- a/src/USER-OMP/angle_quartic_omp.cpp +++ b/src/USER-OMP/angle_quartic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_quartic_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleQuarticOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp index 3c8ee9cde8..e0bc021f1b 100644 --- a/src/USER-OMP/angle_sdk_omp.cpp +++ b/src/USER-OMP/angle_sdk_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_sdk_omp.h" #include #include "atom.h" @@ -49,7 +50,7 @@ void AngleSDKOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp index de36d09980..1af60f85f6 100644 --- a/src/USER-OMP/angle_table_omp.cpp +++ b/src/USER-OMP/angle_table_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "angle_table_omp.h" #include #include "atom.h" @@ -47,7 +48,7 @@ void AngleTableOMP::compute(int eflag, int vflag) const int inum = neighbor->nanglelist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp index 9da50d1aa0..06edf46024 100644 --- a/src/USER-OMP/bond_class2_omp.cpp +++ b/src/USER-OMP/bond_class2_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_class2_omp.h" #include "atom.h" #include "comm.h" @@ -47,7 +48,7 @@ void BondClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp index a937cb6c34..5f3dd48841 100644 --- a/src/USER-OMP/bond_fene_expand_omp.cpp +++ b/src/USER-OMP/bond_fene_expand_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_fene_expand_omp.h" #include "atom.h" #include "comm.h" @@ -48,7 +49,7 @@ void BondFENEExpandOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp index 3c653fedae..ebf5afdc57 100644 --- a/src/USER-OMP/bond_fene_omp.cpp +++ b/src/USER-OMP/bond_fene_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_fene_omp.h" #include "atom.h" #include "comm.h" @@ -48,7 +49,7 @@ void BondFENEOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_gromos_omp.cpp b/src/USER-OMP/bond_gromos_omp.cpp index cedd5f96b7..e5eebfebfc 100644 --- a/src/USER-OMP/bond_gromos_omp.cpp +++ b/src/USER-OMP/bond_gromos_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_gromos_omp.h" #include "atom.h" #include "comm.h" @@ -44,7 +45,7 @@ void BondGromosOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp index 4144f02a39..9160ee7023 100644 --- a/src/USER-OMP/bond_harmonic_omp.cpp +++ b/src/USER-OMP/bond_harmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_harmonic_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp index 10e4cdcd1c..7a6c616e4a 100644 --- a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_harmonic_shift_cut_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondHarmonicShiftCutOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp index a0a7750890..19f0a08510 100644 --- a/src/USER-OMP/bond_harmonic_shift_omp.cpp +++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_harmonic_shift_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondHarmonicShiftOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp index 32361cb5de..90318c8faf 100644 --- a/src/USER-OMP/bond_morse_omp.cpp +++ b/src/USER-OMP/bond_morse_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_morse_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondMorseOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp index f21772435f..809950f2b7 100644 --- a/src/USER-OMP/bond_nonlinear_omp.cpp +++ b/src/USER-OMP/bond_nonlinear_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_nonlinear_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondNonlinearOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp index 46ee4ab96b..0efaa10e37 100644 --- a/src/USER-OMP/bond_quartic_omp.cpp +++ b/src/USER-OMP/bond_quartic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_quartic_omp.h" #include "atom.h" #include "comm.h" @@ -52,7 +53,7 @@ void BondQuarticOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp index db1599fbcc..f503bcab26 100644 --- a/src/USER-OMP/bond_table_omp.cpp +++ b/src/USER-OMP/bond_table_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "bond_table_omp.h" #include "atom.h" #include "comm.h" @@ -46,7 +47,7 @@ void BondTableOMP::compute(int eflag, int vflag) const int inum = neighbor->nbondlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp index 242a0a1d86..afd3071434 100644 --- a/src/USER-OMP/dihedral_charmm_omp.cpp +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dihedral_charmm_omp.h" #include #include "atom.h" @@ -56,7 +57,7 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp index 215677612b..49b8659e2f 100644 --- a/src/USER-OMP/dihedral_class2_omp.cpp +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_class2_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void DihedralClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp index f42121f8a9..cea22adf1b 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_cosine_shift_exp_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_fourier_omp.cpp b/src/USER-OMP/dihedral_fourier_omp.cpp index cd12b3630e..756931774a 100644 --- a/src/USER-OMP/dihedral_fourier_omp.cpp +++ b/src/USER-OMP/dihedral_fourier_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dihedral_fourier_omp.h" #include #include "atom.h" @@ -49,7 +50,7 @@ void DihedralFourierOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp index c3adb113e2..d1f511954e 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_harmonic_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp index b38ff2739a..5289fc2c62 100644 --- a/src/USER-OMP/dihedral_helix_omp.cpp +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_helix_omp.h" #include "atom.h" @@ -53,7 +54,7 @@ void DihedralHelixOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp index 7b79a63722..edf8a20899 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_multi_harmonic_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_nharmonic_omp.cpp b/src/USER-OMP/dihedral_nharmonic_omp.cpp index f3d8471c95..bf51a01713 100644 --- a/src/USER-OMP/dihedral_nharmonic_omp.cpp +++ b/src/USER-OMP/dihedral_nharmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_nharmonic_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void DihedralNHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp index 24cc4cd064..8814d3f3f0 100644 --- a/src/USER-OMP/dihedral_opls_omp.cpp +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_opls_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_quadratic_omp.cpp b/src/USER-OMP/dihedral_quadratic_omp.cpp index 6f82c1e6b0..e61c5f0d85 100644 --- a/src/USER-OMP/dihedral_quadratic_omp.cpp +++ b/src/USER-OMP/dihedral_quadratic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "dihedral_quadratic_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void DihedralQuadraticOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/dihedral_table_omp.cpp b/src/USER-OMP/dihedral_table_omp.cpp index a760fc6094..bf5332a1c1 100644 --- a/src/USER-OMP/dihedral_table_omp.cpp +++ b/src/USER-OMP/dihedral_table_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "dihedral_table_omp.h" #include #include "atom.h" @@ -113,7 +114,7 @@ void DihedralTableOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/domain_omp.cpp b/src/USER-OMP/domain_omp.cpp index 18d2a587ca..dfd3d3590f 100644 --- a/src/USER-OMP/domain_omp.cpp +++ b/src/USER-OMP/domain_omp.cpp @@ -15,6 +15,7 @@ Contributing author : Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "accelerator_omp.h" #include "atom.h" @@ -45,7 +46,7 @@ void DomainOMP::pbc() const int nlocal = atom->nlocal; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { imageint idim,otherdims; @@ -143,7 +144,7 @@ void DomainOMP::lamda2x(int n) const int num = n; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < num; i++) { x[i].x = h[0]*x[i].x + h[5]*x[i].y + h[4]*x[i].z + boxlo[0]; @@ -163,7 +164,7 @@ void DomainOMP::x2lamda(int n) const int num = n; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < num; i++) { double delta0 = x[i].x - boxlo[0]; diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp index a539394f69..fd776e46a6 100644 --- a/src/USER-OMP/ewald_omp.cpp +++ b/src/USER-OMP/ewald_omp.cpp @@ -15,6 +15,7 @@ Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "ewald_omp.h" #include #include @@ -104,7 +105,7 @@ void EwaldOMP::compute(int eflag, int vflag) v0=v1=v2=v3=v4=v5=0.0; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5) #endif { @@ -234,7 +235,7 @@ void EwaldOMP::eik_dot_r() const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int i,ifrom,ito,k,l,m,n,ic,tid; diff --git a/src/USER-OMP/fix_gravity_omp.cpp b/src/USER-OMP/fix_gravity_omp.cpp index 5bc1085f34..830fa0c4eb 100644 --- a/src/USER-OMP/fix_gravity_omp.cpp +++ b/src/USER-OMP/fix_gravity_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_gravity_omp.h" #include "atom.h" #include "update.h" @@ -69,7 +70,7 @@ void FixGravityOMP::post_force(int /* vflag */) if (rmass) { #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(-:grav) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(-:grav) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -81,7 +82,7 @@ void FixGravityOMP::post_force(int /* vflag */) } } else { #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(-:grav) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(-:grav) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { diff --git a/src/USER-OMP/fix_neigh_history_omp.cpp b/src/USER-OMP/fix_neigh_history_omp.cpp index 22d3fa944c..7acded0ab2 100644 --- a/src/USER-OMP/fix_neigh_history_omp.cpp +++ b/src/USER-OMP/fix_neigh_history_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_neigh_history_omp.h" #include #include "my_page.h" @@ -73,7 +74,7 @@ void FixNeighHistoryOMP::pre_exchange_onesided() maxpartner = 0; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { @@ -199,7 +200,7 @@ void FixNeighHistoryOMP::pre_exchange_newton() for (int i = 0; i < nall_neigh; i++) npartner[i] = 0; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { @@ -373,7 +374,7 @@ void FixNeighHistoryOMP::pre_exchange_no_newton() maxpartner = 0; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { @@ -525,7 +526,7 @@ void FixNeighHistoryOMP::post_neighbor() #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { diff --git a/src/USER-OMP/fix_nh_asphere_omp.cpp b/src/USER-OMP/fix_nh_asphere_omp.cpp index 185eab5f47..9c7317bebd 100644 --- a/src/USER-OMP/fix_nh_asphere_omp.cpp +++ b/src/USER-OMP/fix_nh_asphere_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include @@ -82,7 +83,7 @@ void FixNHAsphereOMP::nve_v() // merged with FixNHOMP instead of calling it for the COM update. #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -122,7 +123,7 @@ void FixNHAsphereOMP::nve_x() // principal moments of inertia #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -163,7 +164,7 @@ void FixNHAsphereOMP::nh_v_temp() if (which == NOBIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -177,7 +178,7 @@ void FixNHAsphereOMP::nh_v_temp() } } else if (which == BIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { double buf[3]; diff --git a/src/USER-OMP/fix_nh_omp.cpp b/src/USER-OMP/fix_nh_omp.cpp index d584bcd11f..b30169b2dc 100644 --- a/src/USER-OMP/fix_nh_omp.cpp +++ b/src/USER-OMP/fix_nh_omp.cpp @@ -15,6 +15,7 @@ Contributing authors: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_nh_omp.h" #include #include "atom.h" @@ -57,7 +58,7 @@ void FixNHOMP::remap() if (allremap) domain->x2lamda(nlocal); else { #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) @@ -207,7 +208,7 @@ void FixNHOMP::remap() if (allremap) domain->lamda2x(nlocal); else { #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) @@ -235,7 +236,7 @@ void FixNHOMP::nh_v_press() if (which == NOBIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -253,7 +254,7 @@ void FixNHOMP::nh_v_press() } } else if (which == BIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { double buf[3]; @@ -289,7 +290,7 @@ void FixNHOMP::nve_v() if (atom->rmass) { const double * _noalias const rmass = atom->rmass; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -303,7 +304,7 @@ void FixNHOMP::nve_v() const double *_noalias const mass = atom->mass; const int * _noalias const type = atom->type; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -330,7 +331,7 @@ void FixNHOMP::nve_x() // x update by full step only for atoms in group #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -352,7 +353,7 @@ void FixNHOMP::nh_v_temp() if (which == NOBIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -363,7 +364,7 @@ void FixNHOMP::nh_v_temp() } } else if (which == BIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { double buf[3]; diff --git a/src/USER-OMP/fix_nh_sphere_omp.cpp b/src/USER-OMP/fix_nh_sphere_omp.cpp index 0048ae7ff7..dd9a8addf4 100644 --- a/src/USER-OMP/fix_nh_sphere_omp.cpp +++ b/src/USER-OMP/fix_nh_sphere_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_nh_sphere_omp.h" #include "atom.h" #include "compute.h" @@ -85,7 +86,7 @@ void FixNHSphereOMP::nve_v() // 4 cases depending on radius vs shape and rmass vs mass #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -115,7 +116,7 @@ void FixNHSphereOMP::nh_v_temp() if (which == NOBIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -129,7 +130,7 @@ void FixNHSphereOMP::nh_v_temp() } } else if (which == BIAS) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) { double buf[3]; diff --git a/src/USER-OMP/fix_nve_omp.cpp b/src/USER-OMP/fix_nve_omp.cpp index f693c2fa20..13c1dfb642 100644 --- a/src/USER-OMP/fix_nve_omp.cpp +++ b/src/USER-OMP/fix_nve_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_nve_omp.h" #include "atom.h" @@ -41,7 +42,7 @@ void FixNVEOMP::initial_integrate(int /* vflag */) if (atom->rmass) { const double * const rmass = atom->rmass; #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -58,7 +59,7 @@ void FixNVEOMP::initial_integrate(int /* vflag */) const double * const mass = atom->mass; const int * const type = atom->type; #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -87,7 +88,7 @@ void FixNVEOMP::final_integrate() if (atom->rmass) { const double * const rmass = atom->rmass; #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -101,7 +102,7 @@ void FixNVEOMP::final_integrate() const double * const mass = atom->mass; const int * const type = atom->type; #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp index bc7be4019c..4a1bd4dfda 100644 --- a/src/USER-OMP/fix_nve_sphere_omp.cpp +++ b/src/USER-OMP/fix_nve_sphere_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_nve_sphere_omp.h" #include #include "atom.h" @@ -49,7 +50,7 @@ void FixNVESphereOMP::initial_integrate(int /* vflag */) // update v,x,omega for all particles // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for default(none) +#pragma omp parallel for LMP_DEFAULT_NONE #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -76,7 +77,7 @@ void FixNVESphereOMP::initial_integrate(int /* vflag */) double * const * const mu = atom->mu; if (dlm == NODLM) { #if defined(_OPENMP) -#pragma omp parallel for default(none) +#pragma omp parallel for LMP_DEFAULT_NONE #endif for (int i = 0; i < nlocal; i++) { double g0,g1,g2,msq,scale; @@ -95,7 +96,7 @@ void FixNVESphereOMP::initial_integrate(int /* vflag */) } } else { #if defined(_OPENMP) -#pragma omp parallel for default(none) +#pragma omp parallel for LMP_DEFAULT_NONE #endif // Integrate orientation following Dullweber-Leimkuhler-Maclachlan scheme for (int i = 0; i < nlocal; i++) { @@ -223,7 +224,7 @@ void FixNVESphereOMP::final_integrate() // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for default(none) +#pragma omp parallel for LMP_DEFAULT_NONE #endif for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { diff --git a/src/USER-OMP/fix_nvt_sllod_omp.cpp b/src/USER-OMP/fix_nvt_sllod_omp.cpp index 9b3b515415..0d23d6c03f 100644 --- a/src/USER-OMP/fix_nvt_sllod_omp.cpp +++ b/src/USER-OMP/fix_nvt_sllod_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_nvt_sllod_omp.h" #include #include "math_extra.h" @@ -114,7 +115,7 @@ void FixNVTSllodOMP::nh_v_temp() MathExtra::multiply_shape_shape(domain->h_rate,domain->h_inv,h_two); #if defined(_OPENMP) -#pragma omp parallel for default(none) shared(h_two) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE LMP_SHARED(h_two) schedule(static) #endif for (int i = 0; i < nlocal; i++) { double vdelu0,vdelu1,vdelu2,buf[3]; diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index 1fca1e739b..25a3a09816 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -16,6 +16,7 @@ OpenMP based threading support for LAMMPS ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "atom.h" #include "comm.h" #include "error.h" @@ -70,7 +71,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) if (narg > 3) { #if defined(_OPENMP) if (strcmp(arg[3],"0") == 0) -#pragma omp parallel default(none) shared(nthreads) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(nthreads) nthreads = omp_get_num_threads(); else nthreads = force->inumeric(FLERR,arg[3]); @@ -134,7 +135,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(lmp) #endif { const int tid = get_tid(); @@ -186,7 +187,7 @@ void FixOMP::init() thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const int tid = get_tid(); @@ -350,7 +351,7 @@ void FixOMP::pre_force(int) double *drho = atom->drho; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(f,torque,erforce,de,drho) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(f,torque,erforce,de,drho) #endif { const int tid = get_tid(); diff --git a/src/USER-OMP/fix_rigid_nh_omp.cpp b/src/USER-OMP/fix_rigid_nh_omp.cpp index da512cb428..63084fcc9e 100644 --- a/src/USER-OMP/fix_rigid_nh_omp.cpp +++ b/src/USER-OMP/fix_rigid_nh_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_rigid_nh_omp.h" #include #include @@ -89,7 +90,7 @@ void FixRigidNHOMP::initial_integrate(int vflag) double akt=0.0, akr=0.0; #if defined(_OPENMP) -#pragma omp parallel for default(none) shared(scale_r,scale_t,scale_v) schedule(static) reduction(+:akt,akr) +#pragma omp parallel for LMP_DEFAULT_NONE LMP_SHARED(scale_r,scale_t,scale_v) schedule(static) reduction(+:akt,akr) #endif for (int ibody = 0; ibody < nbody; ibody++) { double mbody[3],tbody[3],fquat[4]; @@ -250,7 +251,7 @@ void FixRigidNHOMP::compute_forces_and_torques() int i; #if defined(_OPENMP) -#pragma omp parallel for default(none) private(i) reduction(+:s0,s1,s2,s3,s4,s5) +#pragma omp parallel for LMP_DEFAULT_NONE private(i) reduction(+:s0,s1,s2,s3,s4,s5) #endif for (i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -289,7 +290,7 @@ void FixRigidNHOMP::compute_forces_and_torques() int i; #if defined(_OPENMP) -#pragma omp parallel for default(none) private(i) shared(ib) reduction(+:s0,s1,s2,s3,s4,s5) +#pragma omp parallel for LMP_DEFAULT_NONE private(i) LMP_SHARED(ib) reduction(+:s0,s1,s2,s3,s4,s5) #endif for (i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -330,7 +331,7 @@ void FixRigidNHOMP::compute_forces_and_torques() memset(&sum[0][0],0,6*nbody*sizeof(double)); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -373,7 +374,7 @@ void FixRigidNHOMP::compute_forces_and_torques() MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world); #if defined(_OPENMP) -#pragma omp parallel for default(none) private(ibody) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE private(ibody) schedule(static) #endif for (ibody = 0; ibody < nbody; ibody++) { fcm[ibody][0] = all[ibody][0] + langextra[ibody][0]; @@ -388,7 +389,7 @@ void FixRigidNHOMP::compute_forces_and_torques() if (id_gravity) { #if defined(_OPENMP) -#pragma omp parallel for default(none) private(ibody) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE private(ibody) schedule(static) #endif for (ibody = 0; ibody < nbody; ibody++) { fcm[ibody][0] += gvec[0]*masstotal[ibody]; @@ -433,7 +434,7 @@ void FixRigidNHOMP::final_integrate() const double dtf2 = dtf * 2.0; #if defined(_OPENMP) -#pragma omp parallel for default(none) shared(scale_t,scale_r) schedule(static) reduction(+:akt,akr) +#pragma omp parallel for LMP_DEFAULT_NONE LMP_SHARED(scale_t,scale_r) schedule(static) reduction(+:akt,akr) #endif for (int ibody = 0; ibody < nbody; ibody++) { double mbody[3],tbody[3],fquat[4]; @@ -554,7 +555,7 @@ void FixRigidNHOMP::remap() if (allremap) domain->x2lamda(nlocal); else { #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) @@ -586,7 +587,7 @@ void FixRigidNHOMP::remap() if (allremap) domain->lamda2x(nlocal); else { #if defined (_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) @@ -631,7 +632,7 @@ void FixRigidNHOMP::set_xv_thr() int i; #if defined(_OPENMP) -#pragma omp parallel for default(none) private(i) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE private(i) reduction(+:v0,v1,v2,v3,v4,v5) #endif for (i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -832,7 +833,7 @@ void FixRigidNHOMP::set_v_thr() int i; #if defined(_OPENMP) -#pragma omp parallel for default(none) private(i) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE private(i) reduction(+:v0,v1,v2,v3,v4,v5) #endif for (i = 0; i < nlocal; i++) { const int ibody = body[i]; diff --git a/src/USER-OMP/fix_rigid_omp.cpp b/src/USER-OMP/fix_rigid_omp.cpp index 770361d557..9f78f6dc26 100644 --- a/src/USER-OMP/fix_rigid_omp.cpp +++ b/src/USER-OMP/fix_rigid_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_rigid_omp.h" #include #include @@ -47,7 +48,7 @@ typedef struct { double x,y,z; } dbl3_t; void FixRigidOMP::initial_integrate(int vflag) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nbody; ibody++) { @@ -120,7 +121,7 @@ void FixRigidOMP::compute_forces_and_torques() double s0=0.0,s1=0.0,s2=0.0,s3=0.0,s4=0.0,s5=0.0; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:s0,s1,s2,s3,s4,s5) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:s0,s1,s2,s3,s4,s5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -158,7 +159,7 @@ void FixRigidOMP::compute_forces_and_torques() double s0=0.0,s1=0.0,s2=0.0,s3=0.0,s4=0.0,s5=0.0; #if defined(_OPENMP) -#pragma omp parallel for default(none) shared(ib) reduction(+:s0,s1,s2,s3,s4,s5) +#pragma omp parallel for LMP_DEFAULT_NONE LMP_SHARED(ib) reduction(+:s0,s1,s2,s3,s4,s5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -199,7 +200,7 @@ void FixRigidOMP::compute_forces_and_torques() memset(&sum[0][0],0,6*nbody*sizeof(double)); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -246,7 +247,7 @@ void FixRigidOMP::compute_forces_and_torques() // fflag,tflag = 0 for some dimensions in 2d #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nbody; ibody++) { fcm[ibody][0] = all[ibody][0] + langextra[ibody][0]; @@ -261,7 +262,7 @@ void FixRigidOMP::compute_forces_and_torques() if (id_gravity) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nbody; ibody++) { fcm[ibody][0] += gvec[0]*masstotal[ibody]; @@ -280,7 +281,7 @@ void FixRigidOMP::final_integrate() // update vcm and angmom #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nbody; ibody++) { @@ -346,7 +347,7 @@ void FixRigidOMP::set_xv_thr() const int nlocal = atom->nlocal; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:v0,v1,v2,v3,v4,v5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = body[i]; @@ -546,7 +547,7 @@ void FixRigidOMP::set_v_thr() const int nlocal = atom->nlocal; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:v0,v1,v2,v3,v4,v5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = body[i]; diff --git a/src/USER-OMP/fix_rigid_small_omp.cpp b/src/USER-OMP/fix_rigid_small_omp.cpp index fc6b6fa57a..227b0e1f8a 100644 --- a/src/USER-OMP/fix_rigid_small_omp.cpp +++ b/src/USER-OMP/fix_rigid_small_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "fix_rigid_small_omp.h" #include #include "atom.h" @@ -46,7 +47,7 @@ void FixRigidSmallOMP::initial_integrate(int vflag) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nlocal_body; ibody++) { @@ -117,7 +118,7 @@ void FixRigidSmallOMP::compute_forces_and_torques() const int nthreads=comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nlocal_body+nghost_body; ibody++) { double * _noalias const fcm = body[ibody].fcm; @@ -132,7 +133,7 @@ void FixRigidSmallOMP::compute_forces_and_torques() // and then each thread only processes some bodies. #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -183,7 +184,7 @@ void FixRigidSmallOMP::compute_forces_and_torques() if (langflag) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nlocal_body; ibody++) { double * _noalias const fcm = body[ibody].fcm; @@ -201,7 +202,7 @@ void FixRigidSmallOMP::compute_forces_and_torques() if (id_gravity) { #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nbody; ibody++) { double * _noalias const fcm = body[ibody].fcm; @@ -222,7 +223,7 @@ void FixRigidSmallOMP::final_integrate() // update vcm and angmom, recompute omega #if defined(_OPENMP) -#pragma omp parallel for default(none) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE schedule(static) #endif for (int ibody = 0; ibody < nlocal_body; ibody++) { Body &b = body[ibody]; @@ -294,7 +295,7 @@ void FixRigidSmallOMP::set_xv_thr() const int nlocal = atom->nlocal; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:v0,v1,v2,v3,v4,v5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = atom2body[i]; @@ -489,7 +490,7 @@ void FixRigidSmallOMP::set_v_thr() const int nlocal = atom->nlocal; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:v0,v1,v2,v3,v4,v5) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:v0,v1,v2,v3,v4,v5) #endif for (int i = 0; i < nlocal; i++) { const int ibody = atom2body[i]; diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp index 32c7406ada..8927980951 100644 --- a/src/USER-OMP/improper_class2_omp.cpp +++ b/src/USER-OMP/improper_class2_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_class2_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_cossq_omp.cpp b/src/USER-OMP/improper_cossq_omp.cpp index 230f13eac7..72d76e8c42 100644 --- a/src/USER-OMP/improper_cossq_omp.cpp +++ b/src/USER-OMP/improper_cossq_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_cossq_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperCossqOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp index f5ff590775..e9ff4bfc73 100644 --- a/src/USER-OMP/improper_cvff_omp.cpp +++ b/src/USER-OMP/improper_cvff_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_cvff_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperCvffOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp index 4e83c025d0..0671bdc375 100644 --- a/src/USER-OMP/improper_fourier_omp.cpp +++ b/src/USER-OMP/improper_fourier_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_fourier_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperFourierOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp index f2f19557d9..8bef42bf23 100644 --- a/src/USER-OMP/improper_harmonic_omp.cpp +++ b/src/USER-OMP/improper_harmonic_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_harmonic_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp index 1d0cc6cc46..4ba67aab70 100644 --- a/src/USER-OMP/improper_ring_omp.cpp +++ b/src/USER-OMP/improper_ring_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "improper_ring_omp.h" #include #include "atom.h" @@ -50,7 +51,7 @@ void ImproperRingOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp index 4cea8a4b63..bf466afb19 100644 --- a/src/USER-OMP/improper_umbrella_omp.cpp +++ b/src/USER-OMP/improper_umbrella_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "improper_umbrella_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void ImproperUmbrellaOMP::compute(int eflag, int vflag) const int inum = neighbor->nimproperlist; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp index 7ca01dbd6a..16db4857d6 100644 --- a/src/USER-OMP/msm_cg_omp.cpp +++ b/src/USER-OMP/msm_cg_omp.cpp @@ -16,6 +16,7 @@ Original MSM class by: Paul Crozier, Stan Moore, Stephen Bond, (all SNL) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "msm_cg_omp.h" #include #include @@ -310,7 +311,7 @@ void MSMCGOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) diff --git a/src/USER-OMP/msm_omp.cpp b/src/USER-OMP/msm_omp.cpp index 81f84e8f6f..2689226725 100644 --- a/src/USER-OMP/msm_omp.cpp +++ b/src/USER-OMP/msm_omp.cpp @@ -15,6 +15,7 @@ Contributing authors: Axel Kohlmeyer (Temple U), Stan Moore (SNL) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "msm_omp.h" #include #include "comm.h" @@ -52,7 +53,7 @@ void MSMOMP::compute(int eflag, int vflag) MSM::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -158,7 +159,7 @@ void MSMOMP::direct_eval(const int nn) const int n=nn; #if defined(_OPENMP) -#pragma omp parallel default(none) reduction(+:v0,v1,v2,v3,v4,v5,emsm) +#pragma omp parallel LMP_DEFAULT_NONE reduction(+:v0,v1,v2,v3,v4,v5,emsm) #endif { double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum; diff --git a/src/USER-OMP/npair_full_bin_atomonly_omp.cpp b/src/USER-OMP/npair_full_bin_atomonly_omp.cpp index 3bda2e4c5a..226b2dcf6d 100644 --- a/src/USER-OMP/npair_full_bin_atomonly_omp.cpp +++ b/src/USER-OMP/npair_full_bin_atomonly_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_bin_atomonly_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -36,7 +37,7 @@ void NPairFullBinAtomonlyOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_full_bin_ghost_omp.cpp b/src/USER-OMP/npair_full_bin_ghost_omp.cpp index b0b0070df5..e9297538d9 100644 --- a/src/USER-OMP/npair_full_bin_ghost_omp.cpp +++ b/src/USER-OMP/npair_full_bin_ghost_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_bin_ghost_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairFullBinGhostOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nall); diff --git a/src/USER-OMP/npair_full_bin_omp.cpp b/src/USER-OMP/npair_full_bin_omp.cpp index d3e30b4932..35835cae1d 100644 --- a/src/USER-OMP/npair_full_bin_omp.cpp +++ b/src/USER-OMP/npair_full_bin_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_bin_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -40,7 +41,7 @@ void NPairFullBinOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_full_multi_omp.cpp b/src/USER-OMP/npair_full_multi_omp.cpp index 707db2edcf..b4b0ea3033 100644 --- a/src/USER-OMP/npair_full_multi_omp.cpp +++ b/src/USER-OMP/npair_full_multi_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_multi_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -41,7 +42,7 @@ void NPairFullMultiOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_full_nsq_ghost_omp.cpp b/src/USER-OMP/npair_full_nsq_ghost_omp.cpp index 527df58fd6..8b940f3724 100644 --- a/src/USER-OMP/npair_full_nsq_ghost_omp.cpp +++ b/src/USER-OMP/npair_full_nsq_ghost_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_nsq_ghost_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairFullNsqGhostOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nall); diff --git a/src/USER-OMP/npair_full_nsq_omp.cpp b/src/USER-OMP/npair_full_nsq_omp.cpp index 2719f5dc2c..95c5caa148 100644 --- a/src/USER-OMP/npair_full_nsq_omp.cpp +++ b/src/USER-OMP/npair_full_nsq_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_full_nsq_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairFullNsqOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp index a69779d96d..230d10a267 100644 --- a/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp +++ b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_bin_atomonly_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -37,7 +38,7 @@ void NPairHalfBinAtomonlyNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp b/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp index 5f8ffdab29..33fa4ed685 100644 --- a/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newtoff_ghost_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_bin_newtoff_ghost_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -46,7 +47,7 @@ void NPairHalfBinNewtoffGhostOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nall); diff --git a/src/USER-OMP/npair_half_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_bin_newtoff_omp.cpp index 35807645cf..8e756d5072 100644 --- a/src/USER-OMP/npair_half_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_bin_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairHalfBinNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_bin_newton_omp.cpp b/src/USER-OMP/npair_half_bin_newton_omp.cpp index 2700d6863b..e27a58de46 100644 --- a/src/USER-OMP/npair_half_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_bin_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -41,7 +42,7 @@ void NPairHalfBinNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp index d94845898c..f88df4aed4 100644 --- a/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_bin_newton_tri_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_bin_newton_tri_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -41,7 +42,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_multi_newtoff_omp.cpp b/src/USER-OMP/npair_half_multi_newtoff_omp.cpp index c06737dc1c..586809c174 100644 --- a/src/USER-OMP/npair_half_multi_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_multi_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -43,7 +44,7 @@ void NPairHalfMultiNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_multi_newton_omp.cpp b/src/USER-OMP/npair_half_multi_newton_omp.cpp index 50df756be4..9b8fc78f09 100644 --- a/src/USER-OMP/npair_half_multi_newton_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_multi_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairHalfMultiNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp b/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp index 8b78b311dd..fec687d075 100644 --- a/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_multi_newton_tri_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_multi_newton_tri_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -43,7 +44,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_nsq_newtoff_ghost_omp.cpp b/src/USER-OMP/npair_half_nsq_newtoff_ghost_omp.cpp index f0eb211425..316c63d541 100644 --- a/src/USER-OMP/npair_half_nsq_newtoff_ghost_omp.cpp +++ b/src/USER-OMP/npair_half_nsq_newtoff_ghost_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_nsq_newtoff_ghost_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -47,7 +48,7 @@ void NPairHalfNsqNewtoffGhostOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nall); diff --git a/src/USER-OMP/npair_half_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_nsq_newtoff_omp.cpp index 55b9f9e512..c937b5bc68 100644 --- a/src/USER-OMP/npair_half_nsq_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_nsq_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_nsq_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -44,7 +45,7 @@ void NPairHalfNsqNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_nsq_newton_omp.cpp index 223da622e8..6baab97aa0 100644 --- a/src/USER-OMP/npair_half_nsq_newton_omp.cpp +++ b/src/USER-OMP/npair_half_nsq_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_nsq_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -43,7 +44,7 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp index 9bb4d277fe..c8dc37f978 100644 --- a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_respa_bin_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -47,7 +48,7 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list) const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp index 9ed0ae482d..98732a62ea 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_respa_bin_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -46,7 +47,7 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list) const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp index cd03684940..65315a2905 100644 --- a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_respa_bin_newton_tri_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -46,7 +47,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp index b1e7467ec7..45d81069fc 100644 --- a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_respa_nsq_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -48,7 +49,7 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list) const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp index c22965895d..1237ce7858 100644 --- a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp +++ b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_respa_nsq_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -49,7 +50,7 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp index 1b437482c4..c979c1384d 100644 --- a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_size_bin_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -43,7 +44,7 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp index 8e0581d4ce..c83cf77e50 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_size_bin_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -42,7 +43,7 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp index 38a2c0d61f..a615b11d93 100644 --- a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp +++ b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_size_bin_newton_tri_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -41,7 +42,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp index d1505e1b2e..f42cc3e9bf 100644 --- a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp +++ b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_size_nsq_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -45,7 +46,7 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp index 9027b0728d..10d7d26b3d 100644 --- a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp +++ b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_half_size_nsq_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -46,7 +47,7 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(nlocal); diff --git a/src/USER-OMP/npair_halffull_newtoff_omp.cpp b/src/USER-OMP/npair_halffull_newtoff_omp.cpp index 7d2fe4f109..7ad0157076 100644 --- a/src/USER-OMP/npair_halffull_newtoff_omp.cpp +++ b/src/USER-OMP/npair_halffull_newtoff_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_halffull_newtoff_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -38,7 +39,7 @@ void NPairHalffullNewtoffOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(inum_full); diff --git a/src/USER-OMP/npair_halffull_newton_omp.cpp b/src/USER-OMP/npair_halffull_newton_omp.cpp index 3fcc8c2e98..9e248ee609 100644 --- a/src/USER-OMP/npair_halffull_newton_omp.cpp +++ b/src/USER-OMP/npair_halffull_newton_omp.cpp @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "npair_halffull_newton_omp.h" #include "npair_omp.h" #include "neigh_list.h" @@ -38,7 +39,7 @@ void NPairHalffullNewtonOmp::build(NeighList *list) NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) #endif NPAIR_OMP_SETUP(inum_full); diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp index 0a48de453e..63a539c93c 100644 --- a/src/USER-OMP/pair_adp_omp.cpp +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include @@ -62,7 +63,7 @@ void PairADPOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_agni_omp.cpp b/src/USER-OMP/pair_agni_omp.cpp index 1580256e35..b61bf52e4e 100644 --- a/src/USER-OMP/pair_agni_omp.cpp +++ b/src/USER-OMP/pair_agni_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include // requires C++-11 @@ -49,7 +50,7 @@ void PairAGNIOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp index aedf5056a6..25d6c35174 100644 --- a/src/USER-OMP/pair_airebo_omp.cpp +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_airebo_omp.h" #include "atom.h" @@ -58,7 +59,7 @@ void PairAIREBOOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:pv0,pv1,pv2) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) reduction(+:pv0,pv1,pv2) #endif { int ifrom, ito, tid; @@ -104,7 +105,7 @@ void PairAIREBOOMP::REBO_neigh_thr() } #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int i,j,ii,jj,n,jnum,itype,jtype; diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp index 72e41f074c..48e6f9b0fe 100644 --- a/src/USER-OMP/pair_beck_omp.cpp +++ b/src/USER-OMP/pair_beck_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_beck_omp.h" #include #include "atom.h" @@ -45,7 +46,7 @@ void PairBeckOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp index f7e3fced46..7f92ab6734 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.cpp +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_born_coul_long_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_born_coul_msm_omp.cpp b/src/USER-OMP/pair_born_coul_msm_omp.cpp index b057cbc706..eec1765859 100644 --- a/src/USER-OMP/pair_born_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_born_coul_msm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_born_coul_msm_omp.h" #include "atom.h" @@ -48,7 +49,7 @@ void PairBornCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_born_coul_wolf_omp.cpp b/src/USER-OMP/pair_born_coul_wolf_omp.cpp index bac38ae43a..ffa069ec4e 100644 --- a/src/USER-OMP/pair_born_coul_wolf_omp.cpp +++ b/src/USER-OMP/pair_born_coul_wolf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_born_coul_wolf_omp.h" #include #include "atom.h" @@ -45,7 +46,7 @@ void PairBornCoulWolfOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp index 6cacd5625b..aaac28d07c 100644 --- a/src/USER-OMP/pair_born_omp.cpp +++ b/src/USER-OMP/pair_born_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_born_omp.h" #include #include "atom.h" @@ -43,7 +44,7 @@ void PairBornOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_brownian_omp.cpp b/src/USER-OMP/pair_brownian_omp.cpp index 907a447332..ca28fa14bb 100644 --- a/src/USER-OMP/pair_brownian_omp.cpp +++ b/src/USER-OMP/pair_brownian_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_brownian_omp.h" #include #include "atom.h" @@ -135,7 +136,7 @@ void PairBrownianOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_brownian_poly_omp.cpp b/src/USER-OMP/pair_brownian_poly_omp.cpp index f6f5f269a5..939bc223eb 100644 --- a/src/USER-OMP/pair_brownian_poly_omp.cpp +++ b/src/USER-OMP/pair_brownian_poly_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_brownian_poly_omp.h" #include #include "atom.h" @@ -135,7 +136,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp index c9ccb12447..2cff0d749d 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_buck_coul_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp index 0929157895..759a8b2118 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_buck_coul_long_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_buck_coul_msm_omp.cpp b/src/USER-OMP/pair_buck_coul_msm_omp.cpp index a4d322890a..b26243b19a 100644 --- a/src/USER-OMP/pair_buck_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_msm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_buck_coul_msm_omp.h" #include "atom.h" @@ -48,7 +49,7 @@ void PairBuckCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp index d30e8949b3..a2e34b9a1b 100644 --- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp @@ -12,6 +12,7 @@ ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "math_vector.h" #include "pair_buck_long_coul_long_omp.h" @@ -56,7 +57,7 @@ void PairBuckLongCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; @@ -320,7 +321,7 @@ void PairBuckLongCoulLongOMP::compute_inner() const int nthreads = comm->nthreads; const int inum = list->inum_inner; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -345,7 +346,7 @@ void PairBuckLongCoulLongOMP::compute_middle() const int inum = list->inum_middle; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -375,7 +376,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp index 563133e1cd..fc85d79c81 100644 --- a/src/USER-OMP/pair_buck_omp.cpp +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_buck_omp.h" #include #include "atom.h" @@ -43,7 +44,7 @@ void PairBuckOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp index 0fc4b1bdf2..cce588f516 100644 --- a/src/USER-OMP/pair_colloid_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_colloid_omp.h" #include "atom.h" @@ -46,7 +47,7 @@ void PairColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp index 4b5f0ea6c7..8912cbc243 100644 --- a/src/USER-OMP/pair_comb_omp.cpp +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_comb_omp.h" #include "atom.h" @@ -52,7 +53,7 @@ void PairCombOMP::compute(int eflag, int vflag) Short_neigh_thr(); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; @@ -411,7 +412,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup) // loop over full neighbor list of my atoms #if defined(_OPENMP) -#pragma omp parallel for private(ii) default(none) shared(potal,fac11e) +#pragma omp parallel for private(ii) LMP_DEFAULT_NONE LMP_SHARED(potal,fac11e) #endif for (ii = 0; ii < inum; ii ++) { double fqi,fqj,fqij,fqji,fqjj,delr1[3]; @@ -564,7 +565,7 @@ void PairCombOMP::Short_neigh_thr() const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int nj,*neighptrj; diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp index ce858666cb..69eb9ac0f3 100644 --- a/src/USER-OMP/pair_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_coul_cut_omp.h" #include #include "atom.h" @@ -43,7 +44,7 @@ void PairCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_coul_cut_soft_omp.cpp index 7b6ed22f0c..89222903ec 100644 --- a/src/USER-OMP/pair_coul_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_coul_cut_soft_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairCoulCutSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp index 270770ee5d..c22f7340c4 100644 --- a/src/USER-OMP/pair_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_coul_debye_omp.h" #include #include "atom.h" @@ -43,7 +44,7 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp index 67e09690de..656cdc9421 100644 --- a/src/USER-OMP/pair_coul_diel_omp.cpp +++ b/src/USER-OMP/pair_coul_diel_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_coul_diel_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairCoulDielOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_dsf_omp.cpp b/src/USER-OMP/pair_coul_dsf_omp.cpp index 40e285e7c8..ddcc8dbeb0 100644 --- a/src/USER-OMP/pair_coul_dsf_omp.cpp +++ b/src/USER-OMP/pair_coul_dsf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_coul_dsf_omp.h" #include #include "atom.h" @@ -52,7 +53,7 @@ void PairCoulDSFOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp index b135ffa7e6..6c9b9d034d 100644 --- a/src/USER-OMP/pair_coul_long_omp.cpp +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_coul_long_omp.h" #include "atom.h" @@ -52,7 +53,7 @@ void PairCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_long_soft_omp.cpp b/src/USER-OMP/pair_coul_long_soft_omp.cpp index 891123b8b3..ff3267fb38 100644 --- a/src/USER-OMP/pair_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_coul_long_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_coul_long_soft_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairCoulLongSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_msm_omp.cpp b/src/USER-OMP/pair_coul_msm_omp.cpp index 9417df7ac6..32a657e286 100644 --- a/src/USER-OMP/pair_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_coul_msm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_coul_msm_omp.h" #include "atom.h" @@ -49,7 +50,7 @@ void PairCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_coul_wolf_omp.cpp b/src/USER-OMP/pair_coul_wolf_omp.cpp index 9163eff086..d0f6fdb125 100644 --- a/src/USER-OMP/pair_coul_wolf_omp.cpp +++ b/src/USER-OMP/pair_coul_wolf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_coul_wolf_omp.h" #include #include "atom.h" @@ -45,7 +46,7 @@ void PairCoulWolfOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp index 7c265b2b7b..f3a1c29a70 100644 --- a/src/USER-OMP/pair_dpd_omp.cpp +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_dpd_omp.h" #include #include "atom.h" @@ -80,7 +81,7 @@ void PairDPDOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp index 076e27a299..06e80274bd 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.cpp +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_dpd_tstat_omp.h" #include "atom.h" @@ -79,7 +80,7 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) random_thr[0] = random; } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_eam_cd_omp.cpp b/src/USER-OMP/pair_eam_cd_omp.cpp index 1d945e06a8..874a2fa252 100644 --- a/src/USER-OMP/pair_eam_cd_omp.cpp +++ b/src/USER-OMP/pair_eam_cd_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include @@ -77,7 +78,7 @@ void PairEAMCDOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp index 899323a680..60ae65def5 100644 --- a/src/USER-OMP/pair_eam_omp.cpp +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include @@ -59,7 +60,7 @@ void PairEAMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp index d1fa4c1c7a..efba197153 100644 --- a/src/USER-OMP/pair_edip_omp.cpp +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_edip_omp.h" #include "atom.h" @@ -50,7 +51,7 @@ void PairEDIPOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp index dd590b75e2..02b765a956 100644 --- a/src/USER-OMP/pair_eim_omp.cpp +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include @@ -57,7 +58,7 @@ void PairEIMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp index e14a85fc95..6d5344701d 100644 --- a/src/USER-OMP/pair_gauss_cut_omp.cpp +++ b/src/USER-OMP/pair_gauss_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_gauss_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairGaussCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp index de212c9213..106d520fa1 100644 --- a/src/USER-OMP/pair_gauss_omp.cpp +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_gauss_omp.h" #include "atom.h" @@ -45,7 +46,7 @@ void PairGaussOMP::compute(int eflag, int vflag) double occ = 0.0; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) reduction(+:occ) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp index a58c16eafc..f0fd60a309 100644 --- a/src/USER-OMP/pair_gayberne_omp.cpp +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_gayberne_omp.h" #include "math_extra.h" @@ -45,7 +46,7 @@ void PairGayBerneOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp index 1e3d86a1a5..9ad84afb92 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_gran_hertz_history_omp.h" #include "fix_neigh_history.h" @@ -69,7 +70,7 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index d0e44cc430..3e519fc0d0 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "pair_gran_hooke_history_omp.h" @@ -70,7 +71,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp index 33296e22fa..8de959a91d 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_gran_hooke_omp.h" #include "atom.h" @@ -65,7 +66,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp index 77cc60e437..bb8bbcc984 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_hbond_dreiding_lj_omp.h" #include "atom.h" @@ -74,7 +75,7 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp index 47b2818be8..4ad3a8f057 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_hbond_dreiding_morse_omp.h" #include "atom.h" @@ -74,7 +75,7 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp index b48946b3f9..adf0aac9b2 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.cpp +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj96_cut_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp index 10a253de6c..1c8e4c3e6b 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_charmm_coul_charmm_implicit_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp index c4c5a9650e..55227e2e10 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_charmm_coul_charmm_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp index 949ea3ded7..437bd183ed 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_charmm_coul_long_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp index 2a41b0690b..b32f108098 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_charmm_coul_long_soft_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJCharmmCoulLongSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp index cecd27bfdc..9a0b44c1ec 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_charmm_coul_msm_omp.h" #include "atom.h" @@ -49,7 +50,7 @@ void PairLJCharmmCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp index 9e4dc08fd5..59a6841c9e 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_class2_coul_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp index 4fd371e2eb..c0734e39d6 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_class2_coul_long_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp index fa9b6ae703..2b91e10cfa 100644 --- a/src/USER-OMP/pair_lj_class2_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_class2_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJClass2OMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp index 3fe6fab5de..778c25393d 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.cpp +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cubic_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJCubicOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp index 6c0a3706f7..d560b803f1 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp index 742565d19c..1c88600e7a 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_cut_soft_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCutCoulCutSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp index 413758cc4a..79754e704b 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_debye_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp index 3295ede132..ec69a1a1ca 100644 --- a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_dsf_omp.h" #include "atom.h" @@ -53,7 +54,7 @@ void PairLJCutCoulDSFOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp index d32ec58607..618986389c 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_long_omp.h" #include "atom.h" @@ -52,7 +53,7 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp index d1f97941f5..ce84ba01c3 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_long_soft_omp.h" #include "atom.h" @@ -52,7 +53,7 @@ void PairLJCutCoulLongSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp index 9c48e03a13..58e5cee0c2 100644 --- a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_msm_omp.h" #include "atom.h" @@ -49,7 +50,7 @@ void PairLJCutCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_coul_wolf_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_wolf_omp.cpp index 09403b893c..4111c5b22c 100644 --- a/src/USER-OMP/pair_lj_cut_coul_wolf_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_wolf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_coul_wolf_omp.h" #include "atom.h" @@ -45,7 +46,7 @@ void PairLJCutCoulWolfOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp index c259b006c5..7d5d25c39b 100644 --- a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_dipole_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJCutDipoleCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp index f8670b7c28..3e7e92de21 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_soft_omp.cpp index 988af13938..5064c4705e 100644 --- a/src/USER-OMP/pair_lj_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_soft_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairLJCutSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp b/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp index a8f2d2a081..72a7d5f16a 100644 --- a/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Paul Crozier (SNL) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_lj_cut_thole_long_omp.h" #include #include @@ -70,7 +71,7 @@ void PairLJCutTholeLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp index 40cfcb6bc2..2d1b828822 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_tip4p_cut_omp.h" #include "atom.h" @@ -93,7 +94,7 @@ void PairLJCutTIP4PCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp index d51382c0f1..a60f9d6a57 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_lj_cut_tip4p_long_omp.h" #include #include "atom.h" @@ -93,7 +94,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp index 350bea884d..f4a75b9af8 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_cut_tip4p_long_soft_omp.h" #include "atom.h" @@ -93,7 +94,7 @@ void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp index d3d1da263c..70b5e436fa 100644 --- a/src/USER-OMP/pair_lj_expand_omp.cpp +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_expand_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJExpandOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp index 2fe7c39afe..0f0a8de2ff 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_gromacs_coul_gromacs_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp index 4a4e68ec6e..ea2c9e8f55 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_gromacs_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp index 7286771c26..e74f487129 100644 --- a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_long_coul_long_omp.h" #include "atom.h" @@ -56,7 +57,7 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; @@ -318,7 +319,7 @@ void PairLJLongCoulLongOMP::compute_inner() const int nthreads = comm->nthreads; const int inum = list->inum_inner; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -343,7 +344,7 @@ void PairLJLongCoulLongOMP::compute_middle() const int inum = list->inum_middle; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -373,7 +374,7 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp index d8eedc6c0b..9c8de110d7 100644 --- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_long_tip4p_long_omp.h" #include "atom.h" @@ -96,7 +97,7 @@ void PairLJLongTIP4PLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; @@ -379,7 +380,7 @@ void PairLJLongTIP4PLongOMP::compute_inner() const int nthreads = comm->nthreads; const int inum = list->inum_inner; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -404,7 +405,7 @@ void PairLJLongTIP4PLongOMP::compute_middle() const int inum = list->inum_middle; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int ifrom, ito, tid; @@ -458,7 +459,7 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp index 15fea32f53..4ad082cde1 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_sdk_coul_long_omp.h" #include "atom.h" @@ -45,7 +46,7 @@ void PairLJSDKCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp index 3326034da8..4871356b18 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp @@ -13,6 +13,7 @@ This style is a simplified re-implementation of the CG/CMM pair style ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_sdk_coul_msm_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp index 9ba90a2937..9f0671c61c 100644 --- a/src/USER-OMP/pair_lj_sdk_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_omp.cpp @@ -13,6 +13,7 @@ This style is a simplified re-implementation of the CG/CMM pair style ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_sdk_omp.h" #include "atom.h" @@ -47,7 +48,7 @@ void PairLJSDKOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp index ca08f817dc..1adaf93953 100644 --- a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_sf_dipole_sf_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJSFDipoleSFOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp index 0b29a8fc30..497c2c3a43 100644 --- a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_smooth_linear_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJSmoothLinearOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp index fe3d64cbf6..bdb9b3141e 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lj_smooth_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp index dc6be0b96a..9db4239587 100644 --- a/src/USER-OMP/pair_lubricate_omp.cpp +++ b/src/USER-OMP/pair_lubricate_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_lubricate_omp.h" #include "atom.h" @@ -109,7 +110,7 @@ void PairLubricateOMP::compute(int eflag, int vflag) #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_lubricate_poly_omp.cpp b/src/USER-OMP/pair_lubricate_poly_omp.cpp index 648b10b114..dc143a3160 100644 --- a/src/USER-OMP/pair_lubricate_poly_omp.cpp +++ b/src/USER-OMP/pair_lubricate_poly_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pair_lubricate_poly_omp.h" #include #include "atom.h" @@ -106,7 +107,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag) #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp index f4aff69881..19fb09dd7b 100644 --- a/src/USER-OMP/pair_meam_spline_omp.cpp +++ b/src/USER-OMP/pair_meam_spline_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include @@ -57,7 +58,7 @@ void PairMEAMSplineOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp index c77196d1c5..1f566dbd42 100644 --- a/src/USER-OMP/pair_morse_omp.cpp +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_morse_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairMorseOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_morse_smooth_linear_omp.cpp b/src/USER-OMP/pair_morse_smooth_linear_omp.cpp index e30a774bf2..ac73344c67 100644 --- a/src/USER-OMP/pair_morse_smooth_linear_omp.cpp +++ b/src/USER-OMP/pair_morse_smooth_linear_omp.cpp @@ -13,6 +13,7 @@ Most code borrowed from pair_morse_omp.cpp ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_morse_smooth_linear_omp.h" #include "atom.h" @@ -47,7 +48,7 @@ void PairMorseSmoothLinearOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp index 7ffd189a5b..4326434450 100644 --- a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_nm_cut_coul_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairNMCutCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp index 0a0a861b6f..970b383f7e 100644 --- a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_nm_cut_coul_long_omp.h" #include "atom.h" @@ -51,7 +52,7 @@ void PairNMCutCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_nm_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_omp.cpp index cb74eed3f1..ef68071b68 100644 --- a/src/USER-OMP/pair_nm_cut_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_nm_cut_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairNMCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp index 3ea41321a7..cf29b5cab8 100644 --- a/src/USER-OMP/pair_peri_lps_omp.cpp +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "pair_peri_lps_omp.h" @@ -62,7 +63,7 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp index 95e50df50d..1990b46fe5 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.cpp +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include #include "pair_peri_pmb_omp.h" @@ -58,7 +59,7 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp index 65f1c7289f..557d212531 100644 --- a/src/USER-OMP/pair_resquared_omp.cpp +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_resquared_omp.h" #include "math_extra.h" @@ -45,7 +46,7 @@ void PairRESquaredOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp index 7de94a2cd1..85425974cc 100644 --- a/src/USER-OMP/pair_soft_omp.cpp +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_soft_omp.h" #include "atom.h" @@ -47,7 +48,7 @@ void PairSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp index 9812cf06ba..ebe501ff8e 100644 --- a/src/USER-OMP/pair_sw_omp.cpp +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_sw_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairSWOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp index bf18d53d2e..2546bfdc9a 100644 --- a/src/USER-OMP/pair_table_omp.cpp +++ b/src/USER-OMP/pair_table_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_table_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairTableOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tersoff_mod_c_omp.cpp b/src/USER-OMP/pair_tersoff_mod_c_omp.cpp index 6b6b130c65..5e1e6b1b0e 100644 --- a/src/USER-OMP/pair_tersoff_mod_c_omp.cpp +++ b/src/USER-OMP/pair_tersoff_mod_c_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tersoff_mod_c_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairTersoffMODCOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tersoff_mod_omp.cpp b/src/USER-OMP/pair_tersoff_mod_omp.cpp index 634676ee49..aa90b88375 100644 --- a/src/USER-OMP/pair_tersoff_mod_omp.cpp +++ b/src/USER-OMP/pair_tersoff_mod_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tersoff_mod_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairTersoffMODOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp index 9735ccaa1f..34dbfb73b6 100644 --- a/src/USER-OMP/pair_tersoff_omp.cpp +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tersoff_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairTersoffOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp index fa9512bf71..a0a7f4c810 100644 --- a/src/USER-OMP/pair_tersoff_table_omp.cpp +++ b/src/USER-OMP/pair_tersoff_table_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tersoff_table_omp.h" #include "atom.h" @@ -68,7 +69,7 @@ void PairTersoffTableOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tip4p_cut_omp.cpp b/src/USER-OMP/pair_tip4p_cut_omp.cpp index c8584b4fe3..5b983d3db5 100644 --- a/src/USER-OMP/pair_tip4p_cut_omp.cpp +++ b/src/USER-OMP/pair_tip4p_cut_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tip4p_cut_omp.h" #include "atom.h" @@ -92,7 +93,7 @@ void PairTIP4PCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tip4p_long_omp.cpp b/src/USER-OMP/pair_tip4p_long_omp.cpp index 89c3c17684..86bebdbeff 100644 --- a/src/USER-OMP/pair_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_tip4p_long_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tip4p_long_omp.h" #include "atom.h" @@ -93,7 +94,7 @@ void PairTIP4PLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp index 88da3d02a8..26e0420955 100644 --- a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp +++ b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_tip4p_long_soft_omp.h" #include "atom.h" @@ -93,7 +94,7 @@ void PairTIP4PLongSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_ufm_omp.cpp b/src/USER-OMP/pair_ufm_omp.cpp index 23e76186c0..605d6dd2c6 100644 --- a/src/USER-OMP/pair_ufm_omp.cpp +++ b/src/USER-OMP/pair_ufm_omp.cpp @@ -14,6 +14,7 @@ Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_ufm_omp.h" #include "atom.h" @@ -45,7 +46,7 @@ void PairUFMOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_vashishta_omp.cpp b/src/USER-OMP/pair_vashishta_omp.cpp index a218ddf3ed..425a06c296 100644 --- a/src/USER-OMP/pair_vashishta_omp.cpp +++ b/src/USER-OMP/pair_vashishta_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_vashishta_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairVashishtaOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_vashishta_table_omp.cpp b/src/USER-OMP/pair_vashishta_table_omp.cpp index 0986128bf6..36c86a8995 100644 --- a/src/USER-OMP/pair_vashishta_table_omp.cpp +++ b/src/USER-OMP/pair_vashishta_table_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_vashishta_table_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairVashishtaTableOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp index 9b8428c648..e6ac3fa9fb 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_yukawa_colloid_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp index 20d557e605..f222876cb5 100644 --- a/src/USER-OMP/pair_yukawa_omp.cpp +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_yukawa_omp.h" #include "atom.h" @@ -43,7 +44,7 @@ void PairYukawaOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pair_zbl_omp.cpp b/src/USER-OMP/pair_zbl_omp.cpp index 3a300ce477..284ebbf09c 100644 --- a/src/USER-OMP/pair_zbl_omp.cpp +++ b/src/USER-OMP/pair_zbl_omp.cpp @@ -12,6 +12,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include #include "pair_zbl_omp.h" #include "atom.h" @@ -44,7 +45,7 @@ void PairZBLOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { int ifrom, ito, tid; diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp index f9967bf52a..31098d2675 100644 --- a/src/USER-OMP/pppm_cg_omp.cpp +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pppm_cg_omp.h" #include #include @@ -59,7 +60,7 @@ PPPMCGOMP::PPPMCGOMP(LAMMPS *lmp) : PPPMCG(lmp), ThrOMP(lmp, THR_KSPACE) PPPMCGOMP::~PPPMCGOMP() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -81,7 +82,7 @@ void PPPMCGOMP::allocate() PPPMCG::allocate(); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -122,7 +123,7 @@ void PPPMCGOMP::compute_gf_ik() const int twoorder = 2*order; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double snx,sny,snz; @@ -216,7 +217,7 @@ void PPPMCGOMP::compute_gf_ad() double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0; #if defined(_OPENMP) -#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) +#pragma omp parallel LMP_DEFAULT_NONE reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) #endif { double snx,sny,snz,sqk; @@ -314,7 +315,7 @@ void PPPMCGOMP::compute(int eflag, int vflag) PPPMCG::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -351,7 +352,7 @@ void PPPMCGOMP::make_rho() const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const double * _noalias const q = atom->q; @@ -443,7 +444,7 @@ void PPPMCGOMP::fieldforce_ik() const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { FFT_SCALAR dx,dy,dz,x0,y0,z0,ekx,eky,ekz; @@ -524,7 +525,7 @@ void PPPMCGOMP::fieldforce_ad() const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz; @@ -617,7 +618,7 @@ void PPPMCGOMP::fieldforce_peratom() const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { FFT_SCALAR dx,dy,dz,x0,y0,z0; diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp index 6b2c180a3f..aad77cffc7 100644 --- a/src/USER-OMP/pppm_disp_omp.cpp +++ b/src/USER-OMP/pppm_disp_omp.cpp @@ -16,6 +16,7 @@ Rolf Isele-Holder (RWTH Aachen University) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pppm_disp_omp.h" #include #include @@ -59,7 +60,7 @@ PPPMDispOMP::PPPMDispOMP(LAMMPS *lmp) : PPPMDisp(lmp), ThrOMP(lmp, THR_KSPACE) PPPMDispOMP::~PPPMDispOMP() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -87,7 +88,7 @@ void PPPMDispOMP::allocate() PPPMDisp::allocate(); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -114,7 +115,7 @@ void PPPMDispOMP::allocate() void PPPMDispOMP::compute_gf() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { @@ -204,7 +205,7 @@ void PPPMDispOMP::compute_gf() void PPPMDispOMP::compute_gf_6() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double *prd; @@ -311,7 +312,7 @@ void PPPMDispOMP::compute(int eflag, int vflag) PPPMDisp::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -366,7 +367,7 @@ void PPPMDispOMP::particle_map(double dxinv, double dyinv, int flag = 0; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:flag) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:flag) schedule(static) #endif for (int i = 0; i < nlocal; i++) { @@ -419,7 +420,7 @@ void PPPMDispOMP::make_rho_c() const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const double * _noalias const q = atom->q; @@ -509,7 +510,7 @@ void PPPMDispOMP::make_rho_g() const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; @@ -613,7 +614,7 @@ void PPPMDispOMP::make_rho_a() const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; @@ -723,7 +724,7 @@ void PPPMDispOMP::fieldforce_c_ik() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -828,7 +829,7 @@ void PPPMDispOMP::fieldforce_c_ad() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -935,7 +936,7 @@ void PPPMDispOMP::fieldforce_c_peratom() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1034,7 +1035,7 @@ void PPPMDispOMP::fieldforce_g_ik() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1138,7 +1139,7 @@ void PPPMDispOMP::fieldforce_g_ad() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1248,7 +1249,7 @@ void PPPMDispOMP::fieldforce_g_peratom() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1350,7 +1351,7 @@ void PPPMDispOMP::fieldforce_a_ik() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1486,7 +1487,7 @@ void PPPMDispOMP::fieldforce_a_ad() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1663,7 +1664,7 @@ void PPPMDispOMP::fieldforce_a_peratom() #if defined(_OPENMP) const int nthreads = comm->nthreads; -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) diff --git a/src/USER-OMP/pppm_disp_tip4p_omp.cpp b/src/USER-OMP/pppm_disp_tip4p_omp.cpp index ec294cd56d..7da4257e07 100644 --- a/src/USER-OMP/pppm_disp_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_disp_tip4p_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pppm_disp_tip4p_omp.h" #include #include @@ -56,7 +57,7 @@ PPPMDispTIP4POMP::~PPPMDispTIP4POMP() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -84,7 +85,7 @@ void PPPMDispTIP4POMP::allocate() PPPMDispTIP4P::allocate(); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -111,7 +112,7 @@ void PPPMDispTIP4POMP::allocate() void PPPMDispTIP4POMP::compute_gf() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { @@ -198,7 +199,7 @@ void PPPMDispTIP4POMP::compute_gf() void PPPMDispTIP4POMP::compute_gf_6() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double *prd; @@ -302,7 +303,7 @@ void PPPMDispTIP4POMP::compute(int eflag, int vflag) PPPMDispTIP4P::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -358,7 +359,7 @@ void PPPMDispTIP4POMP::particle_map_c(double dxinv, double dyinv, int flag = 0; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:flag) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:flag) schedule(static) #endif for (int i = 0; i < nlocal; i++) { dbl3_t xM; @@ -434,7 +435,7 @@ void PPPMDispTIP4POMP::particle_map(double dxinv, double dyinv, int flag = 0; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:flag) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:flag) schedule(static) #endif for (int i = 0; i < nlocal; i++) { @@ -487,7 +488,7 @@ void PPPMDispTIP4POMP::make_rho_c() const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const double * _noalias const q = atom->q; @@ -582,7 +583,7 @@ void PPPMDispTIP4POMP::make_rho_g() const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; @@ -684,7 +685,7 @@ void PPPMDispTIP4POMP::make_rho_a() const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; @@ -795,7 +796,7 @@ void PPPMDispTIP4POMP::fieldforce_c_ik() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { dbl3_t xM; @@ -903,7 +904,7 @@ void PPPMDispTIP4POMP::fieldforce_c_ad() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double s1,s2,s3,sf; @@ -1018,7 +1019,7 @@ void PPPMDispTIP4POMP::fieldforce_g_ik() const double * const * const x = atom->x; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1119,7 +1120,7 @@ void PPPMDispTIP4POMP::fieldforce_g_ad() const double hz_inv = nz_pppm_6/zprd_slab; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1226,7 +1227,7 @@ void PPPMDispTIP4POMP::fieldforce_g_peratom() const double * const * const x = atom->x; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1325,7 +1326,7 @@ void PPPMDispTIP4POMP::fieldforce_a_ik() const double * const * const x = atom->x; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1458,7 +1459,7 @@ void PPPMDispTIP4POMP::fieldforce_a_ad() const double hz_inv = nz_pppm_6/zprd_slab; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -1632,7 +1633,7 @@ void PPPMDispTIP4POMP::fieldforce_a_peratom() const double * const * const x = atom->x; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp index b9b39826ff..e3e46f4de0 100644 --- a/src/USER-OMP/pppm_omp.cpp +++ b/src/USER-OMP/pppm_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pppm_omp.h" #include #include @@ -61,7 +62,7 @@ void PPPMOMP::allocate() PPPM::allocate(); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -81,7 +82,7 @@ void PPPMOMP::allocate() PPPMOMP::~PPPMOMP() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -122,7 +123,7 @@ void PPPMOMP::compute_gf_ik() const int twoorder = 2*order; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double snx,sny,snz; @@ -216,7 +217,7 @@ void PPPMOMP::compute_gf_ad() double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0; #if defined(_OPENMP) -#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) +#pragma omp parallel LMP_DEFAULT_NONE reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) #endif { double snx,sny,snz,sqk; @@ -314,7 +315,7 @@ void PPPMOMP::compute(int eflag, int vflag) PPPM::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -352,7 +353,7 @@ void PPPMOMP::make_rho() const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const double * _noalias const q = atom->q; @@ -449,7 +450,7 @@ void PPPMOMP::fieldforce_ik() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { FFT_SCALAR x0,y0,z0,ekx,eky,ekz; @@ -534,7 +535,7 @@ void PPPMOMP::fieldforce_ad() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double s1,s2,s3,sf; @@ -627,7 +628,7 @@ void PPPMOMP::fieldforce_peratom() const double * _noalias const q = atom->q; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { FFT_SCALAR dx,dy,dz,x0,y0,z0; diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp index 359b5dcc8d..6b3316943e 100644 --- a/src/USER-OMP/pppm_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -15,6 +15,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "pppm_tip4p_omp.h" #include #include @@ -61,7 +62,7 @@ PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp) : PPPMTIP4POMP::~PPPMTIP4POMP() { #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -83,7 +84,7 @@ void PPPMTIP4POMP::allocate() PPPMTIP4P::allocate(); #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -124,7 +125,7 @@ void PPPMTIP4POMP::compute_gf_ik() const int twoorder = 2*order; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double snx,sny,snz; @@ -218,7 +219,7 @@ void PPPMTIP4POMP::compute_gf_ad() double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0; #if defined(_OPENMP) -#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) +#pragma omp parallel LMP_DEFAULT_NONE reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) #endif { double snx,sny,snz,sqk; @@ -316,7 +317,7 @@ void PPPMTIP4POMP::compute(int eflag, int vflag) PPPMTIP4P::compute(eflag,vflag); #if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag) #endif { #if defined(_OPENMP) @@ -355,7 +356,7 @@ void PPPMTIP4POMP::particle_map() int flag = 0; #if defined(_OPENMP) -#pragma omp parallel for default(none) reduction(+:flag) schedule(static) +#pragma omp parallel for LMP_DEFAULT_NONE reduction(+:flag) schedule(static) #endif for (int i = 0; i < nlocal; i++) { dbl3_t xM; @@ -416,7 +417,7 @@ void PPPMTIP4POMP::make_rho() const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { const double * _noalias const q = atom->q; @@ -521,7 +522,7 @@ void PPPMTIP4POMP::fieldforce_ik() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { dbl3_t xM; @@ -632,7 +633,7 @@ void PPPMTIP4POMP::fieldforce_ad() const double boxloz = boxlo[2]; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { double s1,s2,s3,sf; diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp index e48a5c11d4..381b2e5525 100644 --- a/src/USER-OMP/reaxc_forces_omp.cpp +++ b/src/USER-OMP/reaxc_forces_omp.cpp @@ -26,6 +26,7 @@ . ----------------------------------------------------------------------*/ +#include "omp_compat.h" #include "reaxc_forces_omp.h" #include #include @@ -146,7 +147,7 @@ void Compute_Total_ForceOMP( reax_system *system, control_params *control, reax_list *bonds = (*lists) + BONDS; #if defined(_OPENMP) -#pragma omp parallel default(shared) //default(none) +#pragma omp parallel default(shared) //LMP_DEFAULT_NONE #endif { int i, j, k, pj, pk, start_j, end_j; diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp index d06966a92d..22d9df7702 100644 --- a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp @@ -26,6 +26,7 @@ . ----------------------------------------------------------------------*/ +#include "omp_compat.h" #include "reaxc_hydrogen_bonds_omp.h" #include #include @@ -57,7 +58,7 @@ void Hydrogen_BondsOMP( reax_system *system, control_params *control, const int nthreads = control->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(shared) //default(none) +#pragma omp parallel default(shared) //LMP_DEFAULT_NONE #endif { int i, j, k, pi, pk; diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp index 5add419253..b5e5293aa4 100644 --- a/src/USER-OMP/respa_omp.cpp +++ b/src/USER-OMP/respa_omp.cpp @@ -15,6 +15,7 @@ Contributing authors: Mark Stevens (SNL), Paul Crozier (SNL) ------------------------------------------------------------------------- */ +#include "omp_compat.h" #include "respa_omp.h" #include "neighbor.h" #include "comm.h" @@ -146,7 +147,7 @@ void RespaOMP::setup(int flag) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -241,7 +242,7 @@ void RespaOMP::setup_minimal(int flag) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) @@ -394,7 +395,7 @@ void RespaOMP::recurse(int ilevel) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel LMP_DEFAULT_NONE #endif { #if defined(_OPENMP) -- GitLab From 6563331d6e11da18a763143562a3b80b61ee9761 Mon Sep 17 00:00:00 2001 From: Michael Lamparski Date: Fri, 20 Mar 2020 13:21:21 -0400 Subject: [PATCH 027/328] rename to LAMMPS_OMP_COMPAT, improve docs --- cmake/CMakeLists.txt | 4 ++-- cmake/README.md | 10 ++++++++++ doc/src/Build_basics.rst | 3 ++- src/omp_compat.h | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3dea0db8b0..60cbc8e5c0 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -242,9 +242,9 @@ if(BUILD_OMP) if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.99.9)) # GCC 9.x strictly implements OpenMP 4.0 semantics for consts. - add_definitions(-DLMP_OMP_COMPAT=4) + add_definitions(-DLAMMPS_OMP_COMPAT=4) else() - add_definitions(-DLMP_OMP_COMPAT=3) + add_definitions(-DLAMMPS_OMP_COMPAT=3) endif() endif() diff --git a/cmake/README.md b/cmake/README.md index b9dd6d4373..cd90bc7020 100644 --- a/cmake/README.md +++ b/cmake/README.md @@ -264,6 +264,16 @@ cmake -C ../cmake/presets/all_on.cmake -C ../cmake/presets/nolib.cmake -D PKG_GP + + LAMMPS_OMP_COMPAT + Workaround for backwards-incompatible changes regarding predetermined sharing modes in OpenMP 4.x. A value of 3 or 4 should be used, reflecting the version of the OpenMP spec that is implemented by the compiler. + +
+
3 (default except for known OMP 4.0 compilers)
+
4
+
+ + LAMMPS_MEMALIGN controls the alignment of blocks of memory allocated by LAMMPS diff --git a/doc/src/Build_basics.rst b/doc/src/Build_basics.rst index 96bc0f5bd1..62b16d9a03 100644 --- a/doc/src/Build_basics.rst +++ b/doc/src/Build_basics.rst @@ -147,7 +147,8 @@ semantics, which are incompatible with the OpenMP 3.1 semantics used in LAMMPS (for maximal compatibility with compiler versions in use). LAMMPS will try to detect compilers that use OpenMP 4.0 semantics and change the directives accordingly, but if your compiler is not -detected, you may set the CMake variable ``-D LMP_OMP_COMPAT=4``. +detected, you may set the define ``-D LAMMPS_OMP_COMPAT=4`` in ``LMP_INC`` +or the CMake build command. ---------- diff --git a/src/omp_compat.h b/src/omp_compat.h index 8abf1c54bc..add429eea8 100644 --- a/src/omp_compat.h +++ b/src/omp_compat.h @@ -25,7 +25,7 @@ // so this is what LAMMPS primarily uses. For those compilers // that strictly implement OpenMP 4.0 (such as GCC 9.0), we // give up default(none). -#if LMP_OMP_COMPAT == 4 +#if LAMMPS_OMP_COMPAT == 4 # define LMP_SHARED(...) # define LMP_DEFAULT_NONE default(shared) #else -- GitLab From f560cd6dd596857d14d3663e3c225fbaec2096d0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 21 Mar 2020 01:08:09 -0400 Subject: [PATCH 028/328] make certain, the molecular flag is always initialized --- src/atom_vec.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/atom_vec.cpp b/src/atom_vec.cpp index 7b89c2fd79..c4dd53ad18 100644 --- a/src/atom_vec.cpp +++ b/src/atom_vec.cpp @@ -36,6 +36,7 @@ AtomVec::AtomVec(LAMMPS *lmp) : Pointers(lmp) forceclearflag = 0; size_data_bonus = 0; maxexchange = 0; + molecular = 0; kokkosable = 0; -- GitLab From 79b84c0847d0f9504527ce0b5dcb99688af2943c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 22 Mar 2020 15:44:28 -0400 Subject: [PATCH 029/328] more thorough checking if BUILD_OMP may be enabled by default. we need the OpenMP runtime, too. --- cmake/CMakeLists.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 4dd079eaae..36bed2d649 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -227,7 +227,15 @@ pkg_depends(USER-LB MPI) pkg_depends(USER-PHONON KSPACE) pkg_depends(USER-SCAFACOS MPI) +# detect if we may enable OpenMP support by default +set(BUILD_OMP_DEFAULT OFF) find_package(OpenMP QUIET) +if(OpenMP_FOUND) + check_include_file_cxx(omp.h HAVE_OMP_H_INCLUDE) + if(HAVE_OMP_H_INCLUDE) + set(BUILD_OMP_DEFAULT ON) + endif() +endif() # TODO: this is a temporary workaround until a better solution is found. AK 2019-05-30 # GNU GCC 9.x uses settings incompatible with our use of 'default(none)' in OpenMP pragmas @@ -237,14 +245,14 @@ find_package(OpenMP QUIET) if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.99.9)) option(BUILD_OMP "Build with OpenMP support" OFF) else() - option(BUILD_OMP "Build with OpenMP support" ${OpenMP_FOUND}) + option(BUILD_OMP "Build with OpenMP support" ${BUILD_OMP_DEFAULT}) endif() if(BUILD_OMP) find_package(OpenMP REQUIRED) check_include_file_cxx(omp.h HAVE_OMP_H_INCLUDE) if(NOT HAVE_OMP_H_INCLUDE) - message(FATAL_ERROR "Cannot find required 'omp.h' header file") + message(FATAL_ERROR "Cannot find the 'omp.h' header file required for full OpenMP support") endif() set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -- GitLab From 62cb760ee2ea8172f621201f04417c9d60bf9474 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 14:42:29 -0600 Subject: [PATCH 030/328] cmake: remove LIB_SOURCES and LMP_SOURCES --- cmake/CMakeLists.txt | 30 ++++++++++++------------- cmake/Modules/Packages/CORESHELL.cmake | 2 +- cmake/Modules/Packages/GPU.cmake | 2 +- cmake/Modules/Packages/KOKKOS.cmake | 2 +- cmake/Modules/Packages/OPT.cmake | 2 +- cmake/Modules/Packages/QEQ.cmake | 2 +- cmake/Modules/Packages/USER-INTEL.cmake | 2 +- cmake/Modules/Packages/USER-OMP.cmake | 2 +- cmake/Modules/Packages/USER-SDPD.cmake | 4 +++- 9 files changed, 25 insertions(+), 23 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 4dd079eaae..eb5700b19d 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -21,11 +21,6 @@ if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "default install path" FORCE ) endif() -# To avoid conflicts with the conventional Makefile build system, we build everything here -file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/[^.]*.cpp) -file(GLOB LMP_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) -list(REMOVE_ITEM LIB_SOURCES ${LMP_SOURCES}) - # Cmake modules/macros are in a subdirectory to keep this file cleaner set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules) @@ -110,8 +105,19 @@ endif() option(BUILD_TOOLS "Build and install LAMMPS tools (msi2lmp, binary2txt, chain)" OFF) -if(NOT BUILD_EXE AND NOT BUILD_LIB) - message(FATAL_ERROR "You need to at least enable one of two following options: BUILD_LIB or BUILD_EXE") +if(BUILD_LIB) + file(GLOB MAIN_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) + list(REMOVE_ITEM ALL_SOURCES ${MAIN_SOURCES}) + add_library(lammps ${ALL_SOURCES}) + if(BUILD_EXE) + add_executable(lmp ${MAIN_SOURCES}) + target_link_libraries(lmp PRIVATE lammps) + endif() +else() + if(NOT BUILD_EXE) + message(FATAL_ERROR "You need to at least enable one of two following options: BUILD_LIB or BUILD_EXE") + endif() + add_executable(lammps ${ALL_SOURCES}) endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) @@ -391,7 +397,7 @@ foreach(PKG ${DEFAULT_PACKAGES}) # detects styles in package and adds them to global list RegisterStyles(${${PKG}_SOURCES_DIR}) - list(APPEND LIB_SOURCES ${${PKG}_SOURCES}) + target_sources(lammps PRIVATE ${${PKG}_SOURCES}) include_directories(${${PKG}_SOURCES_DIR}) endif() @@ -533,7 +539,6 @@ if (${_index} GREATER -1) endif() list(REMOVE_DUPLICATES LAMMPS_LINK_LIBS) if(BUILD_LIB) - add_library(lammps ${LIB_SOURCES}) target_link_libraries(lammps ${LAMMPS_LINK_LIBS}) if(LAMMPS_DEPS) add_dependencies(lammps ${LAMMPS_DEPS}) @@ -579,15 +584,10 @@ if(BUILD_LIB) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_LIB_SUFFIX}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) configure_file(FindLAMMPS.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS${LAMMPS_LIB_SUFFIX}.cmake @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS${LAMMPS_LIB_SUFFIX}.cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/cmake/Modules) -else() - list(APPEND LMP_SOURCES ${LIB_SOURCES}) endif() if(BUILD_EXE) - add_executable(lmp ${LMP_SOURCES}) - if(BUILD_LIB) - target_link_libraries(lmp lammps) - else() + if(NOT BUILD_LIB) target_link_libraries(lmp ${LAMMPS_LINK_LIBS}) if(LAMMPS_DEPS) add_dependencies(lmp ${LAMMPS_DEPS}) diff --git a/cmake/Modules/Packages/CORESHELL.cmake b/cmake/Modules/Packages/CORESHELL.cmake index 591477c899..2afe2b8c1b 100644 --- a/cmake/Modules/Packages/CORESHELL.cmake +++ b/cmake/Modules/Packages/CORESHELL.cmake @@ -8,6 +8,6 @@ if(PKG_CORESHELL) get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES) - list(APPEND LIB_SOURCES ${CORESHELL_SOURCES}) + target_sources(lammps PRIVATE ${CORESHELL_SOURCES}) include_directories(${CORESHELL_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index abbcb1f495..e8ca4d35ab 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -192,6 +192,6 @@ if(PKG_GPU) get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) - list(APPEND LIB_SOURCES ${GPU_SOURCES}) + target_sources(lammps PRIVATE ${GPU_SOURCES}) include_directories(${GPU_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 29beaca957..4b871ccb46 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -69,6 +69,6 @@ if(PKG_KOKKOS) get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) - list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES}) + target_sources(lammps PRIVATE ${KOKKOS_PKG_SOURCES}) include_directories(${KOKKOS_PKG_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/OPT.cmake b/cmake/Modules/Packages/OPT.cmake index f2802c757b..02e3877c59 100644 --- a/cmake/Modules/Packages/OPT.cmake +++ b/cmake/Modules/Packages/OPT.cmake @@ -8,6 +8,6 @@ if(PKG_OPT) get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES) - list(APPEND LIB_SOURCES ${OPT_SOURCES}) + target_sources(lammps PRIVATE ${OPT_SOURCES}) include_directories(${OPT_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/QEQ.cmake b/cmake/Modules/Packages/QEQ.cmake index 94cca30540..9b151c2610 100644 --- a/cmake/Modules/Packages/QEQ.cmake +++ b/cmake/Modules/Packages/QEQ.cmake @@ -15,6 +15,6 @@ if(PKG_QEQ) endforeach() get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES) - list(APPEND LIB_SOURCES ${QEQ_SOURCES}) + target_sources(lammps PRIVATE ${QEQ_SOURCES}) include_directories(${QEQ_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake index d0941a0a12..7b08a7b459 100644 --- a/cmake/Modules/Packages/USER-INTEL.cmake +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -108,6 +108,6 @@ if(PKG_USER-INTEL) RegisterIntegrateStyle(${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.h) endif() - list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES}) + target_sources(lammps PRIVATE ${USER-INTEL_SOURCES}) include_directories(${USER-INTEL_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-OMP.cmake b/cmake/Modules/Packages/USER-OMP.cmake index 668f42f10a..4b1a4b1571 100644 --- a/cmake/Modules/Packages/USER-OMP.cmake +++ b/cmake/Modules/Packages/USER-OMP.cmake @@ -37,6 +37,6 @@ if(PKG_USER-OMP) ${USER-OMP_SOURCES_DIR}/reaxc_valence_angles_omp.cpp) endif() - list(APPEND LIB_SOURCES ${USER-OMP_SOURCES}) + target_sources(lammps PRIVATE ${USER-OMP_SOURCES}) include_directories(${USER-OMP_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-SDPD.cmake b/cmake/Modules/Packages/USER-SDPD.cmake index 530dcf2bd9..1105fbd7ce 100644 --- a/cmake/Modules/Packages/USER-SDPD.cmake +++ b/cmake/Modules/Packages/USER-SDPD.cmake @@ -5,7 +5,9 @@ if(PKG_USER-SDPD) get_property(hlist GLOBAL PROPERTY FIX) if(NOT PKG_RIGID) list(REMOVE_ITEM hlist ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.h) - list(REMOVE_ITEM LIB_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp) + get_target_property(LAMMPS_SOURCES lammps SOURCES) + list(REMOVE_ITEM LAMMPS_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp) + set_property(TARGET lammps PROPERTY SOURCES ${LAMMPS_SOURCES}) endif() set_property(GLOBAL PROPERTY FIX "${hlist}") -- GitLab From cd89a7c4b7edb877694e07a91edc558a94845f12 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 18:03:07 -0600 Subject: [PATCH 031/328] cmake: fix renamed target for exe --- cmake/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index eb5700b19d..0fae7f55bf 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -105,12 +105,14 @@ endif() option(BUILD_TOOLS "Build and install LAMMPS tools (msi2lmp, binary2txt, chain)" OFF) +file(GLOB ALL_SOURCES ${LAMMPS_SOURCE_DIR}/[^.]*.cpp) if(BUILD_LIB) file(GLOB MAIN_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) list(REMOVE_ITEM ALL_SOURCES ${MAIN_SOURCES}) add_library(lammps ${ALL_SOURCES}) if(BUILD_EXE) add_executable(lmp ${MAIN_SOURCES}) + set(LAMMPS_EXE lmp) target_link_libraries(lmp PRIVATE lammps) endif() else() @@ -118,6 +120,7 @@ else() message(FATAL_ERROR "You need to at least enable one of two following options: BUILD_LIB or BUILD_EXE") endif() add_executable(lammps ${ALL_SOURCES}) + set(LAMMPS_EXE lammps) endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) @@ -588,14 +591,14 @@ endif() if(BUILD_EXE) if(NOT BUILD_LIB) - target_link_libraries(lmp ${LAMMPS_LINK_LIBS}) + target_link_libraries(${LAMMPS_EXE} ${LAMMPS_LINK_LIBS}) if(LAMMPS_DEPS) - add_dependencies(lmp ${LAMMPS_DEPS}) + add_dependencies(${LAMMPS_EXE} ${LAMMPS_DEPS}) endif() endif() - set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) - install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) + set_target_properties(${LAMMPS_EXE} PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) + install(TARGETS ${LAMMPS_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) endif() -- GitLab From 5828815b3ec50845be1a312056de559c05bbff1a Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 18:55:53 -0600 Subject: [PATCH 032/328] cmake: remove LAMMPS_LINK_LIBS --- cmake/CMakeLists.txt | 28 ++++++---------- cmake/Modules/Packages/COMPRESS.cmake | 3 +- cmake/Modules/Packages/GPU.cmake | 4 +-- cmake/Modules/Packages/KIM.cmake | 5 ++- cmake/Modules/Packages/KOKKOS.cmake | 6 ++-- cmake/Modules/Packages/KSPACE.cmake | 6 ++-- cmake/Modules/Packages/LATTE.cmake | 2 +- cmake/Modules/Packages/MESSAGE.cmake | 2 +- cmake/Modules/Packages/MSCG.cmake | 2 +- cmake/Modules/Packages/PYTHON.cmake | 2 +- cmake/Modules/Packages/USER-COLVARS.cmake | 4 +-- cmake/Modules/Packages/USER-INTEL.cmake | 6 ++-- cmake/Modules/Packages/USER-MOLFILE.cmake | 2 +- cmake/Modules/Packages/USER-NETCDF.cmake | 4 +-- cmake/Modules/Packages/USER-PLUMED.cmake | 8 ++--- cmake/Modules/Packages/USER-QMMM.cmake | 2 +- cmake/Modules/Packages/USER-QUIP.cmake | 2 +- cmake/Modules/Packages/USER-SCAFACOS.cmake | 38 +++++++++++----------- cmake/Modules/Packages/USER-VTK.cmake | 2 +- cmake/Modules/Packages/VORONOI.cmake | 2 +- 20 files changed, 60 insertions(+), 70 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 0fae7f55bf..9190374a2e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -126,7 +126,6 @@ endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) include(GNUInstallDirs) -set(LAMMPS_LINK_LIBS) set(LAMMPS_DEPS) set(LAMMPS_API_DEFINES) @@ -173,7 +172,7 @@ if(PKG_USER-ADIOS) # script that defines the MPI::MPI_C target enable_language(C) find_package(ADIOS2 REQUIRED) - list(APPEND LAMMPS_LINK_LIBS adios2::adios2) + target_link_libraries(lammps PRIVATE adios2::adios2) endif() # do MPI detection after language activation, @@ -188,9 +187,8 @@ if(BUILD_MPI) include(MPI4WIN) else() find_package(MPI REQUIRED) - include_directories(${MPI_CXX_INCLUDE_PATH}) add_definitions(-DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) - list(APPEND LAMMPS_LINK_LIBS ${MPI_CXX_LIBRARIES}) + target_link_libraries(lammps PRIVATE MPI::MPI_CXX) option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) if(LAMMPS_LONGLONG_TO_LONG) add_definitions(-DLAMMPS_LONGLONG_TO_LONG) @@ -201,7 +199,7 @@ else() file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c) add_library(mpi_stubs STATIC ${MPI_SOURCES}) include_directories(${LAMMPS_SOURCE_DIR}/STUBS) - list(APPEND LAMMPS_LINK_LIBS mpi_stubs) + target_link_libraries(lammps PRIVATE mpi_stubs) endif() set(LAMMPS_SIZES "smallbig" CACHE STRING "LAMMPS integer sizes (smallsmall: all 32-bit, smallbig: 64-bit #atoms #timesteps, bigbig: also 64-bit imageint, 64-bit atom ids)") @@ -284,8 +282,7 @@ option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND}) if(WITH_JPEG) find_package(JPEG REQUIRED) add_definitions(-DLAMMPS_JPEG) - include_directories(${JPEG_INCLUDE_DIR}) - list(APPEND LAMMPS_LINK_LIBS ${JPEG_LIBRARIES}) + target_link_libraries(lammps PRIVATE JPEG::JPEG) endif() find_package(PNG QUIET) @@ -298,8 +295,7 @@ endif() if(WITH_PNG) find_package(PNG REQUIRED) find_package(ZLIB REQUIRED) - include_directories(${PNG_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${PNG_LIBRARIES} ${ZLIB_LIBRARIES}) + target_link_libraries(lammps PRIVATE PNG::PNG ZLIB::ZLIB) add_definitions(-DLAMMPS_PNG) endif() @@ -376,7 +372,7 @@ include(CheckLibraryExists) # message(FATAL_ERROR "Could not find needed math function - ${FUNC}") # endif(NOT FOUND_${FUNC}_${MATH_LIBRARIES}) #endforeach(FUNC) -list(APPEND LAMMPS_LINK_LIBS ${MATH_LIBRARIES}) +target_link_libraries(lammps PRIVATE ${MATH_LIBRARIES}) ###################################### # Generate Basic Style files @@ -442,7 +438,7 @@ foreach(SIMPLE_LIB POEMS USER-ATC USER-AWPMD USER-H5MD) if(LAMMPS_USE_MPI4WIN) add_dependencies(${PKG_LIB} mpi4win_build) endif() - list(APPEND LAMMPS_LINK_LIBS ${PKG_LIB}) + target_link_libraries(lammps PRIVATE ${PKG_LIB}) if(PKG_LIB STREQUAL awpmd) target_include_directories(awpmd PUBLIC ${LAMMPS_LIB_SOURCE_DIR}/awpmd/systems/interact ${LAMMPS_LIB_SOURCE_DIR}/awpmd/ivutils/include) elseif(PKG_LIB STREQUAL h5md) @@ -486,9 +482,9 @@ include(Packages/GPU) ###################################################################### if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") if(LAMMPS_USE_MPI4WIN) - list(APPEND LAMMPS_LINK_LIBS ${MPI4WIN_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${MPI4WIN_LIBRARIES}) endif() - list(APPEND LAMMPS_LINK_LIBS -lwsock32 -lpsapi) + target_link_libraries(lammps PRIVATE -lwsock32 -lpsapi) endif() ###################################################### @@ -538,11 +534,9 @@ list(APPEND LAMMPS_DEPS gitversion) get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) list (FIND LANGUAGES "Fortran" _index) if (${_index} GREATER -1) - list(APPEND LAMMPS_LINK_LIBS ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) endif() -list(REMOVE_DUPLICATES LAMMPS_LINK_LIBS) if(BUILD_LIB) - target_link_libraries(lammps ${LAMMPS_LINK_LIBS}) if(LAMMPS_DEPS) add_dependencies(lammps ${LAMMPS_DEPS}) endif() @@ -591,7 +585,6 @@ endif() if(BUILD_EXE) if(NOT BUILD_LIB) - target_link_libraries(${LAMMPS_EXE} ${LAMMPS_LINK_LIBS}) if(LAMMPS_DEPS) add_dependencies(${LAMMPS_EXE} ${LAMMPS_DEPS}) endif() @@ -736,7 +729,6 @@ if(BUILD_SHARED_LIBS) else() message(STATUS "Static library flags: ${CMAKE_STATIC_LINKER_FLAGS}") endif() -message(STATUS "Link libraries: ${LAMMPS_LINK_LIBS}") if(BUILD_MPI) message(STATUS "Using MPI with headers in ${MPI_CXX_INCLUDE_PATH} and these libraries: ${MPI_CXX_LIBRARIES};${MPI_Fortran_LIBRARIES}") endif() diff --git a/cmake/Modules/Packages/COMPRESS.cmake b/cmake/Modules/Packages/COMPRESS.cmake index 864b868865..ea5d5e37bd 100644 --- a/cmake/Modules/Packages/COMPRESS.cmake +++ b/cmake/Modules/Packages/COMPRESS.cmake @@ -1,5 +1,4 @@ if(PKG_COMPRESS) find_package(ZLIB REQUIRED) - include_directories(${ZLIB_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES}) + target_link_libraries(lammps PRIVATE ZLIB::ZLIB) endif() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index e8ca4d35ab..427644f9c7 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -107,7 +107,7 @@ if(PKG_GPU) target_compile_definitions(gpu PRIVATE -DUSE_CUDPP) endif() - list(APPEND LAMMPS_LINK_LIBS gpu) + target_link_libraries(lammps PRIVATE gpu) if(LAMMPS_USE_MPI4WIN) add_dependencies(gpu mpi4win_build) endif() @@ -171,7 +171,7 @@ if(PKG_GPU) target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT) target_compile_definitions(gpu PRIVATE -DUSE_OPENCL) - list(APPEND LAMMPS_LINK_LIBS gpu) + target_link_libraries(lammps PRIVATE gpu) if(LAMMPS_USE_MPI4WIN) add_dependencies(gpu mpi4win_build) endif() diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake index 617068ce13..002fbbcaed 100644 --- a/cmake/Modules/Packages/KIM.cmake +++ b/cmake/Modules/Packages/KIM.cmake @@ -2,8 +2,7 @@ if(PKG_KIM) set(KIM-API_MIN_VERSION 2.1) find_package(CURL) if(CURL_FOUND) - include_directories(${CURL_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${CURL_LIBRARIES}) + target_link_libraries(lammps PRIVATE CURL::libcurl) add_definitions(-DLMP_KIM_CURL) set(LMP_DEBUG_CURL OFF CACHE STRING "Set libcurl verbose mode on/off. If on, it displays a lot of verbose information about its operations.") mark_as_advanced(LMP_DEBUG_CURL) @@ -62,6 +61,6 @@ if(PKG_KIM) else() find_package(KIM-API ${KIM-API_MIN_VERSION} REQUIRED) endif() - list(APPEND LAMMPS_LINK_LIBS "${KIM-API_LDFLAGS}") + target_link_libraries(lammps PRIVATE "${KIM-API_LDFLAGS}") include_directories(${KIM-API_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 4b871ccb46..58d2c139f6 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -5,7 +5,7 @@ if(PKG_KOKKOS) option(EXTERNAL_KOKKOS "Build against external kokkos library") if(EXTERNAL_KOKKOS) find_package(Kokkos REQUIRED) - list(APPEND LAMMPS_LINK_LIBS Kokkos::kokkos) + target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos) @@ -16,7 +16,7 @@ if(PKG_KOKKOS) ${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src ${LAMMPS_LIB_KOKKOS_BIN_DIR}) include_directories(${Kokkos_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS kokkos) + target_link_libraries(lammps PRIVATE kokkos) endif() add_definitions(-DLMP_KOKKOS) @@ -45,7 +45,7 @@ if(PKG_KOKKOS) if(KOKKOS_ENABLE_CUDA) if(NOT ${FFT} STREQUAL "KISS") add_definitions(-DFFT_CUFFT) - list(APPEND LAMMPS_LINK_LIBS cufft) + target_link_libraries(lammps PRIVATE cufft) endif() endif() endif() diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 07612447f9..4f92a6963c 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -20,7 +20,7 @@ if(PKG_KSPACE) find_package(${FFTW} REQUIRED) add_definitions(-DFFT_FFTW3) include_directories(${${FFTW}_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${${FFTW}_LIBRARIES}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) else() @@ -30,7 +30,7 @@ if(PKG_KSPACE) if(FFT_FFTW_THREADS) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) add_definitions(-DFFT_FFTW_THREADS) - list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_OMP_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${${FFTW}_OMP_LIBRARIES}) else() message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS") endif() @@ -43,7 +43,7 @@ if(PKG_KSPACE) add_definitions(-DFFT_MKL_THREADS) endif() include_directories(${MKL_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${MKL_LIBRARIES}) else() # last option is KISSFFT add_definitions(-DFFT_KISS) diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake index 9f665d7f0e..55d9b31797 100644 --- a/cmake/Modules/Packages/LATTE.cmake +++ b/cmake/Modules/Packages/LATTE.cmake @@ -36,5 +36,5 @@ if(PKG_LATTE) if(NOT LAPACK_FOUND) add_dependencies(latte_build linalg) endif() - list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES}) endif() diff --git a/cmake/Modules/Packages/MESSAGE.cmake b/cmake/Modules/Packages/MESSAGE.cmake index aff9c2964a..c28c50c507 100644 --- a/cmake/Modules/Packages/MESSAGE.cmake +++ b/cmake/Modules/Packages/MESSAGE.cmake @@ -27,6 +27,6 @@ if(PKG_MESSAGE) target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_ZMQ) endif() - list(APPEND LAMMPS_LINK_LIBS cslib) + target_link_libraries(lammps PRIVATE cslib) include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src) endif() diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index 35f0c57449..e300ed6ae2 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -43,6 +43,6 @@ if(PKG_MSCG) message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it") endif() endif() - list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${MSCG_LIBRARIES} GSL::gsl ${LAPACK_LIBRARIES}) include_directories(${MSCG_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/PYTHON.cmake b/cmake/Modules/Packages/PYTHON.cmake index 4f8959ae38..7d6e2999f7 100644 --- a/cmake/Modules/Packages/PYTHON.cmake +++ b/cmake/Modules/Packages/PYTHON.cmake @@ -2,5 +2,5 @@ if(PKG_PYTHON) find_package(PythonLibs REQUIRED) add_definitions(-DLMP_PYTHON) include_directories(${PYTHON_INCLUDE_DIR}) - list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY}) + target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARY}) endif() diff --git a/cmake/Modules/Packages/USER-COLVARS.cmake b/cmake/Modules/Packages/USER-COLVARS.cmake index a112fbb6aa..2a337b9c8b 100644 --- a/cmake/Modules/Packages/USER-COLVARS.cmake +++ b/cmake/Modules/Packages/USER-COLVARS.cmake @@ -16,10 +16,10 @@ if(PKG_USER-COLVARS) add_library(colvars STATIC ${COLVARS_SOURCES}) target_include_directories(colvars PUBLIC ${LAMMPS_LIB_SOURCE_DIR}/colvars) - list(APPEND LAMMPS_LINK_LIBS colvars) + target_link_libraries(lammps PRIVATE colvars) if(COLVARS_LEPTON) - list(APPEND LAMMPS_LINK_LIBS lepton) + target_link_libraries(lammps PRIVATE lepton) target_compile_options(colvars PRIVATE -DLEPTON) target_include_directories(colvars PUBLIC ${LEPTON_DIR}/include) endif() diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake index 7b08a7b459..0a9b634bc7 100644 --- a/cmake/Modules/Packages/USER-INTEL.cmake +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -25,7 +25,7 @@ if(PKG_USER-INTEL) if(INTEL_LRT_MODE STREQUAL "THREADS") if(Threads_FOUND) add_definitions(-DLMP_INTEL_USELRT) - list(APPEND LAMMPS_LINK_LIBS ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(lammps PRIVATE Threads::Threads) else() message(FATAL_ERROR "Must have working threads library for Long-range thread support") endif() @@ -44,7 +44,7 @@ if(PKG_USER-INTEL) find_package(TBB QUIET) if(TBB_FOUND) - list(APPEND LAMMPS_LINK_LIBS ${TBB_MALLOC_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${TBB_MALLOC_LIBRARIES}) else() add_definitions(-DLMP_INTEL_NO_TBB) if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") @@ -55,7 +55,7 @@ if(PKG_USER-INTEL) find_package(MKL QUIET) if(MKL_FOUND) add_definitions(-DLMP_USE_MKL_RNG) - list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${MKL_LIBRARIES}) else() message(STATUS "Pair style dpd/intel will be faster with MKL libraries") endif() diff --git a/cmake/Modules/Packages/USER-MOLFILE.cmake b/cmake/Modules/Packages/USER-MOLFILE.cmake index 16ffc34994..cbba1eee7b 100644 --- a/cmake/Modules/Packages/USER-MOLFILE.cmake +++ b/cmake/Modules/Packages/USER-MOLFILE.cmake @@ -6,5 +6,5 @@ if(PKG_USER-MOLFILE) if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS}) endif() - list(APPEND LAMMPS_LINK_LIBS molfile) + target_link_libraries(lammps PRIVATE molfile) endif() diff --git a/cmake/Modules/Packages/USER-NETCDF.cmake b/cmake/Modules/Packages/USER-NETCDF.cmake index 921156f1e0..8d62f5f7ec 100644 --- a/cmake/Modules/Packages/USER-NETCDF.cmake +++ b/cmake/Modules/Packages/USER-NETCDF.cmake @@ -10,13 +10,13 @@ if(PKG_USER-NETCDF) if(NETCDF_FOUND) include_directories(${NETCDF_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${NETCDF_LIBRARIES}) add_definitions(-DLMP_HAS_NETCDF) endif(NETCDF_FOUND) if(PNETCDF_FOUND) include_directories(${PNETCDF_INCLUDES}) - list(APPEND LAMMPS_LINK_LIBS ${PNETCDF_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${PNETCDF_LIBRARIES}) add_definitions(-DLMP_HAS_PNETCDF) endif(PNETCDF_FOUND) diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index 426ae2df2a..9669f1955c 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -70,12 +70,12 @@ if(PKG_USER-PLUMED) list(APPEND LAMMPS_DEPS plumed_build) if(PLUMED_MODE STREQUAL "STATIC") add_definitions(-D__PLUMED_WRAPPER_CXX=1) - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) + target_link_libraries(lammps ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "SHARED") - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed${CMAKE_SHARED_LIBRARY_SUFFIX} ${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_DL_LIBS}) + target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumed${CMAKE_SHARED_LIBRARY_SUFFIX} ${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "RUNTIME") add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS}) + target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS}) endif() set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include") else() @@ -90,7 +90,7 @@ if(PKG_USER-PLUMED) add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime) endif() - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_LOAD}) + target_link_libraries(lammps PRIVATE ${PLUMED_LOAD}) endif() include_directories(${PLUMED_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/USER-QMMM.cmake b/cmake/Modules/Packages/USER-QMMM.cmake index 544455868e..0f3fa93b2a 100644 --- a/cmake/Modules/Packages/USER-QMMM.cmake +++ b/cmake/Modules/Packages/USER-QMMM.cmake @@ -8,6 +8,6 @@ if(PKG_USER-QMMM) message(WARNING "It is recommended to use BUILD_SHARED_LIBS=yes with USER-QMMM") endif() add_library(qmmm STATIC ${LAMMPS_LIB_SOURCE_DIR}/qmmm/libqmmm.c) - list(APPEND LAMMPS_LINK_LIBS qmmm) + target_link_libraries(lammps PRIVATE qmmm) target_include_directories(qmmm PUBLIC ${LAMMPS_LIB_SOURCE_DIR}/qmmm) endif() diff --git a/cmake/Modules/Packages/USER-QUIP.cmake b/cmake/Modules/Packages/USER-QUIP.cmake index 93096a2f54..52ba7e9c47 100644 --- a/cmake/Modules/Packages/USER-QUIP.cmake +++ b/cmake/Modules/Packages/USER-QUIP.cmake @@ -1,5 +1,5 @@ if(PKG_USER-QUIP) enable_language(Fortran) find_package(QUIP REQUIRED) - list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${LAPACK_LIBRARIES}) endif() diff --git a/cmake/Modules/Packages/USER-SCAFACOS.cmake b/cmake/Modules/Packages/USER-SCAFACOS.cmake index 8bb9e63605..0fac1fe919 100644 --- a/cmake/Modules/Packages/USER-SCAFACOS.cmake +++ b/cmake/Modules/Packages/USER-SCAFACOS.cmake @@ -49,28 +49,28 @@ if(PKG_USER-SCAFACOS) set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include) list(APPEND LAMMPS_DEPS scafacos_build) # list and order from pkg_config file of ScaFaCoS build - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a) - list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES}) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a) - list(APPEND LAMMPS_LINK_LIBS ${MPI_Fortran_LIBRARIES}) - list(APPEND LAMMPS_LINK_LIBS ${MPI_C_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a) + target_link_libraries(lammps PRIVATE ${GSL_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a) + target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a) + target_link_libraries(lammps PRIVATE ${MPI_Fortran_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${MPI_C_LIBRARIES}) else() find_package(PkgConfig REQUIRED) pkg_check_modules(SCAFACOS REQUIRED scafacos) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_LDFLAGS}) + target_link_libraries(lammps PRIVATE ${SCAFACOS_LDFLAGS}) endif() include_directories(${SCAFACOS_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/USER-VTK.cmake b/cmake/Modules/Packages/USER-VTK.cmake index d264577ca2..fb69f115b2 100644 --- a/cmake/Modules/Packages/USER-VTK.cmake +++ b/cmake/Modules/Packages/USER-VTK.cmake @@ -2,5 +2,5 @@ if(PKG_USER-VTK) find_package(VTK REQUIRED NO_MODULE) include(${VTK_USE_FILE}) add_definitions(-DLAMMPS_VTK) - list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${VTK_LIBRARIES}) endif() diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake index 5418132034..f567c35597 100644 --- a/cmake/Modules/Packages/VORONOI.cmake +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -39,5 +39,5 @@ if(PKG_VORONOI) endif() endif() include_directories(${VORO_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${VORO_LIBRARIES}) endif() -- GitLab From 0e3f4f3de2bd7ad50a70353e9e749758796cb10d Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 19:07:23 -0600 Subject: [PATCH 033/328] cmake: remove LAMMPS_DEPS --- cmake/CMakeLists.txt | 12 +----------- cmake/Modules/MPI4WIN.cmake | 2 +- cmake/Modules/Packages/KIM.cmake | 2 +- cmake/Modules/Packages/LATTE.cmake | 2 +- cmake/Modules/Packages/MSCG.cmake | 2 +- cmake/Modules/Packages/USER-PLUMED.cmake | 2 +- cmake/Modules/Packages/USER-SCAFACOS.cmake | 2 +- cmake/Modules/Packages/USER-SMD.cmake | 2 +- cmake/Modules/Packages/VORONOI.cmake | 2 +- 9 files changed, 9 insertions(+), 19 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 9190374a2e..b9e8d5b919 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -126,7 +126,6 @@ endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) include(GNUInstallDirs) -set(LAMMPS_DEPS) set(LAMMPS_API_DEFINES) set(DEFAULT_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS CORESHELL DIPOLE @@ -526,7 +525,7 @@ add_custom_target(gitversion COMMAND ${CMAKE_COMMAND} -DLAMMPS_STYLE_HEADERS_DIR="${LAMMPS_STYLE_HEADERS_DIR}" -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/generate_lmpgitversion.cmake) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${LAMMPS_STYLE_HEADERS_DIR}/gitversion.h) -list(APPEND LAMMPS_DEPS gitversion) +add_dependencies(lammps gitversion) ########################################### # Actually add executable and lib to build @@ -537,9 +536,6 @@ if (${_index} GREATER -1) target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) endif() if(BUILD_LIB) - if(LAMMPS_DEPS) - add_dependencies(lammps ${LAMMPS_DEPS}) - endif() set(LAMMPS_CXX_HEADERS ${LAMMPS_SOURCE_DIR}/angle.h ${LAMMPS_SOURCE_DIR}/atom.h @@ -584,12 +580,6 @@ if(BUILD_LIB) endif() if(BUILD_EXE) - if(NOT BUILD_LIB) - if(LAMMPS_DEPS) - add_dependencies(${LAMMPS_EXE} ${LAMMPS_DEPS}) - endif() - endif() - set_target_properties(${LAMMPS_EXE} PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) install(TARGETS ${LAMMPS_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) diff --git a/cmake/Modules/MPI4WIN.cmake b/cmake/Modules/MPI4WIN.cmake index 1f3195041a..1fe6daada3 100644 --- a/cmake/Modules/MPI4WIN.cmake +++ b/cmake/Modules/MPI4WIN.cmake @@ -19,5 +19,5 @@ ExternalProject_get_property(mpi4win_build SOURCE_DIR) add_definitions(-DMPICH_SKIP_MPICXX) include_directories("${SOURCE_DIR}/include") set(MPI4WIN_LIBRARIES "${SOURCE_DIR}/lib/libmpi.a") -list(APPEND LAMMPS_DEPS mpi4win_build) +add_dependencies(lammps mpi4win_build) set(LAMMPS_USE_MPI4WIN ON) diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake index 002fbbcaed..56fc1449fa 100644 --- a/cmake/Modules/Packages/KIM.cmake +++ b/cmake/Modules/Packages/KIM.cmake @@ -57,7 +57,7 @@ if(PKG_KIM) ExternalProject_get_property(kim_build INSTALL_DIR) set(KIM-API_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api) set(KIM-API_LDFLAGS ${INSTALL_DIR}/${_KIM_LIBDIR}/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX}) - list(APPEND LAMMPS_DEPS kim_build) + add_dependencies(lammps kim_build) else() find_package(KIM-API ${KIM-API_MIN_VERSION} REQUIRED) endif() diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake index 55d9b31797..abadd1cd97 100644 --- a/cmake/Modules/Packages/LATTE.cmake +++ b/cmake/Modules/Packages/LATTE.cmake @@ -24,7 +24,7 @@ if(PKG_LATTE) -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} BUILD_BYPRODUCTS /${_LATTE_LIBDIR}/liblatte.a ) - list(APPEND LAMMPS_DEPS latte_build) + add_dependencies(lammps latte_build) ExternalProject_get_property(latte_build INSTALL_DIR) set(LATTE_LIBRARIES ${INSTALL_DIR}/${_LATTE_LIBDIR}/liblatte.a) else() diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index e300ed6ae2..2c63e69fa7 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -32,7 +32,7 @@ if(PKG_MSCG) set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a) ExternalProject_get_property(mscg_build SOURCE_DIR) set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src) - list(APPEND LAMMPS_DEPS mscg_build) + add_dependencies(lammps mscg_build) if(NOT LAPACK_FOUND) file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS}) add_dependencies(mscg_build linalg) diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index 9669f1955c..2bca281fa8 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -67,7 +67,7 @@ if(PKG_USER-PLUMED) ) ExternalProject_get_property(plumed_build INSTALL_DIR) set(PLUMED_INSTALL_DIR ${INSTALL_DIR}) - list(APPEND LAMMPS_DEPS plumed_build) + add_dependencies(lammps plumed_build) if(PLUMED_MODE STREQUAL "STATIC") add_definitions(-D__PLUMED_WRAPPER_CXX=1) target_link_libraries(lammps ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) diff --git a/cmake/Modules/Packages/USER-SCAFACOS.cmake b/cmake/Modules/Packages/USER-SCAFACOS.cmake index 0fac1fe919..ebd750c04b 100644 --- a/cmake/Modules/Packages/USER-SCAFACOS.cmake +++ b/cmake/Modules/Packages/USER-SCAFACOS.cmake @@ -47,7 +47,7 @@ if(PKG_USER-SCAFACOS) ExternalProject_get_property(scafacos_build INSTALL_DIR) set(SCAFACOS_BUILD_DIR ${INSTALL_DIR}) set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include) - list(APPEND LAMMPS_DEPS scafacos_build) + add_dependencies(lammps scafacos_build) # list and order from pkg_config file of ScaFaCoS build target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a) diff --git a/cmake/Modules/Packages/USER-SMD.cmake b/cmake/Modules/Packages/USER-SMD.cmake index a868918e37..eed60ae8bd 100644 --- a/cmake/Modules/Packages/USER-SMD.cmake +++ b/cmake/Modules/Packages/USER-SMD.cmake @@ -16,7 +16,7 @@ if(PKG_USER-SMD) ) ExternalProject_get_property(Eigen3_build SOURCE_DIR) set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR}) - list(APPEND LAMMPS_DEPS Eigen3_build) + add_dependencies(lammps Eigen3_build) else() find_package(Eigen3 NO_MODULE) mark_as_advanced(Eigen3_DIR) diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake index f567c35597..d2bb185228 100644 --- a/cmake/Modules/Packages/VORONOI.cmake +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -31,7 +31,7 @@ if(PKG_VORONOI) ExternalProject_get_property(voro_build SOURCE_DIR) set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a) set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src) - list(APPEND LAMMPS_DEPS voro_build) + add_dependencies(lammps voro_build) else() find_package(VORO) if(NOT VORO_FOUND) -- GitLab From e1f01d3e6514eecb83584c1e2b14c88569db70bb Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 22 Mar 2020 21:14:26 -0400 Subject: [PATCH 034/328] use consistent naming for c++11 style kspace thread --- src/USER-INTEL/verlet_lrt_intel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/USER-INTEL/verlet_lrt_intel.cpp b/src/USER-INTEL/verlet_lrt_intel.cpp index a3c05c46bc..bd143c4c94 100644 --- a/src/USER-INTEL/verlet_lrt_intel.cpp +++ b/src/USER-INTEL/verlet_lrt_intel.cpp @@ -185,7 +185,7 @@ void VerletLRTIntel::setup(int flag) _kspace_done = 0; pthread_mutex_unlock(&_kmutex); #elif defined(_LMP_INTEL_LRT_11) - kspace_thread.join(); + _kspace_thread.join(); #endif if (kspace_compute_flag) _intel_kspace->compute_second(eflag,vflag); @@ -298,9 +298,9 @@ void VerletLRTIntel::run(int n) pthread_cond_signal(&_kcond); pthread_mutex_unlock(&_kmutex); #elif defined(_LMP_INTEL_LRT_11) - std::thread kspace_thread; + std::thread _kspace_thread; if (kspace_compute_flag) - kspace_thread=std::thread([=] { + _kspace_thread=std::thread([=] { _intel_kspace->compute_first(eflag, vflag); timer->stamp(Timer::KSPACE); } ); @@ -332,7 +332,7 @@ void VerletLRTIntel::run(int n) pthread_mutex_unlock(&_kmutex); #elif defined(_LMP_INTEL_LRT_11) if (kspace_compute_flag) - kspace_thread.join(); + _kspace_thread.join(); #endif if (kspace_compute_flag) { -- GitLab From 3076e267073f7501467933cb846a1b129373e2fb Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 19:20:00 -0600 Subject: [PATCH 035/328] cmake: create imported target for FFTW --- cmake/Modules/FindFFTW3.cmake | 28 +++++++++++++++++++++++---- cmake/Modules/FindFFTW3F.cmake | 30 ++++++++++++++++++++++++----- cmake/Modules/Packages/KSPACE.cmake | 5 ++--- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/cmake/Modules/FindFFTW3.cmake b/cmake/Modules/FindFFTW3.cmake index 63752f85df..708ec49509 100644 --- a/cmake/Modules/FindFFTW3.cmake +++ b/cmake/Modules/FindFFTW3.cmake @@ -14,14 +14,34 @@ find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS}) find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS}) find_library(FFTW3_OMP_LIBRARY NAMES fftw3_omp HINTS ${PC_FFTW3_LIBRARY_DIRS}) -set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR}) -set(FFTW3_LIBRARIES ${FFTW3_LIBRARY}) -set(FFTW3_OMP_LIBRARIES ${FFTW3_OMP_LIBRARY}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR) +# Copy the results to the output variables and target. +if(FFTW3_FOUND) + set(FFTW3_LIBRARIES ${FFTW3_LIBRARY} ) + set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) + + if(NOT TARGET FFTW3::FFTW3) + add_library(FFTW3::FFTW3 UNKNOWN IMPORTED) + set_target_properties(FFTW3::FFTW3 PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${FFTW3_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIRS}") + endif() + if(FFTW3_OMP_LIBRARY) + set(FFTW3_OMP_LIBRARIES ${FFTW3_OMP_LIBRARY}) + if(NOT TARGET FFTW3::FFTW3_OMP) + add_library(FFTW3::FFTW3_OMP UNKNOWN IMPORTED) + set_target_properties(FFTW3::FFTW3_OMP PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${FFTW3_OMP_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIRS}") + endif() + endif() +endif() + mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY FFTW3_OMP_LIBRARY) diff --git a/cmake/Modules/FindFFTW3F.cmake b/cmake/Modules/FindFFTW3F.cmake index c67aa5faf1..3dbcdaa04e 100644 --- a/cmake/Modules/FindFFTW3F.cmake +++ b/cmake/Modules/FindFFTW3F.cmake @@ -13,14 +13,34 @@ find_path(FFTW3F_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3F_INCLUDE_DIRS}) find_library(FFTW3F_LIBRARY NAMES fftw3f HINTS ${PC_FFTW3F_LIBRARY_DIRS}) find_library(FFTW3F_OMP_LIBRARY NAMES fftw3f_omp HINTS ${PC_FFTW3F_LIBRARY_DIRS}) -set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR}) -set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY}) -set(FFTW3F_OMP_LIBRARIES ${FFTW3F_OMP_LIBRARY}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set FFTW3F_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(FFTW3F DEFAULT_MSG FFTW3F_LIBRARY FFTW3F_INCLUDE_DIR) -mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY) +# Copy the results to the output variables and target. +if(FFTW3F_FOUND) + set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY} ) + set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR} ) + + if(NOT TARGET FFTW3F::FFTW3F) + add_library(FFTW3F::FFTW3F UNKNOWN IMPORTED) + set_target_properties(FFTW3F::FFTW3F PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${FFTW3F_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${FFTW3F_INCLUDE_DIRS}") + endif() + if(FFTW3F_OMP_LIBRARY) + set(FFTW3F_OMP_LIBRARIES ${FFTW3F_OMP_LIBRARY}) + if(NOT TARGET FFTW3F::FFTW3F_OMP) + add_library(FFTW3F::FFTW3F_OMP UNKNOWN IMPORTED) + set_target_properties(FFTW3F::FFTW3F_OMP PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${FFTW3F_OMP_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${FFTW3F_INCLUDE_DIRS}") + endif() + endif() +endif() + +mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY FFTW3F_OMP_LIBRARY) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 4f92a6963c..db12787013 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -19,8 +19,7 @@ if(PKG_KSPACE) if(FFT STREQUAL "FFTW3") find_package(${FFTW} REQUIRED) add_definitions(-DFFT_FFTW3) - include_directories(${${FFTW}_INCLUDE_DIRS}) - target_link_libraries(lammps PRIVATE ${${FFTW}_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) else() @@ -30,7 +29,7 @@ if(PKG_KSPACE) if(FFT_FFTW_THREADS) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) add_definitions(-DFFT_FFTW_THREADS) - target_link_libraries(lammps PRIVATE ${${FFTW}_OMP_LIBRARIES}) + target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP) else() message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS") endif() -- GitLab From 591212af3ac3a6c0d5f9fce59199b39b279929e8 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 19:57:48 -0600 Subject: [PATCH 036/328] cmake: add back include path for now --- cmake/CMakeLists.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index b9e8d5b919..c6d149e7d6 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -112,15 +112,17 @@ if(BUILD_LIB) add_library(lammps ${ALL_SOURCES}) if(BUILD_EXE) add_executable(lmp ${MAIN_SOURCES}) - set(LAMMPS_EXE lmp) target_link_libraries(lmp PRIVATE lammps) + set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) + install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() else() if(NOT BUILD_EXE) message(FATAL_ERROR "You need to at least enable one of two following options: BUILD_LIB or BUILD_EXE") endif() add_executable(lammps ${ALL_SOURCES}) - set(LAMMPS_EXE lammps) + set_target_properties(lammps PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) + install(TARGETS lammps DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) @@ -186,6 +188,7 @@ if(BUILD_MPI) include(MPI4WIN) else() find_package(MPI REQUIRED) + include_directories(${MPI_CXX_INCLUDE_PATH}) add_definitions(-DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) target_link_libraries(lammps PRIVATE MPI::MPI_CXX) option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) @@ -580,8 +583,6 @@ if(BUILD_LIB) endif() if(BUILD_EXE) - set_target_properties(${LAMMPS_EXE} PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) - install(TARGETS ${LAMMPS_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) endif() -- GitLab From 98bfbbd57630fb21421d73c2478571068e343a5c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 22 Mar 2020 22:21:12 -0400 Subject: [PATCH 037/328] fix typo in CMake module --- cmake/Modules/Packages/USER-INTEL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake index d0941a0a12..9ae4333ee2 100644 --- a/cmake/Modules/Packages/USER-INTEL.cmake +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -31,7 +31,7 @@ if(PKG_USER-INTEL) endif() endif() if(INTEL_LRT_MODE STREQUAL "C++11") - add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11) + add_definitions(-DLMP_INTEL_USELRT -DLMP_INTEL_LRT11) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") -- GitLab From a42f7163d222367003c291e7c307521966c9a266 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 22 Mar 2020 21:29:15 -0600 Subject: [PATCH 038/328] cmake: move include up --- cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c6d149e7d6..ae80996196 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -105,6 +105,7 @@ endif() option(BUILD_TOOLS "Build and install LAMMPS tools (msi2lmp, binary2txt, chain)" OFF) +include(GNUInstallDirs) file(GLOB ALL_SOURCES ${LAMMPS_SOURCE_DIR}/[^.]*.cpp) if(BUILD_LIB) file(GLOB MAIN_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) @@ -126,7 +127,6 @@ else() endif() option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF) -include(GNUInstallDirs) set(LAMMPS_API_DEFINES) -- GitLab From 6e1f18961c87595058f2f8c83414ba0717eb1e13 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Mon, 23 Mar 2020 09:42:46 -0400 Subject: [PATCH 039/328] Convert characters to UTF-8 --- src/KSPACE/msm.cpp | 4 ++-- src/USER-INTEL/pair_tersoff_intel.cpp | 2 +- src/USER-SMTBQ/pair_smtbq.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 126236a328..1520c2c607 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -2920,7 +2920,7 @@ void MSM::compute_phis_and_dphis(const double &dx, const double &dy, /* ---------------------------------------------------------------------- compute phi using interpolating polynomial - see Eq 7 from Parallel Computing 35 (2009) 164–177 + see Eq 7 from Parallel Computing 35 (2009) 164-177 and Hardy's thesis ------------------------------------------------------------------------- */ @@ -2999,7 +2999,7 @@ inline double MSM::compute_phi(const double &xi) /* ---------------------------------------------------------------------- compute the derivative of phi phi is an interpolating polynomial - see Eq 7 from Parallel Computing 35 (2009) 164–177 + see Eq 7 from Parallel Computing 35 (2009) 164-177 and Hardy's thesis ------------------------------------------------------------------------- */ diff --git a/src/USER-INTEL/pair_tersoff_intel.cpp b/src/USER-INTEL/pair_tersoff_intel.cpp index 76d06b02dd..8784029320 100644 --- a/src/USER-INTEL/pair_tersoff_intel.cpp +++ b/src/USER-INTEL/pair_tersoff_intel.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Markus Höhnerbach (RWTH) + Contributing author: Markus Höhnerbach (RWTH) ------------------------------------------------------------------------- */ #include diff --git a/src/USER-SMTBQ/pair_smtbq.cpp b/src/USER-SMTBQ/pair_smtbq.cpp index f61fc1a72e..fcf6d141f2 100644 --- a/src/USER-SMTBQ/pair_smtbq.cpp +++ b/src/USER-SMTBQ/pair_smtbq.cpp @@ -13,7 +13,7 @@ /* ---------------------------------------------------------------------- The SMTBQ code has been developed with the financial support of CNRS and - of the Regional Council of Burgundy (Convention n¡ 2010-9201AAO037S03129) + of the Regional Council of Burgundy (Convention n¡ 2010-9201AAO037S03129) Copyright (2015) Universite de Bourgogne : Nicolas SALLES, Olivier POLITANO @@ -943,7 +943,7 @@ void PairSMTBQ::compute(int eflag, int vflag) 3 -> Short int. Ox-Ox 4 -> Short int. SMTB (repulsion) 5 -> Covalent energy SMTB - 6 -> Somme des Q(i)² + 6 -> Somme des Q(i)² ------------------------------------------------------------------------- */ /* -------------- N-body forces Calcul --------------- */ @@ -3022,7 +3022,7 @@ void PairSMTBQ::groupQEqAllParallel_QEq() ngp = igp = 0; nelt[ngp] = 0; - // On prend un oxygène + // On prend un oxygène // printf ("[me %d] On prend un oxygene\n",me); for (ii = 0; ii < inum; ii++) { -- GitLab From 6ee25db32a46c80da89a69ccfa667e283404ff3a Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 23 Mar 2020 08:00:51 -0600 Subject: [PATCH 040/328] cmake: fftw needs to be public due to DSO --- cmake/Modules/Packages/KSPACE.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index db12787013..2a586dccf3 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -19,7 +19,7 @@ if(PKG_KSPACE) if(FFT STREQUAL "FFTW3") find_package(${FFTW} REQUIRED) add_definitions(-DFFT_FFTW3) - target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}) + target_link_libraries(lammps PUBLIC ${FFTW}::${FFTW}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) else() -- GitLab From 9b8266173faff9ab80e77461be42e566e658491e Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 23 Mar 2020 08:23:16 -0600 Subject: [PATCH 041/328] cmake: JPEG imported target is >=cmake-3.12 only --- cmake/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ae80996196..6dd80d3253 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -284,7 +284,12 @@ option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND}) if(WITH_JPEG) find_package(JPEG REQUIRED) add_definitions(-DLAMMPS_JPEG) - target_link_libraries(lammps PRIVATE JPEG::JPEG) + if(CMAKE_VERSION VERSION_LESS 3.12) + include_directories(${JPEG_INCLUDE_DIR}) + target_link_libraries(lammps PRIVATE ${JPEG_LIBRARIES}) + else() + target_link_libraries(lammps PRIVATE JPEG::JPEG) + endif() endif() find_package(PNG QUIET) -- GitLab From d92b9ba8d0c38de37c1699dbcaecfce7931766db Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 23 Mar 2020 08:51:29 -0600 Subject: [PATCH 042/328] cmake: mpi needs to be public due to DSO --- cmake/CMakeLists.txt | 2 +- cmake/Modules/Packages/USER-PLUMED.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 6dd80d3253..362cce94db 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -190,7 +190,7 @@ if(BUILD_MPI) find_package(MPI REQUIRED) include_directories(${MPI_CXX_INCLUDE_PATH}) add_definitions(-DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) - target_link_libraries(lammps PRIVATE MPI::MPI_CXX) + target_link_libraries(lammps PUBLIC MPI::MPI_CXX) option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) if(LAMMPS_LONGLONG_TO_LONG) add_definitions(-DLAMMPS_LONGLONG_TO_LONG) diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index 2bca281fa8..a6cd20adbb 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -70,7 +70,7 @@ if(PKG_USER-PLUMED) add_dependencies(lammps plumed_build) if(PLUMED_MODE STREQUAL "STATIC") add_definitions(-D__PLUMED_WRAPPER_CXX=1) - target_link_libraries(lammps ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) + target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "SHARED") target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumed${CMAKE_SHARED_LIBRARY_SUFFIX} ${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "RUNTIME") -- GitLab From e76afb33edc939ee63d2033e750d186bc2799c5a Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 23 Mar 2020 12:06:58 -0600 Subject: [PATCH 043/328] cmake: update to new target_link_libraries() signature --- cmake/CMakeLists.txt | 8 ++++---- cmake/Modules/Packages/GPU.cmake | 4 ++-- cmake/Modules/Packages/USER-H5MD.cmake | 2 +- cmake/pkgconfig/liblammps.pc.in | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 362cce94db..cc9b3fc56a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -457,14 +457,14 @@ foreach(SIMPLE_LIB POEMS USER-ATC USER-AWPMD USER-H5MD) endforeach() if(PKG_USER-AWPMD) - target_link_libraries(awpmd ${LAPACK_LIBRARIES}) + target_link_libraries(awpmd PRIVATE ${LAPACK_LIBRARIES}) endif() if(PKG_USER-ATC) if(LAMMPS_SIZES STREQUAL BIGBIG) message(FATAL_ERROR "The USER-ATC Package is not compatible with -DLAMMPS_BIGBIG") endif() - target_link_libraries(atc ${LAPACK_LIBRARIES}) + target_link_libraries(atc PRIVATE ${LAPACK_LIBRARIES}) endif() include(Packages/USER-H5MD) @@ -599,7 +599,7 @@ if(BUILD_TOOLS) if(CMAKE_GENERATOR_SUPPORT_FORTRAN) enable_language(Fortran) add_executable(chain.x ${LAMMPS_TOOLS_DIR}/chain.f) - target_link_libraries(chain.x ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) + target_link_libraries(chain.x PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) install(TARGETS chain.x DESTINATION ${CMAKE_INSTALL_BINDIR}) else() message(WARNING "CMake build doesn't support fortran, skipping building 'chain.x'") @@ -609,7 +609,7 @@ if(BUILD_TOOLS) get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE) file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c) add_executable(msi2lmp ${MSI2LMP_SOURCES}) - target_link_libraries(msi2lmp m) + target_link_libraries(msi2lmp PRIVATE ${MATH_LIBRARIES}) install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) endif() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 427644f9c7..95bb525dd5 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -99,7 +99,7 @@ if(PKG_GPU) add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) - target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) + target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS}) target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS}) if(CUDPP_OPT) @@ -166,7 +166,7 @@ if(PKG_GPU) ) add_library(gpu STATIC ${GPU_LIB_SOURCES}) - target_link_libraries(gpu ${OpenCL_LIBRARIES}) + target_link_libraries(gpu PRIVATE ${OpenCL_LIBRARIES}) target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS}) target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT) target_compile_definitions(gpu PRIVATE -DUSE_OPENCL) diff --git a/cmake/Modules/Packages/USER-H5MD.cmake b/cmake/Modules/Packages/USER-H5MD.cmake index 40ea7b7444..1e3fcf82ce 100644 --- a/cmake/Modules/Packages/USER-H5MD.cmake +++ b/cmake/Modules/Packages/USER-H5MD.cmake @@ -2,7 +2,7 @@ if(PKG_USER-H5MD) enable_language(C) find_package(HDF5 REQUIRED) - target_link_libraries(h5md ${HDF5_LIBRARIES}) + target_link_libraries(h5md PRIVATE ${HDF5_LIBRARIES}) target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS}) include_directories(${HDF5_INCLUDE_DIRS}) endif() diff --git a/cmake/pkgconfig/liblammps.pc.in b/cmake/pkgconfig/liblammps.pc.in index a8710ca224..a89f992c4a 100644 --- a/cmake/pkgconfig/liblammps.pc.in +++ b/cmake/pkgconfig/liblammps.pc.in @@ -22,7 +22,7 @@ # CMakeLists.txt: # find_package(PkgConfig) # pkg_check_modules(LAMMPS IMPORTED_TARGET lammps) -# target_link_libraries( PkgConfig::LAMMPS) +# target_link_libraries( PRIVATE PkgConfig::LAMMPS) prefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@ -- GitLab From bcfc606efb2622e9afdfae6984f6b01a187f0997 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 23 Mar 2020 13:20:56 -0700 Subject: [PATCH 044/328] SNAP optimizations, kernel fusion, large reduction of memory usage on the GPU, misc. performance optimizations. --- src/KOKKOS/pair_snap_kokkos.h | 12 +- src/KOKKOS/pair_snap_kokkos_impl.h | 111 ++--- src/KOKKOS/sna_kokkos.h | 20 +- src/KOKKOS/sna_kokkos_impl.h | 676 ++++++++++++++--------------- 4 files changed, 349 insertions(+), 470 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index e53dec4d86..b57ef2d9e5 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -37,15 +37,13 @@ struct TagPairSNAPBeta{}; struct TagPairSNAPComputeNeigh{}; struct TagPairSNAPPreUi{}; struct TagPairSNAPComputeUi{}; -struct TagPairSNAPComputeUiTot{}; // accumulate ulist into ulisttot separately struct TagPairSNAPComputeUiCPU{}; struct TagPairSNAPComputeZi{}; struct TagPairSNAPComputeBi{}; struct TagPairSNAPZeroYi{}; struct TagPairSNAPComputeYi{}; -struct TagPairSNAPComputeDuidrj{}; +struct TagPairSNAPComputeFusedDeidrj{}; struct TagPairSNAPComputeDuidrjCPU{}; -struct TagPairSNAPComputeDeidrj{}; struct TagPairSNAPComputeDeidrjCPU{}; template @@ -83,9 +81,6 @@ public: KOKKOS_INLINE_FUNCTION void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPComputeUiTot,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const; @@ -102,14 +97,11 @@ public: void operator() (TagPairSNAPComputeYi,const int& ii) const; KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy::member_type& team) const; + void operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const; KOKKOS_INLINE_FUNCTION void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 1156d11c31..d807f149a9 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -30,7 +30,6 @@ #include "kokkos.h" #include "sna.h" - #define MAXLINE 1024 #define MAXWORD 3 @@ -255,26 +254,19 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) // scratch size: 2 * team_size * (twojmax+1)^2, to cover all `m1`,`m2` values // 2 is for double buffer - typename Kokkos::TeamPolicy policy_ui(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); + const int tile_size = (twojmax+1)*(twojmax+1); typedef Kokkos::View< SNAcomplex*, Kokkos::DefaultExecutionSpace::scratch_memory_space, Kokkos::MemoryTraits > ScratchViewType; - int scratch_size = ScratchViewType::shmem_size( 2 * team_size * (twojmax+1)*(twojmax+1)); + int scratch_size = ScratchViewType::shmem_size( 2 * team_size * tile_size ); + + typename Kokkos::TeamPolicy policy_ui(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); policy_ui = policy_ui.set_scratch_size(0, Kokkos::PerTeam( scratch_size )); Kokkos::parallel_for("ComputeUi",policy_ui,*this); - // ComputeUitot - vector_length = 1; - team_size = 128; - team_size_max = Kokkos::TeamPolicy::team_size_max(*this); - if (team_size*vector_length > team_size_max) - team_size = team_size_max/vector_length; - - typename Kokkos::TeamPolicy policy_ui_tot(((idxu_max+team_size-1)/team_size)*chunk_size,team_size,vector_length); - Kokkos::parallel_for("ComputeUiTot",policy_ui_tot,*this); } @@ -316,7 +308,7 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) typename Kokkos::RangePolicy policy_yi(0,chunk_size*idxz_max); Kokkos::parallel_for("ComputeYi",policy_yi,*this); - //ComputeDuidrj + //ComputeDuidrj and Deidrj if (lmp->kokkos->ngpus == 0) { // CPU int vector_length = 1; int team_size = 1; @@ -324,53 +316,37 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) typename Kokkos::TeamPolicy policy_duidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); snaKK.set_dir(-1); // technically doesn't do anything Kokkos::parallel_for("ComputeDuidrjCPU",policy_duidrj_cpu,*this); - } else { // GPU, utilize scratch memory and splitting over dimensions - int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); + typename Kokkos::TeamPolicy policy_deidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); + + Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this); + } else { // GPU, utilize scratch memory and splitting over dimensions, fused dui and dei + + int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); int vector_length = 32; int team_size = 2; // need to cap b/c of shared memory reqs if (team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; - // scratch size: 2 * 2 * team_size * (twojmax+1)^2, to cover all `m1`,`m2` values + // scratch size: 2 * 2 * team_size * (twojmax+1)*(twojmax/2+1), to cover half `m1`,`m2` values due to symmetry // 2 is for double buffer + const int tile_size = (twojmax+1)*(twojmax/2+1); + typedef Kokkos::View< SNAcomplex*, - Kokkos::DefaultExecutionSpace::scratch_memory_space, - Kokkos::MemoryTraits > - ScratchViewType; - - int scratch_size = ScratchViewType::shmem_size( 4 * team_size * (twojmax+1)*(twojmax+1)); - typename Kokkos::TeamPolicy policy_duidrj(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); - policy_duidrj = policy_duidrj.set_scratch_size(0, Kokkos::PerTeam( scratch_size )); - // Need to call three times, once for each direction + Kokkos::DefaultExecutionSpace::scratch_memory_space, + Kokkos::MemoryTraits > + ScratchViewType; + int scratch_size = ScratchViewType::shmem_size( 4 * team_size * tile_size); + + typename Kokkos::TeamPolicy policy_fused_deidrj(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); + policy_fused_deidrj = policy_fused_deidrj.set_scratch_size(0, Kokkos::PerTeam( scratch_size )); + for (int k = 0; k < 3; k++) { snaKK.set_dir(k); - Kokkos::parallel_for("ComputeDuidrj",policy_duidrj,*this); + Kokkos::parallel_for("ComputeFusedDeidrj",policy_fused_deidrj,*this); } } - //ComputeDeidrj - if (lmp->kokkos->ngpus == 0) { // CPU - int vector_length = 1; - int team_size = 1; - - typename Kokkos::TeamPolicy policy_deidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); - - Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this); - - } else { // GPU, different loop strategy internally - - int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); - int vector_length = 32; // coalescing disaster right now, will fix later - int team_size = 8; - if (team_size*vector_length > team_size_max) - team_size = team_size_max/vector_length; - - typename Kokkos::TeamPolicy policy_deidrj(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length); - - Kokkos::parallel_for("ComputeDeidrj",policy_deidrj,*this); - } - //ComputeForce if (eflag) { if (neighflag == HALF) { @@ -642,25 +618,6 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename my_sna.compute_ui(team,ii,jj); } -template -KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiTot,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; - - // Extract the quantum number - const int idx = team.team_rank() + team.team_size() * (team.league_rank() % ((my_sna.idxu_max+team.team_size()-1)/team.team_size())); - if (idx >= my_sna.idxu_max) return; - - // Extract the atomic index - const int ii = team.league_rank() / ((my_sna.idxu_max+team.team_size()-1)/team.team_size()); - if (ii >= chunk_size) return; - - // Extract the number of neighbors neighbor number - const int ninside = d_ninside(ii); - - my_sna.compute_uitot(team,idx,ii,ninside); -} - template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy::member_type& team) const { @@ -718,7 +675,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeBi,const typename template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const { SNAKokkos my_sna = snaKK; // Extract the atom number @@ -730,7 +687,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrj,const type const int ninside = d_ninside(ii); if (jj >= ninside) return; - my_sna.compute_duidrj(team,ii,jj); + my_sna.compute_fused_deidrj(team,ii,jj); } template @@ -750,24 +707,6 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU,const t my_sna.compute_duidrj_cpu(team,ii,jj); } - -template -KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const { - SNAKokkos my_sna = snaKK; - - // Extract the atom number - int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size())); - if (ii >= chunk_size) return; - - // Extract the neighbor number - const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); - const int ninside = d_ninside(ii); - if (jj >= ninside) return; - - my_sna.compute_deidrj(team,ii,jj); -} - template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy::member_type& team) const { diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 48d9114fbf..a6d9db3218 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -135,14 +135,10 @@ inline KOKKOS_INLINE_FUNCTION void pre_ui(const typename Kokkos::TeamPolicy::member_type& team,const int&); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP + void compute_ui(const typename Kokkos::TeamPolicy::member_type& team, const int, const int); // ForceSNAP KOKKOS_INLINE_FUNCTION void compute_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_ui_orig(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP - KOKKOS_INLINE_FUNCTION - void compute_uitot(const typename Kokkos::TeamPolicy::member_type& team, int, int, int); // ForceSNAP - KOKKOS_INLINE_FUNCTION void compute_zi(const int&); // ForceSNAP KOKKOS_INLINE_FUNCTION void zero_yi(const int&,const int&); // ForceSNAP @@ -155,12 +151,10 @@ inline // functions for derivatives KOKKOS_INLINE_FUNCTION - void compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, int, int); //ForceSNAP + void compute_fused_deidrj(const typename Kokkos::TeamPolicy::member_type& team, const int, const int); //ForceSNAP KOKKOS_INLINE_FUNCTION void compute_duidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int); //ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP - KOKKOS_INLINE_FUNCTION void compute_deidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION double compute_sfac(double, double); // add_uarraytot, compute_duarray @@ -251,10 +245,6 @@ inline KOKKOS_INLINE_FUNCTION void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int, int, double, double, double); // compute_ui - KOKKOS_INLINE_FUNCTION - void compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, int, int, - double, double, double, - double, double); // compute_ui KOKKOS_INLINE_FUNCTION void compute_uarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int, double, double, double, @@ -267,12 +257,8 @@ inline inline int compute_ncoeff(); // SNAKokkos() KOKKOS_INLINE_FUNCTION - void compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, int, int, - double, double, double, // compute_duidrj - double, double, double, double, double); - KOKKOS_INLINE_FUNCTION void compute_duarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int, int, - double, double, double, // compute_duidrj + double, double, double, // compute_duidrj_cpu double, double, double, double, double); // Sets the style for the switching function diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index ef3312bd16..1daf8fd05c 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace LAMMPS_NS { @@ -231,11 +232,22 @@ void SNAKokkos::grow_rij(int newnatom, int newnmax) zlist = t_sna_2c_ll("sna:zlist",idxz_max,natom); //ulist = t_sna_3c("sna:ulist",natom,nmax,idxu_max); - ulist = t_sna_3c_ll("sna:ulist",idxu_max,natom,nmax); +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + // dummy allocation + ulist = t_sna_3c_ll("sna:ulist",1,1,1); + dulist = t_sna_4c_ll("sna:dulist",1,1,1); + } else { +#endif + ulist = t_sna_3c_ll("sna:ulist",idxu_max,natom,nmax); + dulist = t_sna_4c_ll("sna:dulist",idxu_max,natom,nmax); +#ifdef KOKKOS_ENABLE_CUDA + } +#endif + //ylist = t_sna_2c_lr("sna:ylist",natom,idxu_max); ylist = t_sna_2c_ll("sna:ylist",idxu_max,natom); - //dulist = t_sna_4c("sna:dulist",natom,nmax,idxu_max); dulist = t_sna_4c_ll("sna:dulist",idxu_max,natom,nmax); } @@ -269,14 +281,14 @@ void SNAKokkos::pre_ui(const typename Kokkos::TeamPolicy } /* ---------------------------------------------------------------------- - compute Ui by summing over bispectrum components + compute Ui by computing Wigner U-functions for one neighbor and + accumulating to the total. GPU only. ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, const int iatom, const int jnbor) { - double rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma @@ -284,22 +296,143 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy 0)?rootpq2*buf1[jjup_shared_idx-1]:SNAcomplex(0.,0.); + //const SNAcomplex u_up2 = (ma > 0)?rootpq2*ulist(jjup_index-1,iatom,jnbor):SNAcomplex(0.,0.); + caconjxpy(b, u_up2, u_accum); + + // VMK recursion relation: grab contribution which is multiplied by a* + const double rootpq1 = rootpqarray(j - ma, j - mb); + const SNAcomplex u_up1 = (ma < j)?rootpq1*buf1[jjup_shared_idx]:SNAcomplex(0.,0.); + //const SNAcomplex u_up1 = (ma < j)?rootpq1*ulist(jjup_index,iatom,jnbor):SNAcomplex(0.,0.); + caconjxpy(a, u_up1, u_accum); + + //ulist(jju_index,iatom,jnbor) = u_accum; + // back up into shared memory for next iter + buf2[jju_shared_idx] = u_accum; + + Kokkos::atomic_add(&(ulisttot(jju_index,iatom).re), sfac * u_accum.re); + Kokkos::atomic_add(&(ulisttot(jju_index,iatom).im), sfac * u_accum.im); + + // copy left side to right side with inversion symmetry VMK 4.4(2) + // u[ma-j,mb-j] = (-1)^(ma-mb)*Conj([u[ma,mb)) + // if j is even (-> physical j integer), last element maps to self, skip + //if (!(m == total_iters - 1 && j % 2 == 0)) { + if (m < total_iters - 1 || j % 2 == 1) { + const int sign_factor = (((ma+mb)%2==0)?1:-1); + const int jju_shared_flip = (j+1-mb)*(j+1)-(ma+1); + const int jjup_flip = jju + jju_shared_flip; // jju+(j+1-mb)*(j+1)-(ma+1); + + + if (sign_factor == 1) { + u_accum.im = -u_accum.im; + } else { + u_accum.re = -u_accum.re; + } + //ulist(jjup_flip,iatom,jnbor) = u_accum; + buf2[jju_shared_flip] = u_accum; + + Kokkos::atomic_add(&(ulisttot(jjup_flip,iatom).re), sfac * u_accum.re); + Kokkos::atomic_add(&(ulisttot(jjup_flip,iatom).im), sfac * u_accum.im); + } + }); + // In CUDA backend, + // ThreadVectorRange has a __syncwarp (appropriately masked for + // vector lengths < 32) implict at the end + + // swap double buffers + auto tmp = buf1; buf1 = buf2; buf2 = tmp; + + + } } +/* ---------------------------------------------------------------------- + compute Ui by summing over bispectrum components. CPU only. +------------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_ui_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) @@ -327,37 +460,6 @@ void SNAKokkos::compute_ui_cpu(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_uitot(const typename Kokkos::TeamPolicy::member_type& team, int idx, int iatom, int ninside) -{ - // fuse initialize in, avoid this load? - SNAcomplex utot = ulisttot(idx, iatom); - for (int jnbor = 0; jnbor < ninside; jnbor++) { - - const auto x = rij(iatom,jnbor,0); - const auto y = rij(iatom,jnbor,1); - const auto z = rij(iatom,jnbor,2); - const auto rsq = x * x + y * y + z * z; - const auto r = sqrt(rsq); - - const double wj_local = wj(iatom, jnbor); - const double rcut = rcutij(iatom, jnbor); - const double sfac = compute_sfac(r, rcut) * wj_local; - - auto ulist_local = ulist(idx, iatom, jnbor); - utot.re += sfac * ulist_local.re; - utot.im += sfac * ulist_local.im; - } - - ulisttot(idx, iatom) = utot; - -} - /* ---------------------------------------------------------------------- compute Zi by summing over products of Ui not updated yet @@ -509,72 +611,203 @@ void SNAKokkos::compute_yi(int iter, } /* ---------------------------------------------------------------------- - compute dEidRj + Fused calculation of the derivative of Ui w.r.t. atom j + and of dEidRj. GPU only. ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +void SNAKokkos::compute_fused_deidrj(const typename Kokkos::TeamPolicy::member_type& team, const int iatom, const int jnbor) { - t_scalar3 final_sum; + // get shared memory offset + const int max_m_tile = (twojmax+1)*(twojmax/2+1); + const int team_rank = team.team_rank(); + const int scratch_shift = team_rank * max_m_tile; - // Like in ComputeUi/ComputeDuidrj, regular loop over j. - for (int j = 0; j <= twojmax; j++) { - int jju = idxu_block(j); + // double buffer for ulist + SNAcomplex* ulist_buf1 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0) + scratch_shift; + SNAcomplex* ulist_buf2 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0) + scratch_shift; + + // double buffer for dulist + SNAcomplex* dulist_buf1 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0) + scratch_shift; + SNAcomplex* dulist_buf2 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0) + scratch_shift; + + const double x = rij(iatom,jnbor,0); + const double y = rij(iatom,jnbor,1); + const double z = rij(iatom,jnbor,2); + const double rsq = x * x + y * y + z * z; + const double r = sqrt(rsq); + const double rcut = rcutij(iatom, jnbor); + const double rscale0 = rfac0 * MY_PI / (rcut - rmin0); + const double theta0 = (r - rmin0) * rscale0; + const double cs = cos(theta0); + const double sn = sin(theta0); + const double z0 = r * cs / sn; + const double dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; + + const double wj_local = wj(iatom, jnbor); + const double sfac = wj_local * compute_sfac(r, rcut); + const double dsfac = wj_local * compute_dsfac(r, rcut); + + const double rinv = 1.0 / r; + + // extract a single unit vector + const double u = (dir == 0 ? x * rinv : dir == 1 ? y * rinv : z * rinv); + + // Compute Cayley-Klein parameters for unit quaternion + const double r0inv = 1.0 / sqrt(r * r + z0 * z0); + + const SNAcomplex a = { r0inv * z0, -r0inv * z }; + const SNAcomplex b = { r0inv * y, -r0inv * x }; + + const double dr0invdr = -r0inv * r0inv * r0inv * (r + z0 * dz0dr); + const double dr0inv = dr0invdr * u; + const double dz0 = dz0dr * u; + + const SNAcomplex da = { dz0 * r0inv + z0 * dr0inv, + - z * dr0inv + (dir == 2 ? - r0inv : 0.) }; + + const SNAcomplex db = { y * dr0inv + (dir==1?r0inv:0.), + -x * dr0inv + (dir==0?-r0inv:0.) }; + + // Accumulate the full contribution to dedr on the fly + const double du_prod = dsfac * u; // chain rule + const SNAcomplex y_local = ylist(0, iatom); - // Flatten loop over ma, mb, reduce w/in + // Symmetry factor of 0.5 b/c 0 element is on diagonal for even j==0 + double dedr_full_sum = 0.5 * du_prod * y_local.re; + // single has a warp barrier at the end + Kokkos::single(Kokkos::PerThread(team), [=]() { + //dulist(0,iatom,jnbor,dir) = { dsfac * u, 0. }; // fold in chain rule here + ulist_buf1[0] = {1., 0.}; + dulist_buf1[0] = {0., 0.}; + }); + + for (int j = 1; j <= twojmax; j++) { + int jju = idxu_block[j]; + int jjup = idxu_block[j-1]; + + // flatten the loop over ma,mb + + // for (int ma = 0; ma <= j; ma++) const int n_ma = j+1; // for (int mb = 0; 2*mb <= j; mb++) const int n_mb = j/2+1; const int total_iters = n_ma * n_mb; - t_scalar3 sum; + double dedr_sum = 0.; // j-local sum //for (int m = 0; m < total_iters; m++) { Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, total_iters), - [&] (const int m, t_scalar3& sum_tmp) { + [&] (const int m, double& sum_tmp) { // ma fast, mb slow int ma = m % n_ma; int mb = m / n_ma; - // get index - const int jju_index = jju+mb+mb*j+ma; - - // get ylist, rescale last element by 0.5 - SNAcomplex y_local = ylist(jju_index,iatom); - - const SNAcomplex du_x = dulist(jju_index,iatom,jnbor,0); - const SNAcomplex du_y = dulist(jju_index,iatom,jnbor,1); - const SNAcomplex du_z = dulist(jju_index,iatom,jnbor,2); + const int jju_index = jju+m; + // Load y_local, apply the symmetry scaling factor + // The "secret" of the shared memory optimization is it eliminates + // all global memory reads to duidrj in lieu of caching values in + // shared memory and otherwise always writing, making the kernel + // ultimately compute bound. We take advantage of that by adding + // some reads back in. + auto y_local = ylist(jju_index,iatom); if (j % 2 == 0 && 2*mb == j) { if (ma == mb) { y_local = 0.5*y_local; } - else if (ma > mb) { y_local = { 0., 0. }; } + else if (ma > mb) { y_local = { 0., 0. }; } // can probably avoid this outright // else the ma < mb gets "double counted", cancelling the 0.5. } - sum_tmp.x += du_x.re * y_local.re + du_x.im * y_local.im; - sum_tmp.y += du_y.re * y_local.re + du_y.im * y_local.im; - sum_tmp.z += du_z.re * y_local.re + du_z.im * y_local.im; + // index into shared memory + const int jju_shared_idx = m; + const int jjup_shared_idx = jju_shared_idx - mb; - }, sum); // end loop over flattened ma,mb + // Need to compute and accumulate both u and du (mayhaps, we could probably + // balance some read and compute by reading u each time). + SNAcomplex u_accum = { 0., 0. }; + SNAcomplex du_accum = { 0., 0. }; - final_sum.x += sum.x; - final_sum.y += sum.y; - final_sum.z += sum.z; + const double rootpq2 = -rootpqarray(ma, j - mb); + const SNAcomplex u_up2 = (ma > 0)?rootpq2*ulist_buf1[jjup_shared_idx-1]:SNAcomplex(0.,0.); + caconjxpy(b, u_up2, u_accum); + + const double rootpq1 = rootpqarray(j - ma, j - mb); + const SNAcomplex u_up1 = (ma < j)?rootpq1*ulist_buf1[jjup_shared_idx]:SNAcomplex(0.,0.); + caconjxpy(a, u_up1, u_accum); + + // Next, spin up du_accum + const SNAcomplex du_up1 = (ma < j) ? rootpq1*dulist_buf1[jjup_shared_idx] : SNAcomplex(0.,0.); + caconjxpy(da, u_up1, du_accum); + caconjxpy(a, du_up1, du_accum); + + const SNAcomplex du_up2 = (ma > 0) ? rootpq2*dulist_buf1[jjup_shared_idx-1] : SNAcomplex(0.,0.); + caconjxpy(db, u_up2, du_accum); + caconjxpy(b, du_up2, du_accum); + + // No need to save u_accum to global memory + // Cache u_accum, du_accum to scratch memory. + ulist_buf2[jju_shared_idx] = u_accum; + dulist_buf2[jju_shared_idx] = du_accum; + + // Directly accumulate deidrj into sum_tmp + //dulist(jju_index,iatom,jnbor,dir) = ((dsfac * u)*u_accum) + (sfac*du_accum); + const SNAcomplex du_prod = ((dsfac * u)*u_accum) + (sfac*du_accum); + sum_tmp += du_prod.re * y_local.re + du_prod.im * y_local.im; + + // copy left side to right side with inversion symmetry VMK 4.4(2) + // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) + if (j%2==1 && mb+1==n_mb) { + int sign_factor = (((ma+mb)%2==0)?1:-1); + //const int jjup_flip = jju+(j+1-mb)*(j+1)-(ma+1); // no longer needed b/c we don't update dulist + const int jju_shared_flip = (j+1-mb)*(j+1)-(ma+1); + + if (sign_factor == 1) { + u_accum.im = -u_accum.im; + du_accum.im = -du_accum.im; + } else { + u_accum.re = -u_accum.re; + du_accum.re = -du_accum.re; + } + + // We don't need the second half of the tile for the deidrj accumulation. + // That's taken care of by the symmetry factor above. + //dulist(jjup_flip,iatom,jnbor,dir) = ((dsfac * u)*u_accum) + (sfac*du_accum); + + // We do need it for ortho polynomial generation, though + ulist_buf2[jju_shared_flip] = u_accum; + dulist_buf2[jju_shared_flip] = du_accum; + } + + }, dedr_sum); + + // swap buffers + auto tmp = ulist_buf1; ulist_buf1 = ulist_buf2; ulist_buf2 = tmp; + tmp = dulist_buf1; dulist_buf1 = dulist_buf2; dulist_buf2 = tmp; + + // Accumulate dedr. This "should" be in a single, but + // a Kokkos::single call implies a warp sync, and we may + // as well avoid that. This does no harm as long as the + // final assignment is in a single block. + //Kokkos::single(Kokkos::PerThread(team), [=]() { + dedr_full_sum += dedr_sum; + //}); } + // Store the accumulated dedr. Kokkos::single(Kokkos::PerThread(team), [&] () { - dedr(iatom,jnbor,0) = final_sum.x*2.0; - dedr(iatom,jnbor,1) = final_sum.y*2.0; - dedr(iatom,jnbor,2) = final_sum.z*2.0; + dedr(iatom,jnbor,dir) = dedr_full_sum*2.0; }); - } +/* ---------------------------------------------------------------------- + compute dEidRj, CPU path only. +------------------------------------------------------------------------- */ + + template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_deidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) @@ -624,6 +857,7 @@ void SNAKokkos::compute_deidrj_cpu(const typename Kokkos::TeamPolicy /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi + not updated yet ------------------------------------------------------------------------- */ template @@ -708,28 +942,6 @@ void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) -{ - double rsq, r, x, y, z, z0, theta0, cs, sn; - double dz0dr; - - x = rij(iatom,jnbor,0); - y = rij(iatom,jnbor,1); - z = rij(iatom,jnbor,2); - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - double rscale0 = rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0); - theta0 = (r - rmin0) * rscale0; - cs = cos(theta0); - sn = sin(theta0); - z0 = r * cs / sn; - dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - - compute_duarray(team, iatom, jnbor, x, y, z, z0, r, dz0dr, wj(iatom,jnbor), rcutij(iatom,jnbor)); -} - template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_duidrj_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) @@ -774,119 +986,6 @@ void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, - double x, double y, double z, - double z0, double r) -{ - // define size of scratch memory buffer - const int max_m_tile = (twojmax+1)*(twojmax+1); - const int team_rank = team.team_rank(); - - // get scratch memory double buffer - SNAcomplex* buf1 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - SNAcomplex* buf2 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - - // compute Cayley-Klein parameters for unit quaternion, - // pack into complex number - double r0inv = 1.0 / sqrt(r * r + z0 * z0); - SNAcomplex a = { r0inv * z0, -r0inv * z }; - SNAcomplex b = { r0inv * y, -r0inv * x }; - - // VMK Section 4.8.2 - - // All writes go to global memory and shared memory - // so we can avoid all global memory reads - Kokkos::single(Kokkos::PerThread(team), [=]() { - ulist(0,iatom,jnbor) = { 1.0, 0.0 }; - buf1[max_m_tile*team_rank] = {1.,0.}; - }); - - for (int j = 1; j <= twojmax; j++) { - const int jju = idxu_block[j]; - int jjup = idxu_block[j-1]; - - // fill in left side of matrix layer from previous layer - - // Flatten loop over ma, mb, need to figure out total - // number of iterations - - // for (int ma = 0; ma <= j; ma++) - const int n_ma = j+1; - // for (int mb = 0; 2*mb <= j; mb++) - const int n_mb = j/2+1; - - const int total_iters = n_ma * n_mb; - - //for (int m = 0; m < total_iters; m++) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, total_iters), - [&] (const int m) { - - // ma fast, mb slow - int ma = m % n_ma; - int mb = m / n_ma; - - // index into global memory array - const int jju_index = jju+mb+mb*j+ma; - - // index into shared memory buffer for previous level - const int jju_shared_idx = max_m_tile*team_rank+mb+mb*j+ma; - - // index into shared memory buffer for next level - const int jjup_shared_idx = max_m_tile*team_rank+mb*j+ma; - - SNAcomplex u_accum = {0., 0.}; - - // VMK recursion relation: grab contribution which is multiplied by a* - const double rootpq1 = rootpqarray(j - ma, j - mb); - const SNAcomplex u_up1 = (ma < j)?rootpq1*buf1[jjup_shared_idx]:SNAcomplex(0.,0.); - caconjxpy(a, u_up1, u_accum); - - // VMK recursion relation: grab contribution which is multiplied by b* - const double rootpq2 = -rootpqarray(ma, j - mb); - const SNAcomplex u_up2 = (ma > 0)?rootpq2*buf1[jjup_shared_idx-1]:SNAcomplex(0.,0.); - caconjxpy(b, u_up2, u_accum); - - ulist(jju_index,iatom,jnbor) = u_accum; - - // We no longer accumulate into ulisttot in this kernel. - // Instead, we have a separate kernel which avoids atomics. - // Running two separate kernels is net faster. - - // back up into shared memory for next iter - if (j != twojmax) buf2[jju_shared_idx] = u_accum; - - // copy left side to right side with inversion symmetry VMK 4.4(2) - // u[ma-j,mb-j] = (-1)^(ma-mb)*Conj([u[ma,mb)) - // We can avoid this if we're on the last row for an integer j - if (!(n_ma % 2 == 1 && (mb+1) == n_mb)) { - - int sign_factor = ((ma%2==0)?1:-1)*(mb%2==0?1:-1); - const int jjup_flip = jju+(j+1-mb)*(j+1)-(ma+1); - const int jju_shared_flip = max_m_tile*team_rank+(j+1-mb)*(j+1)-(ma+1); - - if (sign_factor == 1) { - u_accum.im = -u_accum.im; - } else { - u_accum.re = -u_accum.re; - } - ulist(jjup_flip,iatom,jnbor) = u_accum; - if (j != twojmax) buf2[jju_shared_flip] = u_accum; - } - }); - // In CUDA backend, - // ThreadVectorRange has a __syncwarp (appropriately masked for - // vector lengths < 32) implicit at the end - - // swap double buffers - auto tmp = buf1; buf1 = buf2; buf2 = tmp; - //std::swap(buf1, buf2); // throws warnings - - } -} - -// CPU version template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_uarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, @@ -976,152 +1075,9 @@ void SNAKokkos::compute_uarray_cpu(const typename Kokkos::TeamPolicy /* ---------------------------------------------------------------------- compute derivatives of Wigner U-functions for one neighbor - see comments in compute_uarray() + see comments in compute_uarray_cpu() ------------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, - double x, double y, double z, - double z0, double r, double dz0dr, - double wj, double rcut) -{ - - // get shared memory offset - const int max_m_tile = (twojmax+1)*(twojmax+1); - const int team_rank = team.team_rank(); - - // double buffer for ulist - SNAcomplex* ulist_buf1 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - SNAcomplex* ulist_buf2 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - - // double buffer for dulist - SNAcomplex* dulist_buf1 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - SNAcomplex* dulist_buf2 = (SNAcomplex*)team.team_shmem( ).get_shmem(team.team_size()*max_m_tile*sizeof(SNAcomplex), 0); - - const double sfac = wj * compute_sfac(r, rcut); - const double dsfac = wj * compute_dsfac(r, rcut); - - const double rinv = 1.0 / r; - - // extract a single unit vector - const double u = (dir == 0 ? x * rinv : dir == 1 ? y * rinv : z * rinv); - - // Compute Cayley-Klein parameters for unit quaternion - - const double r0inv = 1.0 / sqrt(r * r + z0 * z0); - - const SNAcomplex a = { r0inv * z0, -r0inv * z }; - const SNAcomplex b = { r0inv * y, -r0inv * x }; - - const double dr0invdr = -r0inv * r0inv * r0inv * (r + z0 * dz0dr); - const double dr0inv = dr0invdr * u; - const double dz0 = dz0dr * u; - - const SNAcomplex da = { dz0 * r0inv + z0 * dr0inv, - - z * dr0inv + (dir == 2 ? - r0inv : 0.) }; - - const SNAcomplex db = { y * dr0inv + (dir==1?r0inv:0.), - -x * dr0inv + (dir==0?-r0inv:0.) }; - - // single has a warp barrier at the end - Kokkos::single(Kokkos::PerThread(team), [=]() { - dulist(0,iatom,jnbor,dir) = { dsfac * u, 0. }; // fold in chain rule here - ulist_buf1[max_m_tile*team_rank] = {1., 0.}; - dulist_buf1[max_m_tile*team_rank] = {0., 0.}; - }); - - - for (int j = 1; j <= twojmax; j++) { - int jju = idxu_block[j]; - int jjup = idxu_block[j-1]; - - // flatten the loop over ma,mb - - // for (int ma = 0; ma <= j; ma++) - const int n_ma = j+1; - // for (int mb = 0; 2*mb <= j; mb++) - const int n_mb = j/2+1; - - const int total_iters = n_ma * n_mb; - - //for (int m = 0; m < total_iters; m++) { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, total_iters), - [&] (const int m) { - - // ma fast, mb slow - int ma = m % n_ma; - int mb = m / n_ma; - - const int jju_index = jju+mb+mb*j+ma; - - // index into shared memory - const int jju_shared_idx = max_m_tile*team_rank+mb+mb*j+ma; - const int jjup_shared_idx = max_m_tile*team_rank+mb*j+ma; - - // Need to compute and accumulate both u and du (mayhaps, we could probably - // balance some read and compute by reading u each time). - SNAcomplex u_accum = { 0., 0. }; - SNAcomplex du_accum = { 0., 0. }; - - const double rootpq1 = rootpqarray(j - ma, j - mb); - const SNAcomplex u_up1 = (ma < j)?rootpq1*ulist_buf1[jjup_shared_idx]:SNAcomplex(0.,0.); - caconjxpy(a, u_up1, u_accum); - - const double rootpq2 = -rootpqarray(ma, j - mb); - const SNAcomplex u_up2 = (ma > 0)?rootpq2*ulist_buf1[jjup_shared_idx-1]:SNAcomplex(0.,0.); - caconjxpy(b, u_up2, u_accum); - - // No need to save u_accum to global memory - if (j != twojmax) ulist_buf2[jju_shared_idx] = u_accum; - - // Next, spin up du_accum - const SNAcomplex du_up1 = (ma < j) ? rootpq1*dulist_buf1[jjup_shared_idx] : SNAcomplex(0.,0.); - caconjxpy(da, u_up1, du_accum); - caconjxpy(a, du_up1, du_accum); - - const SNAcomplex du_up2 = (ma > 0) ? rootpq2*dulist_buf1[jjup_shared_idx-1] : SNAcomplex(0.,0.); - caconjxpy(db, u_up2, du_accum); - caconjxpy(b, du_up2, du_accum); - - dulist(jju_index,iatom,jnbor,dir) = ((dsfac * u)*u_accum) + (sfac*du_accum); - - if (j != twojmax) dulist_buf2[jju_shared_idx] = du_accum; - - // copy left side to right side with inversion symmetry VMK 4.4(2) - // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) - - int sign_factor = ((ma%2==0)?1:-1)*(mb%2==0?1:-1); - const int jjup_flip = jju+(j+1-mb)*(j+1)-(ma+1); - const int jju_shared_flip = max_m_tile*team_rank+(j+1-mb)*(j+1)-(ma+1); - - if (sign_factor == 1) { - //ulist_alt(iatom,jnbor,jjup_flip).re = u_accum.re; - //ulist_alt(iatom,jnbor,jjup_flip).im = -u_accum.im; - u_accum.im = -u_accum.im; - du_accum.im = -du_accum.im; - } else { - //ulist_alt(iatom,jnbor,jjup_flip).re = -u_accum.re; - //ulist_alt(iatom,jnbor,jjup_flip).im = u_accum.im; - u_accum.re = -u_accum.re; - du_accum.re = -du_accum.re; - } - - dulist(jjup_flip,iatom,jnbor,dir) = ((dsfac * u)*u_accum) + (sfac*du_accum); - - if (j != twojmax) { - ulist_buf2[jju_shared_flip] = u_accum; - dulist_buf2[jju_shared_flip] = du_accum; - } - - }); - - // swap buffers - auto tmp = ulist_buf1; ulist_buf1 = ulist_buf2; ulist_buf2 = tmp; - tmp = dulist_buf1; dulist_buf1 = dulist_buf2; dulist_buf2 = tmp; - } -} - template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_duarray_cpu(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, @@ -1680,11 +1636,17 @@ double SNAKokkos::memory_usage() bytes += jdimpq*jdimpq * sizeof(double); // pqarray bytes += idxcg_max * sizeof(double); // cglist - bytes += natom * idxu_max * sizeof(double) * 2; // ulist +#ifdef KOKKOS_ENABLE_CUDA + if (!std::is_same::value) { +#endif + bytes += natom * idxu_max * sizeof(double) * 2; // ulist + bytes += natom * idxu_max * 3 * sizeof(double) * 2; // dulist +#ifdef KOKKOS_ENABLE_CUDA + } +#endif bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot if (!Kokkos::Impl::is_same::value) bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot_lr - bytes += natom * idxu_max * 3 * sizeof(double) * 2; // dulist bytes += natom * idxz_max * sizeof(double) * 2; // zlist bytes += natom * idxb_max * sizeof(double); // blist -- GitLab From 36095bbfdf39c85b6932bd0ec03ac062209e497e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 23 Mar 2020 21:15:00 -0600 Subject: [PATCH 045/328] Tweak comment --- src/KOKKOS/sna_kokkos_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 1daf8fd05c..182a49bfb1 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -333,7 +333,7 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy Date: Mon, 23 Mar 2020 23:31:12 -0400 Subject: [PATCH 046/328] Cleaned up comment. --- src/KOKKOS/sna_kokkos_impl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 182a49bfb1..e7b7087951 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -462,7 +462,6 @@ void SNAKokkos::compute_ui_cpu(const typename Kokkos::TeamPolicy -- GitLab From 5fa99cb07281675ebde5ada638d6597ba23a95f5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 23 Mar 2020 21:33:11 -0600 Subject: [PATCH 047/328] Comment cleanup --- src/KOKKOS/sna_kokkos_impl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index e7b7087951..e6c34a245b 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -856,7 +856,6 @@ void SNAKokkos::compute_deidrj_cpu(const typename Kokkos::TeamPolicy /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi - not updated yet ------------------------------------------------------------------------- */ template -- GitLab From 0060473cee2824527c44a1839ae7a870f42f9b08 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 24 Mar 2020 11:35:21 -0400 Subject: [PATCH 048/328] fix up some escaped '*' characters in "code-block" sections that do not need to be escaped --- doc/src/bond_oxdna.rst | 2 +- doc/src/pair_bop.rst | 4 ++-- doc/src/pair_class2.rst | 8 ++++---- doc/src/pair_coeff.rst | 12 ++++++------ doc/src/pair_cosine_squared.rst | 2 +- doc/src/pair_meam_sw_spline.rst | 4 +++- doc/src/pair_nb3b_harmonic.rst | 4 +++- doc/src/pair_polymorphic.rst | 4 ++-- doc/src/pair_python.rst | 12 ++++++------ doc/src/pair_spin_magelec.rst | 2 +- 10 files changed, 29 insertions(+), 25 deletions(-) diff --git a/doc/src/bond_oxdna.rst b/doc/src/bond_oxdna.rst index 8e69b298bf..71e5105436 100644 --- a/doc/src/bond_oxdna.rst +++ b/doc/src/bond_oxdna.rst @@ -32,7 +32,7 @@ Examples bond_coeff * 2.0 0.25 0.7564 bond_style oxrna2/fene - bond_coeff \* 2.0 0.25 0.76107 + bond_coeff * 2.0 0.25 0.76107 Description """"""""""" diff --git a/doc/src/pair_bop.rst b/doc/src/pair_bop.rst index ce7b69f41f..5cd045931a 100644 --- a/doc/src/pair_bop.rst +++ b/doc/src/pair_bop.rst @@ -132,9 +132,9 @@ and Te. If your LAMMPS simulation has 4 atoms types and you want the 1st 3 to be Cd, and the 4th to be Te, you would use the following pair_coeff command: -.. parsed-literal:: +.. code-block:: LAMMPS - pair_coeff \* \* CdTe Cd Cd Cd Te + pair_coeff * * CdTe Cd Cd Cd Te The 1st 2 arguments must be \* \* so as to span all LAMMPS atom types. The first three Cd arguments map LAMMPS atom types 1,2,3 to the Cd diff --git a/doc/src/pair_class2.rst b/doc/src/pair_class2.rst index d1c673ab97..8131799181 100644 --- a/doc/src/pair_class2.rst +++ b/doc/src/pair_class2.rst @@ -60,18 +60,18 @@ Examples .. code-block:: LAMMPS pair_style lj/class2 10.0 - pair_coeff \* \* 100.0 2.5 - pair_coeff 1 2\* 100.0 2.5 9.0 + pair_coeff * * 100.0 2.5 + pair_coeff 1 2* 100.0 2.5 9.0 pair_style lj/class2/coul/cut 10.0 pair_style lj/class2/coul/cut 10.0 8.0 - pair_coeff \* \* 100.0 3.0 + pair_coeff * * 100.0 3.0 pair_coeff 1 1 100.0 3.5 9.0 pair_coeff 1 1 100.0 3.5 9.0 9.0 pair_style lj/class2/coul/long 10.0 pair_style lj/class2/coul/long 10.0 8.0 - pair_coeff \* \* 100.0 3.0 + pair_coeff * * 100.0 3.0 pair_coeff 1 1 100.0 3.5 9.0 Description diff --git a/doc/src/pair_coeff.rst b/doc/src/pair_coeff.rst index 26910c1746..1886ce1118 100644 --- a/doc/src/pair_coeff.rst +++ b/doc/src/pair_coeff.rst @@ -19,11 +19,11 @@ Examples .. code-block:: LAMMPS pair_coeff 1 2 1.0 1.0 2.5 - pair_coeff 2 \* 1.0 1.0 - pair_coeff 3\* 1\*2 1.0 1.0 2.5 - pair_coeff \* \* 1.0 1.0 - pair_coeff \* \* nialhjea 1 1 2 - pair_coeff \* 3 morse.table ENTRY1 + pair_coeff 2 * 1.0 1.0 + pair_coeff 3* 1*2 1.0 1.0 2.5 + pair_coeff * * 1.0 1.0 + pair_coeff * * nialhjea 1 1 2 + pair_coeff * 3 morse.table ENTRY1 pair_coeff 1 2 lj/cut 1.0 1.0 2.5 (for pair_style hybrid) Description @@ -55,7 +55,7 @@ pairs, then overwrite the coeffs for just the I,J = 2,3 pair: .. code-block:: LAMMPS - pair_coeff \* \* 1.0 1.0 2.5 + pair_coeff * * 1.0 1.0 2.5 pair_coeff 2 3 2.0 1.0 1.12 A line in a data file that specifies pair coefficients uses the exact diff --git a/doc/src/pair_cosine_squared.rst b/doc/src/pair_cosine_squared.rst index 4f19ddd1e4..b7fa29bbd5 100644 --- a/doc/src/pair_cosine_squared.rst +++ b/doc/src/pair_cosine_squared.rst @@ -31,7 +31,7 @@ Examples .. code-block:: LAMMPS pair_style cosine/squared 3.0 - pair_coeff \* \* 1.0 1.3 + pair_coeff * * 1.0 1.3 pair_coeff 1 3 1.0 1.3 2.0 pair_coeff 1 3 1.0 1.3 wca pair_coeff 1 3 1.0 1.3 2.0 wca diff --git a/doc/src/pair_meam_sw_spline.rst b/doc/src/pair_meam_sw_spline.rst index ebb51795c2..0cf4c57975 100644 --- a/doc/src/pair_meam_sw_spline.rst +++ b/doc/src/pair_meam_sw_spline.rst @@ -75,7 +75,9 @@ If your LAMMPS simulation has 3 atoms types and they are all to be treated with this potential, you would use the following pair_coeff command: -pair_coeff \* \* Ti.meam.sw.spline Ti Ti Ti +.. code-block:: LAMMPS + + pair_coeff * * Ti.meam.sw.spline Ti Ti Ti The 1st 2 arguments must be \* \* so as to span all LAMMPS atom types. The three Ti arguments map LAMMPS atom types 1,2,3 to the Ti element diff --git a/doc/src/pair_nb3b_harmonic.rst b/doc/src/pair_nb3b_harmonic.rst index c4f3f3c3e8..9d63df65d5 100644 --- a/doc/src/pair_nb3b_harmonic.rst +++ b/doc/src/pair_nb3b_harmonic.rst @@ -64,7 +64,9 @@ NULL values are placeholders for atom types that will be used with other potentials. An example of a pair_coeff command for use with the *hybrid* pair style is: -pair_coeff \* \* nb3b/harmonic MgOH.nb3b.harmonic Mg O H +.. code-block:: LAMMPS + + pair_coeff * * nb3b/harmonic MgOH.nb3b.harmonic Mg O H Three-body non-bonded harmonic files in the *potentials* directory of the LAMMPS distribution have a ".nb3b.harmonic" suffix. Lines that diff --git a/doc/src/pair_polymorphic.rst b/doc/src/pair_polymorphic.rst index e06e9e7855..c0db3f10a4 100644 --- a/doc/src/pair_polymorphic.rst +++ b/doc/src/pair_polymorphic.rst @@ -180,9 +180,9 @@ functions for Si-C tersoff potential. If your LAMMPS simulation has 4 atoms types and you want the 1st 3 to be Si, and the 4th to be C, you would use the following pair_coeff command: -.. parsed-literal:: +.. code-block:: LAMMPS - pair_coeff \* \* SiC_tersoff.poly Si Si Si C + pair_coeff * * SiC_tersoff.poly Si Si Si C The 1st 2 arguments must be \* \* so as to span all LAMMPS atom types. The first three Si arguments map LAMMPS atom types 1,2,3 to the diff --git a/doc/src/pair_python.rst b/doc/src/pair_python.rst index fa76d4c16c..e654a6025f 100644 --- a/doc/src/pair_python.rst +++ b/doc/src/pair_python.rst @@ -113,8 +113,8 @@ which the parameters epsilon and sigma are both 1.0: class LJCutMelt(LAMMPSPairPotential): def __init__(self): super(LJCutMelt,self).__init__() - # set coeffs: 48\*eps\*sig\*\*12, 24\*eps\*sig\*\*6, - # 4\*eps\*sig\*\*12, 4\*eps\*sig\*\*6 + # set coeffs: 48*eps*sig**12, 24*eps*sig**6, + # 4*eps*sig**12, 4*eps*sig**6 self.units = 'lj' self.coeff = {'lj' : {'lj' : (48.0,24.0,4.0,4.0)}} @@ -137,18 +137,18 @@ the *LJCutMelt* example, here are the two functions: def compute_force(self,rsq,itype,jtype): coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] r2inv = 1.0/rsq - r6inv = r2inv\*r2inv\*r2inv + r6inv = r2inv*r2inv*r2inv lj1 = coeff[0] lj2 = coeff[1] - return (r6inv \* (lj1\*r6inv - lj2))\*r2inv + return (r6inv * (lj1*r6inv - lj2))*r2inv def compute_energy(self,rsq,itype,jtype): coeff = self.coeff[self.pmap[itype]][self.pmap[jtype]] r2inv = 1.0/rsq - r6inv = r2inv\*r2inv\*r2inv + r6inv = r2inv*r2inv*r2inv lj3 = coeff[2] lj4 = coeff[3] - return (r6inv \* (lj3\*r6inv - lj4)) + return (r6inv * (lj3*r6inv - lj4)) .. note:: diff --git a/doc/src/pair_spin_magelec.rst b/doc/src/pair_spin_magelec.rst index a220299b07..e4d6b81b8a 100644 --- a/doc/src/pair_spin_magelec.rst +++ b/doc/src/pair_spin_magelec.rst @@ -18,7 +18,7 @@ Examples .. code-block:: LAMMPS pair_style spin/magelec 4.5 - pair_coeff \* \* magelec 4.5 0.00109 1.0 1.0 1.0 + pair_coeff * * magelec 4.5 0.00109 1.0 1.0 1.0 Description """"""""""" -- GitLab From 572502b33d86a7b834dbbe5d646dee42c65e00e9 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 09:35:57 -0600 Subject: [PATCH 049/328] cmake: add_definitions() -> target_compile_definitions() --- cmake/CMakeLists.txt | 20 ++++++++++---------- cmake/Modules/MPI4WIN.cmake | 2 +- cmake/Modules/Packages/KIM.cmake | 6 +++--- cmake/Modules/Packages/KOKKOS.cmake | 4 ++-- cmake/Modules/Packages/KSPACE.cmake | 14 +++++++------- cmake/Modules/Packages/PYTHON.cmake | 2 +- cmake/Modules/Packages/USER-INTEL.cmake | 12 ++++++------ cmake/Modules/Packages/USER-NETCDF.cmake | 6 +++--- cmake/Modules/Packages/USER-OMP.cmake | 2 +- cmake/Modules/Packages/USER-PLUMED.cmake | 8 ++++---- cmake/Modules/Packages/USER-VTK.cmake | 2 +- 11 files changed, 39 insertions(+), 39 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index cc9b3fc56a..b1d90af045 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -189,11 +189,11 @@ if(BUILD_MPI) else() find_package(MPI REQUIRED) include_directories(${MPI_CXX_INCLUDE_PATH}) - add_definitions(-DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) + target_compile_definitions(lammps PRIVATE -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) target_link_libraries(lammps PUBLIC MPI::MPI_CXX) option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) if(LAMMPS_LONGLONG_TO_LONG) - add_definitions(-DLAMMPS_LONGLONG_TO_LONG) + target_compile_definitions(lammps PRIVATE -DLAMMPS_LONGLONG_TO_LONG) endif() endif() else() @@ -209,7 +209,7 @@ set(LAMMPS_SIZES_VALUES smallbig bigbig smallsmall) set_property(CACHE LAMMPS_SIZES PROPERTY STRINGS ${LAMMPS_SIZES_VALUES}) validate_option(LAMMPS_SIZES LAMMPS_SIZES_VALUES) string(TOUPPER ${LAMMPS_SIZES} LAMMPS_SIZES) -add_definitions(-DLAMMPS_${LAMMPS_SIZES}) +target_compile_definitions(lammps PRIVATE -DLAMMPS_${LAMMPS_SIZES}) set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES} -DLAMMPS_${LAMMPS_SIZES}") # posix_memalign is not available on Windows @@ -219,12 +219,12 @@ else() set(LAMMPS_MEMALIGN "64" CACHE STRING "enables the use of the posix_memalign() call instead of malloc() when large chunks or memory are allocated by LAMMPS. Set to 0 to disable") endif() if(NOT ${LAMMPS_MEMALIGN} STREQUAL "0") - add_definitions(-DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN}) + target_compile_definitions(lammps PRIVATE -DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN}) endif() option(LAMMPS_EXCEPTIONS "enable the use of C++ exceptions for error messages (useful for library interface)" OFF) if(LAMMPS_EXCEPTIONS) - add_definitions(-DLAMMPS_EXCEPTIONS) + target_compile_definitions(lammps PUBLIC -DLAMMPS_EXCEPTIONS) set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES} -DLAMMPS_EXCEPTIONS") endif() @@ -283,7 +283,7 @@ find_package(JPEG QUIET) option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND}) if(WITH_JPEG) find_package(JPEG REQUIRED) - add_definitions(-DLAMMPS_JPEG) + target_compile_definitions(lammps PRIVATE -DLAMMPS_JPEG) if(CMAKE_VERSION VERSION_LESS 3.12) include_directories(${JPEG_INCLUDE_DIR}) target_link_libraries(lammps PRIVATE ${JPEG_LIBRARIES}) @@ -303,7 +303,7 @@ if(WITH_PNG) find_package(PNG REQUIRED) find_package(ZLIB REQUIRED) target_link_libraries(lammps PRIVATE PNG::PNG ZLIB::ZLIB) - add_definitions(-DLAMMPS_PNG) + target_compile_definitions(lammps PRIVATE -DLAMMPS_PNG) endif() find_program(GZIP_EXECUTABLE gzip) @@ -313,7 +313,7 @@ if(WITH_GZIP) if(NOT GZIP_FOUND) message(FATAL_ERROR "gzip executable not found") endif() - add_definitions(-DLAMMPS_GZIP) + target_compile_definitions(lammps PRIVATE -DLAMMPS_GZIP) endif() find_program(FFMPEG_EXECUTABLE ffmpeg) @@ -323,7 +323,7 @@ if(WITH_FFMPEG) if(NOT FFMPEG_FOUND) message(FATAL_ERROR "ffmpeg executable not found") endif() - add_definitions(-DLAMMPS_FFMPEG) + target_compile_definitions(lammps PRIVATE -DLAMMPS_FFMPEG) endif() if(BUILD_SHARED_LIBS) @@ -413,7 +413,7 @@ endforeach() # packages that need defines set foreach(PKG MPIIO) if(PKG_${PKG}) - add_definitions(-DLMP_${PKG}) + target_compile_definitions(lammps PRIVATE -DLMP_${PKG}) endif() endforeach() diff --git a/cmake/Modules/MPI4WIN.cmake b/cmake/Modules/MPI4WIN.cmake index 1fe6daada3..a02adca77d 100644 --- a/cmake/Modules/MPI4WIN.cmake +++ b/cmake/Modules/MPI4WIN.cmake @@ -16,7 +16,7 @@ else() endif() ExternalProject_get_property(mpi4win_build SOURCE_DIR) -add_definitions(-DMPICH_SKIP_MPICXX) +target_compile_definitions(lammps PRIVATE -DMPICH_SKIP_MPICXX) include_directories("${SOURCE_DIR}/include") set(MPI4WIN_LIBRARIES "${SOURCE_DIR}/lib/libmpi.a") add_dependencies(lammps mpi4win_build) diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake index 56fc1449fa..7d349f496b 100644 --- a/cmake/Modules/Packages/KIM.cmake +++ b/cmake/Modules/Packages/KIM.cmake @@ -3,16 +3,16 @@ if(PKG_KIM) find_package(CURL) if(CURL_FOUND) target_link_libraries(lammps PRIVATE CURL::libcurl) - add_definitions(-DLMP_KIM_CURL) + target_compile_definitions(lammps PRIVATE -DLMP_KIM_CURL) set(LMP_DEBUG_CURL OFF CACHE STRING "Set libcurl verbose mode on/off. If on, it displays a lot of verbose information about its operations.") mark_as_advanced(LMP_DEBUG_CURL) if(LMP_DEBUG_CURL) - add_definitions(-DLMP_DEBUG_CURL) + target_compile_definitions(lammps PRIVATE -DLMP_DEBUG_CURL) endif() set(LMP_NO_SSL_CHECK OFF CACHE STRING "Tell libcurl to not verify the peer. If on, the connection succeeds regardless of the names in the certificate. Insecure - Use with caution!") mark_as_advanced(LMP_NO_SSL_CHECK) if(LMP_NO_SSL_CHECK) - add_definitions(-DLMP_NO_SSL_CHECK) + target_compile_definitions(lammps PRIVATE -DLMP_NO_SSL_CHECK) endif() endif() find_package(KIM-API QUIET) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 58d2c139f6..34c924790e 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -18,7 +18,7 @@ if(PKG_KOKKOS) include_directories(${Kokkos_INCLUDE_DIRS}) target_link_libraries(lammps PRIVATE kokkos) endif() - add_definitions(-DLMP_KOKKOS) + target_compile_definitions(lammps PRIVATE -DLMP_KOKKOS) set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS) set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp @@ -44,7 +44,7 @@ if(PKG_KOKKOS) ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) if(KOKKOS_ENABLE_CUDA) if(NOT ${FFT} STREQUAL "KISS") - add_definitions(-DFFT_CUFFT) + target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() endif() diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 2a586dccf3..e2a42f3981 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -3,7 +3,7 @@ if(PKG_KSPACE) set(FFTW "FFTW3") if(FFT_SINGLE) set(FFTW "FFTW3F") - add_definitions(-DFFT_SINGLE) + target_compile_definitions(lammps PUBLIC -DFFT_SINGLE) endif() find_package(${FFTW} QUIET) if(${FFTW}_FOUND) @@ -18,7 +18,7 @@ if(PKG_KSPACE) if(FFT STREQUAL "FFTW3") find_package(${FFTW} REQUIRED) - add_definitions(-DFFT_FFTW3) + target_compile_definitions(lammps PUBLIC -DFFT_FFTW3) target_link_libraries(lammps PUBLIC ${FFTW}::${FFTW}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) @@ -28,7 +28,7 @@ if(PKG_KSPACE) if(FFT_FFTW_THREADS) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) - add_definitions(-DFFT_FFTW_THREADS) + target_compile_definitions(lammps PRIVATE -DFFT_FFTW_THREADS) target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP) else() message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS") @@ -36,16 +36,16 @@ if(PKG_KSPACE) endif() elseif(FFT STREQUAL "MKL") find_package(MKL REQUIRED) - add_definitions(-DFFT_MKL) + target_compile_definitions(lammps PRIVATE -DFFT_MKL) option(FFT_MKL_THREADS "Use threaded MKL FFT" ON) if(FFT_MKL_THREADS) - add_definitions(-DFFT_MKL_THREADS) + target_compile_definitions(lammps PRIVATE -DFFT_MKL_THREADS) endif() include_directories(${MKL_INCLUDE_DIRS}) target_link_libraries(lammps PRIVATE ${MKL_LIBRARIES}) else() # last option is KISSFFT - add_definitions(-DFFT_KISS) + target_compile_definitions(lammps PRIVATE -DFFT_KISS) endif() set(FFT_PACK "array" CACHE STRING "Optimization for FFT") @@ -54,6 +54,6 @@ if(PKG_KSPACE) validate_option(FFT_PACK FFT_PACK_VALUES) if(NOT FFT_PACK STREQUAL "array") string(TOUPPER ${FFT_PACK} FFT_PACK) - add_definitions(-DFFT_PACK_${FFT_PACK}) + target_compile_definitions(lammps PRIVATE -DFFT_PACK_${FFT_PACK}) endif() endif() diff --git a/cmake/Modules/Packages/PYTHON.cmake b/cmake/Modules/Packages/PYTHON.cmake index 7d6e2999f7..4611fddb98 100644 --- a/cmake/Modules/Packages/PYTHON.cmake +++ b/cmake/Modules/Packages/PYTHON.cmake @@ -1,6 +1,6 @@ if(PKG_PYTHON) find_package(PythonLibs REQUIRED) - add_definitions(-DLMP_PYTHON) + target_compile_definitions(lammps PRIVATE -DLMP_PYTHON) include_directories(${PYTHON_INCLUDE_DIR}) target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARY}) endif() diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake index 0a9b634bc7..96a897297f 100644 --- a/cmake/Modules/Packages/USER-INTEL.cmake +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -4,7 +4,7 @@ if(PKG_USER-INTEL) message(FATAL_ERROR "immintrin.h header not found, Intel package won't work without it") endif() - add_definitions(-DLMP_USER_INTEL) + target_compile_definitions(lammps PRIVATE -DLMP_USER_INTEL) set(INTEL_ARCH "cpu" CACHE STRING "Architectures used by USER-INTEL (cpu or knl)") set(INTEL_ARCH_VALUES cpu knl) @@ -24,14 +24,14 @@ if(PKG_USER-INTEL) string(TOUPPER ${INTEL_LRT_MODE} INTEL_LRT_MODE) if(INTEL_LRT_MODE STREQUAL "THREADS") if(Threads_FOUND) - add_definitions(-DLMP_INTEL_USELRT) + target_compile_definitions(lammps PRIVATE -DLMP_INTEL_USELRT) target_link_libraries(lammps PRIVATE Threads::Threads) else() message(FATAL_ERROR "Must have working threads library for Long-range thread support") endif() endif() if(INTEL_LRT_MODE STREQUAL "C++11") - add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11) + target_compile_definitions(lammps PRIVATE -DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") @@ -46,7 +46,7 @@ if(PKG_USER-INTEL) if(TBB_FOUND) target_link_libraries(lammps PRIVATE ${TBB_MALLOC_LIBRARIES}) else() - add_definitions(-DLMP_INTEL_NO_TBB) + target_compile_definitions(lammps PRIVATE -DLMP_INTEL_NO_TBB) if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") message(WARNING "USER-INTEL with Intel compilers should use TBB malloc libraries") endif() @@ -54,7 +54,7 @@ if(PKG_USER-INTEL) find_package(MKL QUIET) if(MKL_FOUND) - add_definitions(-DLMP_USE_MKL_RNG) + target_compile_definitions(lammps PRIVATE -DLMP_USE_MKL_RNG) target_link_libraries(lammps PRIVATE ${MKL_LIBRARIES}) else() message(STATUS "Pair style dpd/intel will be faster with MKL libraries") @@ -71,7 +71,7 @@ if(PKG_USER-INTEL) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -xHost -qopenmp -qoffload") set(MIC_OPTIONS "-qoffload-option,mic,compiler,\"-fp-model fast=2 -mGLOB_default_function_attrs=\\\"gather_scatter_loop_unroll=4\\\"\"") add_compile_options(-xMIC-AVX512 -qoffload -fno-alias -ansi-alias -restrict -qoverride-limits ${MIC_OPTIONS}) - add_definitions(-DLMP_INTEL_OFFLOAD) + target_compile_definitions(lammps PRIVATE -DLMP_INTEL_OFFLOAD) else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") include(CheckCXXCompilerFlag) diff --git a/cmake/Modules/Packages/USER-NETCDF.cmake b/cmake/Modules/Packages/USER-NETCDF.cmake index 8d62f5f7ec..2c12d2b799 100644 --- a/cmake/Modules/Packages/USER-NETCDF.cmake +++ b/cmake/Modules/Packages/USER-NETCDF.cmake @@ -11,14 +11,14 @@ if(PKG_USER-NETCDF) if(NETCDF_FOUND) include_directories(${NETCDF_INCLUDE_DIRS}) target_link_libraries(lammps PRIVATE ${NETCDF_LIBRARIES}) - add_definitions(-DLMP_HAS_NETCDF) + target_compile_definitions(lammps PRIVATE -DLMP_HAS_NETCDF) endif(NETCDF_FOUND) if(PNETCDF_FOUND) include_directories(${PNETCDF_INCLUDES}) target_link_libraries(lammps PRIVATE ${PNETCDF_LIBRARIES}) - add_definitions(-DLMP_HAS_PNETCDF) + target_compile_definitions(lammps PRIVATE -DLMP_HAS_PNETCDF) endif(PNETCDF_FOUND) - add_definitions(-DNC_64BIT_DATA=0x0020) + target_compile_definitions(lammps PRIVATE -DNC_64BIT_DATA=0x0020) endif() diff --git a/cmake/Modules/Packages/USER-OMP.cmake b/cmake/Modules/Packages/USER-OMP.cmake index 4b1a4b1571..91f0467a17 100644 --- a/cmake/Modules/Packages/USER-OMP.cmake +++ b/cmake/Modules/Packages/USER-OMP.cmake @@ -6,7 +6,7 @@ if(PKG_USER-OMP) ${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp ${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp ${USER-OMP_SOURCES_DIR}/domain_omp.cpp) - add_definitions(-DLMP_USER_OMP) + target_compile_definitions(lammps PRIVATE -DLMP_USER_OMP) set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}") # detects styles which have USER-OMP version diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index a6cd20adbb..cfd187ed9d 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -69,12 +69,12 @@ if(PKG_USER-PLUMED) set(PLUMED_INSTALL_DIR ${INSTALL_DIR}) add_dependencies(lammps plumed_build) if(PLUMED_MODE STREQUAL "STATIC") - add_definitions(-D__PLUMED_WRAPPER_CXX=1) + target_compile_definitions(lammps PRIVATE -D__PLUMED_WRAPPER_CXX=1) target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${PLUMED_LINK_LIBS} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "SHARED") target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumed${CMAKE_SHARED_LIBRARY_SUFFIX} ${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_DL_LIBS}) elseif(PLUMED_MODE STREQUAL "RUNTIME") - add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) + target_compile_definitions(lammps PRIVATE -D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) target_link_libraries(lammps PRIVATE ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS}) endif() set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include") @@ -82,12 +82,12 @@ if(PKG_USER-PLUMED) find_package(PkgConfig REQUIRED) pkg_check_modules(PLUMED REQUIRED plumed) if(PLUMED_MODE STREQUAL "STATIC") - add_definitions(-D__PLUMED_WRAPPER_CXX=1) + target_compile_definitions(lammps PRIVATE -D__PLUMED_WRAPPER_CXX=1) include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static) elseif(PLUMED_MODE STREQUAL "SHARED") include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared) elseif(PLUMED_MODE STREQUAL "RUNTIME") - add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) + target_compile_definitions(lammps PRIVATE -D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}) include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime) endif() target_link_libraries(lammps PRIVATE ${PLUMED_LOAD}) diff --git a/cmake/Modules/Packages/USER-VTK.cmake b/cmake/Modules/Packages/USER-VTK.cmake index fb69f115b2..61defcbf82 100644 --- a/cmake/Modules/Packages/USER-VTK.cmake +++ b/cmake/Modules/Packages/USER-VTK.cmake @@ -1,6 +1,6 @@ if(PKG_USER-VTK) find_package(VTK REQUIRED NO_MODULE) include(${VTK_USE_FILE}) - add_definitions(-DLAMMPS_VTK) + target_compile_definitions(lammps PRIVATE -DLAMMPS_VTK) target_link_libraries(lammps PRIVATE ${VTK_LIBRARIES}) endif() -- GitLab From f2ab654662fbba0a43a3919d13bb48590d8d08a9 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 10:41:19 -0600 Subject: [PATCH 050/328] cmake: some include_directories() -> target_include_directories() --- cmake/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index b1d90af045..aaafd9c3f6 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -165,7 +165,7 @@ endif() ###################################################### # packages with special compiler needs or external libs ###################################################### -include_directories(${LAMMPS_SOURCE_DIR}) +target_include_directories(lammps PRIVATE ${LAMMPS_SOURCE_DIR}) if(PKG_USER-ADIOS) # The search for ADIOS2 must come before MPI because @@ -200,7 +200,7 @@ else() enable_language(C) file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c) add_library(mpi_stubs STATIC ${MPI_SOURCES}) - include_directories(${LAMMPS_SOURCE_DIR}/STUBS) + target_include_directories(mpi_stubs PUBLIC ${LAMMPS_SOURCE_DIR}/STUBS) target_link_libraries(lammps PRIVATE mpi_stubs) endif() @@ -285,7 +285,7 @@ if(WITH_JPEG) find_package(JPEG REQUIRED) target_compile_definitions(lammps PRIVATE -DLAMMPS_JPEG) if(CMAKE_VERSION VERSION_LESS 3.12) - include_directories(${JPEG_INCLUDE_DIR}) + target_include_directories(lammps PRIVATE ${JPEG_INCLUDE_DIR}) target_link_libraries(lammps PRIVATE ${JPEG_LIBRARIES}) else() target_link_libraries(lammps PRIVATE JPEG::JPEG) @@ -404,7 +404,7 @@ foreach(PKG ${DEFAULT_PACKAGES}) RegisterStyles(${${PKG}_SOURCES_DIR}) target_sources(lammps PRIVATE ${${PKG}_SOURCES}) - include_directories(${${PKG}_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${${PKG}_SOURCES_DIR}) endif() RegisterPackages(${${PKG}_SOURCES_DIR}) @@ -504,7 +504,7 @@ set(LAMMPS_STYLE_HEADERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/styles) GenerateStyleHeaders(${LAMMPS_STYLE_HEADERS_DIR}) GeneratePackagesHeaders(${LAMMPS_STYLE_HEADERS_DIR}) -include_directories(${LAMMPS_STYLE_HEADERS_DIR}) +target_include_directories(lammps PRIVATE ${LAMMPS_STYLE_HEADERS_DIR}) ###################################### # Generate lmpinstalledpkgs.h -- GitLab From 31930139c6a5ffcb956ec7da462df330590b3a77 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 10:49:13 -0600 Subject: [PATCH 051/328] USER-SMD.cmake: use Eigen3 imported target --- cmake/Modules/Packages/USER-SMD.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/Packages/USER-SMD.cmake b/cmake/Modules/Packages/USER-SMD.cmake index eed60ae8bd..9d2c48fe1d 100644 --- a/cmake/Modules/Packages/USER-SMD.cmake +++ b/cmake/Modules/Packages/USER-SMD.cmake @@ -15,7 +15,7 @@ if(PKG_USER-SMD) CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" ) ExternalProject_get_property(Eigen3_build SOURCE_DIR) - set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR}) + target_include_directories(lammps ${SOURCE_DIR}) add_dependencies(lammps Eigen3_build) else() find_package(Eigen3 NO_MODULE) @@ -23,6 +23,6 @@ if(PKG_USER-SMD) if(NOT EIGEN3_FOUND) message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it") endif() + target_link_libraries(lammps PRIVATE Eigen3::Eigen) endif() - include_directories(${EIGEN3_INCLUDE_DIR}) endif() -- GitLab From 9b244b14a6f00f14237e51b3124cda8b420d63a8 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 10:56:49 -0600 Subject: [PATCH 052/328] atc: needs lammps.h --- cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index aaafd9c3f6..a93f1ee42a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -465,6 +465,7 @@ if(PKG_USER-ATC) message(FATAL_ERROR "The USER-ATC Package is not compatible with -DLAMMPS_BIGBIG") endif() target_link_libraries(atc PRIVATE ${LAPACK_LIBRARIES}) + target_include_directories(atc PRIVATE ${LAMMPS_SOURCE_DIR}) endif() include(Packages/USER-H5MD) -- GitLab From 51eb865d6342ec32cd1262635a86d78802f75c7c Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 11:05:25 -0600 Subject: [PATCH 053/328] VORONOI.cmake: use VORO imported target --- cmake/Modules/FindVORO.cmake | 16 +++++++++++++--- cmake/Modules/Packages/VORONOI.cmake | 7 +++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cmake/Modules/FindVORO.cmake b/cmake/Modules/FindVORO.cmake index b0cccbcd1d..3f0fe98ff1 100644 --- a/cmake/Modules/FindVORO.cmake +++ b/cmake/Modules/FindVORO.cmake @@ -10,13 +10,23 @@ find_path(VORO_INCLUDE_DIR voro++.hh PATH_SUFFIXES voro++) find_library(VORO_LIBRARY NAMES voro++) -set(VORO_LIBRARIES ${VORO_LIBRARY}) -set(VORO_INCLUDE_DIRS ${VORO_INCLUDE_DIR}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set VORO_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(VORO DEFAULT_MSG VORO_LIBRARY VORO_INCLUDE_DIR) +# Copy the results to the output variables and target. +if(VORO_FOUND) + set(VORO_LIBRARIES ${VORO_LIBRARY}) + set(VORO_INCLUDE_DIRS ${VORO_INCLUDE_DIR}) + + if(NOT TARGET VORO::VORO) + add_library(VORO::VORO UNKNOWN IMPORTED) + set_target_properties(VORO::VORO PROPERTIES + IMPORTED_LOCATION "${VORO_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${VORO_INCLUDE_DIR}") + endif() +endif() + mark_as_advanced(VORO_INCLUDE_DIR VORO_LIBRARY ) diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake index d2bb185228..e7f2b6b46d 100644 --- a/cmake/Modules/Packages/VORONOI.cmake +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -29,15 +29,14 @@ if(PKG_VORONOI) BUILD_BYPRODUCTS /src/libvoro++.a ) ExternalProject_get_property(voro_build SOURCE_DIR) - set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a) - set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src) + target_link_libraries(lammps PRIVATE ${SOURCE_DIR}/src/libvoro++.a) + target_include_directories(lammps PRIVATE ${SOURCE_DIR}/src) add_dependencies(lammps voro_build) else() find_package(VORO) if(NOT VORO_FOUND) message(FATAL_ERROR "Voro++ library not found. Help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it") endif() + target_link_libraries(lammps PRIVATE VORO::VORO) endif() - include_directories(${VORO_INCLUDE_DIRS}) - target_link_libraries(lammps PRIVATE ${VORO_LIBRARIES}) endif() -- GitLab From 4736402dd944d773a081fbf7cc6933010644da64 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 11:07:45 -0600 Subject: [PATCH 054/328] cmake: atc needs mpi itself --- cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index a93f1ee42a..0001f37fce 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -202,6 +202,7 @@ else() add_library(mpi_stubs STATIC ${MPI_SOURCES}) target_include_directories(mpi_stubs PUBLIC ${LAMMPS_SOURCE_DIR}/STUBS) target_link_libraries(lammps PRIVATE mpi_stubs) + add_library(MPI::MPI_CXX ALIAS mpi_stubs) endif() set(LAMMPS_SIZES "smallbig" CACHE STRING "LAMMPS integer sizes (smallsmall: all 32-bit, smallbig: 64-bit #atoms #timesteps, bigbig: also 64-bit imageint, 64-bit atom ids)") @@ -464,7 +465,7 @@ if(PKG_USER-ATC) if(LAMMPS_SIZES STREQUAL BIGBIG) message(FATAL_ERROR "The USER-ATC Package is not compatible with -DLAMMPS_BIGBIG") endif() - target_link_libraries(atc PRIVATE ${LAPACK_LIBRARIES}) + target_link_libraries(atc PRIVATE ${LAPACK_LIBRARIES} MPI::MPI_CXX) target_include_directories(atc PRIVATE ${LAMMPS_SOURCE_DIR}) endif() -- GitLab From 33a57ed21592615a752021c789df4db142747edd Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 11:36:21 -0600 Subject: [PATCH 055/328] cmake: some more include_directories() -> target_include_directories() --- cmake/Modules/MPI4WIN.cmake | 2 +- cmake/Modules/Packages/CORESHELL.cmake | 2 +- cmake/Modules/Packages/GPU.cmake | 2 +- cmake/Modules/Packages/KIM.cmake | 2 +- cmake/Modules/Packages/KOKKOS.cmake | 4 ++-- cmake/Modules/Packages/KSPACE.cmake | 2 +- cmake/Modules/Packages/MESSAGE.cmake | 2 +- cmake/Modules/Packages/OPT.cmake | 2 +- cmake/Modules/Packages/PYTHON.cmake | 2 +- cmake/Modules/Packages/QEQ.cmake | 2 +- cmake/Modules/Packages/USER-H5MD.cmake | 1 - cmake/Modules/Packages/USER-INTEL.cmake | 2 +- cmake/Modules/Packages/USER-OMP.cmake | 2 +- cmake/Modules/Packages/USER-PLUMED.cmake | 2 +- cmake/Modules/Packages/USER-SDPD.cmake | 2 +- 15 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cmake/Modules/MPI4WIN.cmake b/cmake/Modules/MPI4WIN.cmake index a02adca77d..4447289668 100644 --- a/cmake/Modules/MPI4WIN.cmake +++ b/cmake/Modules/MPI4WIN.cmake @@ -17,7 +17,7 @@ endif() ExternalProject_get_property(mpi4win_build SOURCE_DIR) target_compile_definitions(lammps PRIVATE -DMPICH_SKIP_MPICXX) -include_directories("${SOURCE_DIR}/include") +target_include_directories(lammps PRIVATE "${SOURCE_DIR}/include") set(MPI4WIN_LIBRARIES "${SOURCE_DIR}/lib/libmpi.a") add_dependencies(lammps mpi4win_build) set(LAMMPS_USE_MPI4WIN ON) diff --git a/cmake/Modules/Packages/CORESHELL.cmake b/cmake/Modules/Packages/CORESHELL.cmake index 2afe2b8c1b..e550b1989f 100644 --- a/cmake/Modules/Packages/CORESHELL.cmake +++ b/cmake/Modules/Packages/CORESHELL.cmake @@ -9,5 +9,5 @@ if(PKG_CORESHELL) get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES) target_sources(lammps PRIVATE ${CORESHELL_SOURCES}) - include_directories(${CORESHELL_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${CORESHELL_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 95bb525dd5..cd359068e7 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -193,5 +193,5 @@ if(PKG_GPU) get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) target_sources(lammps PRIVATE ${GPU_SOURCES}) - include_directories(${GPU_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${GPU_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake index 7d349f496b..5a4b12956e 100644 --- a/cmake/Modules/Packages/KIM.cmake +++ b/cmake/Modules/Packages/KIM.cmake @@ -62,5 +62,5 @@ if(PKG_KIM) find_package(KIM-API ${KIM-API_MIN_VERSION} REQUIRED) endif() target_link_libraries(lammps PRIVATE "${KIM-API_LDFLAGS}") - include_directories(${KIM-API_INCLUDE_DIRS}) + target_include_directories(lammps PRIVATE ${KIM-API_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 34c924790e..083b55840c 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -15,7 +15,7 @@ if(PKG_KOKKOS) ${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src ${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src ${LAMMPS_LIB_KOKKOS_BIN_DIR}) - include_directories(${Kokkos_INCLUDE_DIRS}) + target_include_directories(lammps PRIVATE ${Kokkos_INCLUDE_DIRS}) target_link_libraries(lammps PRIVATE kokkos) endif() target_compile_definitions(lammps PRIVATE -DLMP_KOKKOS) @@ -70,5 +70,5 @@ if(PKG_KOKKOS) get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) target_sources(lammps PRIVATE ${KOKKOS_PKG_SOURCES}) - include_directories(${KOKKOS_PKG_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${KOKKOS_PKG_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index e2a42f3981..184a70f746 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -41,7 +41,7 @@ if(PKG_KSPACE) if(FFT_MKL_THREADS) target_compile_definitions(lammps PRIVATE -DFFT_MKL_THREADS) endif() - include_directories(${MKL_INCLUDE_DIRS}) + target_include_directories(lammps PRIVATE ${MKL_INCLUDE_DIRS}) target_link_libraries(lammps PRIVATE ${MKL_LIBRARIES}) else() # last option is KISSFFT diff --git a/cmake/Modules/Packages/MESSAGE.cmake b/cmake/Modules/Packages/MESSAGE.cmake index c28c50c507..54d9f3eef7 100644 --- a/cmake/Modules/Packages/MESSAGE.cmake +++ b/cmake/Modules/Packages/MESSAGE.cmake @@ -28,5 +28,5 @@ if(PKG_MESSAGE) endif() target_link_libraries(lammps PRIVATE cslib) - include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src) + target_include_directories(lammps PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src) endif() diff --git a/cmake/Modules/Packages/OPT.cmake b/cmake/Modules/Packages/OPT.cmake index 02e3877c59..5d006bfd58 100644 --- a/cmake/Modules/Packages/OPT.cmake +++ b/cmake/Modules/Packages/OPT.cmake @@ -9,5 +9,5 @@ if(PKG_OPT) get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES) target_sources(lammps PRIVATE ${OPT_SOURCES}) - include_directories(${OPT_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${OPT_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/PYTHON.cmake b/cmake/Modules/Packages/PYTHON.cmake index 4611fddb98..b2451ebbdf 100644 --- a/cmake/Modules/Packages/PYTHON.cmake +++ b/cmake/Modules/Packages/PYTHON.cmake @@ -1,6 +1,6 @@ if(PKG_PYTHON) find_package(PythonLibs REQUIRED) target_compile_definitions(lammps PRIVATE -DLMP_PYTHON) - include_directories(${PYTHON_INCLUDE_DIR}) + target_include_directories(lammps PRIVATE ${PYTHON_INCLUDE_DIR}) target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARY}) endif() diff --git a/cmake/Modules/Packages/QEQ.cmake b/cmake/Modules/Packages/QEQ.cmake index 9b151c2610..d69fb23440 100644 --- a/cmake/Modules/Packages/QEQ.cmake +++ b/cmake/Modules/Packages/QEQ.cmake @@ -16,5 +16,5 @@ if(PKG_QEQ) get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES) target_sources(lammps PRIVATE ${QEQ_SOURCES}) - include_directories(${QEQ_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${QEQ_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-H5MD.cmake b/cmake/Modules/Packages/USER-H5MD.cmake index 1e3fcf82ce..cf5623f46d 100644 --- a/cmake/Modules/Packages/USER-H5MD.cmake +++ b/cmake/Modules/Packages/USER-H5MD.cmake @@ -4,5 +4,4 @@ if(PKG_USER-H5MD) find_package(HDF5 REQUIRED) target_link_libraries(h5md PRIVATE ${HDF5_LIBRARIES}) target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS}) - include_directories(${HDF5_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake index 96a897297f..d81f793f50 100644 --- a/cmake/Modules/Packages/USER-INTEL.cmake +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -109,5 +109,5 @@ if(PKG_USER-INTEL) endif() target_sources(lammps PRIVATE ${USER-INTEL_SOURCES}) - include_directories(${USER-INTEL_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${USER-INTEL_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-OMP.cmake b/cmake/Modules/Packages/USER-OMP.cmake index 91f0467a17..42f91495d5 100644 --- a/cmake/Modules/Packages/USER-OMP.cmake +++ b/cmake/Modules/Packages/USER-OMP.cmake @@ -38,5 +38,5 @@ if(PKG_USER-OMP) endif() target_sources(lammps PRIVATE ${USER-OMP_SOURCES}) - include_directories(${USER-OMP_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${USER-OMP_SOURCES_DIR}) endif() diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index cfd187ed9d..75b2db2f93 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -92,5 +92,5 @@ if(PKG_USER-PLUMED) endif() target_link_libraries(lammps PRIVATE ${PLUMED_LOAD}) endif() - include_directories(${PLUMED_INCLUDE_DIRS}) + target_include_directories(lammps PRIVATE ${PLUMED_INCLUDE_DIRS}) endif() diff --git a/cmake/Modules/Packages/USER-SDPD.cmake b/cmake/Modules/Packages/USER-SDPD.cmake index 1105fbd7ce..750194bcc6 100644 --- a/cmake/Modules/Packages/USER-SDPD.cmake +++ b/cmake/Modules/Packages/USER-SDPD.cmake @@ -11,5 +11,5 @@ if(PKG_USER-SDPD) endif() set_property(GLOBAL PROPERTY FIX "${hlist}") - include_directories(${USER-SDPD_SOURCES_DIR}) + target_include_directories(lammps PRIVATE ${USER-SDPD_SOURCES_DIR}) endif() -- GitLab From d8b6d10f3b9d65f99232980510011ace2db068a6 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 11:42:36 -0600 Subject: [PATCH 056/328] MSCG.cmake: create an use imported target --- cmake/Modules/FindMSCG.cmake | 16 +++++++++++++--- cmake/Modules/Packages/MSCG.cmake | 12 ++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/cmake/Modules/FindMSCG.cmake b/cmake/Modules/FindMSCG.cmake index 311ff78038..b301303dbe 100644 --- a/cmake/Modules/FindMSCG.cmake +++ b/cmake/Modules/FindMSCG.cmake @@ -10,13 +10,23 @@ find_path(MSCG_INCLUDE_DIR mscg.h PATH_SUFFIXES mscg) find_library(MSCG_LIBRARY NAMES mscg) -set(MSCG_LIBRARIES ${MSCG_LIBRARY}) -set(MSCG_INCLUDE_DIRS ${MSCG_INCLUDE_DIR}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set MSCG_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(MSCG DEFAULT_MSG MSCG_LIBRARY MSCG_INCLUDE_DIR) +# Copy the results to the output variables and target. +if(MSCG_FOUND) + set(MSCG_LIBRARIES ${MSCG_LIBRARY}) + set(MSCG_INCLUDE_DIRS ${MSCG_INCLUDE_DIR}) + + if(NOT TARGET MSCG::MSCG) + add_library(MSCG::MSCG UNKNOWN IMPORTED) + set_target_properties(MSCG::MSCG PROPERTIES + IMPORTED_LOCATION "${MSCG_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${MSCG_INCLUDE_DIR}") + endif() +endif() + mark_as_advanced(MSCG_INCLUDE_DIR MSCG_LIBRARY ) diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index 2c63e69fa7..750a1a8c37 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -29,12 +29,17 @@ if(PKG_MSCG) BUILD_BYPRODUCTS /libmscg.a ) ExternalProject_get_property(mscg_build BINARY_DIR) - set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a) ExternalProject_get_property(mscg_build SOURCE_DIR) set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src) + if(NOT TARGET MSCG::MSCG) + add_library(MSCG::MSCG UNKNOWN IMPORTED) + set_target_properties(MSCG::MSCG PROPERTIES + IMPORTED_LOCATION "${BINARY_DIR}/libmscg.a" + INTERFACE_INCLUDE_DIRECTORIES "${MSCG_INCLUDE_DIRS}") + endif() add_dependencies(lammps mscg_build) if(NOT LAPACK_FOUND) - file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS}) + file(MAKE_DIRECTORY ) add_dependencies(mscg_build linalg) endif() else() @@ -43,6 +48,5 @@ if(PKG_MSCG) message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it") endif() endif() - target_link_libraries(lammps PRIVATE ${MSCG_LIBRARIES} GSL::gsl ${LAPACK_LIBRARIES}) - include_directories(${MSCG_INCLUDE_DIRS}) + target_link_libraries(lammps PRIVATE MSCG::MSCG GSL::gsl ${LAPACK_LIBRARIES}) endif() -- GitLab From 72c94f0fbf7c70c18fb44820247fe2b83667efb6 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 11:49:24 -0600 Subject: [PATCH 057/328] USER-SCAFACOS.cmake: use imported pkg-config target --- cmake/Modules/Packages/USER-SCAFACOS.cmake | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmake/Modules/Packages/USER-SCAFACOS.cmake b/cmake/Modules/Packages/USER-SCAFACOS.cmake index ebd750c04b..6725472c07 100644 --- a/cmake/Modules/Packages/USER-SCAFACOS.cmake +++ b/cmake/Modules/Packages/USER-SCAFACOS.cmake @@ -46,7 +46,7 @@ if(PKG_USER-SCAFACOS) ) ExternalProject_get_property(scafacos_build INSTALL_DIR) set(SCAFACOS_BUILD_DIR ${INSTALL_DIR}) - set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include) + target_include_directories(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/include) add_dependencies(lammps scafacos_build) # list and order from pkg_config file of ScaFaCoS build target_link_libraries(lammps PRIVATE ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) @@ -69,8 +69,7 @@ if(PKG_USER-SCAFACOS) target_link_libraries(lammps PRIVATE ${MPI_C_LIBRARIES}) else() find_package(PkgConfig REQUIRED) - pkg_check_modules(SCAFACOS REQUIRED scafacos) - target_link_libraries(lammps PRIVATE ${SCAFACOS_LDFLAGS}) + pkg_check_modules(SCAFACOS REQUIRED IMPORTED_TARGET scafacos) + target_link_libraries(lammps PRIVATE PkgConfig::SCAFACOS) endif() - include_directories(${SCAFACOS_INCLUDE_DIRS}) endif() -- GitLab From d19f112c754e8345813e23d1743bca837c87d27b Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 12:18:10 -0600 Subject: [PATCH 058/328] MSCG.cmake: fix up imported target --- cmake/Modules/Packages/MSCG.cmake | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index 750a1a8c37..f4bac7d422 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -31,15 +31,11 @@ if(PKG_MSCG) ExternalProject_get_property(mscg_build BINARY_DIR) ExternalProject_get_property(mscg_build SOURCE_DIR) set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src) - if(NOT TARGET MSCG::MSCG) - add_library(MSCG::MSCG UNKNOWN IMPORTED) - set_target_properties(MSCG::MSCG PROPERTIES - IMPORTED_LOCATION "${BINARY_DIR}/libmscg.a" - INTERFACE_INCLUDE_DIRECTORIES "${MSCG_INCLUDE_DIRS}") - endif() + target_include_directories(lammps PRIVATE ${MSCG_INCLUDE_DIRS}) + target_link_libraries(lammps PRIVATE "${BINARY_DIR}/libmscg.a") add_dependencies(lammps mscg_build) if(NOT LAPACK_FOUND) - file(MAKE_DIRECTORY ) + file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS}) add_dependencies(mscg_build linalg) endif() else() @@ -47,6 +43,7 @@ if(PKG_MSCG) if(NOT MSCG_FOUND) message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it") endif() + target_link_libraries(lammps PRIVATE MSCG::MSCG) endif() - target_link_libraries(lammps PRIVATE MSCG::MSCG GSL::gsl ${LAPACK_LIBRARIES}) + target_link_libraries(lammps PRIVATE GSL::gsl ${LAPACK_LIBRARIES}) endif() -- GitLab From f5279a291e93195054e4099ef0fec0ccb5f61093 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 12:23:39 -0600 Subject: [PATCH 059/328] USER-QUIP.cmake: create and use imported target --- cmake/Modules/FindQUIP.cmake | 13 +++++++++++-- cmake/Modules/Packages/USER-QUIP.cmake | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cmake/Modules/FindQUIP.cmake b/cmake/Modules/FindQUIP.cmake index b6d87d11fa..277cfae49e 100644 --- a/cmake/Modules/FindQUIP.cmake +++ b/cmake/Modules/FindQUIP.cmake @@ -7,12 +7,21 @@ find_library(QUIP_LIBRARY NAMES quip) -set(QUIP_LIBRARIES ${QUIP_LIBRARY}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set QUIP_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(QUIP DEFAULT_MSG QUIP_LIBRARY) +# Copy the results to the output variables and target. +if(QUIP_FOUND) + set(QUIP_LIBRARIES ${QUIP_LIBRARY}) + + if(NOT TARGET QUIP::QUIP) + add_library(QUIP::QUIP UNKNOWN IMPORTED) + set_target_properties(QUIP::QUIP PROPERTIES + IMPORTED_LOCATION "${QUIP_LIBRARY}") + endif() +endif() + mark_as_advanced(QUIP_LIBRARY) diff --git a/cmake/Modules/Packages/USER-QUIP.cmake b/cmake/Modules/Packages/USER-QUIP.cmake index 52ba7e9c47..4f41f9ef98 100644 --- a/cmake/Modules/Packages/USER-QUIP.cmake +++ b/cmake/Modules/Packages/USER-QUIP.cmake @@ -1,5 +1,5 @@ if(PKG_USER-QUIP) enable_language(Fortran) find_package(QUIP REQUIRED) - target_link_libraries(lammps PRIVATE ${LAPACK_LIBRARIES}) + target_link_libraries(lammps PRIVATE QUIP::QUIP ${LAPACK_LIBRARIES}) endif() -- GitLab From 99d4a226b5d0f74927b118dbd1bb4b90edaa7606 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 12:33:36 -0600 Subject: [PATCH 060/328] USER-NETCDF.cmake: create and use imported targets --- cmake/Modules/FindNetCDF.cmake | 16 ++++++++++++++-- cmake/Modules/FindPNetCDF.cmake | 9 +++++++++ cmake/Modules/Packages/USER-NETCDF.cmake | 6 ++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/FindNetCDF.cmake b/cmake/Modules/FindNetCDF.cmake index 2a992b6b3b..7f0ee93520 100644 --- a/cmake/Modules/FindNetCDF.cmake +++ b/cmake/Modules/FindNetCDF.cmake @@ -112,11 +112,23 @@ NetCDF_check_interface (F90 netcdf.mod netcdff) #export accumulated results to internal varS that rest of project can depend on list (APPEND NetCDF_libs "${NETCDF_C_LIBRARIES}") -set (NETCDF_LIBRARIES ${NetCDF_libs}) -set (NETCDF_INCLUDE_DIRS ${NetCDF_includes}) # handle the QUIETLY and REQUIRED arguments and set NETCDF_FOUND to TRUE if # all listed variables are TRUE include (FindPackageHandleStandardArgs) find_package_handle_standard_args (NetCDF DEFAULT_MSG NETCDF_LIBRARIES NETCDF_INCLUDE_DIRS NETCDF_HAS_INTERFACES) + +# Copy the results to the output variables and target. +if(NetCDF_FOUND) + set (NETCDF_LIBRARIES ${NetCDF_libs}) + set (NETCDF_INCLUDE_DIRS ${NetCDF_includes}) + + if(NOT TARGET NetCDF::NetCDF) + add_library(NetCDF::NetCDF UNKNOWN IMPORTED) + set_target_properties(NetCDF::NetCDF PROPERTIES + IMPORTED_LOCATION "${NETCDF_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${NetCDF_includes}" + INTERFACE_LINK_LIBRARIES "${NETCDF_LIBRARIES}") + endif() +endif() diff --git a/cmake/Modules/FindPNetCDF.cmake b/cmake/Modules/FindPNetCDF.cmake index bc3a5f9538..e501e9af49 100644 --- a/cmake/Modules/FindPNetCDF.cmake +++ b/cmake/Modules/FindPNetCDF.cmake @@ -53,3 +53,12 @@ include (FindPackageHandleStandardArgs) find_package_handle_standard_args (PNetCDF DEFAULT_MSG PNETCDF_LIBRARIES PNETCDF_INCLUDES) mark_as_advanced (PNETCDF_LIBRARIES PNETCDF_INCLUDES) + +if(PNetCDF_FOUND) + if(NOT TARGET PNetCDF::PNetCDF) + add_library(PNetCDF::PNetCDF UNKNOWN IMPORTED) + set_target_properties(PNetCDF::PNetCDF PROPERTIES + IMPORTED_LOCATION "${PNETCDF_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${PNETCDF_INCLUDES}") + endif() +endif() diff --git a/cmake/Modules/Packages/USER-NETCDF.cmake b/cmake/Modules/Packages/USER-NETCDF.cmake index 2c12d2b799..a149c7357d 100644 --- a/cmake/Modules/Packages/USER-NETCDF.cmake +++ b/cmake/Modules/Packages/USER-NETCDF.cmake @@ -9,14 +9,12 @@ if(PKG_USER-NETCDF) endif(NETCDF_FOUND) if(NETCDF_FOUND) - include_directories(${NETCDF_INCLUDE_DIRS}) - target_link_libraries(lammps PRIVATE ${NETCDF_LIBRARIES}) + target_link_libraries(lammps PRIVATE NetCDF::NetCDF) target_compile_definitions(lammps PRIVATE -DLMP_HAS_NETCDF) endif(NETCDF_FOUND) if(PNETCDF_FOUND) - include_directories(${PNETCDF_INCLUDES}) - target_link_libraries(lammps PRIVATE ${PNETCDF_LIBRARIES}) + target_link_libraries(lammps PRIVATE PNetCDF::PNetCDF) target_compile_definitions(lammps PRIVATE -DLMP_HAS_PNETCDF) endif(PNETCDF_FOUND) -- GitLab From 5eb77f2e9a2c3045f0b5c080ce7ef07a6eb903d2 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 14:29:14 -0600 Subject: [PATCH 061/328] FindNetCDF.cmake: fix target --- cmake/Modules/FindNetCDF.cmake | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmake/Modules/FindNetCDF.cmake b/cmake/Modules/FindNetCDF.cmake index 7f0ee93520..5c8caa9208 100644 --- a/cmake/Modules/FindNetCDF.cmake +++ b/cmake/Modules/FindNetCDF.cmake @@ -112,6 +112,8 @@ NetCDF_check_interface (F90 netcdf.mod netcdff) #export accumulated results to internal varS that rest of project can depend on list (APPEND NetCDF_libs "${NETCDF_C_LIBRARIES}") +set (NETCDF_LIBRARIES ${NetCDF_libs}) +set (NETCDF_INCLUDE_DIRS ${NetCDF_includes}) # handle the QUIETLY and REQUIRED arguments and set NETCDF_FOUND to TRUE if # all listed variables are TRUE @@ -121,14 +123,11 @@ find_package_handle_standard_args (NetCDF # Copy the results to the output variables and target. if(NetCDF_FOUND) - set (NETCDF_LIBRARIES ${NetCDF_libs}) - set (NETCDF_INCLUDE_DIRS ${NetCDF_includes}) - if(NOT TARGET NetCDF::NetCDF) add_library(NetCDF::NetCDF UNKNOWN IMPORTED) set_target_properties(NetCDF::NetCDF PROPERTIES IMPORTED_LOCATION "${NETCDF_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${NetCDF_includes}" + INTERFACE_INCLUDE_DIRECTORIES "${NETCDF_INCLUDE_DIRS}" INTERFACE_LINK_LIBRARIES "${NETCDF_LIBRARIES}") endif() endif() -- GitLab From 0da108f06fe2b7372a0ade40378b4368edbaa257 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Tue, 24 Mar 2020 14:50:38 -0600 Subject: [PATCH 062/328] cmake: remove last include_directories() call --- cmake/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 0001f37fce..3569ffc555 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -188,7 +188,6 @@ if(BUILD_MPI) include(MPI4WIN) else() find_package(MPI REQUIRED) - include_directories(${MPI_CXX_INCLUDE_PATH}) target_compile_definitions(lammps PRIVATE -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1) target_link_libraries(lammps PUBLIC MPI::MPI_CXX) option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) -- GitLab From 0f35c1d009b70258c3c967e51abbd11e923c6f30 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Tue, 24 Mar 2020 18:43:20 -0400 Subject: [PATCH 063/328] Update ubuntu package name to libpng-dev --- doc/src/Howto_bash.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Howto_bash.rst b/doc/src/Howto_bash.rst index 02322e5e1c..98cc6c0dff 100644 --- a/doc/src/Howto_bash.rst +++ b/doc/src/Howto_bash.rst @@ -103,7 +103,7 @@ needed for various LAMMPS features: .. code-block:: bash - sudo apt install -y build-essential ccache gfortran openmpi-bin libopenmpi-dev libfftw3-dev libjpeg-dev libpng12-dev python-dev python-virtualenv libblas-dev liblapack-dev libhdf5-serial-dev hdf5-tools + sudo apt install -y build-essential ccache gfortran openmpi-bin libopenmpi-dev libfftw3-dev libjpeg-dev libpng-dev python-dev python-virtualenv libblas-dev liblapack-dev libhdf5-serial-dev hdf5-tools Files in Ubuntu on Windows ^^^^^^^^^^^^^^^^^^^^^^^^^^ -- GitLab From 431647d943d64001e2e2d6a6e01294c2903c42d7 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Tue, 24 Mar 2020 18:52:05 -0400 Subject: [PATCH 064/328] Add link to official WSL docs --- doc/src/Howto_bash.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/Howto_bash.rst b/doc/src/Howto_bash.rst index 98cc6c0dff..b452f579d0 100644 --- a/doc/src/Howto_bash.rst +++ b/doc/src/Howto_bash.rst @@ -12,6 +12,10 @@ via apt-get and all files are accessible in both the Windows Explorer and your Linux shell (bash). This avoids switching to a different operating system or installing a virtual machine. Everything runs on Windows. +.. seealso:: + + You can find more detailed information at the `Windows Subsystem for Linux Installation Guide for Windows 10 `_. + Installing Bash on Windows -------------------------- -- GitLab From 398c030925e8175f6e15177f2d9b3194fca9524a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 25 Mar 2020 06:52:37 -0400 Subject: [PATCH 065/328] whitespace cleanup --- src/KOKKOS/sna_kokkos_impl.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index e6c34a245b..dcedf333e5 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -382,7 +382,7 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy 0)?rootpq2*buf1[jjup_shared_idx-1]:SNAcomplex(0.,0.); //const SNAcomplex u_up2 = (ma > 0)?rootpq2*ulist(jjup_index-1,iatom,jnbor):SNAcomplex(0.,0.); caconjxpy(b, u_up2, u_accum); - + // VMK recursion relation: grab contribution which is multiplied by a* const double rootpq1 = rootpqarray(j - ma, j - mb); const SNAcomplex u_up1 = (ma < j)?rootpq1*buf1[jjup_shared_idx]:SNAcomplex(0.,0.); @@ -399,12 +399,12 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy physical j integer), last element maps to self, skip - //if (!(m == total_iters - 1 && j % 2 == 0)) { + //if (!(m == total_iters - 1 && j % 2 == 0)) { if (m < total_iters - 1 || j % 2 == 1) { const int sign_factor = (((ma+mb)%2==0)?1:-1); const int jju_shared_flip = (j+1-mb)*(j+1)-(ma+1); const int jjup_flip = jju + jju_shared_flip; // jju+(j+1-mb)*(j+1)-(ma+1); - + if (sign_factor == 1) { u_accum.im = -u_accum.im; @@ -419,12 +419,12 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::compute_fused_deidrj(const typename Kokkos::TeamPoli // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) if (j%2==1 && mb+1==n_mb) { - int sign_factor = (((ma+mb)%2==0)?1:-1); + int sign_factor = (((ma+mb)%2==0)?1:-1); //const int jjup_flip = jju+(j+1-mb)*(j+1)-(ma+1); // no longer needed b/c we don't update dulist const int jju_shared_flip = (j+1-mb)*(j+1)-(ma+1); @@ -787,18 +787,18 @@ void SNAKokkos::compute_fused_deidrj(const typename Kokkos::TeamPoli auto tmp = ulist_buf1; ulist_buf1 = ulist_buf2; ulist_buf2 = tmp; tmp = dulist_buf1; dulist_buf1 = dulist_buf2; dulist_buf2 = tmp; - // Accumulate dedr. This "should" be in a single, but + // Accumulate dedr. This "should" be in a single, but // a Kokkos::single call implies a warp sync, and we may // as well avoid that. This does no harm as long as the // final assignment is in a single block. //Kokkos::single(Kokkos::PerThread(team), [=]() { - dedr_full_sum += dedr_sum; + dedr_full_sum += dedr_sum; //}); } // Store the accumulated dedr. Kokkos::single(Kokkos::PerThread(team), [&] () { - dedr(iatom,jnbor,dir) = dedr_full_sum*2.0; + dedr(iatom,jnbor,dir) = dedr_full_sum*2.0; }); } -- GitLab From 1192845ad58327cee3c3e4608596baf8f848b1e7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 25 Mar 2020 08:19:24 -0400 Subject: [PATCH 066/328] avoid segmentation faults in universe/uloop variable increment --- src/variable.cpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index 1093ce9066..0fc53f8074 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -661,6 +661,8 @@ int Variable::next(int narg, char **arg) } else if (istyle == UNIVERSE || istyle == ULOOP) { + uloop_again: + // wait until lock file can be created and owned by proc 0 of this world // rename() is not atomic in practice, but no known simple fix // means multiple procs can read/write file at the same time (bad!) @@ -669,7 +671,7 @@ int Variable::next(int narg, char **arg) // delay for random fraction of 1 second before subsequent tries // when successful, read next available index and Bcast it within my world - int nextindex; + int nextindex = -1; if (me == 0) { int seed = 12345 + universe->me + which[find(arg[0])]; RanMars *random = new RanMars(lmp,seed); @@ -682,10 +684,33 @@ int Variable::next(int narg, char **arg) } delete random; - FILE *fp = fopen("tmp.lammps.variable.lock","r"); - fscanf(fp,"%d",&nextindex); + // if the file cannot be found, we may have a race with some + // other MPI rank that has called rename at the same time + // and we have to start over. + // if the read is short (we need at least one byte) we try reading again. + + FILE *fp; + char buf[64]; + for (int loopmax = 0; loopmax < 100; ++loopmax) { + fp = fopen("tmp.lammps.variable.lock","r"); + if (fp == NULL) goto uloop_again; + + buf[0] = buf[1] = '\0'; + fread(buf,1,64,fp); + fclose(fp); + + if (strlen(buf) > 0) { + nextindex = atoi(buf); + break; + } + delay = (int) (1000000*random->uniform()); + usleep(delay); + } + if (nextindex < 0) + error->one(FLERR,"Unexpected error while incrementing uloop " + "style variable. Please contact LAMMPS developers."); + //printf("READ %d %d\n",universe->me,nextindex); - fclose(fp); fp = fopen("tmp.lammps.variable.lock","w"); fprintf(fp,"%d\n",nextindex+1); //printf("WRITE %d %d\n",universe->me,nextindex+1); -- GitLab From b8464da71ffbe6c1697b2c2071924d75c2ea30d0 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Mar 2020 07:45:21 -0600 Subject: [PATCH 067/328] cmake: remove FindLAMMPS.cmake --- cmake/CMakeLists.txt | 2 -- cmake/FindLAMMPS.cmake.in | 48 --------------------------------------- 2 files changed, 50 deletions(-) delete mode 100644 cmake/FindLAMMPS.cmake.in diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3569ffc555..02d7b50f1b 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -584,8 +584,6 @@ if(BUILD_LIB) install(FILES ${LAMMPS_CXX_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lammps) configure_file(pkgconfig/liblammps.pc.in ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_LIB_SUFFIX}.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_LIB_SUFFIX}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) - configure_file(FindLAMMPS.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS${LAMMPS_LIB_SUFFIX}.cmake @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS${LAMMPS_LIB_SUFFIX}.cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/cmake/Modules) endif() if(BUILD_EXE) diff --git a/cmake/FindLAMMPS.cmake.in b/cmake/FindLAMMPS.cmake.in deleted file mode 100644 index 586df83c2d..0000000000 --- a/cmake/FindLAMMPS.cmake.in +++ /dev/null @@ -1,48 +0,0 @@ -# - Find liblammps -# Find the native liblammps headers and libraries. -# -# The following variables will set: -# LAMMPS_INCLUDE_DIRS - where to find lammps/library.h, etc. -# LAMMPS_LIBRARIES - List of libraries when using lammps. -# LAMMPS_API_DEFINES - lammps library api defines -# LAMMPS_VERSION - lammps library version -# LAMMPS_FOUND - True if liblammps found. -# -# In addition a LAMMPS::LAMMPS imported target is getting created. -# -# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator -# http://lammps.sandia.gov, Sandia National Laboratories -# Steve Plimpton, sjplimp@sandia.gov -# -# Copyright (2003) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains -# certain rights in this software. This software is distributed under -# the GNU General Public License. -# -# See the README file in the top-level LAMMPS directory. -# - -find_package(PkgConfig) - -pkg_check_modules(PC_LAMMPS liblammps@LAMMPS_LIB_SUFFIX@) -find_path(LAMMPS_INCLUDE_DIR lammps/library.h HINTS ${PC_LAMMPS_INCLUDE_DIRS} @CMAKE_INSTALL_FULL_INCLUDEDIR@) - -set(LAMMPS_VERSION @LAMMPS_VERSION@) -set(LAMMPS_API_DEFINES @LAMMPS_API_DEFINES@) - -find_library(LAMMPS_LIBRARY NAMES lammps@LAMMPS_LIB_SUFFIX@ HINTS ${PC_LAMMPS_LIBRARY_DIRS} @CMAKE_INSTALL_FULL_LIBDIR@) - -set(LAMMPS_INCLUDE_DIRS "${LAMMPS_INCLUDE_DIR}") -set(LAMMPS_LIBRARIES "${LAMMPS_LIBRARY}") - -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set LAMMPS_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(LAMMPS REQUIRED_VARS LAMMPS_LIBRARY LAMMPS_INCLUDE_DIR VERSION_VAR LAMMPS_VERSION) - -mark_as_advanced(LAMMPS_INCLUDE_DIR LAMMPS_LIBRARY) - -if(LAMMPS_FOUND AND NOT TARGET LAMMPS::LAMMPS) - add_library(LAMMPS::LAMMPS UNKNOWN IMPORTED) - set_target_properties(LAMMPS::LAMMPS PROPERTIES IMPORTED_LOCATION "${LAMMPS_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${LAMMPS_INCLUDE_DIR}" INTERFACE_COMPILE_DEFINITIONS "${LAMMPS_API_DEFINES}") -endif() -- GitLab From 0e9f65e0219b86d959ba66c948a6951c8003d80b Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Mar 2020 08:43:57 -0600 Subject: [PATCH 068/328] cmake: add exported target --- cmake/CMakeLists.txt | 9 +++++++-- cmake/LAMMPSConfig.cmake.in | 5 +++++ cmake/pkgconfig/liblammps.pc.in | 8 +------- 3 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 cmake/LAMMPSConfig.cmake.in diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 02d7b50f1b..97bb00e250 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -26,7 +26,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules) include(LAMMPSUtils) -get_lammps_version(${LAMMPS_SOURCE_DIR}/version.h LAMMPS_VERSION) +get_lammps_version(${LAMMPS_SOURCE_DIR}/version.h PROJECT_VERSION) include(PreventInSourceBuilds) @@ -579,11 +579,16 @@ if(BUILD_LIB) set_target_properties(lammps PROPERTIES OUTPUT_NAME lammps${LAMMPS_LIB_SUFFIX}) set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION}) - install(TARGETS lammps LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(TARGETS lammps EXPORT LAMMPS_Targets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(FILES ${LAMMPS_SOURCE_DIR}/library.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lammps) install(FILES ${LAMMPS_CXX_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lammps) configure_file(pkgconfig/liblammps.pc.in ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_LIB_SUFFIX}.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_LIB_SUFFIX}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + install(EXPORT LAMMPS_Targets FILE LAMMPS_Targets.cmake NAMESPACE LAMMPS:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) + include(CMakePackageConfigHelpers) + configure_file(LAMMPSConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake @ONLY) + write_basic_package_version_file("LAMMPSConfigVersion.cmake" VERSION ${PROJECT_VERSION} COMPATIBILITY ExactVersion) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfigVersion.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) endif() if(BUILD_EXE) diff --git a/cmake/LAMMPSConfig.cmake.in b/cmake/LAMMPSConfig.cmake.in new file mode 100644 index 0000000000..0dacfc2089 --- /dev/null +++ b/cmake/LAMMPSConfig.cmake.in @@ -0,0 +1,5 @@ +include(CMakeFindDependencyMacro) +if(@BUILD_MPI@) + find_dependency(MPI REQUIRED CXX) +endif() +include("${CMAKE_CURRENT_LIST_DIR}/LAMMPS_Targets.cmake") diff --git a/cmake/pkgconfig/liblammps.pc.in b/cmake/pkgconfig/liblammps.pc.in index a89f992c4a..96dab89161 100644 --- a/cmake/pkgconfig/liblammps.pc.in +++ b/cmake/pkgconfig/liblammps.pc.in @@ -18,12 +18,6 @@ # myapp_CFLAGS = $(LAMMPS_CFLAGS) # myapp_LDADD = $(LAMMPS_LIBS) -# Use this in CMake: -# CMakeLists.txt: -# find_package(PkgConfig) -# pkg_check_modules(LAMMPS IMPORTED_TARGET lammps) -# target_link_libraries( PRIVATE PkgConfig::LAMMPS) - prefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ @@ -31,7 +25,7 @@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: liblammps@LAMMPS_MACHINE@ Description: Large-scale Atomic/Molecular Massively Parallel Simulator Library URL: http://lammps.sandia.gov -Version: @LAMMPS_VERSION@ +Version: @PROJECT_VERSION@ Requires: Libs: -L${libdir} -llammps@LAMMPS_LIB_SUFFIX@ Libs.private: -lm -- GitLab From ee3249676e726d5173d8b1efdf1f63c6b72ea2e3 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Mar 2020 09:09:59 -0600 Subject: [PATCH 069/328] cmake: use OpenMP imported target --- cmake/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 97bb00e250..befbe4ecda 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -255,8 +255,7 @@ if(BUILD_OMP) if(NOT HAVE_OMP_H_INCLUDE) message(FATAL_ERROR "Cannot find required 'omp.h' header file") endif() - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + target_link_libraries(lammps PRIVATE OpenMP::OpenMP_CXX) endif() if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-QUIP OR PKG_LATTE) -- GitLab From 58c9c4c64bc02d279b163cfb766ca6a4060f107d Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Mar 2020 10:14:42 -0600 Subject: [PATCH 070/328] USER-H5MD.cmake: make hdf5 incldir public --- cmake/Modules/Packages/USER-H5MD.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/Packages/USER-H5MD.cmake b/cmake/Modules/Packages/USER-H5MD.cmake index cf5623f46d..2893f7903c 100644 --- a/cmake/Modules/Packages/USER-H5MD.cmake +++ b/cmake/Modules/Packages/USER-H5MD.cmake @@ -3,5 +3,5 @@ if(PKG_USER-H5MD) find_package(HDF5 REQUIRED) target_link_libraries(h5md PRIVATE ${HDF5_LIBRARIES}) - target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS}) + target_include_directories(h5md PUBLIC ${HDF5_INCLUDE_DIRS}) endif() -- GitLab From 2571b6058e82315fc7031b083b34fb05b9da06cc Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 25 Mar 2020 11:26:19 -0600 Subject: [PATCH 071/328] LATTE.cmake: create and use imported target --- cmake/Modules/FindLATTE.cmake | 13 +++++++++++-- cmake/Modules/Packages/LATTE.cmake | 10 +++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cmake/Modules/FindLATTE.cmake b/cmake/Modules/FindLATTE.cmake index 74d5173bf0..ac5c639b79 100644 --- a/cmake/Modules/FindLATTE.cmake +++ b/cmake/Modules/FindLATTE.cmake @@ -7,12 +7,21 @@ find_library(LATTE_LIBRARY NAMES latte) -set(LATTE_LIBRARIES ${LATTE_LIBRARY}) - include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set LATTE_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(LATTE DEFAULT_MSG LATTE_LIBRARY) +# Copy the results to the output variables and target. +if(LATTE_FOUND) + set(LATTE_LIBRARIES ${LATTE_LIBRARY}) + + if(NOT TARGET LATTE::latte) + add_library(LATTE::latte UNKNOWN IMPORTED) + set_target_properties(LATTE::latte PROPERTIES + IMPORTED_LOCATION "${LATTE_LIBRARY}") + endif() +endif() + mark_as_advanced(LATTE_LIBRARY) diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake index abadd1cd97..20944db1a9 100644 --- a/cmake/Modules/Packages/LATTE.cmake +++ b/cmake/Modules/Packages/LATTE.cmake @@ -26,15 +26,15 @@ if(PKG_LATTE) ) add_dependencies(lammps latte_build) ExternalProject_get_property(latte_build INSTALL_DIR) - set(LATTE_LIBRARIES ${INSTALL_DIR}/${_LATTE_LIBDIR}/liblatte.a) + target_link_libraries(lammps PRIVATE ${INSTALL_DIR}/${_LATTE_LIBDIR}/liblatte.a ${LAPACK_LIBRARIES}) + if(NOT LAPACK_FOUND) + add_dependencies(latte_build linalg) + endif() else() find_package(LATTE) if(NOT LATTE_FOUND) message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it") endif() + target_link_libraries(lammps PRIVATE LATTE::latte) endif() - if(NOT LAPACK_FOUND) - add_dependencies(latte_build linalg) - endif() - target_link_libraries(lammps PRIVATE ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES}) endif() -- GitLab From 299f79c91932edbb870ae8f14df5f5f1610344ff Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 25 Mar 2020 13:55:31 -0600 Subject: [PATCH 072/328] Remove deprecated Kokkos code --- src/KOKKOS/comm_kokkos.cpp | 42 +++++++++++++++--------------- src/KOKKOS/fft3d_kokkos.cpp | 8 +++--- src/KOKKOS/gridcomm_kokkos.cpp | 8 +++--- src/KOKKOS/kokkos_type.h | 2 +- src/KOKKOS/pack_kokkos.h | 16 ++++++------ src/KOKKOS/pair_kokkos.h | 18 ++++++++----- src/KOKKOS/pair_snap_kokkos_impl.h | 7 +++-- src/KOKKOS/remap_kokkos.cpp | 4 +-- 8 files changed, 57 insertions(+), 48 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index a1ece37efd..628b0b668a 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -205,7 +205,7 @@ void CommKokkos::forward_comm_device(int dummy) } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); if (n) { MPI_Send(k_buf_send.view().data(), n,MPI_DOUBLE,sendproc[iswap],0,world); @@ -224,14 +224,14 @@ void CommKokkos::forward_comm_device(int dummy) } n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); if (n) { MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); } if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); + DeviceType().fence(); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view().data(), @@ -239,26 +239,26 @@ void CommKokkos::forward_comm_device(int dummy) recvproc[iswap],0,world,&request); n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); if (n) MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); + DeviceType().fence(); } } else { if (!ghost_velocity) { if (sendnum[iswap]) n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); } else { n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType::fence(); + DeviceType().fence(); } } } @@ -334,7 +334,7 @@ void CommKokkos::reverse_comm_device() size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType::fence(); + DeviceType().fence(); if (n) MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,recvproc[iswap],0,world); @@ -342,7 +342,7 @@ void CommKokkos::reverse_comm_device() } avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_recv); - DeviceType::fence(); + DeviceType().fence(); } else { if (sendnum[iswap]) n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap, @@ -410,7 +410,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -445,7 +445,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair); - DeviceType::fence(); + DeviceType().fence(); } } @@ -647,7 +647,7 @@ void CommKokkos::exchange_device() k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space, dim,lo,hi); - DeviceType::fence(); + DeviceType().fence(); } else { while (i < nlocal) { if (x[i][dim] < lo || x[i][dim] >= hi) { @@ -671,7 +671,7 @@ void CommKokkos::exchange_device() atom->nlocal=avec-> unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); - DeviceType::fence(); + DeviceType().fence(); } } else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, @@ -704,7 +704,7 @@ void CommKokkos::exchange_device() atom->nlocal = avec-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); - DeviceType::fence(); + DeviceType().fence(); } } @@ -964,13 +964,13 @@ void CommKokkos::borders_device() { n = avec-> pack_border_vel_kokkos(nsend,k_sendlist,k_buf_send,iswap, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType::fence(); + DeviceType().fence(); } else { n = avec-> pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType::fence(); + DeviceType().fence(); } // swap atoms with other proc @@ -1000,21 +1000,21 @@ void CommKokkos::borders_device() { if (sendproc[iswap] != me) { avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType::fence(); + DeviceType().fence(); } else { avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType::fence(); + DeviceType().fence(); } } else { if (sendproc[iswap] != me) { avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType::fence(); + DeviceType().fence(); } else { avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType::fence(); + DeviceType().fence(); } } // set all pointers & counters diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 26e6e93ad9..bedd15df93 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -230,7 +230,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = - typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); kiss_fft_functor f; if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -238,7 +238,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); #endif @@ -281,7 +281,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.extent(0)); #endif // 2nd mid-remap to prepare for 3rd FFTs @@ -864,7 +864,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else kiss_fft_functor f; - typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); + typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.extent(0)); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index f1ccffe20d..bdf816b647 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -524,7 +524,7 @@ void GridCommKokkos::forward_comm(KSpace *kspace, int which) kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); else kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m); - DeviceType::fence(); + DeviceType().fence(); if (swap[m].sendproc != me) { FFT_SCALAR* buf1; @@ -552,7 +552,7 @@ void GridCommKokkos::forward_comm(KSpace *kspace, int which) } kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); - DeviceType::fence(); + DeviceType().fence(); } } @@ -574,7 +574,7 @@ void GridCommKokkos::reverse_comm(KSpace *kspace, int which) kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); else kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m); - DeviceType::fence(); + DeviceType().fence(); if (swap[m].recvproc != me) { FFT_SCALAR* buf1; @@ -602,7 +602,7 @@ void GridCommKokkos::reverse_comm(KSpace *kspace, int which) } kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); - DeviceType::fence(); + DeviceType().fence(); } } diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index d21b9eecd2..2e68cc0405 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -1012,7 +1012,7 @@ void memset_kokkos (ViewType &view) { #else Kokkos::parallel_for(view.span()*sizeof(typename ViewType::value_type)/4, f); #endif - ViewType::execution_space::fence(); + ViewType::execution_space().fence(); } struct params_lj_coul { diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index 62e7960999..400048b1f0 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -86,7 +86,7 @@ static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, const int nfast = plan->nfast; pack_3d_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- @@ -140,7 +140,7 @@ static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, const int nfast = plan->nfast; unpack_3d_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- @@ -195,7 +195,7 @@ static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute1_1_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- unpack from buf -> data, one axis permutation, 2 values/element @@ -249,7 +249,7 @@ static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute1_2_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- @@ -305,7 +305,7 @@ static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute1_n_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- @@ -358,7 +358,7 @@ static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute2_1_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- @@ -412,7 +412,7 @@ static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute2_2_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } /* ---------------------------------------------------------------------- unpack from buf -> data, two axis permutation, nqty values/element @@ -466,7 +466,7 @@ static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int const int nfast = plan->nfast; unpack_3d_permute2_n_functor f(d_buf,buf_offset,d_data,data_offset,plan); Kokkos::parallel_for(nslow*nmid*nfast,f); - DeviceType::fence(); + DeviceType().fence(); } }; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 52a05b3991..d501324960 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -444,7 +444,7 @@ struct PairComputeFunctor { ev.evdwl += fev.evdwl; if (c.eflag_atom) - d_eatom(i,0) += fev.evdwl; + d_eatom(i) += fev.evdwl; if (c.vflag_global) { ev.v[0] += fev.v[0]; @@ -554,7 +554,7 @@ struct PairComputeFunctor { } if (c.eflag_atom) - d_eatom(i,0) += fev.evdwl + fev.ecoul; + d_eatom(i) += fev.evdwl + fev.ecoul; if (c.vflag_global) { ev.v[0] += fev.v[0]; @@ -850,8 +850,14 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable } template -int GetTeamSize(FunctorStyle& functor, int team_size, int vector_length) { - int team_size_max = Kokkos::TeamPolicy<>::team_size_max(functor); +int GetTeamSize(FunctorStyle& functor, int inum, int reduce_flag, int team_size, int vector_length) { + int team_size_max; + if (reduce_flag) { + EV_FLOAT ev; + team_size_max = Kokkos::TeamPolicy<>(inum,Kokkos::AUTO).team_size_max(functor,ev,Kokkos::ParallelReduceTag()); + } else { + team_size_max = Kokkos::TeamPolicy<>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelForTag()); + } #ifdef KOKKOS_ENABLE_CUDA if(team_size*vector_length > team_size_max) @@ -877,13 +883,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { PairComputeFunctor ff(fpair,list); - atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + atoms_per_team = GetTeamSize(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); else Kokkos::parallel_for(policy,ff); } else { PairComputeFunctor ff(fpair,list); - atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + atoms_per_team = GetTeamSize(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); else Kokkos::parallel_for(policy,ff); diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index d807f149a9..d4e5535614 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -182,11 +182,14 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) if (max_neighs(k_list), Kokkos::Experimental::Max(max_neighs)); + Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",inum, FindMaxNumNeighs(k_list), Kokkos::Max(max_neighs)); + + int chunk_size = MIN(2000,inum); + chunk_offset = 0; int vector_length = 1; int team_size = 1; - int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); + int team_size_max = Kokkos::TeamPolicy(chunk_size,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); #ifdef KOKKOS_ENABLE_CUDA team_size = 32;//max_neighs; if (team_size*vector_length > team_size_max) diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index c809aa034c..0d1ce46d7d 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -120,7 +120,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d // post all recvs into scratch space for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_SCALAR* scratch = d_scratch.ptr_on_device() + plan->recv_bufloc[irecv]; + FFT_SCALAR* scratch = d_scratch.data() + plan->recv_bufloc[irecv]; MPI_Irecv(scratch,plan->recv_size[irecv], MPI_FFT_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); @@ -132,7 +132,7 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d int in_offset = plan->send_offset[isend]; plan->pack(d_in,in_offset, plan->d_sendbuf,0,&plan->packplan[isend]); - MPI_Send(plan->d_sendbuf.ptr_on_device(),plan->send_size[isend],MPI_FFT_SCALAR, + MPI_Send(plan->d_sendbuf.data(),plan->send_size[isend],MPI_FFT_SCALAR, plan->send_proc[isend],0,plan->comm); } -- GitLab From 7a09636f9a80c486aa7f7afbc55c78d28664ce03 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 5 Feb 2020 14:36:39 -0700 Subject: [PATCH 073/328] Fix compile issue in pair_kokkos.h --- src/KOKKOS/pair_kokkos.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index d501324960..c9f375de4b 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -852,12 +852,11 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable template int GetTeamSize(FunctorStyle& functor, int inum, int reduce_flag, int team_size, int vector_length) { int team_size_max; - if (reduce_flag) { - EV_FLOAT ev; - team_size_max = Kokkos::TeamPolicy<>(inum,Kokkos::AUTO).team_size_max(functor,ev,Kokkos::ParallelReduceTag()); - } else { + + if (reduce_flag) + team_size_max = Kokkos::TeamPolicy<>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelReduceTag()); + else team_size_max = Kokkos::TeamPolicy<>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelForTag()); - } #ifdef KOKKOS_ENABLE_CUDA if(team_size*vector_length > team_size_max) -- GitLab From 4eebcdfc0db5b6afbf6fa9a7dffb5f6ebd0e11d3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 5 Feb 2020 16:35:43 -0700 Subject: [PATCH 074/328] Fix runtime issue in Kokkos --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_bond_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_charge_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_full_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 6 +++--- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 736e1c1fca..5b934e2434 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -74,9 +74,9 @@ void AtomVecAngleKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 4fec5740d6..df30b50dd0 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -70,9 +70,9 @@ void AtomVecAtomicKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); grow_reset(); atomKK->sync(Host,ALL_MASK); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 4475131d77..b614d94bce 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -73,9 +73,9 @@ void AtomVecBondKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 3f26b1e9ea..a9cee3aca8 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -73,9 +73,9 @@ void AtomVecChargeKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 144ef26f19..dfc122b1ef 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -75,9 +75,9 @@ void AtomVecDPDKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 1fdbcbec8c..b5239867fb 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -73,9 +73,9 @@ void AtomVecFullKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index f3b4ae98ca..fec0183971 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -73,9 +73,9 @@ void AtomVecMolecularKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 67aaa32c21..31f8180b4c 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -107,9 +107,9 @@ void AtomVecSphereKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,"atom:f"); memoryKK->grow_kokkos(atomKK->k_radius,atomKK->radius,nmax,"atom:radius"); memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass"); memoryKK->grow_kokkos(atomKK->k_omega,atomKK->omega,nmax,3,"atom:omega"); -- GitLab From 1e7e9369dab0d5310d80e7c26458fa9fa78911e6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 6 Feb 2020 08:44:58 -0700 Subject: [PATCH 075/328] Fix runtime error in Kokkos package --- src/KOKKOS/kokkos_type.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 2e68cc0405..3ba6318d41 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -980,17 +980,9 @@ typedef struct ArrayTypes HAT; template void buffer_view(BufferView &buf, DualView &view, const size_t n0, - const size_t n1 = 0, - const size_t n2 = 0, - const size_t n3 = 0, - const size_t n4 = 0, - const size_t n5 = 0, - const size_t n6 = 0, - const size_t n7 = 0) { - - buf = BufferView( - view.template view().data(), - n0,n1,n2,n3,n4,n5,n6,n7); + const size_t n1) { + + buf = BufferView(view.template view().data(),n0,n1); } -- GitLab From 0025dfe1e3fd79bb557048143ccb48296549337c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 6 Feb 2020 09:35:04 -0700 Subject: [PATCH 076/328] Update Kokkos CUDA minimum verison --- doc/src/Speed_kokkos.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Speed_kokkos.rst b/doc/src/Speed_kokkos.rst index 6658957006..ab8444b845 100644 --- a/doc/src/Speed_kokkos.rst +++ b/doc/src/Speed_kokkos.rst @@ -38,7 +38,7 @@ compatible with specific hardware. .. note:: To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA - software version 7.5 or later must be installed on your system. See + software version 9.0 or later must be installed on your system. See the discussion for the :doc:`GPU package ` for details of how to check and do this. -- GitLab From e298978da0f758b44644d82859cd701ae7923469 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 25 Mar 2020 13:58:12 -0600 Subject: [PATCH 077/328] Change Kokkos::Impl to std namespace --- src/KOKKOS/atom_kokkos.h | 16 ++++++++-------- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 6 +++--- src/KOKKOS/fix_rx_kokkos.cpp | 6 +++--- src/KOKKOS/fix_shardlow_kokkos.cpp | 6 +++--- src/KOKKOS/pair_buck_coul_cut_kokkos.cpp | 6 +++--- src/KOKKOS/pair_buck_coul_long_kokkos.cpp | 6 +++--- src/KOKKOS/pair_buck_kokkos.cpp | 6 +++--- src/KOKKOS/pair_coul_cut_kokkos.cpp | 6 +++--- src/KOKKOS/pair_coul_debye_kokkos.cpp | 6 +++--- src/KOKKOS/pair_coul_dsf_kokkos.cpp | 6 +++--- src/KOKKOS/pair_coul_long_kokkos.cpp | 6 +++--- src/KOKKOS/pair_coul_wolf_kokkos.cpp | 6 +++--- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 12 ++++++------ src/KOKKOS/pair_eam_alloy_kokkos.cpp | 6 +++--- src/KOKKOS/pair_eam_fs_kokkos.cpp | 6 +++--- src/KOKKOS/pair_eam_kokkos.cpp | 6 +++--- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 6 +++--- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 6 +++--- src/KOKKOS/pair_kokkos.h | 4 ++-- ...air_lj_charmm_coul_charmm_implicit_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_class2_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_cut_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_expand_kokkos.cpp | 6 +++--- .../pair_lj_gromacs_coul_gromacs_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_gromacs_kokkos.cpp | 6 +++--- src/KOKKOS/pair_lj_sdk_kokkos.cpp | 6 +++--- src/KOKKOS/pair_morse_kokkos.cpp | 6 +++--- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 6 +++--- src/KOKKOS/pair_reaxc_kokkos.cpp | 6 +++--- src/KOKKOS/pair_snap_kokkos_impl.h | 6 +++--- src/KOKKOS/pair_sw_kokkos.cpp | 6 +++--- src/KOKKOS/pair_table_kokkos.cpp | 6 +++--- src/KOKKOS/pair_table_rx_kokkos.cpp | 6 +++--- src/KOKKOS/pair_tersoff_kokkos.cpp | 6 +++--- src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 6 +++--- src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 6 +++--- src/KOKKOS/pair_vashishta_kokkos.cpp | 6 +++--- src/KOKKOS/pair_yukawa_kokkos.cpp | 6 +++--- src/KOKKOS/pair_zbl_kokkos.cpp | 6 +++--- src/KOKKOS/sna_kokkos_impl.h | 2 +- 48 files changed, 149 insertions(+), 149 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index a83b299ebd..0ae032032a 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -83,32 +83,32 @@ class SortFunctor { ViewType source; Kokkos::View dest; IndexView index; - SortFunctor(ViewType src, typename Kokkos::Impl::enable_if::type ind):source(src),index(ind){ + SortFunctor(ViewType src, typename std::enable_if::type ind):source(src),index(ind){ dest = Kokkos::View("",src.extent(0)); } - SortFunctor(ViewType src, typename Kokkos::Impl::enable_if::type ind):source(src),index(ind){ + SortFunctor(ViewType src, typename std::enable_if::type ind):source(src),index(ind){ dest = Kokkos::View("",src.extent(0),src.extent(1)); } - SortFunctor(ViewType src, typename Kokkos::Impl::enable_if::type ind):source(src),index(ind){ + SortFunctor(ViewType src, typename std::enable_if::type ind):source(src),index(ind){ dest = Kokkos::View("",src.extent(0),src.extent(1),src.extent(2)); } - SortFunctor(ViewType src, typename Kokkos::Impl::enable_if::type ind):source(src),index(ind){ + SortFunctor(ViewType src, typename std::enable_if::type ind):source(src),index(ind){ dest = Kokkos::View("",src.extent(0),src.extent(1),src.extent(2),src.extent(3)); } KOKKOS_INLINE_FUNCTION - void operator()(const typename Kokkos::Impl::enable_if::type& i) { + void operator()(const typename std::enable_if::type& i) { dest(i) = source(index(i)); } - void operator()(const typename Kokkos::Impl::enable_if::type& i) { + void operator()(const typename std::enable_if::type& i) { for(int j=0;j::type& i) { + void operator()(const typename std::enable_if::type& i) { for(int j=0;j::type& i) { + void operator()(const typename std::enable_if::type& i) { for(int j=0;j::init() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->fix = 1; diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index e06fc14585..dcb1ac0b71 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -146,10 +146,10 @@ void FixRxKokkos::init() int neighflag = lmp->kokkos->neighflag; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 9bd8594341..c6ad47501a 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -132,10 +132,10 @@ void FixShardlowKokkos::init() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; // neighbor->requests[irequest]->pair = 0; // neighbor->requests[irequest]->fix = 1; diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp index 2a72617525..97154f7604 100644 --- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp @@ -296,10 +296,10 @@ void PairBuckCoulCutKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp index fdf395684a..a55c6b25f6 100644 --- a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp @@ -456,10 +456,10 @@ void PairBuckCoulLongKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp index 375d0dc1ea..76ff246be4 100644 --- a/src/KOKKOS/pair_buck_kokkos.cpp +++ b/src/KOKKOS/pair_buck_kokkos.cpp @@ -218,10 +218,10 @@ void PairBuckKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp index 5a1a6eefac..210144040c 100644 --- a/src/KOKKOS/pair_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp @@ -220,10 +220,10 @@ void PairCoulCutKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp index 8dd7e4f3d2..46a7df7cb1 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp @@ -265,10 +265,10 @@ void PairCoulDebyeKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index 836b12ba39..f7bf8fb5d1 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -202,10 +202,10 @@ void PairCoulDSFKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_coul_long_kokkos.cpp b/src/KOKKOS/pair_coul_long_kokkos.cpp index 84b89c6373..19a40dad0b 100644 --- a/src/KOKKOS/pair_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_coul_long_kokkos.cpp @@ -416,10 +416,10 @@ void PairCoulLongKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index 3ca8f16a79..f3ffdc6069 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -203,10 +203,10 @@ void PairCoulWolfKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 21fd32a2c8..1ddf950fd7 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -87,10 +87,10 @@ void PairDPDfdtEnergyKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; @@ -125,10 +125,10 @@ void PairDPDfdtEnergyKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 48bf63386a..87bb5dddf2 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -302,10 +302,10 @@ void PairEAMAlloyKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 6536dd745a..37fab42d4b 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -302,10 +302,10 @@ void PairEAMFSKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 3358fe709c..06e26301a4 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -296,10 +296,10 @@ void PairEAMKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index f3f63c98b2..4d72c85029 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -118,10 +118,10 @@ void PairExp6rxKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 5071bae32f..9e65c0589e 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -86,10 +86,10 @@ void PairGranHookeHistoryKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index c9f375de4b..41922b7349 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -841,7 +841,7 @@ struct PairComputeFunctor { // pair_compute_neighlist will match - either the dummy version // or the real one further below. template -EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if*>::type list) { +EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if*>::type list) { EV_FLOAT ev; (void) fpair; (void) list; @@ -869,7 +869,7 @@ int GetTeamSize(FunctorStyle& functor, int inum, int reduce_flag, int team_size, // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2 template -EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { +EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { EV_FLOAT ev; if (!fpair->lmp->kokkos->neigh_thread_set) diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp index ae6cb61b60..86786be6f2 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp @@ -462,10 +462,10 @@ void PairLJCharmmCoulCharmmImplicitKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp index 9cdef267e2..3bc7cf5425 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp @@ -464,10 +464,10 @@ void PairLJCharmmCoulCharmmKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index 441070248d..4285d16bed 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -471,10 +471,10 @@ void PairLJCharmmCoulLongKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp index 1f7642e965..f338575ff6 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp @@ -298,10 +298,10 @@ void PairLJClass2CoulCutKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp index c88ff9378e..3e62b57fdc 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp @@ -453,10 +453,10 @@ void PairLJClass2CoulLongKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_class2_kokkos.cpp b/src/KOKKOS/pair_lj_class2_kokkos.cpp index 9900e7361f..76406c4410 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_kokkos.cpp @@ -236,10 +236,10 @@ void PairLJClass2Kokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp index 1601e4a4b2..094c25471c 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp @@ -289,10 +289,10 @@ void PairLJCutCoulCutKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp index 6e7d1eeb8b..8d1f650061 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp @@ -318,10 +318,10 @@ void PairLJCutCoulDebyeKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp index b7dc7cc26d..bd1754df9a 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp @@ -311,10 +311,10 @@ void PairLJCutCoulDSFKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp index 122d59af82..fa53850b07 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -452,10 +452,10 @@ void PairLJCutCoulLongKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index df750b7524..3770e8f816 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -230,10 +230,10 @@ void PairLJCutKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_expand_kokkos.cpp b/src/KOKKOS/pair_lj_expand_kokkos.cpp index 38bebc364f..c46e0d47e4 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.cpp +++ b/src/KOKKOS/pair_lj_expand_kokkos.cpp @@ -238,10 +238,10 @@ void PairLJExpandKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp index a46a5c0441..1bef3f0a27 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp @@ -447,10 +447,10 @@ void PairLJGromacsCoulGromacsKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp index 23ed5e5595..cfc65c883b 100644 --- a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp @@ -285,10 +285,10 @@ void PairLJGromacsKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.cpp b/src/KOKKOS/pair_lj_sdk_kokkos.cpp index 25f081d255..cb99de3cd9 100644 --- a/src/KOKKOS/pair_lj_sdk_kokkos.cpp +++ b/src/KOKKOS/pair_lj_sdk_kokkos.cpp @@ -268,10 +268,10 @@ void PairLJSDKKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_morse_kokkos.cpp b/src/KOKKOS/pair_morse_kokkos.cpp index d3e3042a34..799278bf9e 100644 --- a/src/KOKKOS/pair_morse_kokkos.cpp +++ b/src/KOKKOS/pair_morse_kokkos.cpp @@ -247,10 +247,10 @@ void PairMorseKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 75247859ed..3b7a738026 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -99,10 +99,10 @@ void PairMultiLucyRXKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index d0ad1f1b09..18144bb653 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -147,10 +147,10 @@ void PairReaxCKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index d4e5535614..ad850981cc 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -91,10 +91,10 @@ void PairSNAPKokkos::init_style() int irequest = neighbor->request(this,instance_me); neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == HALF || neighflag == HALFTHREAD) { // still need atomics, even though using a full neigh list neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index e85afa362f..3ce99fe629 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -610,10 +610,10 @@ void PairSWKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; // always request a full neighbor list diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index 5ee24ad479..7311e4ec05 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -514,10 +514,10 @@ void PairTableKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index daebeda8db..cd7aa373ed 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -1268,10 +1268,10 @@ void PairTableRXKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index b360b20ef3..d068ac0412 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -88,10 +88,10 @@ void PairTersoffKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/kk"); diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index 81ef486999..5eb23d498a 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -88,10 +88,10 @@ void PairTersoffMODKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/mod/kk"); diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 4593f32e36..2648689fad 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -101,10 +101,10 @@ void PairTersoffZBLKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/zbl/kk"); diff --git a/src/KOKKOS/pair_vashishta_kokkos.cpp b/src/KOKKOS/pair_vashishta_kokkos.cpp index 614d3334d5..ddb0688e03 100644 --- a/src/KOKKOS/pair_vashishta_kokkos.cpp +++ b/src/KOKKOS/pair_vashishta_kokkos.cpp @@ -585,10 +585,10 @@ void PairVashishtaKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; // always request a full neighbor list diff --git a/src/KOKKOS/pair_yukawa_kokkos.cpp b/src/KOKKOS/pair_yukawa_kokkos.cpp index 6dfffd4a54..a1838c9ef6 100644 --- a/src/KOKKOS/pair_yukawa_kokkos.cpp +++ b/src/KOKKOS/pair_yukawa_kokkos.cpp @@ -120,10 +120,10 @@ void PairYukawaKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pair_zbl_kokkos.cpp b/src/KOKKOS/pair_zbl_kokkos.cpp index 5697dd5b00..7a476e4a14 100644 --- a/src/KOKKOS/pair_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_zbl_kokkos.cpp @@ -92,10 +92,10 @@ void PairZBLKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index dcedf333e5..c6c8bbb421 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -1643,7 +1643,7 @@ double SNAKokkos::memory_usage() } #endif bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot - if (!Kokkos::Impl::is_same::value) + if (!std::is_same::value) bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot_lr bytes += natom * idxz_max * sizeof(double) * 2; // zlist -- GitLab From 0252d8c21073512b6dd8b85d8c21cef8eeabd175 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 11 Mar 2020 12:17:53 -0600 Subject: [PATCH 078/328] Fix compile for UVM --- src/KOKKOS/angle_charmm_kokkos.h | 6 +-- src/KOKKOS/angle_class2_kokkos.cpp | 6 +-- src/KOKKOS/angle_cosine_kokkos.cpp | 6 +-- src/KOKKOS/angle_harmonic_kokkos.cpp | 6 +-- src/KOKKOS/bond_class2_kokkos.h | 6 +-- src/KOKKOS/bond_fene_kokkos.cpp | 6 +-- src/KOKKOS/bond_harmonic_kokkos.h | 6 +-- src/KOKKOS/dihedral_charmm_kokkos.cpp | 2 +- src/KOKKOS/dihedral_charmm_kokkos.h | 8 ++-- src/KOKKOS/dihedral_class2_kokkos.cpp | 6 +-- src/KOKKOS/dihedral_harmonic_kokkos.cpp | 6 +-- src/KOKKOS/dihedral_opls_kokkos.cpp | 6 +-- src/KOKKOS/fix_qeq_reax_kokkos.h | 4 +- src/KOKKOS/fix_rx_kokkos.cpp | 6 +-- src/KOKKOS/improper_class2_kokkos.cpp | 8 ++-- src/KOKKOS/improper_class2_kokkos.h | 2 +- src/KOKKOS/improper_harmonic_kokkos.h | 6 +-- src/KOKKOS/kokkos_type.h | 14 +++++++ src/KOKKOS/pair_coul_dsf_kokkos.cpp | 8 ++-- src/KOKKOS/pair_coul_wolf_kokkos.cpp | 8 ++-- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 12 +++--- src/KOKKOS/pair_eam_alloy_kokkos.h | 16 ++++---- src/KOKKOS/pair_eam_fs_kokkos.h | 16 ++++---- src/KOKKOS/pair_eam_kokkos.h | 16 ++++---- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 16 ++++---- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 6 +-- src/KOKKOS/pair_kokkos.h | 12 +++--- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 8 ++-- src/KOKKOS/pair_reaxc_kokkos.cpp | 10 ++--- src/KOKKOS/pair_reaxc_kokkos.h | 38 +++++++++---------- src/KOKKOS/pair_snap_kokkos.h | 8 ++-- src/KOKKOS/pair_sw_kokkos.h | 12 +++--- src/KOKKOS/pair_table_rx_kokkos.cpp | 26 +++++++------ src/KOKKOS/pair_tersoff_kokkos.h | 12 +++--- src/KOKKOS/pair_tersoff_mod_kokkos.h | 12 +++--- src/KOKKOS/pair_tersoff_zbl_kokkos.h | 12 +++--- src/KOKKOS/pair_vashishta_kokkos.cpp | 10 ++--- src/KOKKOS/pppm_kokkos.cpp | 2 +- src/KOKKOS/sna_kokkos.h | 4 +- 39 files changed, 195 insertions(+), 179 deletions(-) diff --git a/src/KOKKOS/angle_charmm_kokkos.h b/src/KOKKOS/angle_charmm_kokkos.h index e168160562..865439b83a 100644 --- a/src/KOKKOS/angle_charmm_kokkos.h +++ b/src/KOKKOS/angle_charmm_kokkos.h @@ -63,13 +63,13 @@ class AngleCharmmKokkos : public AngleCharmm { typedef ArrayTypes AT; typename AT::t_x_array_randomread x; - typename Kokkos::View > f; + typename Kokkos::View::value,Kokkos::MemoryTraits > f; typename AT::t_int_2d anglelist; Kokkos::DualView k_eatom; Kokkos::DualView k_vatom; - Kokkos::View > d_eatom; - Kokkos::View > d_vatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/angle_class2_kokkos.cpp b/src/KOKKOS/angle_class2_kokkos.cpp index 809ce7e7dd..57563b959f 100644 --- a/src/KOKKOS/angle_class2_kokkos.cpp +++ b/src/KOKKOS/angle_class2_kokkos.cpp @@ -158,7 +158,7 @@ KOKKOS_INLINE_FUNCTION void AngleClass2Kokkos::operator()(TagAngleClass2Compute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = anglelist(n,0); const int i2 = anglelist(n,1); @@ -495,8 +495,8 @@ void AngleClass2Kokkos::ev_tally(EV_FLOAT &ev, const int i, const in F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.template view(); - Kokkos::View > v_vatom = k_vatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.template view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/angle_cosine_kokkos.cpp b/src/KOKKOS/angle_cosine_kokkos.cpp index da0ff398f1..65c4c1475a 100644 --- a/src/KOKKOS/angle_cosine_kokkos.cpp +++ b/src/KOKKOS/angle_cosine_kokkos.cpp @@ -141,7 +141,7 @@ KOKKOS_INLINE_FUNCTION void AngleCosineKokkos::operator()(TagAngleCosineCompute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = anglelist(n,0); const int i2 = anglelist(n,1); @@ -284,8 +284,8 @@ void AngleCosineKokkos::ev_tally(EV_FLOAT &ev, const int i, const in F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.template view(); - Kokkos::View > v_vatom = k_vatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.template view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp index fc274bb894..4b8a87ece7 100644 --- a/src/KOKKOS/angle_harmonic_kokkos.cpp +++ b/src/KOKKOS/angle_harmonic_kokkos.cpp @@ -142,7 +142,7 @@ KOKKOS_INLINE_FUNCTION void AngleHarmonicKokkos::operator()(TagAngleHarmonicCompute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = anglelist(n,0); const int i2 = anglelist(n,1); @@ -302,8 +302,8 @@ void AngleHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i, const F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.template view(); - Kokkos::View > v_vatom = k_vatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.template view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.template view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/bond_class2_kokkos.h b/src/KOKKOS/bond_class2_kokkos.h index a31ae4b8ae..b3c1d5f682 100644 --- a/src/KOKKOS/bond_class2_kokkos.h +++ b/src/KOKKOS/bond_class2_kokkos.h @@ -63,13 +63,13 @@ class BondClass2Kokkos : public BondClass2 { class NeighborKokkos *neighborKK; typename AT::t_x_array_randomread x; - typename Kokkos::View > f; + typename Kokkos::View::value,Kokkos::MemoryTraits > f; typename AT::t_int_2d bondlist; Kokkos::DualView k_eatom; Kokkos::DualView k_vatom; - Kokkos::View > d_eatom; - Kokkos::View > d_vatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index 361bb61f7e..5f1e9d3ddc 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -166,7 +166,7 @@ void BondFENEKokkos::operator()(TagBondFENECompute > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = bondlist(n,0); const int i2 = bondlist(n,1); @@ -320,8 +320,8 @@ void BondFENEKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.view(); - Kokkos::View > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/bond_harmonic_kokkos.h b/src/KOKKOS/bond_harmonic_kokkos.h index 2bf12f3766..b5bee7e909 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.h +++ b/src/KOKKOS/bond_harmonic_kokkos.h @@ -63,13 +63,13 @@ class BondHarmonicKokkos : public BondHarmonic { typedef ArrayTypes AT; typename AT::t_x_array_randomread x; - typename Kokkos::View > f; + typename Kokkos::View::value,Kokkos::MemoryTraits > f; typename AT::t_int_2d bondlist; Kokkos::DualView k_eatom; Kokkos::DualView k_vatom; - Kokkos::View > d_eatom; - Kokkos::View > d_vatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index 94fd0b9bb7..939834d096 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -201,7 +201,7 @@ KOKKOS_INLINE_FUNCTION void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute, const int &n, EVM_FLOAT& evm) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = dihedrallist(n,0); const int i2 = dihedrallist(n,1); diff --git a/src/KOKKOS/dihedral_charmm_kokkos.h b/src/KOKKOS/dihedral_charmm_kokkos.h index 449f934533..21bb6fd2e1 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.h +++ b/src/KOKKOS/dihedral_charmm_kokkos.h @@ -134,13 +134,13 @@ class DihedralCharmmKokkos : public DihedralCharmm { Kokkos::DualView k_eatom; Kokkos::DualView k_vatom; - Kokkos::View > d_eatom; - Kokkos::View > d_vatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom; Kokkos::DualView k_eatom_pair; Kokkos::DualView k_vatom_pair; - Kokkos::View > d_eatom_pair; - Kokkos::View > d_vatom_pair; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom_pair; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom_pair; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp index 0310053b5e..60daca3137 100644 --- a/src/KOKKOS/dihedral_class2_kokkos.cpp +++ b/src/KOKKOS/dihedral_class2_kokkos.cpp @@ -197,7 +197,7 @@ KOKKOS_INLINE_FUNCTION void DihedralClass2Kokkos::operator()(TagDihedralClass2Compute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = dihedrallist(n,0); const int i2 = dihedrallist(n,1); @@ -1015,8 +1015,8 @@ void DihedralClass2Kokkos::ev_tally(EV_FLOAT &ev, const int i1, cons F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.view(); - Kokkos::View > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/dihedral_harmonic_kokkos.cpp b/src/KOKKOS/dihedral_harmonic_kokkos.cpp index dd77bc605b..0ed739ef38 100644 --- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp +++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp @@ -158,7 +158,7 @@ KOKKOS_INLINE_FUNCTION void DihedralHarmonicKokkos::operator()(TagDihedralHarmonicCompute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = dihedrallist(n,0); const int i2 = dihedrallist(n,1); @@ -414,8 +414,8 @@ void DihedralHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i1, co F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.view(); - Kokkos::View > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp index 825d106e04..0f510e01ad 100644 --- a/src/KOKKOS/dihedral_opls_kokkos.cpp +++ b/src/KOKKOS/dihedral_opls_kokkos.cpp @@ -157,7 +157,7 @@ KOKKOS_INLINE_FUNCTION void DihedralOPLSKokkos::operator()(TagDihedralOPLSCompute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; const int i1 = dihedrallist(n,0); const int i2 = dihedrallist(n,1); @@ -419,8 +419,8 @@ void DihedralOPLSKokkos::ev_tally(EV_FLOAT &ev, const int i1, const F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.view(); - Kokkos::View > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index cd69aa9283..55dec64d33 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -200,8 +200,8 @@ class FixQEqReaxKokkos : public FixQEqReax { HAT::t_ffloat_2d h_s_hist, h_t_hist; typename AT::t_ffloat_2d_randomread r_s_hist, r_t_hist; - Kokkos::Experimental::ScatterView dup_o; - Kokkos::Experimental::ScatterView ndup_o; + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated> dup_o; + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated> ndup_o; void init_shielding_k(); void init_hist(); diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index dcb1ac0b71..9271cf1b88 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -1908,7 +1908,7 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_firstPairOperator::value> > AtomicViewType; + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename KKDevice::value, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; AtomicViewType a_sumWeights = d_sumWeights; @@ -2083,8 +2083,8 @@ void FixRxKokkos::computeLocalTemperature() { // Create an atomic view of sumWeights and dpdThetaLocal. Only needed // for Half/thread scenarios. - //typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; - typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + //typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename KKDevice::value, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename KKDevice::value, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; AtomicViewType a_sumWeights = d_sumWeights; diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp index defd5e16f5..888f526c76 100644 --- a/src/KOKKOS/improper_class2_kokkos.cpp +++ b/src/KOKKOS/improper_class2_kokkos.cpp @@ -188,7 +188,7 @@ KOKKOS_INLINE_FUNCTION void ImproperClass2Kokkos::operator()(TagImproperClass2Compute, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; int i, j, k; F_FLOAT delr[3][3],rmag[3],rinvmag[3],rmag2[3]; @@ -660,7 +660,7 @@ KOKKOS_INLINE_FUNCTION void ImproperClass2Kokkos::operator()(TagImproperClass2AngleAngle, const int &n, EV_FLOAT& ev) const { // The f array is atomic - Kokkos::View > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; int i,j,k; F_FLOAT eimproper; @@ -1010,8 +1010,8 @@ void ImproperClass2Kokkos::ev_tally(EV_FLOAT &ev, const int i1, cons F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View > v_eatom = k_eatom.view(); - Kokkos::View > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = k_vatom.view(); if (eflag_either) { if (eflag_global) { diff --git a/src/KOKKOS/improper_class2_kokkos.h b/src/KOKKOS/improper_class2_kokkos.h index 0fbfab2beb..11212249e7 100644 --- a/src/KOKKOS/improper_class2_kokkos.h +++ b/src/KOKKOS/improper_class2_kokkos.h @@ -75,7 +75,7 @@ class ImproperClass2Kokkos : public ImproperClass2 { class NeighborKokkos *neighborKK; typename AT::t_x_array_randomread x; - typename Kokkos::View > f; + typename Kokkos::View::value,Kokkos::MemoryTraits > f; typename AT::t_int_2d improperlist; DAT::tdual_efloat_1d k_eatom; diff --git a/src/KOKKOS/improper_harmonic_kokkos.h b/src/KOKKOS/improper_harmonic_kokkos.h index 23ae0c7110..fb44081928 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.h +++ b/src/KOKKOS/improper_harmonic_kokkos.h @@ -64,13 +64,13 @@ class ImproperHarmonicKokkos : public ImproperHarmonic { class NeighborKokkos *neighborKK; typename AT::t_x_array_randomread x; - typename Kokkos::View > f; + typename Kokkos::View::value,Kokkos::MemoryTraits > f; typename AT::t_int_2d improperlist; Kokkos::DualView k_eatom; Kokkos::DualView k_vatom; - Kokkos::View > d_eatom; - Kokkos::View > d_vatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_eatom; + Kokkos::View::value,Kokkos::MemoryTraits > d_vatom; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 3ba6318d41..b1d17b45c3 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -163,6 +163,20 @@ t_scalar3 operator * typedef Kokkos::DefaultExecutionSpace LMPDeviceType; typedef Kokkos::HostSpace::execution_space LMPHostType; + +// Need to use Cuda UVM memory space for Host execution space + +template +class KKDevice { +public: +#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) + typedef Kokkos::Device value; +#else + typedef Kokkos::Device value; +#endif +}; + + // set ExecutionSpace stuct with variable "space" template diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index f7bf8fb5d1..cabdcfd455 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -227,8 +227,8 @@ KOKKOS_INLINE_FUNCTION void PairCoulDSFKokkos::operator()(TagPairCoulDSFKernelA, const int &ii, EV_FLOAT& ev) const { // The f array is atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; - Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); @@ -323,8 +323,8 @@ void PairCoulDSFKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i const int VFLAG = vflag_either; // The eatom and vatom arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (EFLAG) { if (eflag_atom) { diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index f3ffdc6069..45c4ec3f22 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -228,8 +228,8 @@ KOKKOS_INLINE_FUNCTION void PairCoulWolfKokkos::operator()(TagPairCoulWolfKernelA, const int &ii, EV_FLOAT& ev) const { // The f array is atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; - Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); @@ -325,8 +325,8 @@ void PairCoulWolfKokkos::ev_tally(EV_FLOAT &ev, const int &i, const const int VFLAG = vflag_either; // The eatom and vatom arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (EFLAG) { if (eflag_atom) { diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 1ddf950fd7..3a1e02037a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -389,7 +389,7 @@ KOKKOS_INLINE_FUNCTION void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii, EV_FLOAT& ev) const { // The f array is atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -473,9 +473,9 @@ KOKKOS_INLINE_FUNCTION void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii, EV_FLOAT& ev) const { // These array are atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; - Kokkos::View::value> > a_duCond = d_duCond; - Kokkos::View::value> > a_duMech = d_duMech; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_duCond = d_duCond; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_duMech = d_duMech; int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -697,8 +697,8 @@ void PairDPDfdtEnergyKokkos::ev_tally(EV_FLOAT &ev, const int &i, co const int VFLAG = vflag_either; // The eatom and vatom arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (EFLAG) { if (eflag_atom) { diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h index e1dd9ab47d..5796bdd1d4 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.h +++ b/src/KOKKOS/pair_eam_alloy_kokkos.h @@ -129,14 +129,14 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_rho; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_rho; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; DAT::tdual_ffloat_1d k_rho; DAT::tdual_ffloat_1d k_fp; diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h index e93977869e..64e1c78d56 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.h +++ b/src/KOKKOS/pair_eam_fs_kokkos.h @@ -129,14 +129,14 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_rho; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_rho; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; DAT::tdual_ffloat_1d k_rho; DAT::tdual_ffloat_1d k_fp; diff --git a/src/KOKKOS/pair_eam_kokkos.h b/src/KOKKOS/pair_eam_kokkos.h index 3bf89c549a..20bac4ed16 100644 --- a/src/KOKKOS/pair_eam_kokkos.h +++ b/src/KOKKOS/pair_eam_kokkos.h @@ -126,14 +126,14 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_rho; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_rho; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_rho; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; DAT::tdual_ffloat_1d k_rho; DAT::tdual_ffloat_1d k_fp; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 4d72c85029..800ea81fa5 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -442,9 +442,9 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::value> > a_f = f; - Kokkos::View::value> > a_uCG = uCG; - Kokkos::View::value> > a_uCGnew = uCGnew; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_uCG = uCG; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_uCGnew = uCGnew; int i,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; @@ -1183,9 +1183,9 @@ KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& ev) const { // These arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; - Kokkos::View::value> > a_uCG = uCG; - Kokkos::View::value> > a_uCGnew = uCGnew; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_uCG = uCG; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_uCGnew = uCGnew; int tid = 0; #ifndef KOKKOS_ENABLE_CUDA @@ -2562,8 +2562,8 @@ void PairExp6rxKokkos::ev_tally(EV_FLOAT &ev, const int &i, const in const int VFLAG = vflag_either; // The eatom and vatom arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (EFLAG) { if (eflag_atom) { diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 9e65c0589e..8797aab71e 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -320,8 +320,8 @@ KOKKOS_INLINE_FUNCTION void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryCompute, const int ii, EV_FLOAT &ev) const { // The f and torque arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; - Kokkos::View::value> > a_torque = torque; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); @@ -549,7 +549,7 @@ void PairGranHookeHistoryKokkos::ev_tally_xyz_atom(EV_FLOAT &ev, int F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const { - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); F_FLOAT v[6]; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 41922b7349..54035c54eb 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -66,17 +66,17 @@ struct PairComputeFunctor { // The force array is atomic for Half/Thread neighbor style //Kokkos::View::value> > f; - Kokkos::Experimental::ScatterView::value > dup_f; + // typename KKDevice::value,Kokkos::MemoryTraits::value> > f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,NeedDup::value > dup_f; // The eatom and vatom arrays are atomic for Half/Thread neighbor style //Kokkos::View::value> > eatom; - Kokkos::Experimental::ScatterView::value > dup_eatom; + // typename KKDevice::value,Kokkos::MemoryTraits::value> > eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,NeedDup::value > dup_eatom; //Kokkos::View::value> > vatom; - Kokkos::Experimental::ScatterView::value > dup_vatom; + // typename KKDevice::value,Kokkos::MemoryTraits::value> > vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,NeedDup::value > dup_vatom; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 3b7a738026..1c125b4dc0 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -270,7 +270,7 @@ KOKKOS_INLINE_FUNCTION void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii, EV_FLOAT& ev) const { // The f array is atomic for Half/Thread neighbor style - Kokkos::View::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; int i,jj,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; @@ -532,7 +532,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca // The rho array is atomic for Half/Thread neighbor style - Kokkos::View::value> > a_rho = rho; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_rho = rho; const int i = d_ilist[ii]; @@ -771,8 +771,8 @@ void PairMultiLucyRXKokkos::ev_tally(EV_FLOAT &ev, const int &i, con const int VFLAG = vflag_either; // The eatom and vatom arrays are atomic for Half/Thread neighbor style - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (EFLAG) { if (eflag_atom) { diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 18144bb653..302ecbafd9 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -2477,7 +2477,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeAngular::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); - Kokkos::View::value> > a_Cdbo = d_Cdbo; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbo = d_Cdbo; auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); @@ -2792,7 +2792,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access::value>(); - Kokkos::View::value> > a_Cdbo = d_Cdbo; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbo = d_Cdbo; //auto a_Cdbo = dup_Cdbo.template access::value>(); // in reaxc_torsion_angles: j = i, k = j, i = k; @@ -3311,9 +3311,9 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxUpdateBond, const int &ii) const { - Kokkos::View::value> > a_Cdbo = d_Cdbo; - Kokkos::View::value> > a_Cdbopi = d_Cdbopi; - Kokkos::View::value> > a_Cdbopi2 = d_Cdbopi2; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbo = d_Cdbo; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbopi = d_Cdbopi; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbopi2 = d_Cdbopi2; //auto a_Cdbo = dup_Cdbo.template access::value>(); //auto a_Cdbopi = dup_Cdbopi.template access::value>(); //auto a_Cdbopi2 = dup_Cdbopi2.template access::value>(); diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index 783ea33c4e..93ca4468ec 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -399,25 +399,25 @@ class PairReaxCKokkos : public PairReaxC { typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2; typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self; - Kokkos::Experimental::ScatterView dup_total_bo; - Kokkos::Experimental::ScatterView dup_CdDelta; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView dup_dDeltap_self; - Kokkos::Experimental::ScatterView dup_Cdbo; - Kokkos::Experimental::ScatterView dup_Cdbopi; - Kokkos::Experimental::ScatterView dup_Cdbopi2; - - Kokkos::Experimental::ScatterView ndup_total_bo; - Kokkos::Experimental::ScatterView ndup_CdDelta; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_vatom; - Kokkos::Experimental::ScatterView ndup_dDeltap_self; - Kokkos::Experimental::ScatterView ndup_Cdbo; - Kokkos::Experimental::ScatterView ndup_Cdbopi; - Kokkos::Experimental::ScatterView ndup_Cdbopi2; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_total_bo; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_CdDelta; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_dDeltap_self; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbo; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbopi; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_Cdbopi2; + + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_total_bo; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_CdDelta; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_dDeltap_self; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbo; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbopi; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_Cdbopi2; int need_dup; diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index b57ef2d9e5..1fbb537f35 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -181,10 +181,10 @@ inline double dist2(double* x,double* y); typename AT::t_int_1d_randomread type; int need_dup; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; friend void pair_virial_fdotr_compute(PairSNAPKokkos*); diff --git a/src/KOKKOS/pair_sw_kokkos.h b/src/KOKKOS/pair_sw_kokkos.h index 1a3f0b862f..2fc7f93c12 100644 --- a/src/KOKKOS/pair_sw_kokkos.h +++ b/src/KOKKOS/pair_sw_kokkos.h @@ -135,12 +135,12 @@ class PairSWKokkos : public PairSW { typename AT::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; typename AT::t_int_1d_randomread d_type2frho; typename AT::t_int_2d_randomread d_type2rhor; diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index cd7aa373ed..687489791e 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -284,11 +284,11 @@ ev_tally( F_FLOAT delx, F_FLOAT dely, F_FLOAT delz, Kokkos::View::t_virial_array::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& v_vatom, Kokkos::View::t_efloat_1d::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& v_eatom) { if (eflag) { @@ -399,15 +399,15 @@ compute_item( typename ArrayTypes::t_ffloat_2d const& d_cutsq, Kokkos::View::t_f_array::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& f, Kokkos::View::t_efloat_1d::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& uCG, Kokkos::View::t_efloat_1d::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst const& d_table_const, @@ -418,11 +418,11 @@ compute_item( int vflag_atom, Kokkos::View::t_virial_array::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& v_vatom, Kokkos::View::t_efloat_1d::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > const& v_eatom) { EV_FLOAT ev; auto i = d_ilist(ii); @@ -544,14 +544,16 @@ static void compute_all_items( typename ArrayTypes::t_ffloat_2d d_cutsq, Kokkos::View::t_f_array::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > f, Kokkos::View::t_efloat_1d::array_layout, - DeviceType,Kokkos::MemoryTraits::value> > uCG, + typename KKDevice::value, + Kokkos::MemoryTraits::value> > uCG, Kokkos::View::t_efloat_1d::array_layout, - DeviceType,Kokkos::MemoryTraits::value> > uCGnew, + typename KKDevice::value, + Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst d_table_const, int eflag, @@ -561,11 +563,11 @@ static void compute_all_items( int vflag_atom, Kokkos::View::t_virial_array::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > v_vatom, Kokkos::View::t_efloat_1d::array_layout, - DeviceType, + typename KKDevice::value, Kokkos::MemoryTraits::value> > v_eatom) { if (eflag || vflag) { Kokkos::parallel_reduce(inum, diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index 7d41fe2346..0c57e21a6c 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -202,12 +202,12 @@ class PairTersoffKokkos : public PairTersoff { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; typedef Kokkos::DualView tdual_ffloat_2d_n7; typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.h b/src/KOKKOS/pair_tersoff_mod_kokkos.h index 889e1eadfa..b47f11e029 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.h +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h @@ -202,12 +202,12 @@ class PairTersoffMODKokkos : public PairTersoffMOD { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; typedef Kokkos::DualView tdual_ffloat_2d_n7; typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.h b/src/KOKKOS/pair_tersoff_zbl_kokkos.h index 0c7fa2e963..bed2564da5 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.h +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h @@ -207,12 +207,12 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { typename ArrayTypes::t_virial_array d_vatom; int need_dup; - Kokkos::Experimental::ScatterView dup_f; - Kokkos::Experimental::ScatterView dup_eatom; - Kokkos::Experimental::ScatterView dup_vatom; - Kokkos::Experimental::ScatterView ndup_f; - Kokkos::Experimental::ScatterView ndup_eatom; - Kokkos::Experimental::ScatterView ndup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterDuplicated> dup_vatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_f; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_eatom; + Kokkos::Experimental::ScatterView::value,Kokkos::Experimental::ScatterSum,Kokkos::Experimental::ScatterNonDuplicated> ndup_vatom; typedef Kokkos::DualView tdual_ffloat_2d_n7; typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; diff --git a/src/KOKKOS/pair_vashishta_kokkos.cpp b/src/KOKKOS/pair_vashishta_kokkos.cpp index ddb0688e03..84887fa1f0 100644 --- a/src/KOKKOS/pair_vashishta_kokkos.cpp +++ b/src/KOKKOS/pair_vashishta_kokkos.cpp @@ -234,7 +234,7 @@ void PairVashishtaKokkos::operator()(TagPairVashishtaComputeHalf::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; F_FLOAT delr1[3],delr2[3],fj[3],fk[3]; F_FLOAT evdwl = 0.0; @@ -780,8 +780,8 @@ void PairVashishtaKokkos::ev_tally(EV_FLOAT &ev, const int &i, const // The eatom and vatom arrays are atomic for half/thread neighbor list - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (eflag_atom) { @@ -856,8 +856,8 @@ void PairVashishtaKokkos::ev_tally3(EV_FLOAT &ev, const int &i, cons // The eatom and vatom arrays are atomic for half/thread neighbor list - Kokkos::View::value> > v_eatom = k_eatom.view(); - Kokkos::View::value> > v_vatom = k_vatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_eatom = k_eatom.view(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); if (eflag_atom) { epairthird = THIRD * (evdwl + ecoul); diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 08a0c18f9c..b0f6f393cf 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1670,7 +1670,7 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) const { // The density_brick array is atomic for Half/Thread neighbor style - Kokkos::View > a_density_brick = d_density_brick; + Kokkos::View::value,Kokkos::MemoryTraits > a_density_brick = d_density_brick; int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index a6d9db3218..b7162cf8d6 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -89,7 +89,7 @@ class SNAKokkos { public: typedef Kokkos::View t_sna_1i; typedef Kokkos::View t_sna_1d; - typedef Kokkos::View > t_sna_1d_atomic; + typedef Kokkos::View::value, Kokkos::MemoryTraits > t_sna_1d_atomic; typedef Kokkos::View t_sna_2i; typedef Kokkos::View t_sna_2d; typedef Kokkos::View t_sna_2d_ll; @@ -99,7 +99,7 @@ public: typedef Kokkos::View t_sna_5d; typedef Kokkos::View t_sna_1c; - typedef Kokkos::View > t_sna_1c_atomic; + typedef Kokkos::View::value, Kokkos::MemoryTraits > t_sna_1c_atomic; typedef Kokkos::View t_sna_2c; typedef Kokkos::View t_sna_2c_ll; typedef Kokkos::View t_sna_2c_lr; -- GitLab From 60864e38d1d5f34db0ff379bf71eab0da0ca1ed0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 25 Mar 2020 14:08:39 -0600 Subject: [PATCH 079/328] Update Kokkos library in LAMMPS to v3.0 --- lib/kokkos/BUILD.md | 323 + lib/kokkos/CHANGELOG.md | 40 + lib/kokkos/CMakeLists.txt | 327 +- lib/kokkos/CONTRIBUTING.md | 14 + lib/kokkos/Copyright.txt | 11 +- lib/kokkos/LICENSE | 11 +- lib/kokkos/Makefile.kokkos | 132 +- lib/kokkos/Makefile.targets | 2 + lib/kokkos/README | 193 - lib/kokkos/README.md | 299 + lib/kokkos/algorithms/CMakeLists.txt | 24 +- lib/kokkos/algorithms/src/CMakeLists.txt | 27 +- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 2683 ++++---- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 667 +- .../algorithms/unit_tests/CMakeLists.txt | 39 +- lib/kokkos/algorithms/unit_tests/TestCuda.cpp | 55 +- lib/kokkos/algorithms/unit_tests/TestHPX.cpp | 53 +- .../algorithms/unit_tests/TestOpenMP.cpp | 51 +- lib/kokkos/algorithms/unit_tests/TestROCm.cpp | 62 +- .../algorithms/unit_tests/TestRandom.hpp | 511 +- .../algorithms/unit_tests/TestSerial.cpp | 54 +- lib/kokkos/algorithms/unit_tests/TestSort.hpp | 298 +- .../algorithms/unit_tests/TestThreads.cpp | 54 +- .../algorithms/unit_tests/UnitTestMain.cpp | 16 +- lib/kokkos/benchmarks/atomic/main.cpp | 202 +- .../benchmarks/bytes_and_flops/bench.hpp | 72 +- .../bytes_and_flops/bench_stride.hpp | 121 +- .../bytes_and_flops/bench_unroll_stride.hpp | 181 +- .../benchmarks/bytes_and_flops/main.cpp | 54 +- lib/kokkos/benchmarks/gather/gather.hpp | 55 +- .../benchmarks/gather/gather_unroll.hpp | 202 +- lib/kokkos/benchmarks/gather/main.cpp | 54 +- lib/kokkos/benchmarks/gups/gups-kokkos.cc | 11 +- .../benchmarks/policy_performance/main.cpp | 179 +- .../policy_performance/policy_perf_test.hpp | 570 +- .../policy_performance/script_sample_usage.sh | 2 +- lib/kokkos/benchmarks/stream/stream-kokkos.cc | 11 +- lib/kokkos/bin/hpcbind | 2 +- lib/kokkos/bin/nvcc_wrapper | 77 +- lib/kokkos/cm_generate_makefile.bash | 339 + lib/kokkos/cmake/KokkosConfig.cmake.in | 26 +- lib/kokkos/cmake/KokkosConfigCommon.cmake.in | 87 + lib/kokkos/cmake/KokkosCore_config.h.in | 89 + .../cmake/Makefile.generate_cmake_settings | 8 - lib/kokkos/cmake/Modules/FindHWLOC.cmake | 20 - lib/kokkos/cmake/Modules/FindMemkind.cmake | 20 - lib/kokkos/cmake/Modules/FindQthreads.cmake | 20 - lib/kokkos/cmake/Modules/FindTPLCUDA.cmake | 13 + lib/kokkos/cmake/Modules/FindTPLHPX.cmake | 15 + lib/kokkos/cmake/Modules/FindTPLHWLOC.cmake | 1 + lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake | 1 + lib/kokkos/cmake/Modules/FindTPLLIBNUMA.cmake | 1 + lib/kokkos/cmake/Modules/FindTPLLIBRT.cmake | 1 + lib/kokkos/cmake/Modules/FindTPLMEMKIND.cmake | 1 + lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake | 17 + lib/kokkos/cmake/README.md | 331 + lib/kokkos/cmake/compile_tests/clang_omp.cpp | 9 + lib/kokkos/cmake/compile_tests/pthread.cpp | 10 + lib/kokkos/cmake/cray.cmake | 9 + lib/kokkos/cmake/deps/CUDA.cmake | 2 +- lib/kokkos/cmake/deps/CUSPARSE.cmake | 2 +- lib/kokkos/cmake/deps/HWLOC.cmake | 2 +- lib/kokkos/cmake/deps/Pthread.cmake | 4 +- lib/kokkos/cmake/deps/QTHREADS.cmake | 69 - lib/kokkos/cmake/fake_tribits.cmake | 338 + lib/kokkos/cmake/gnu.cmake | 23 + lib/kokkos/cmake/intel.cmake | 30 + lib/kokkos/cmake/kokkos_arch.cmake | 438 ++ lib/kokkos/cmake/kokkos_build.cmake | 261 - lib/kokkos/cmake/kokkos_compiler_id.cmake | 80 + lib/kokkos/cmake/kokkos_corner_cases.cmake | 35 + lib/kokkos/cmake/kokkos_enable_devices.cmake | 61 + lib/kokkos/cmake/kokkos_enable_options.cmake | 92 + lib/kokkos/cmake/kokkos_functions.cmake | 1033 ++- lib/kokkos/cmake/kokkos_install.cmake | 42 + lib/kokkos/cmake/kokkos_options.cmake | 419 -- lib/kokkos/cmake/kokkos_pick_cxx_std.cmake | 46 + lib/kokkos/cmake/kokkos_settings.cmake | 259 - lib/kokkos/cmake/kokkos_test_cxx_std.cmake | 144 + lib/kokkos/cmake/kokkos_tpls.cmake | 47 + lib/kokkos/cmake/kokkos_tribits.cmake | 392 ++ lib/kokkos/cmake/pgi.cmake | 8 + lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake | 2 +- lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake | 2 +- lib/kokkos/cmake/tpls/FindTPLPthread.cmake | 2 +- lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake | 69 - lib/kokkos/cmake/tribits.cmake | 531 -- lib/kokkos/containers/CMakeLists.txt | 23 +- .../performance_tests/CMakeLists.txt | 91 +- .../containers/performance_tests/TestCuda.cpp | 48 +- .../performance_tests/TestDynRankView.hpp | 159 +- .../performance_tests/TestGlobal2LocalIds.hpp | 138 +- .../containers/performance_tests/TestHPX.cpp | 68 +- .../containers/performance_tests/TestMain.cpp | 14 +- .../performance_tests/TestOpenMP.cpp | 79 +- .../containers/performance_tests/TestROCm.cpp | 38 +- .../performance_tests/TestScatterView.hpp | 112 +- .../performance_tests/TestThreads.cpp | 55 +- .../TestUnorderedMapPerformance.hpp | 100 +- lib/kokkos/containers/src/CMakeLists.txt | 81 +- lib/kokkos/containers/src/Kokkos_Bitset.hpp | 348 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 830 +-- .../containers/src/Kokkos_DynRankView.hpp | 3176 +++++----- .../containers/src/Kokkos_DynamicView.hpp | 719 +-- .../containers/src/Kokkos_ErrorReporter.hpp | 111 +- .../containers/src/Kokkos_Functional.hpp | 85 +- .../containers/src/Kokkos_OffsetView.hpp | 3802 +++++------ .../containers/src/Kokkos_ScatterView.hpp | 1535 +++-- .../containers/src/Kokkos_StaticCrsGraph.hpp | 430 +- .../containers/src/Kokkos_UnorderedMap.hpp | 621 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 294 +- .../src/impl/Kokkos_Bitset_impl.hpp | 60 +- .../src/impl/Kokkos_Functional_impl.hpp | 113 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 253 +- .../src/impl/Kokkos_UnorderedMap_impl.cpp | 105 +- .../src/impl/Kokkos_UnorderedMap_impl.hpp | 169 +- .../containers/unit_tests/CMakeLists.txt | 171 +- .../containers/unit_tests/TestBitset.hpp | 183 +- .../containers/unit_tests/TestDualView.hpp | 271 +- .../containers/unit_tests/TestDynViewAPI.hpp | 2425 +++---- .../unit_tests/TestDynViewAPI_generic.hpp | 20 +- .../unit_tests/TestDynViewAPI_rank12345.hpp | 20 +- .../unit_tests/TestDynViewAPI_rank67.hpp | 20 +- .../containers/unit_tests/TestDynamicView.hpp | 259 +- .../unit_tests/TestErrorReporter.hpp | 146 +- .../containers/unit_tests/TestOffsetView.hpp | 933 ++- .../containers/unit_tests/TestScatterView.hpp | 672 +- .../unit_tests/TestStaticCrsGraph.hpp | 306 +- .../unit_tests/TestUnorderedMap.hpp | 276 +- .../containers/unit_tests/TestVector.hpp | 235 +- .../TestViewCtorPropEmbeddedDim.hpp | 173 +- .../containers/unit_tests/UnitTestMain.cpp | 17 +- .../unit_tests/cuda/TestCuda_BitSet.cpp | 16 +- .../unit_tests/cuda/TestCuda_Category.hpp | 26 +- .../unit_tests/cuda/TestCuda_DualView.cpp | 16 +- .../cuda/TestCuda_DynRankViewAPI_generic.cpp | 16 +- .../TestCuda_DynRankViewAPI_rank12345.cpp | 16 +- .../cuda/TestCuda_DynRankViewAPI_rank67.cpp | 16 +- .../unit_tests/cuda/TestCuda_DynamicView.cpp | 16 +- .../cuda/TestCuda_ErrorReporter.cpp | 16 +- .../unit_tests/cuda/TestCuda_OffsetView.cpp | 16 +- .../unit_tests/cuda/TestCuda_ScatterView.cpp | 16 +- .../cuda/TestCuda_StaticCrsGraph.cpp | 16 +- .../unit_tests/cuda/TestCuda_UnorderedMap.cpp | 16 +- .../unit_tests/cuda/TestCuda_Vector.cpp | 16 +- .../cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp | 16 +- .../unit_tests/hpx/TestHPX_BitSet.cpp | 16 +- .../unit_tests/hpx/TestHPX_Category.hpp | 26 +- .../unit_tests/hpx/TestHPX_DualView.cpp | 16 +- .../hpx/TestHPX_DynRankViewAPI_generic.cpp | 16 +- .../hpx/TestHPX_DynRankViewAPI_rank12345.cpp | 16 +- .../hpx/TestHPX_DynRankViewAPI_rank67.cpp | 16 +- .../unit_tests/hpx/TestHPX_DynamicView.cpp | 16 +- .../unit_tests/hpx/TestHPX_ErrorReporter.cpp | 16 +- .../unit_tests/hpx/TestHPX_OffsetView.cpp | 16 +- .../unit_tests/hpx/TestHPX_ScatterView.cpp | 16 +- .../unit_tests/hpx/TestHPX_StaticCrsGraph.cpp | 16 +- .../unit_tests/hpx/TestHPX_UnorderedMap.cpp | 16 +- .../unit_tests/hpx/TestHPX_Vector.cpp | 16 +- .../hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp | 16 +- .../unit_tests/openmp/TestOpenMP_BitSet.cpp | 16 +- .../unit_tests/openmp/TestOpenMP_Category.hpp | 26 +- .../unit_tests/openmp/TestOpenMP_DualView.cpp | 16 +- .../TestOpenMP_DynRankViewAPI_generic.cpp | 16 +- .../TestOpenMP_DynRankViewAPI_rank12345.cpp | 16 +- .../TestOpenMP_DynRankViewAPI_rank67.cpp | 16 +- .../openmp/TestOpenMP_DynamicView.cpp | 16 +- .../openmp/TestOpenMP_ErrorReporter.cpp | 16 +- .../openmp/TestOpenMP_OffsetView.cpp | 16 +- .../openmp/TestOpenMP_ScatterView.cpp | 16 +- .../openmp/TestOpenMP_StaticCrsGraph.cpp | 16 +- .../openmp/TestOpenMP_UnorderedMap.cpp | 16 +- .../unit_tests/openmp/TestOpenMP_Vector.cpp | 16 +- .../TestOpenMP_ViewCtorPropEmbeddedDim.cpp | 16 +- .../unit_tests/rocm/TestROCm_BitSet.cpp | 16 +- .../unit_tests/rocm/TestROCm_Category.hpp | 26 +- .../unit_tests/rocm/TestROCm_DualView.cpp | 16 +- .../rocm/TestROCm_DynRankViewAPI_generic.cpp | 16 +- .../TestROCm_DynRankViewAPI_rank12345.cpp | 16 +- .../rocm/TestROCm_DynRankViewAPI_rank67.cpp | 16 +- .../unit_tests/rocm/TestROCm_DynamicView.cpp | 16 +- .../rocm/TestROCm_ErrorReporter.cpp | 16 +- .../unit_tests/rocm/TestROCm_ScatterView.cpp | 16 +- .../rocm/TestROCm_StaticCrsGraph.cpp | 16 +- .../unit_tests/rocm/TestROCm_UnorderedMap.cpp | 16 +- .../unit_tests/rocm/TestROCm_Vector.cpp | 16 +- .../rocm/TestROCm_ViewCtorPropEmbeddedDim.cpp | 16 +- .../unit_tests/serial/TestSerial_BitSet.cpp | 16 +- .../unit_tests/serial/TestSerial_Category.hpp | 26 +- .../unit_tests/serial/TestSerial_DualView.cpp | 16 +- .../TestSerial_DynRankViewAPI_generic.cpp | 16 +- .../TestSerial_DynRankViewAPI_rank12345.cpp | 16 +- .../TestSerial_DynRankViewAPI_rank67.cpp | 16 +- .../serial/TestSerial_DynamicView.cpp | 16 +- .../serial/TestSerial_ErrorReporter.cpp | 16 +- .../serial/TestSerial_OffsetView.cpp | 16 +- .../serial/TestSerial_ScatterView.cpp | 16 +- .../serial/TestSerial_StaticCrsGraph.cpp | 16 +- .../serial/TestSerial_UnorderedMap.cpp | 16 +- .../unit_tests/serial/TestSerial_Vector.cpp | 16 +- .../TestSerial_ViewCtorPropEmbeddedDim.cpp | 16 +- .../unit_tests/threads/TestThreads_BitSet.cpp | 16 +- .../threads/TestThreads_Category.hpp | 26 +- .../threads/TestThreads_DualView.cpp | 16 +- .../TestThreads_DynRankViewAPI_generic.cpp | 16 +- .../TestThreads_DynRankViewAPI_rank12345.cpp | 16 +- .../TestThreads_DynRankViewAPI_rank67.cpp | 16 +- .../threads/TestThreads_DynamicView.cpp | 16 +- .../threads/TestThreads_ErrorReporter.cpp | 16 +- .../threads/TestThreads_OffsetView.cpp | 16 +- .../threads/TestThreads_ScatterView.cpp | 16 +- .../threads/TestThreads_StaticCrsGraph.cpp | 16 +- .../threads/TestThreads_UnorderedMap.cpp | 16 +- .../unit_tests/threads/TestThreads_Vector.cpp | 16 +- .../TestThreads_ViewCtorPropEmbeddedDim.cpp | 16 +- lib/kokkos/core/CMakeLists.txt | 25 +- lib/kokkos/core/cmake/KokkosCore_config.h.in | 3 +- lib/kokkos/core/perf_test/CMakeLists.txt | 80 +- .../core/perf_test/PerfTestBlasKernels.hpp | 254 +- lib/kokkos/core/perf_test/PerfTestDriver.hpp | 545 +- .../core/perf_test/PerfTestGramSchmidt.cpp | 282 +- lib/kokkos/core/perf_test/PerfTestHexGrad.cpp | 437 +- lib/kokkos/core/perf_test/PerfTestMDRange.hpp | 762 +-- lib/kokkos/core/perf_test/PerfTestMain.cpp | 31 +- .../core/perf_test/PerfTest_Category.hpp | 22 +- .../perf_test/PerfTest_CustomReduction.cpp | 128 +- .../PerfTest_ExecSpacePartitioning.cpp | 1046 ++-- .../core/perf_test/PerfTest_ViewAllocate.cpp | 125 +- .../core/perf_test/PerfTest_ViewCopy.hpp | 268 +- .../core/perf_test/PerfTest_ViewCopy_a123.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_a45.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_a6.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_a7.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_a8.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_b123.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_b45.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_b6.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_b7.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_b8.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_c123.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_c45.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_c6.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_c7.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_c8.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_d123.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_d45.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_d6.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_d7.cpp | 19 +- .../core/perf_test/PerfTest_ViewCopy_d8.cpp | 19 +- .../core/perf_test/PerfTest_ViewFill.hpp | 232 +- .../core/perf_test/PerfTest_ViewFill_123.cpp | 19 +- .../core/perf_test/PerfTest_ViewFill_45.cpp | 19 +- .../core/perf_test/PerfTest_ViewFill_6.cpp | 19 +- .../core/perf_test/PerfTest_ViewFill_7.cpp | 19 +- .../core/perf_test/PerfTest_ViewFill_8.cpp | 19 +- .../core/perf_test/PerfTest_ViewResize.hpp | 381 +- .../perf_test/PerfTest_ViewResize_123.cpp | 19 +- .../core/perf_test/PerfTest_ViewResize_45.cpp | 19 +- .../core/perf_test/PerfTest_ViewResize_6.cpp | 19 +- .../core/perf_test/PerfTest_ViewResize_7.cpp | 19 +- .../core/perf_test/PerfTest_ViewResize_8.cpp | 19 +- lib/kokkos/core/perf_test/test_atomic.cpp | 460 +- lib/kokkos/core/perf_test/test_mempool.cpp | 360 +- lib/kokkos/core/perf_test/test_taskdag.cpp | 264 +- lib/kokkos/core/src/CMakeLists.txt | 205 +- .../src/Cuda/KokkosExp_Cuda_IterateTile.hpp | 1757 +++--- .../KokkosExp_Cuda_IterateTile_Refactor.hpp | 3352 +++++----- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 1054 ++-- .../core/src/Cuda/Kokkos_Cuda_Alloc.hpp | 119 +- .../Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp | 1289 ++-- ...uda_Atomic_Intrinsics_Restore_Builtins.hpp | 10 +- .../Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp | 704 ++- .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 91 +- .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 848 +-- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 213 +- .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 599 +- .../core/src/Cuda/Kokkos_Cuda_Locks.cpp | 45 +- .../core/src/Cuda/Kokkos_Cuda_Locks.hpp | 92 +- .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 3816 ++++++----- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 1183 ++-- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 28 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 1115 ++-- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 1119 ++-- .../core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 123 +- .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 484 +- .../Kokkos_Cuda_Version_9_8_Compatibility.hpp | 123 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 330 +- .../src/Cuda/Kokkos_Cuda_ViewCopyETIAvail.hpp | 18 +- .../src/Cuda/Kokkos_Cuda_ViewCopyETIDecl.hpp | 18 +- .../src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 109 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 40 +- lib/kokkos/core/src/HPX/Kokkos_HPX.cpp | 31 +- .../Kokkos_HPX_ChunkedRoundRobinExecutor.hpp | 208 + lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp | 17 +- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 81 +- .../src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp | 18 +- .../src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp | 18 +- .../src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 36 +- .../core/src/KokkosExp_MDRangePolicy.hpp | 727 +-- lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp | 74 +- lib/kokkos/core/src/Kokkos_Array.hpp | 426 +- lib/kokkos/core/src/Kokkos_Atomic.hpp | 118 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 1000 ++- lib/kokkos/core/src/Kokkos_Concepts.hpp | 422 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 4466 +++++++------ lib/kokkos/core/src/Kokkos_Core.hpp | 149 +- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 330 +- lib/kokkos/core/src/Kokkos_Crs.hpp | 331 +- lib/kokkos/core/src/Kokkos_Cuda.hpp | 175 +- lib/kokkos/core/src/Kokkos_CudaSpace.hpp | 936 ++- lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 1080 ++-- lib/kokkos/core/src/Kokkos_Extents.hpp | 110 +- lib/kokkos/core/src/Kokkos_Future.hpp | 495 +- lib/kokkos/core/src/Kokkos_HBWSpace.hpp | 272 +- lib/kokkos/core/src/Kokkos_HPX.hpp | 1016 +-- lib/kokkos/core/src/Kokkos_HostSpace.hpp | 245 +- lib/kokkos/core/src/Kokkos_Layout.hpp | 325 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 722 +-- lib/kokkos/core/src/Kokkos_MasterLock.hpp | 20 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 1138 ++-- lib/kokkos/core/src/Kokkos_MemoryTraits.hpp | 99 +- lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 424 +- lib/kokkos/core/src/Kokkos_OpenMP.hpp | 139 +- lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp | 83 +- .../core/src/Kokkos_OpenMPTargetSpace.hpp | 215 +- lib/kokkos/core/src/Kokkos_Pair.hpp | 319 +- lib/kokkos/core/src/Kokkos_Parallel.hpp | 404 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 1143 ++-- .../core/src/Kokkos_PointerOwnership.hpp | 16 +- .../src/Kokkos_Profiling_ProfileSection.hpp | 175 +- lib/kokkos/core/src/Kokkos_Qthreads.hpp | 77 +- lib/kokkos/core/src/Kokkos_ROCm.hpp | 169 +- lib/kokkos/core/src/Kokkos_ROCmSpace.hpp | 617 +- lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 186 +- lib/kokkos/core/src/Kokkos_Serial.hpp | 1495 +++-- lib/kokkos/core/src/Kokkos_TaskPolicy.hpp | 20 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 749 +-- .../core/src/Kokkos_TaskScheduler_fwd.hpp | 159 +- lib/kokkos/core/src/Kokkos_Threads.hpp | 117 +- lib/kokkos/core/src/Kokkos_Timer.hpp | 41 +- lib/kokkos/core/src/Kokkos_UniqueToken.hpp | 44 +- lib/kokkos/core/src/Kokkos_Vectorization.hpp | 14 +- lib/kokkos/core/src/Kokkos_View.hpp | 3779 ++++++----- .../core/src/Kokkos_WorkGraphPolicy.hpp | 200 +- lib/kokkos/core/src/Kokkos_hwloc.hpp | 34 +- lib/kokkos/core/src/Makefile | 117 - .../core/src/Makefile.generate_build_files | 125 - .../core/src/Makefile.generate_header_lists | 32 - .../core/src/OpenMP/Kokkos_OpenMP_Exec.cpp | 406 +- .../core/src/OpenMP/Kokkos_OpenMP_Exec.hpp | 326 +- .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 1706 +++-- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 80 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 306 +- .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 404 +- .../OpenMP/Kokkos_OpenMP_ViewCopyETIAvail.hpp | 18 +- .../OpenMP/Kokkos_OpenMP_ViewCopyETIDecl.hpp | 18 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 80 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 296 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 249 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 872 +-- .../Kokkos_OpenMPTarget_Parallel.hpp | 1150 ++-- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp | 235 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp | 340 +- .../core/src/Qthreads/Kokkos_QthreadsExec.cpp | 454 +- .../core/src/Qthreads/Kokkos_QthreadsExec.hpp | 614 +- .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 945 +-- .../src/Qthreads/Kokkos_Qthreads_Task.cpp | 224 +- .../src/Qthreads/Kokkos_Qthreads_Task.hpp | 122 +- .../Kokkos_Qthreads_TaskPolicy.hpp.old | 2 +- .../Qthreads/Kokkos_Qthreads_TaskQueue.hpp | 290 +- .../Kokkos_Qthreads_TaskQueue_impl.hpp | 311 +- .../KokkosExp_ROCm_IterateTile_Refactor.hpp | 3256 +++++----- .../core/src/ROCm/Kokkos_ROCm_Atomic.hpp | 826 +-- .../core/src/ROCm/Kokkos_ROCm_Config.hpp | 11 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.cpp | 76 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp | 212 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp | 748 ++- .../core/src/ROCm/Kokkos_ROCm_Invoke.hpp | 122 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Join.hpp | 32 +- .../core/src/ROCm/Kokkos_ROCm_Parallel.hpp | 2543 ++++---- .../core/src/ROCm/Kokkos_ROCm_Reduce.hpp | 162 +- .../core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp | 501 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp | 329 +- .../core/src/ROCm/Kokkos_ROCm_Space.cpp | 745 ++- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.cpp | 118 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.hpp | 489 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp | 628 +- .../src/ROCm/Kokkos_ROCm_Vectorization.hpp | 512 +- .../src/ROCm/Kokkos_ROCm_ViewCopyETIAvail.hpp | 18 +- .../src/ROCm/Kokkos_ROCm_ViewCopyETIDecl.hpp | 18 +- lib/kokkos/core/src/ROCm/hc_math_std.hpp | 426 +- .../Serial/Kokkos_Serial_ViewCopyETIAvail.hpp | 18 +- .../Serial/Kokkos_Serial_ViewCopyETIDecl.hpp | 18 +- .../core/src/Threads/Kokkos_ThreadsExec.cpp | 828 ++- .../core/src/Threads/Kokkos_ThreadsExec.hpp | 791 +-- .../src/Threads/Kokkos_ThreadsExec_base.cpp | 158 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 1389 ++-- .../src/Threads/Kokkos_Threads_Parallel.hpp | 1412 ++--- .../Kokkos_Threads_ViewCopyETIAvail.hpp | 18 +- .../Kokkos_Threads_ViewCopyETIDecl.hpp | 18 +- .../Kokkos_Threads_WorkGraphPolicy.hpp | 103 +- lib/kokkos/core/src/dummy.cpp | 10 + ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 33 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 33 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 34 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 35 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 33 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 34 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 35 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 34 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 35 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 33 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 33 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 33 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 34 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 35 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 33 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 33 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 34 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 35 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 33 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 34 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 35 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 33 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 34 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 35 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 34 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 35 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 35 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 34 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 33 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 33 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 34 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 35 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 35 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 34 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 34 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 35 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 34 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 33 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 33 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 34 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 35 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 34 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 37 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 37 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 37 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 37 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 37 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 37 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 37 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 37 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 37 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 37 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 37 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 37 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 37 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 37 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 37 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 37 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 34 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 35 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 35 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 33 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 34 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 35 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 34 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 35 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 35 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 35 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 34 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 35 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 34 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 35 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 34 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 35 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 34 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 34 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 35 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 34 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 35 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 34 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 34 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 35 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 34 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 35 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 34 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 35 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 34 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 35 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 34 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 35 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 37 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 37 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 37 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 37 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 37 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 37 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 37 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 37 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 37 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 37 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 37 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 37 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 37 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 37 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 37 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 37 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 37 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 37 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 37 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 37 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 37 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 37 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 37 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 37 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 37 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 37 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 37 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 37 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 37 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 37 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 36 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 37 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 34 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 35 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 35 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 33 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 34 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 35 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 34 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 35 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 35 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 35 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 34 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 35 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 34 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 35 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 34 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 35 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 34 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 34 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 35 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 34 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 35 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 34 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 34 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 35 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 34 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 35 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 34 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 35 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 33 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 34 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 35 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 34 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 35 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 35 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 36 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 36 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 36 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 34 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 35 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 36 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 35 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 36 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 36 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 36 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 36 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 34 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 35 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 34 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 35 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 34 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 35 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 36 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 33 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 34 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 35 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 33 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 34 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 35 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 36 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 33 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 34 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 35 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 36 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 36 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 33 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 34 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 36 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 33 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 34 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 35 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 33 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 34 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 35 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 36 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 33 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 34 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 35 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 36 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 33 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 34 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 35 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 36 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 34 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 35 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 36 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 36 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 33 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 35 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 33 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 36 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 33 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 34 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 36 +- .../Kokkos_ViewFillCopyETIAvail_Macros.hpp | 2016 ++++-- .../Kokkos_ViewFillCopyETIDecl_Macros.hpp | 1728 +++-- lib/kokkos/core/src/impl/CMakeLists.txt | 36 +- .../src/impl/KokkosExp_Host_IterateTile.hpp | 3833 ++++++------ .../core/src/impl/KokkosExp_ViewMapping.hpp | 19 +- .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 309 +- .../core/src/impl/Kokkos_Atomic_Assembly.hpp | 113 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 496 +- .../Kokkos_Atomic_Compare_Exchange_Weak.hpp | 343 +- .../core/src/impl/Kokkos_Atomic_Decrement.hpp | 122 +- .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 436 +- .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 389 +- .../core/src/impl/Kokkos_Atomic_Fetch_And.hpp | 117 +- .../core/src/impl/Kokkos_Atomic_Fetch_Or.hpp | 117 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 316 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 428 +- .../core/src/impl/Kokkos_Atomic_Increment.hpp | 128 +- .../core/src/impl/Kokkos_Atomic_Load.hpp | 185 +- .../src/impl/Kokkos_Atomic_Memory_Order.hpp | 55 +- .../core/src/impl/Kokkos_Atomic_Store.hpp | 189 +- .../core/src/impl/Kokkos_Atomic_View.hpp | 340 +- .../core/src/impl/Kokkos_Atomic_Windows.hpp | 340 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 139 +- .../core/src/impl/Kokkos_CPUDiscovery.cpp | 55 +- .../core/src/impl/Kokkos_CPUDiscovery.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp | 205 +- lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp | 46 +- .../core/src/impl/Kokkos_ConcurrentBitset.hpp | 338 +- lib/kokkos/core/src/impl/Kokkos_Core.cpp | 940 +-- lib/kokkos/core/src/impl/Kokkos_EBO.hpp | 223 +- lib/kokkos/core/src/impl/Kokkos_Error.cpp | 173 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 199 +- .../core/src/impl/Kokkos_ExecPolicy.cpp | 28 +- .../src/impl/Kokkos_FixedBufferMemoryPool.hpp | 142 +- .../core/src/impl/Kokkos_FunctorAdapter.hpp | 3126 ++++----- .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 1203 ++-- lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 328 +- .../core/src/impl/Kokkos_HostBarrier.cpp | 94 +- .../core/src/impl/Kokkos_HostBarrier.hpp | 237 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 543 +- .../src/impl/Kokkos_HostSpace_deepcopy.cpp | 101 +- .../src/impl/Kokkos_HostSpace_deepcopy.hpp | 20 +- .../core/src/impl/Kokkos_HostThreadTeam.cpp | 289 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 1322 ++-- lib/kokkos/core/src/impl/Kokkos_LIFO.hpp | 171 +- .../core/src/impl/Kokkos_LinkedListNode.hpp | 109 +- .../core/src/impl/Kokkos_MemoryPool.cpp | 95 +- .../src/impl/Kokkos_MemoryPoolAllocator.hpp | 53 +- .../core/src/impl/Kokkos_MemorySpace.hpp | 110 + .../core/src/impl/Kokkos_Memory_Fence.hpp | 69 +- .../src/impl/Kokkos_MultipleTaskQueue.hpp | 515 +- lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp | 70 +- .../core/src/impl/Kokkos_OptionalRef.hpp | 87 +- .../core/src/impl/Kokkos_PhysicalLayout.hpp | 43 +- .../src/impl/Kokkos_Profiling_DeviceInfo.hpp | 85 +- .../src/impl/Kokkos_Profiling_Interface.cpp | 380 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 170 +- lib/kokkos/core/src/impl/Kokkos_Serial.cpp | 167 +- .../core/src/impl/Kokkos_Serial_Task.cpp | 20 +- .../core/src/impl/Kokkos_Serial_Task.hpp | 195 +- .../impl/Kokkos_Serial_WorkGraphPolicy.hpp | 83 +- .../core/src/impl/Kokkos_SharedAlloc.cpp | 388 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 437 +- .../src/impl/Kokkos_SimpleTaskScheduler.hpp | 574 +- .../core/src/impl/Kokkos_SingleTaskQueue.hpp | 123 +- lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp | 155 +- lib/kokkos/core/src/impl/Kokkos_Spinwait.hpp | 78 +- .../core/src/impl/Kokkos_Stacktrace.cpp | 247 + .../core/src/impl/Kokkos_Stacktrace.hpp | 45 + lib/kokkos/core/src/impl/Kokkos_Tags.hpp | 44 +- lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp | 235 +- lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp | 550 +- .../core/src/impl/Kokkos_TaskPolicyData.hpp | 103 +- lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 151 +- .../core/src/impl/Kokkos_TaskQueueCommon.hpp | 366 +- .../impl/Kokkos_TaskQueueMemoryManager.hpp | 173 +- .../src/impl/Kokkos_TaskQueueMultiple.hpp | 165 +- .../impl/Kokkos_TaskQueueMultiple_impl.hpp | 20 +- .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 443 +- .../core/src/impl/Kokkos_TaskResult.hpp | 101 +- .../core/src/impl/Kokkos_TaskTeamMember.hpp | 43 +- lib/kokkos/core/src/impl/Kokkos_Timer.hpp | 28 +- lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 539 +- lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 460 +- .../core/src/impl/Kokkos_VLAEmulation.hpp | 164 +- lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp | 882 +-- lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp | 260 +- .../src/impl/Kokkos_ViewFillCopyETIAvail.hpp | 96 +- .../src/impl/Kokkos_ViewFillCopyETIDecl.hpp | 115 +- .../core/src/impl/Kokkos_ViewLayoutTiled.hpp | 1806 ++++-- .../core/src/impl/Kokkos_ViewMapping.hpp | 5558 +++++++++-------- lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp | 262 +- .../core/src/impl/Kokkos_ViewUniformType.hpp | 116 +- .../core/src/impl/Kokkos_Volatile_Load.hpp | 163 +- lib/kokkos/core/src/impl/Kokkos_hwloc.cpp | 731 +-- lib/kokkos/core/src/kokkos.pc.in | 18 +- lib/kokkos/core/unit_test/CMakeLists.txt | 915 +-- lib/kokkos/core/unit_test/Makefile | 57 +- lib/kokkos/core/unit_test/TestAggregate.hpp | 132 +- lib/kokkos/core/unit_test/TestAtomic.hpp | 431 +- .../core/unit_test/TestAtomicOperations.hpp | 728 +-- .../TestAtomicOperations_complexdouble.hpp | 33 +- .../TestAtomicOperations_complexfloat.hpp | 33 +- .../unit_test/TestAtomicOperations_double.hpp | 37 +- .../unit_test/TestAtomicOperations_float.hpp | 37 +- .../unit_test/TestAtomicOperations_int.hpp | 58 +- .../TestAtomicOperations_longint.hpp | 58 +- .../TestAtomicOperations_longlongint.hpp | 58 +- .../TestAtomicOperations_unsignedint.hpp | 58 +- .../TestAtomicOperations_unsignedlongint.hpp | 58 +- lib/kokkos/core/unit_test/TestAtomicViews.hpp | 1529 ++--- lib/kokkos/core/unit_test/TestCXX11.hpp | 399 +- .../core/unit_test/TestCXX11Deduction.hpp | 65 +- .../core/unit_test/TestCompilerMacros.hpp | 74 +- lib/kokkos/core/unit_test/TestComplex.hpp | 364 +- .../core/unit_test/TestConcurrentBitset.hpp | 156 +- lib/kokkos/core/unit_test/TestCrs.hpp | 163 +- lib/kokkos/core/unit_test/TestDeepCopy.hpp | 223 +- .../unit_test/TestDefaultDeviceTypeInit.hpp | 380 +- .../core/unit_test/TestFunctorAnalysis.hpp | 171 +- lib/kokkos/core/unit_test/TestHWLOC.cpp | 25 +- lib/kokkos/core/unit_test/TestHostBarrier.cpp | 13 +- lib/kokkos/core/unit_test/TestInit.hpp | 35 +- .../core/unit_test/TestLocalDeepCopy.hpp | 1541 ++--- lib/kokkos/core/unit_test/TestMDRange.hpp | 3629 ++++++----- lib/kokkos/core/unit_test/TestMDRange_a.hpp | 24 +- lib/kokkos/core/unit_test/TestMDRange_b.hpp | 24 +- lib/kokkos/core/unit_test/TestMDRange_c.hpp | 30 +- lib/kokkos/core/unit_test/TestMDRange_d.hpp | 40 +- lib/kokkos/core/unit_test/TestMDRange_e.hpp | 24 +- lib/kokkos/core/unit_test/TestMemoryPool.hpp | 679 +- .../core/unit_test/TestPolicyConstruction.hpp | 1299 ++-- lib/kokkos/core/unit_test/TestRange.hpp | 412 +- lib/kokkos/core/unit_test/TestReduce.hpp | 465 +- .../unit_test/TestReduceCombinatorical.hpp | 599 +- .../core/unit_test/TestReduceDeviceView.hpp | 194 +- lib/kokkos/core/unit_test/TestReducers.hpp | 925 +-- lib/kokkos/core/unit_test/TestReducers_a.hpp | 18 +- lib/kokkos/core/unit_test/TestReducers_b.hpp | 18 +- lib/kokkos/core/unit_test/TestReducers_c.hpp | 18 +- lib/kokkos/core/unit_test/TestReducers_d.hpp | 19 +- lib/kokkos/core/unit_test/TestResize.hpp | 397 +- lib/kokkos/core/unit_test/TestScan.hpp | 114 +- lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 154 +- .../unit_test/TestStackTrace.cpp} | 42 +- lib/kokkos/core/unit_test/TestStackTrace.hpp | 169 + .../unit_test/TestStackTrace_f0.cpp} | 45 +- .../unit_test/TestStackTrace_f1.cpp} | 45 +- .../unit_test/TestStackTrace_f2.cpp} | 49 +- .../core/unit_test/TestStackTrace_f3.cpp | 62 + .../core/unit_test/TestStackTrace_f4.cpp | 53 + .../core/unit_test/TestTaskScheduler.hpp | 910 ++- .../unit_test/TestTaskScheduler_single.hpp | 62 +- lib/kokkos/core/unit_test/TestTeam.hpp | 1237 ++-- .../core/unit_test/TestTeamTeamSize.hpp | 243 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 1037 +-- .../core/unit_test/TestTeamVectorRange.hpp | 421 +- .../unit_test/TestTemplateMetaFunctions.hpp | 169 +- lib/kokkos/core/unit_test/TestTile.hpp | 149 +- lib/kokkos/core/unit_test/TestUniqueToken.hpp | 120 +- lib/kokkos/core/unit_test/TestUtilities.hpp | 455 +- lib/kokkos/core/unit_test/TestViewAPI.hpp | 1899 +++--- lib/kokkos/core/unit_test/TestViewAPI_a.hpp | 20 +- lib/kokkos/core/unit_test/TestViewAPI_b.hpp | 24 +- lib/kokkos/core/unit_test/TestViewAPI_c.hpp | 22 +- lib/kokkos/core/unit_test/TestViewAPI_d.hpp | 30 +- lib/kokkos/core/unit_test/TestViewAPI_e.hpp | 212 +- lib/kokkos/core/unit_test/TestViewCopy.hpp | 202 +- .../unit_test/TestViewCtorPropEmbeddedDim.hpp | 121 +- .../TestViewLayoutStrideAssignment.hpp | 1092 ++-- .../core/unit_test/TestViewLayoutTiled.hpp | 2157 ++++--- .../core/unit_test/TestViewMapping_a.hpp | 1711 ++--- .../core/unit_test/TestViewMapping_b.hpp | 241 +- .../unit_test/TestViewMapping_subview.hpp | 259 +- lib/kokkos/core/unit_test/TestViewOfClass.hpp | 91 +- lib/kokkos/core/unit_test/TestViewResize.hpp | 58 + .../core/unit_test/TestViewSpaceAssign.hpp | 42 +- lib/kokkos/core/unit_test/TestViewSubview.hpp | 2511 +++++--- lib/kokkos/core/unit_test/TestView_64bit.hpp | 119 +- lib/kokkos/core/unit_test/TestWorkGraph.hpp | 99 +- lib/kokkos/core/unit_test/UnitTestMain.cpp | 15 +- .../core/unit_test/UnitTestMainInit.cpp | 19 +- .../unit_test/UnitTest_PushFinalizeHook.cpp | 67 +- .../UnitTest_PushFinalizeHook_terminate.cpp | 33 +- .../unit_test/config/cmaketest/CMakeLists.txt | 1 - .../configuration/test-code/CMakeLists.txt | 44 + .../configuration/test-code/Makefile | 46 + .../configuration/test-code/main.cpp | 6 + .../configuration/test-code/test_config.bash | 7 + .../test-code/test_config_arch_list.bash | 45 + .../test-code/test_config_device_list.bash | 45 + .../test-code/test_config_options_list.bash | 49 + .../test-code/test_config_run.bash | 111 + .../cuda/TestCudaHostPinned_Category.hpp | 28 +- .../cuda/TestCudaHostPinned_SharedAlloc.cpp | 19 +- .../cuda/TestCudaHostPinned_ViewAPI_a.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewAPI_b.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewAPI_c.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewAPI_d.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewAPI_e.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewCopy.cpp | 11 +- .../cuda/TestCudaHostPinned_ViewMapping_a.cpp | 12 +- .../cuda/TestCudaHostPinned_ViewMapping_b.cpp | 12 +- ...TestCudaHostPinned_ViewMapping_subview.cpp | 12 +- .../unit_test/cuda/TestCudaUVM_Category.hpp | 25 +- .../cuda/TestCudaUVM_SharedAlloc.cpp | 19 +- .../unit_test/cuda/TestCudaUVM_ViewAPI_a.cpp | 11 +- .../unit_test/cuda/TestCudaUVM_ViewAPI_b.cpp | 11 +- .../unit_test/cuda/TestCudaUVM_ViewAPI_c.cpp | 11 +- .../unit_test/cuda/TestCudaUVM_ViewAPI_d.cpp | 11 +- .../unit_test/cuda/TestCudaUVM_ViewAPI_e.cpp | 11 +- .../unit_test/cuda/TestCudaUVM_ViewCopy.cpp | 11 +- .../cuda/TestCudaUVM_ViewMapping_a.cpp | 12 +- .../cuda/TestCudaUVM_ViewMapping_b.cpp | 12 +- .../cuda/TestCudaUVM_ViewMapping_subview.cpp | 12 +- ...estCuda_AtomicOperations_complexdouble.cpp | 16 +- ...TestCuda_AtomicOperations_complexfloat.cpp | 16 +- .../cuda/TestCuda_AtomicOperations_double.cpp | 16 +- .../cuda/TestCuda_AtomicOperations_float.cpp | 16 +- .../cuda/TestCuda_AtomicOperations_int.cpp | 16 +- .../TestCuda_AtomicOperations_longint.cpp | 16 +- .../TestCuda_AtomicOperations_longlongint.cpp | 16 +- .../TestCuda_AtomicOperations_unsignedint.cpp | 16 +- ...tCuda_AtomicOperations_unsignedlongint.cpp | 16 +- .../unit_test/cuda/TestCuda_AtomicViews.cpp | 16 +- .../core/unit_test/cuda/TestCuda_Atomics.cpp | 12 +- .../core/unit_test/cuda/TestCuda_Category.hpp | 25 +- .../core/unit_test/cuda/TestCuda_Complex.cpp | 16 +- .../core/unit_test/cuda/TestCuda_Crs.cpp | 11 +- .../cuda/TestCuda_DebugPinUVMSpace.cpp | 131 + .../cuda/TestCuda_DebugSerialExecution.cpp | 197 + .../cuda/TestCuda_DeepCopyAlignment.cpp | 14 +- .../cuda/TestCuda_FunctorAnalysis.cpp | 47 + .../core/unit_test/cuda/TestCuda_Init.cpp | 21 +- .../unit_test/cuda/TestCuda_InterOp_Init.cpp | 50 +- .../cuda/TestCuda_InterOp_Streams.cpp | 256 +- .../unit_test/cuda/TestCuda_LocalDeepCopy.cpp | 12 +- .../unit_test/cuda/TestCuda_MDRange_a.cpp | 16 +- .../unit_test/cuda/TestCuda_MDRange_b.cpp | 16 +- .../unit_test/cuda/TestCuda_MDRange_c.cpp | 16 +- .../unit_test/cuda/TestCuda_MDRange_d.cpp | 16 +- .../unit_test/cuda/TestCuda_MDRange_e.cpp | 16 +- .../core/unit_test/cuda/TestCuda_Other.cpp | 27 +- .../unit_test/cuda/TestCuda_RangePolicy.cpp | 16 +- .../unit_test/cuda/TestCuda_Reducers_a.cpp | 11 +- .../unit_test/cuda/TestCuda_Reducers_b.cpp | 11 +- .../unit_test/cuda/TestCuda_Reducers_c.cpp | 11 +- .../unit_test/cuda/TestCuda_Reducers_d.cpp | 11 +- .../unit_test/cuda/TestCuda_Reductions.cpp | 11 +- .../cuda/TestCuda_Reductions_DeviceView.cpp | 11 +- .../core/unit_test/cuda/TestCuda_Scan.cpp | 16 +- .../unit_test/cuda/TestCuda_SharedAlloc.cpp | 19 +- .../core/unit_test/cuda/TestCuda_Spaces.cpp | 365 +- .../unit_test/cuda/TestCuda_SubView_a.cpp | 77 +- .../unit_test/cuda/TestCuda_SubView_b.cpp | 35 +- .../unit_test/cuda/TestCuda_SubView_c01.cpp | 18 +- .../unit_test/cuda/TestCuda_SubView_c02.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c03.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c04.cpp | 18 +- .../unit_test/cuda/TestCuda_SubView_c05.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c06.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c07.cpp | 18 +- .../unit_test/cuda/TestCuda_SubView_c08.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c09.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c10.cpp | 18 +- .../unit_test/cuda/TestCuda_SubView_c11.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c12.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c13.cpp | 18 +- .../core/unit_test/cuda/TestCuda_Task.cpp | 16 +- .../core/unit_test/cuda/TestCuda_Team.cpp | 85 +- .../cuda/TestCuda_TeamReductionScan.cpp | 63 +- .../unit_test/cuda/TestCuda_TeamScratch.cpp | 59 +- .../unit_test/cuda/TestCuda_TeamTeamSize.cpp | 11 +- .../cuda/TestCuda_TeamVectorRange.cpp | 17 +- .../unit_test/cuda/TestCuda_UniqueToken.cpp | 16 +- .../unit_test/cuda/TestCuda_ViewAPI_a.cpp | 11 +- .../unit_test/cuda/TestCuda_ViewAPI_b.cpp | 11 +- .../unit_test/cuda/TestCuda_ViewAPI_c.cpp | 11 +- .../unit_test/cuda/TestCuda_ViewAPI_d.cpp | 11 +- .../unit_test/cuda/TestCuda_ViewAPI_e.cpp | 11 +- .../TestCuda_ViewLayoutStrideAssignment.cpp | 12 +- .../unit_test/cuda/TestCuda_ViewMapping_a.cpp | 12 +- .../unit_test/cuda/TestCuda_ViewMapping_b.cpp | 12 +- .../cuda/TestCuda_ViewMapping_subview.cpp | 12 +- .../unit_test/cuda/TestCuda_ViewOfClass.cpp | 12 +- .../unit_test/cuda/TestCuda_ViewResize.cpp | 46 + .../unit_test/cuda/TestCuda_View_64bit.cpp | 11 +- .../unit_test/cuda/TestCuda_WorkGraph.cpp | 15 +- .../default/TestDefaultDeviceType.cpp | 39 +- .../default/TestDefaultDeviceTypeInit_1.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_10.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_11.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_12.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_13.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_14.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_15.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_16.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_2.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_3.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_4.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_5.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_6.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_7.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_8.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_9.cpp | 2 +- .../default/TestDefaultDeviceTypeResize.cpp | 18 +- .../TestDefaultDeviceType_Category.hpp | 27 +- .../default/TestDefaultDeviceType_a1.cpp | 19 +- .../default/TestDefaultDeviceType_a2.cpp | 19 +- .../default/TestDefaultDeviceType_a3.cpp | 19 +- .../default/TestDefaultDeviceType_b1.cpp | 18 +- .../default/TestDefaultDeviceType_b2.cpp | 18 +- .../default/TestDefaultDeviceType_b3.cpp | 18 +- .../default/TestDefaultDeviceType_c1.cpp | 20 +- .../default/TestDefaultDeviceType_c2.cpp | 20 +- .../default/TestDefaultDeviceType_c3.cpp | 20 +- .../default/TestDefaultDeviceType_d.cpp | 35 +- ...TestHPX_AtomicOperations_complexdouble.cpp | 46 + .../TestHPX_AtomicOperations_complexfloat.cpp | 46 + .../hpx/TestHPX_AtomicOperations_double.cpp | 16 +- .../hpx/TestHPX_AtomicOperations_float.cpp | 16 +- .../hpx/TestHPX_AtomicOperations_int.cpp | 16 +- .../hpx/TestHPX_AtomicOperations_longint.cpp | 16 +- .../TestHPX_AtomicOperations_longlongint.cpp | 16 +- .../TestHPX_AtomicOperations_unsignedint.cpp | 16 +- ...stHPX_AtomicOperations_unsignedlongint.cpp | 16 +- .../unit_test/hpx/TestHPX_AtomicViews.cpp | 16 +- .../core/unit_test/hpx/TestHPX_Atomics.cpp | 12 +- .../core/unit_test/hpx/TestHPX_Category.hpp | 25 +- .../core/unit_test/hpx/TestHPX_Complex.cpp | 16 +- lib/kokkos/core/unit_test/hpx/TestHPX_Crs.cpp | 11 +- .../hpx/TestHPX_DeepCopyAlignment.cpp | 46 + .../unit_test/hpx/TestHPX_FunctorAnalysis.cpp | 47 + .../core/unit_test/hpx/TestHPX_Init.cpp | 21 +- .../core/unit_test/hpx/TestHPX_InterOp.cpp | 21 +- .../unit_test/hpx/TestHPX_LocalDeepCopy.cpp | 46 + .../core/unit_test/hpx/TestHPX_MDRange_a.cpp | 16 +- .../core/unit_test/hpx/TestHPX_MDRange_b.cpp | 16 +- .../core/unit_test/hpx/TestHPX_MDRange_c.cpp | 16 +- .../core/unit_test/hpx/TestHPX_MDRange_d.cpp | 16 +- .../core/unit_test/hpx/TestHPX_MDRange_e.cpp | 16 +- .../core/unit_test/hpx/TestHPX_Other.cpp | 11 +- .../unit_test/hpx/TestHPX_RangePolicy.cpp | 16 +- .../core/unit_test/hpx/TestHPX_Reducers_a.cpp | 11 +- .../core/unit_test/hpx/TestHPX_Reducers_b.cpp | 11 +- .../core/unit_test/hpx/TestHPX_Reducers_c.cpp | 11 +- .../core/unit_test/hpx/TestHPX_Reducers_d.cpp | 11 +- .../core/unit_test/hpx/TestHPX_Reductions.cpp | 11 +- .../hpx/TestHPX_Reductions_DeviceView.cpp | 46 + .../core/unit_test/hpx/TestHPX_Scan.cpp | 16 +- .../unit_test/hpx/TestHPX_SharedAlloc.cpp | 19 +- .../core/unit_test/hpx/TestHPX_SubView_a.cpp | 68 +- .../core/unit_test/hpx/TestHPX_SubView_b.cpp | 35 +- .../unit_test/hpx/TestHPX_SubView_c01.cpp | 18 +- .../unit_test/hpx/TestHPX_SubView_c02.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c03.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c04.cpp | 18 +- .../unit_test/hpx/TestHPX_SubView_c05.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c06.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c07.cpp | 18 +- .../unit_test/hpx/TestHPX_SubView_c08.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c09.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c10.cpp | 18 +- .../unit_test/hpx/TestHPX_SubView_c11.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c12.cpp | 19 +- .../unit_test/hpx/TestHPX_SubView_c13.cpp | 18 +- .../core/unit_test/hpx/TestHPX_Task.cpp | 16 +- .../core/unit_test/hpx/TestHPX_Team.cpp | 58 +- .../hpx/TestHPX_TeamReductionScan.cpp | 64 +- .../unit_test/hpx/TestHPX_TeamScratch.cpp | 55 +- .../unit_test/hpx/TestHPX_TeamTeamSize.cpp | 46 + .../unit_test/hpx/TestHPX_TeamVectorRange.cpp | 17 +- .../unit_test/hpx/TestHPX_UniqueToken.cpp | 16 +- .../core/unit_test/hpx/TestHPX_ViewAPI_a.cpp | 11 +- .../core/unit_test/hpx/TestHPX_ViewAPI_b.cpp | 11 +- .../core/unit_test/hpx/TestHPX_ViewAPI_c.cpp | 11 +- .../core/unit_test/hpx/TestHPX_ViewAPI_d.cpp | 11 +- .../core/unit_test/hpx/TestHPX_ViewAPI_e.cpp | 11 +- .../TestHPX_ViewLayoutStrideAssignment.cpp | 46 + .../unit_test/hpx/TestHPX_ViewMapping_a.cpp | 12 +- .../unit_test/hpx/TestHPX_ViewMapping_b.cpp | 12 +- .../hpx/TestHPX_ViewMapping_subview.cpp | 12 +- .../unit_test/hpx/TestHPX_ViewOfClass.cpp | 12 +- .../core/unit_test/hpx/TestHPX_ViewResize.cpp | 46 + .../core/unit_test/hpx/TestHPX_View_64bit.cpp | 11 +- .../core/unit_test/hpx/TestHPX_WorkGraph.cpp | 15 +- .../core/unit_test/openmp/TestOpenMP.hpp | 33 +- ...tOpenMP_AtomicOperations_complexdouble.cpp | 16 +- ...stOpenMP_AtomicOperations_complexfloat.cpp | 16 +- .../TestOpenMP_AtomicOperations_double.cpp | 16 +- .../TestOpenMP_AtomicOperations_float.cpp | 16 +- .../TestOpenMP_AtomicOperations_int.cpp | 16 +- .../TestOpenMP_AtomicOperations_longint.cpp | 16 +- ...estOpenMP_AtomicOperations_longlongint.cpp | 16 +- ...estOpenMP_AtomicOperations_unsignedint.cpp | 16 +- ...penMP_AtomicOperations_unsignedlongint.cpp | 16 +- .../openmp/TestOpenMP_AtomicViews.cpp | 16 +- .../unit_test/openmp/TestOpenMP_Atomics.cpp | 12 +- .../unit_test/openmp/TestOpenMP_Category.hpp | 25 +- .../unit_test/openmp/TestOpenMP_Complex.cpp | 16 +- .../core/unit_test/openmp/TestOpenMP_Crs.cpp | 11 +- .../openmp/TestOpenMP_DeepCopyAlignment.cpp | 11 +- .../openmp/TestOpenMP_FunctorAnalysis.cpp | 47 + .../core/unit_test/openmp/TestOpenMP_Init.cpp | 21 +- .../unit_test/openmp/TestOpenMP_InterOp.cpp | 45 +- .../openmp/TestOpenMP_LocalDeepCopy.cpp | 12 +- .../unit_test/openmp/TestOpenMP_MDRange_a.cpp | 16 +- .../unit_test/openmp/TestOpenMP_MDRange_b.cpp | 16 +- .../unit_test/openmp/TestOpenMP_MDRange_c.cpp | 16 +- .../unit_test/openmp/TestOpenMP_MDRange_d.cpp | 16 +- .../unit_test/openmp/TestOpenMP_MDRange_e.cpp | 16 +- .../unit_test/openmp/TestOpenMP_Other.cpp | 119 +- .../openmp/TestOpenMP_RangePolicy.cpp | 16 +- .../openmp/TestOpenMP_Reducers_a.cpp | 11 +- .../openmp/TestOpenMP_Reducers_b.cpp | 11 +- .../openmp/TestOpenMP_Reducers_c.cpp | 11 +- .../openmp/TestOpenMP_Reducers_d.cpp | 11 +- .../openmp/TestOpenMP_Reductions.cpp | 11 +- .../TestOpenMP_Reductions_DeviceView.cpp | 11 +- .../core/unit_test/openmp/TestOpenMP_Scan.cpp | 16 +- .../openmp/TestOpenMP_SharedAlloc.cpp | 19 +- .../unit_test/openmp/TestOpenMP_SubView_a.cpp | 77 +- .../unit_test/openmp/TestOpenMP_SubView_b.cpp | 35 +- .../openmp/TestOpenMP_SubView_c01.cpp | 18 +- .../openmp/TestOpenMP_SubView_c02.cpp | 19 +- .../openmp/TestOpenMP_SubView_c03.cpp | 19 +- .../openmp/TestOpenMP_SubView_c04.cpp | 18 +- .../openmp/TestOpenMP_SubView_c05.cpp | 19 +- .../openmp/TestOpenMP_SubView_c06.cpp | 19 +- .../openmp/TestOpenMP_SubView_c07.cpp | 18 +- .../openmp/TestOpenMP_SubView_c08.cpp | 19 +- .../openmp/TestOpenMP_SubView_c09.cpp | 19 +- .../openmp/TestOpenMP_SubView_c10.cpp | 18 +- .../openmp/TestOpenMP_SubView_c11.cpp | 19 +- .../openmp/TestOpenMP_SubView_c12.cpp | 19 +- .../openmp/TestOpenMP_SubView_c13.cpp | 18 +- .../core/unit_test/openmp/TestOpenMP_Task.cpp | 16 +- .../core/unit_test/openmp/TestOpenMP_Team.cpp | 85 +- .../openmp/TestOpenMP_TeamReductionScan.cpp | 64 +- .../openmp/TestOpenMP_TeamScratch.cpp | 59 +- .../openmp/TestOpenMP_TeamTeamSize.cpp | 12 +- .../openmp/TestOpenMP_TeamVectorRange.cpp | 17 +- .../openmp/TestOpenMP_UniqueToken.cpp | 16 +- .../unit_test/openmp/TestOpenMP_ViewAPI_a.cpp | 11 +- .../unit_test/openmp/TestOpenMP_ViewAPI_b.cpp | 11 +- .../unit_test/openmp/TestOpenMP_ViewAPI_c.cpp | 11 +- .../unit_test/openmp/TestOpenMP_ViewAPI_d.cpp | 11 +- .../unit_test/openmp/TestOpenMP_ViewAPI_e.cpp | 11 +- .../TestOpenMP_ViewLayoutStrideAssignment.cpp | 12 +- .../openmp/TestOpenMP_ViewMapping_a.cpp | 12 +- .../openmp/TestOpenMP_ViewMapping_b.cpp | 12 +- .../openmp/TestOpenMP_ViewMapping_subview.cpp | 12 +- .../openmp/TestOpenMP_ViewOfClass.cpp | 12 +- .../openmp/TestOpenMP_ViewResize.cpp | 46 + .../openmp/TestOpenMP_View_64bit.cpp | 11 +- .../unit_test/openmp/TestOpenMP_WorkGraph.cpp | 15 +- .../openmptarget/TestOpenMPTarget.hpp | 41 +- ...PTarget_AtomicOperations_complexdouble.cpp | 16 +- ...MPTarget_AtomicOperations_complexfloat.cpp | 16 +- ...stOpenMPTarget_AtomicOperations_double.cpp | 16 +- ...estOpenMPTarget_AtomicOperations_float.cpp | 16 +- .../TestOpenMPTarget_AtomicOperations_int.cpp | 16 +- ...tOpenMPTarget_AtomicOperations_longint.cpp | 16 +- ...nMPTarget_AtomicOperations_longlongint.cpp | 16 +- ...nMPTarget_AtomicOperations_unsignedint.cpp | 16 +- ...arget_AtomicOperations_unsignedlongint.cpp | 16 +- .../TestOpenMPTarget_AtomicViews.cpp | 16 +- .../openmptarget/TestOpenMPTarget_Atomics.cpp | 12 +- .../TestOpenMPTarget_Category.hpp | 25 +- .../openmptarget/TestOpenMPTarget_Complex.cpp | 16 +- .../TestOpenMPTarget_DeepCopyAlignment.cpp | 11 +- .../openmptarget/TestOpenMPTarget_Init.cpp | 21 +- .../TestOpenMPTarget_MDRange_a.cpp | 16 +- .../TestOpenMPTarget_MDRange_b.cpp | 16 +- .../TestOpenMPTarget_MDRange_c.cpp | 16 +- .../TestOpenMPTarget_MDRange_d.cpp | 16 +- .../TestOpenMPTarget_MDRange_e.cpp | 16 +- .../openmptarget/TestOpenMPTarget_Other.cpp | 23 +- .../TestOpenMPTarget_RangePolicy.cpp | 16 +- .../TestOpenMPTarget_Reducers_a.cpp | 11 +- .../TestOpenMPTarget_Reducers_b.cpp | 11 +- .../TestOpenMPTarget_Reducers_c.cpp | 11 +- .../TestOpenMPTarget_Reducers_d.cpp | 11 +- .../TestOpenMPTarget_Reductions.cpp | 11 +- .../openmptarget/TestOpenMPTarget_Scan.cpp | 16 +- .../TestOpenMPTarget_SharedAlloc.cpp | 20 +- .../TestOpenMPTarget_SubView_a.cpp | 77 +- .../TestOpenMPTarget_SubView_b.cpp | 35 +- .../TestOpenMPTarget_SubView_c01.cpp | 18 +- .../TestOpenMPTarget_SubView_c02.cpp | 19 +- .../TestOpenMPTarget_SubView_c03.cpp | 19 +- .../TestOpenMPTarget_SubView_c04.cpp | 18 +- .../TestOpenMPTarget_SubView_c05.cpp | 19 +- .../TestOpenMPTarget_SubView_c06.cpp | 19 +- .../TestOpenMPTarget_SubView_c07.cpp | 18 +- .../TestOpenMPTarget_SubView_c08.cpp | 19 +- .../TestOpenMPTarget_SubView_c09.cpp | 19 +- .../TestOpenMPTarget_SubView_c10.cpp | 18 +- .../TestOpenMPTarget_SubView_c11.cpp | 19 +- .../TestOpenMPTarget_SubView_c12.cpp | 19 +- .../openmptarget/TestOpenMPTarget_Team.cpp | 58 +- .../TestOpenMPTarget_TeamReductionScan.cpp | 64 +- .../TestOpenMPTarget_TeamScratch.cpp | 55 +- .../TestOpenMPTarget_ViewAPI_a.cpp | 11 +- .../TestOpenMPTarget_ViewAPI_b.cpp | 11 +- .../TestOpenMPTarget_ViewAPI_c.cpp | 11 +- .../TestOpenMPTarget_ViewAPI_d.cpp | 11 +- .../TestOpenMPTarget_ViewAPI_e.cpp | 11 +- .../TestOpenMPTarget_ViewMapping_a.cpp | 12 +- .../TestOpenMPTarget_ViewMapping_b.cpp | 12 +- .../TestOpenMPTarget_ViewMapping_subview.cpp | 12 +- .../TestOpenMPTarget_ViewOfClass.cpp | 12 +- ...threads_AtomicOperations_complexdouble.cpp | 16 +- ...qthreads_AtomicOperations_complexfloat.cpp | 16 +- .../TestQqthreads_AtomicOperations_double.cpp | 16 +- .../TestQqthreads_AtomicOperations_float.cpp | 16 +- .../TestQqthreads_AtomicOperations_int.cpp | 16 +- ...TestQqthreads_AtomicOperations_longint.cpp | 16 +- ...Qqthreads_AtomicOperations_longlongint.cpp | 16 +- ...Qqthreads_AtomicOperations_unsignedint.cpp | 16 +- ...reads_AtomicOperations_unsignedlongint.cpp | 16 +- .../qthreads/TestQqthreads_MDRange_a.cpp | 16 +- .../qthreads/TestQqthreads_MDRange_b.cpp | 16 +- .../qthreads/TestQqthreads_MDRange_c.cpp | 16 +- .../qthreads/TestQqthreads_MDRange_d.cpp | 16 +- .../qthreads/TestQqthreads_MDRange_e.cpp | 16 +- .../qthreads/TestQqthreads_ViewAPI_a.cpp | 11 +- .../qthreads/TestQqthreads_ViewAPI_b.cpp | 11 +- .../qthreads/TestQqthreads_ViewAPI_c.cpp | 11 +- .../qthreads/TestQqthreads_ViewAPI_d.cpp | 11 +- .../qthreads/TestQqthreads_ViewAPI_e.cpp | 11 +- .../core/unit_test/qthreads/TestQthreads.hpp | 42 +- .../qthreads/TestQthreads_Atomics.cpp | 28 +- .../qthreads/TestQthreads_Category.hpp | 25 +- .../qthreads/TestQthreads_Complex.cpp | 5 +- .../TestQthreads_DeepCopyAlignment.cpp | 11 +- .../unit_test/qthreads/TestQthreads_Other.cpp | 57 +- .../qthreads/TestQthreads_Reductions.cpp | 49 +- .../qthreads/TestQthreads_SubView_a.cpp | 46 +- .../qthreads/TestQthreads_SubView_b.cpp | 19 +- .../qthreads/TestQthreads_SubView_c01.cpp | 16 +- .../qthreads/TestQthreads_SubView_c02.cpp | 16 +- .../qthreads/TestQthreads_SubView_c03.cpp | 16 +- .../qthreads/TestQthreads_SubView_c04.cpp | 16 +- .../qthreads/TestQthreads_SubView_c05.cpp | 16 +- .../qthreads/TestQthreads_SubView_c06.cpp | 16 +- .../qthreads/TestQthreads_SubView_c07.cpp | 16 +- .../qthreads/TestQthreads_SubView_c08.cpp | 16 +- .../qthreads/TestQthreads_SubView_c09.cpp | 16 +- .../qthreads/TestQthreads_SubView_c10.cpp | 16 +- .../qthreads/TestQthreads_SubView_c11.cpp | 16 +- .../qthreads/TestQthreads_SubView_c12.cpp | 16 +- .../qthreads/TestQthreads_SubView_c13.cpp | 16 +- .../unit_test/qthreads/TestQthreads_Team.cpp | 41 +- .../qthreads/TestQthreads_ViewAPI_a.cpp | 16 +- .../qthreads/TestQthreads_ViewAPI_b.cpp | 34 +- .../rocm/TestROCmHostPinned_Category.hpp | 25 +- .../rocm/TestROCmHostPinned_SharedAlloc.cpp | 19 +- .../rocm/TestROCmHostPinned_ViewAPI_a.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewAPI_b.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewAPI_c.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewAPI_d.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewAPI_e.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewCopy.cpp | 11 +- .../rocm/TestROCmHostPinned_ViewMapping_a.cpp | 12 +- .../rocm/TestROCmHostPinned_ViewMapping_b.cpp | 12 +- ...TestROCmHostPinned_ViewMapping_subview.cpp | 12 +- .../rocm/TestROCmHostPinned_View_64bit.cpp | 11 +- .../rocm/TestROCm_AtomicOperations_double.cpp | 16 +- .../rocm/TestROCm_AtomicOperations_float.cpp | 16 +- .../rocm/TestROCm_AtomicOperations_int.cpp | 16 +- .../TestROCm_AtomicOperations_longint.cpp | 16 +- .../TestROCm_AtomicOperations_longlongint.cpp | 16 +- .../TestROCm_AtomicOperations_unsignedint.cpp | 16 +- ...tROCm_AtomicOperations_unsignedlongint.cpp | 16 +- .../unit_test/rocm/TestROCm_AtomicViews.cpp | 16 +- .../core/unit_test/rocm/TestROCm_Atomics.cpp | 12 +- .../core/unit_test/rocm/TestROCm_Category.hpp | 25 +- .../core/unit_test/rocm/TestROCm_Complex.cpp | 16 +- .../core/unit_test/rocm/TestROCm_Crs.cpp | 16 +- .../rocm/TestROCm_DeepCopyAlignment.cpp | 11 +- .../core/unit_test/rocm/TestROCm_Init.cpp | 21 +- .../rocm/TestROCm_MDRangeReduce_a.cpp | 22 +- .../rocm/TestROCm_MDRangeReduce_b.cpp | 22 +- .../rocm/TestROCm_MDRangeReduce_c.cpp | 22 +- .../rocm/TestROCm_MDRangeReduce_d.cpp | 22 +- .../rocm/TestROCm_MDRangeReduce_e.cpp | 22 +- .../unit_test/rocm/TestROCm_MDRange_a.cpp | 16 +- .../unit_test/rocm/TestROCm_MDRange_b.cpp | 16 +- .../unit_test/rocm/TestROCm_MDRange_c.cpp | 16 +- .../unit_test/rocm/TestROCm_MDRange_d.cpp | 16 +- .../unit_test/rocm/TestROCm_MDRange_e.cpp | 16 +- .../core/unit_test/rocm/TestROCm_Other.cpp | 25 +- .../unit_test/rocm/TestROCm_RangePolicy.cpp | 16 +- .../unit_test/rocm/TestROCm_Reducers_a.cpp | 11 +- .../unit_test/rocm/TestROCm_Reducers_b.cpp | 11 +- .../unit_test/rocm/TestROCm_Reducers_c.cpp | 11 +- .../unit_test/rocm/TestROCm_Reducers_d.cpp | 11 +- .../unit_test/rocm/TestROCm_Reductions.cpp | 11 +- .../core/unit_test/rocm/TestROCm_Scan.cpp | 16 +- .../unit_test/rocm/TestROCm_SharedAlloc.cpp | 20 +- .../core/unit_test/rocm/TestROCm_Spaces.cpp | 205 +- .../unit_test/rocm/TestROCm_SubView_a.cpp | 77 +- .../unit_test/rocm/TestROCm_SubView_b.cpp | 35 +- .../unit_test/rocm/TestROCm_SubView_c01.cpp | 18 +- .../unit_test/rocm/TestROCm_SubView_c02.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c03.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c04.cpp | 18 +- .../unit_test/rocm/TestROCm_SubView_c05.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c06.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c07.cpp | 18 +- .../unit_test/rocm/TestROCm_SubView_c08.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c09.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c10.cpp | 18 +- .../unit_test/rocm/TestROCm_SubView_c11.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c12.cpp | 19 +- .../unit_test/rocm/TestROCm_SubView_c13.cpp | 18 +- .../core/unit_test/rocm/TestROCm_Team.cpp | 58 +- .../rocm/TestROCm_TeamReductionScan.cpp | 63 +- .../unit_test/rocm/TestROCm_TeamScratch.cpp | 59 +- .../unit_test/rocm/TestROCm_TeamTeamSize.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewAPI_a.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewAPI_b.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewAPI_c.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewAPI_d.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewAPI_e.cpp | 11 +- .../unit_test/rocm/TestROCm_ViewMapping_a.cpp | 12 +- .../unit_test/rocm/TestROCm_ViewMapping_b.cpp | 12 +- .../rocm/TestROCm_ViewMapping_subview.cpp | 12 +- .../unit_test/rocm/TestROCm_ViewOfClass.cpp | 12 +- ...tSerial_AtomicOperations_complexdouble.cpp | 16 +- ...stSerial_AtomicOperations_complexfloat.cpp | 16 +- .../TestSerial_AtomicOperations_double.cpp | 16 +- .../TestSerial_AtomicOperations_float.cpp | 16 +- .../TestSerial_AtomicOperations_int.cpp | 16 +- .../TestSerial_AtomicOperations_longint.cpp | 16 +- ...estSerial_AtomicOperations_longlongint.cpp | 16 +- ...estSerial_AtomicOperations_unsignedint.cpp | 16 +- ...erial_AtomicOperations_unsignedlongint.cpp | 16 +- .../serial/TestSerial_AtomicViews.cpp | 16 +- .../unit_test/serial/TestSerial_Atomics.cpp | 12 +- .../unit_test/serial/TestSerial_Category.hpp | 25 +- .../unit_test/serial/TestSerial_Complex.cpp | 16 +- .../core/unit_test/serial/TestSerial_Crs.cpp | 11 +- .../serial/TestSerial_DeepCopyAlignment.cpp | 11 +- .../serial/TestSerial_FunctorAnalysis.cpp | 47 + .../core/unit_test/serial/TestSerial_Init.cpp | 21 +- .../serial/TestSerial_LocalDeepCopy.cpp | 12 +- .../unit_test/serial/TestSerial_MDRange_a.cpp | 16 +- .../unit_test/serial/TestSerial_MDRange_b.cpp | 16 +- .../unit_test/serial/TestSerial_MDRange_c.cpp | 16 +- .../unit_test/serial/TestSerial_MDRange_d.cpp | 16 +- .../unit_test/serial/TestSerial_MDRange_e.cpp | 16 +- .../unit_test/serial/TestSerial_Other.cpp | 27 +- .../serial/TestSerial_RangePolicy.cpp | 16 +- .../serial/TestSerial_Reducers_a.cpp | 11 +- .../serial/TestSerial_Reducers_b.cpp | 11 +- .../serial/TestSerial_Reducers_c.cpp | 11 +- .../serial/TestSerial_Reducers_d.cpp | 11 +- .../serial/TestSerial_Reductions.cpp | 11 +- .../TestSerial_Reductions_DeviceView.cpp | 11 +- .../core/unit_test/serial/TestSerial_Scan.cpp | 16 +- .../serial/TestSerial_SharedAlloc.cpp | 19 +- .../unit_test/serial/TestSerial_SubView_a.cpp | 77 +- .../unit_test/serial/TestSerial_SubView_b.cpp | 35 +- .../serial/TestSerial_SubView_c01.cpp | 18 +- .../serial/TestSerial_SubView_c02.cpp | 19 +- .../serial/TestSerial_SubView_c03.cpp | 19 +- .../serial/TestSerial_SubView_c04.cpp | 18 +- .../serial/TestSerial_SubView_c05.cpp | 19 +- .../serial/TestSerial_SubView_c06.cpp | 19 +- .../serial/TestSerial_SubView_c07.cpp | 18 +- .../serial/TestSerial_SubView_c08.cpp | 19 +- .../serial/TestSerial_SubView_c09.cpp | 19 +- .../serial/TestSerial_SubView_c10.cpp | 18 +- .../serial/TestSerial_SubView_c11.cpp | 19 +- .../serial/TestSerial_SubView_c12.cpp | 19 +- .../serial/TestSerial_SubView_c13.cpp | 18 +- .../core/unit_test/serial/TestSerial_Task.cpp | 16 +- .../core/unit_test/serial/TestSerial_Team.cpp | 85 +- .../serial/TestSerial_TeamReductionScan.cpp | 64 +- .../serial/TestSerial_TeamScratch.cpp | 59 +- .../serial/TestSerial_TeamTeamSize.cpp | 11 +- .../serial/TestSerial_TeamVectorRange.cpp | 17 +- .../serial/TestSerial_UniqueToken.cpp | 46 + .../unit_test/serial/TestSerial_ViewAPI_a.cpp | 11 +- .../unit_test/serial/TestSerial_ViewAPI_b.cpp | 11 +- .../unit_test/serial/TestSerial_ViewAPI_c.cpp | 11 +- .../unit_test/serial/TestSerial_ViewAPI_d.cpp | 11 +- .../unit_test/serial/TestSerial_ViewAPI_e.cpp | 12 +- .../TestSerial_ViewLayoutStrideAssignment.cpp | 12 +- .../serial/TestSerial_ViewMapping_a.cpp | 12 +- .../serial/TestSerial_ViewMapping_b.cpp | 12 +- .../serial/TestSerial_ViewMapping_subview.cpp | 12 +- .../serial/TestSerial_ViewOfClass.cpp | 12 +- .../serial/TestSerial_ViewResize.cpp | 46 + .../serial/TestSerial_View_64bit.cpp | 11 +- .../unit_test/serial/TestSerial_WorkGraph.cpp | 15 +- lib/kokkos/core/unit_test/standalone/Makefile | 7 +- .../unit_test/standalone/UnitTestMainInit.cpp | 23 +- ...Threads_AtomicOperations_complexdouble.cpp | 16 +- ...tThreads_AtomicOperations_complexfloat.cpp | 16 +- .../TestThreads_AtomicOperations_double.cpp | 16 +- .../TestThreads_AtomicOperations_float.cpp | 16 +- .../TestThreads_AtomicOperations_int.cpp | 16 +- .../TestThreads_AtomicOperations_longint.cpp | 16 +- ...stThreads_AtomicOperations_longlongint.cpp | 16 +- ...stThreads_AtomicOperations_unsignedint.cpp | 16 +- ...reads_AtomicOperations_unsignedlongint.cpp | 16 +- .../threads/TestThreads_AtomicViews.cpp | 16 +- .../unit_test/threads/TestThreads_Atomics.cpp | 12 +- .../threads/TestThreads_Category.hpp | 25 +- .../unit_test/threads/TestThreads_Complex.cpp | 16 +- .../unit_test/threads/TestThreads_Crs.cpp | 15 +- .../threads/TestThreads_DeepCopyAlignment.cpp | 11 +- .../threads/TestThreads_FunctorAnalysis.cpp | 47 + .../unit_test/threads/TestThreads_Init.cpp | 21 +- .../threads/TestThreads_LocalDeepCopy.cpp | 12 +- .../threads/TestThreads_MDRange_a.cpp | 16 +- .../threads/TestThreads_MDRange_b.cpp | 16 +- .../threads/TestThreads_MDRange_c.cpp | 16 +- .../threads/TestThreads_MDRange_d.cpp | 16 +- .../threads/TestThreads_MDRange_e.cpp | 16 +- .../unit_test/threads/TestThreads_Other.cpp | 27 +- .../threads/TestThreads_RangePolicy.cpp | 16 +- .../threads/TestThreads_Reducers_a.cpp | 11 +- .../threads/TestThreads_Reducers_b.cpp | 11 +- .../threads/TestThreads_Reducers_c.cpp | 11 +- .../threads/TestThreads_Reducers_d.cpp | 11 +- .../threads/TestThreads_Reductions.cpp | 11 +- .../TestThreads_Reductions_DeviceView.cpp | 11 +- .../unit_test/threads/TestThreads_Scan.cpp | 16 +- .../threads/TestThreads_SharedAlloc.cpp | 19 +- .../threads/TestThreads_SubView_a.cpp | 77 +- .../threads/TestThreads_SubView_b.cpp | 35 +- .../threads/TestThreads_SubView_c01.cpp | 18 +- .../threads/TestThreads_SubView_c02.cpp | 19 +- .../threads/TestThreads_SubView_c03.cpp | 19 +- .../threads/TestThreads_SubView_c04.cpp | 18 +- .../threads/TestThreads_SubView_c05.cpp | 19 +- .../threads/TestThreads_SubView_c06.cpp | 19 +- .../threads/TestThreads_SubView_c07.cpp | 18 +- .../threads/TestThreads_SubView_c08.cpp | 19 +- .../threads/TestThreads_SubView_c09.cpp | 19 +- .../threads/TestThreads_SubView_c10.cpp | 18 +- .../threads/TestThreads_SubView_c11.cpp | 19 +- .../threads/TestThreads_SubView_c12.cpp | 19 +- .../threads/TestThreads_SubView_c13.cpp | 18 +- .../unit_test/threads/TestThreads_Team.cpp | 85 +- .../threads/TestThreads_TeamReductionScan.cpp | 64 +- .../threads/TestThreads_TeamScratch.cpp | 59 +- .../threads/TestThreads_TeamTeamSize.cpp | 13 +- .../threads/TestThreads_TeamVectorRange.cpp | 17 +- .../threads/TestThreads_UniqueToken.cpp | 46 + .../threads/TestThreads_ViewAPI_a.cpp | 11 +- .../threads/TestThreads_ViewAPI_b.cpp | 11 +- .../threads/TestThreads_ViewAPI_c.cpp | 11 +- .../threads/TestThreads_ViewAPI_d.cpp | 11 +- .../threads/TestThreads_ViewAPI_e.cpp | 11 +- ...TestThreads_ViewLayoutStrideAssignment.cpp | 12 +- .../threads/TestThreads_ViewMapping_a.cpp | 12 +- .../threads/TestThreads_ViewMapping_b.cpp | 12 +- .../TestThreads_ViewMapping_subview.cpp | 12 +- .../threads/TestThreads_ViewOfClass.cpp | 12 +- .../threads/TestThreads_ViewResize.cpp | 46 + .../threads/TestThreads_View_64bit.cpp | 11 +- .../threads/TestThreads_WorkGraph.cpp | 15 +- lib/kokkos/example/CMakeLists.txt | 28 +- .../CMakeLists.txt | 0 .../cmake_example.cpp | 17 +- .../foo.f | 0 .../build_cmake_installed/CMakeLists.txt | 42 + .../cmake_example.cpp} | 68 +- .../example/build_cmake_installed/foo.f | 4 + lib/kokkos/example/common/VectorImport.hpp | 294 - lib/kokkos/example/common/WrapMPI.hpp | 103 - lib/kokkos/example/feint/CMakeLists.txt | 18 - lib/kokkos/example/feint/ElemFunctor.hpp | 485 -- lib/kokkos/example/feint/Makefile | 71 - lib/kokkos/example/feint/feint.hpp | 165 - lib/kokkos/example/feint/feint_cuda.cpp | 67 - lib/kokkos/example/feint/feint_hpx.cpp | 67 - lib/kokkos/example/feint/feint_rocm.cpp | 67 - lib/kokkos/example/feint/feint_threads.cpp | 67 - lib/kokkos/example/fenl/CGSolve.hpp | 300 - lib/kokkos/example/fenl/CMakeLists.txt | 17 - lib/kokkos/example/fenl/Makefile | 50 - lib/kokkos/example/fenl/fenl.cpp | 181 - lib/kokkos/example/fenl/fenl.hpp | 89 - lib/kokkos/example/fenl/fenl_functors.hpp | 1173 ---- lib/kokkos/example/fenl/fenl_impl.hpp | 598 -- lib/kokkos/example/fenl/main.cpp | 367 -- lib/kokkos/example/fixture/BoxElemFixture.hpp | 355 -- lib/kokkos/example/fixture/BoxElemPart.cpp | 413 -- lib/kokkos/example/fixture/BoxElemPart.hpp | 320 - lib/kokkos/example/fixture/CMakeLists.txt | 13 - lib/kokkos/example/fixture/HexElement.hpp | 270 - lib/kokkos/example/fixture/Main.cpp | 307 - lib/kokkos/example/fixture/Makefile | 46 - lib/kokkos/example/fixture/TestFixture.hpp | 156 - .../example/global_2_local_ids/CMakeLists.txt | 17 - lib/kokkos/example/global_2_local_ids/G2L.hpp | 266 - .../example/global_2_local_ids/G2L_Main.cpp | 158 - .../example/global_2_local_ids/Makefile | 46 - lib/kokkos/example/grow_array/CMakeLists.txt | 14 - lib/kokkos/example/grow_array/Makefile | 46 - lib/kokkos/example/grow_array/grow_array.hpp | 257 - lib/kokkos/example/grow_array/main.cpp | 110 - lib/kokkos/example/make_buildlink/main.cpp | 13 +- lib/kokkos/example/md_skeleton/CMakeLists.txt | 16 - lib/kokkos/example/md_skeleton/Makefile | 46 - lib/kokkos/example/md_skeleton/README | 3 - lib/kokkos/example/md_skeleton/force.cpp | 184 - lib/kokkos/example/md_skeleton/main.cpp | 205 - lib/kokkos/example/md_skeleton/neighbor.cpp | 430 -- lib/kokkos/example/md_skeleton/setup.cpp | 271 - lib/kokkos/example/md_skeleton/system.h | 92 - lib/kokkos/example/md_skeleton/types.h | 118 - .../example/multi_fem/BoxMeshFixture.hpp | 610 -- .../example/multi_fem/BoxMeshPartition.cpp | 381 -- .../example/multi_fem/BoxMeshPartition.hpp | 210 - lib/kokkos/example/multi_fem/CMakeLists.txt | 16 - lib/kokkos/example/multi_fem/Explicit.hpp | 452 -- .../example/multi_fem/ExplicitFunctors.hpp | 1471 ----- lib/kokkos/example/multi_fem/FEMesh.hpp | 86 - lib/kokkos/example/multi_fem/HexElement.hpp | 268 - .../multi_fem/HexExplicitFunctions.hpp | 443 -- lib/kokkos/example/multi_fem/Implicit.hpp | 341 - .../example/multi_fem/ImplicitFunctors.hpp | 585 -- lib/kokkos/example/multi_fem/LinAlgBLAS.hpp | 567 -- lib/kokkos/example/multi_fem/Makefile | 49 - lib/kokkos/example/multi_fem/Nonlinear.hpp | 573 -- .../multi_fem/NonlinearElement_Cuda.hpp | 390 -- .../example/multi_fem/NonlinearFunctors.hpp | 482 -- lib/kokkos/example/multi_fem/ParallelComm.hpp | 167 - .../example/multi_fem/ParallelDataMap.hpp | 517 -- .../example/multi_fem/ParallelMachine.cpp | 178 - .../example/multi_fem/ParallelMachine.hpp | 118 - .../example/multi_fem/SparseLinearSystem.hpp | 404 -- .../multi_fem/SparseLinearSystemFill.hpp | 276 - .../multi_fem/SparseLinearSystem_Cuda.hpp | 164 - .../example/multi_fem/TestBoxMeshFixture.hpp | 242 - .../multi_fem/TestBoxMeshPartition.cpp | 172 - lib/kokkos/example/multi_fem/TestCuda.cpp | 188 - lib/kokkos/example/multi_fem/TestHost.cpp | 142 - .../example/multi_fem/TestHybridFEM.cpp | 348 -- .../example/query_device/CMakeLists.txt | 10 +- .../example/query_device/query_device.cpp | 58 +- lib/kokkos/example/sort_array/CMakeLists.txt | 14 - lib/kokkos/example/sort_array/Makefile | 46 - lib/kokkos/example/sort_array/main.cpp | 95 - lib/kokkos/example/sort_array/sort_array.hpp | 190 - .../tutorial/01_hello_world/CMakeLists.txt | 9 +- .../tutorial/01_hello_world/hello_world.cpp | 36 +- .../01_hello_world_lambda/CMakeLists.txt | 17 +- .../hello_world_lambda.cpp | 31 +- .../tutorial/02_simple_reduce/CMakeLists.txt | 9 +- .../02_simple_reduce/simple_reduce.cpp | 46 +- .../02_simple_reduce_lambda/CMakeLists.txt | 17 +- .../simple_reduce_lambda.cpp | 49 +- .../tutorial/03_simple_view/CMakeLists.txt | 9 +- .../example/tutorial/03_simple_view/Makefile | 2 +- .../tutorial/03_simple_view/simple_view.cpp | 56 +- .../03_simple_view_lambda/CMakeLists.txt | 6 +- .../simple_view_lambda.cpp | 78 +- .../04_simple_memoryspaces/CMakeLists.txt | 5 +- .../simple_memoryspaces.cpp | 44 +- .../tutorial/05_simple_atomics/CMakeLists.txt | 6 +- .../05_simple_atomics/simple_atomics.cpp | 76 +- .../06_simple_mdrangepolicy/CMakeLists.txt | 6 +- .../simple_mdrangepolicy.cpp | 130 +- .../01_data_layouts/CMakeLists.txt | 5 +- .../01_data_layouts/data_layouts.cpp | 72 +- .../02_memory_traits/CMakeLists.txt | 5 +- .../02_memory_traits/memory_traits.cpp | 86 +- .../Advanced_Views/03_subviews/CMakeLists.txt | 5 +- .../Advanced_Views/03_subviews/subviews.cpp | 108 +- .../04_dualviews/CMakeLists.txt | 5 +- .../Advanced_Views/04_dualviews/dual_view.cpp | 209 +- .../05_NVIDIA_UVM/CMakeLists.txt | 14 +- .../05_NVIDIA_UVM/uvm_example.cpp | 91 +- .../overlapping_deepcopy.cpp | 129 +- .../tutorial/Advanced_Views/CMakeLists.txt | 12 +- .../01_random_numbers/random_numbers.cpp | 182 +- lib/kokkos/example/tutorial/CMakeLists.txt | 22 +- .../01_thread_teams/CMakeLists.txt | 5 +- .../01_thread_teams/thread_teams.cpp | 67 +- .../01_thread_teams_lambda/CMakeLists.txt | 13 +- .../thread_teams_lambda.cpp | 53 +- .../02_nested_parallel_for/CMakeLists.txt | 5 +- .../nested_parallel_for.cpp | 81 +- .../03_vectorization/CMakeLists.txt | 10 +- .../03_vectorization/vectorization.cpp | 156 +- .../04_team_scan/CMakeLists.txt | 6 +- .../04_team_scan/team_scan.cpp | 151 +- .../Hierarchical_Parallelism/CMakeLists.txt | 10 +- .../tutorial/launch_bounds/CMakeLists.txt | 5 +- .../launch_bounds/launch_bounds_reduce.cpp | 199 +- .../example/virtual_functions/classes.cpp | 22 +- .../example/virtual_functions/classes.hpp | 48 +- lib/kokkos/example/virtual_functions/main.cpp | 52 +- lib/kokkos/generate_makefile.bash | 100 +- lib/kokkos/master_history.txt | 1 + 2169 files changed, 121961 insertions(+), 127047 deletions(-) create mode 100644 lib/kokkos/BUILD.md create mode 100644 lib/kokkos/CONTRIBUTING.md delete mode 100644 lib/kokkos/README create mode 100644 lib/kokkos/README.md create mode 100755 lib/kokkos/cm_generate_makefile.bash create mode 100644 lib/kokkos/cmake/KokkosConfigCommon.cmake.in create mode 100644 lib/kokkos/cmake/KokkosCore_config.h.in delete mode 100644 lib/kokkos/cmake/Makefile.generate_cmake_settings delete mode 100644 lib/kokkos/cmake/Modules/FindHWLOC.cmake delete mode 100644 lib/kokkos/cmake/Modules/FindMemkind.cmake delete mode 100644 lib/kokkos/cmake/Modules/FindQthreads.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLCUDA.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLHPX.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLHWLOC.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLLIBNUMA.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLLIBRT.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLMEMKIND.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake create mode 100644 lib/kokkos/cmake/README.md create mode 100644 lib/kokkos/cmake/compile_tests/clang_omp.cpp create mode 100644 lib/kokkos/cmake/compile_tests/pthread.cpp create mode 100644 lib/kokkos/cmake/cray.cmake delete mode 100644 lib/kokkos/cmake/deps/QTHREADS.cmake create mode 100644 lib/kokkos/cmake/fake_tribits.cmake create mode 100644 lib/kokkos/cmake/gnu.cmake create mode 100644 lib/kokkos/cmake/intel.cmake create mode 100644 lib/kokkos/cmake/kokkos_arch.cmake delete mode 100644 lib/kokkos/cmake/kokkos_build.cmake create mode 100644 lib/kokkos/cmake/kokkos_compiler_id.cmake create mode 100644 lib/kokkos/cmake/kokkos_corner_cases.cmake create mode 100644 lib/kokkos/cmake/kokkos_enable_devices.cmake create mode 100644 lib/kokkos/cmake/kokkos_enable_options.cmake create mode 100644 lib/kokkos/cmake/kokkos_install.cmake delete mode 100644 lib/kokkos/cmake/kokkos_options.cmake create mode 100644 lib/kokkos/cmake/kokkos_pick_cxx_std.cmake delete mode 100644 lib/kokkos/cmake/kokkos_settings.cmake create mode 100644 lib/kokkos/cmake/kokkos_test_cxx_std.cmake create mode 100644 lib/kokkos/cmake/kokkos_tpls.cmake create mode 100644 lib/kokkos/cmake/kokkos_tribits.cmake create mode 100644 lib/kokkos/cmake/pgi.cmake delete mode 100644 lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake delete mode 100644 lib/kokkos/cmake/tribits.cmake create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp delete mode 100644 lib/kokkos/core/src/Makefile delete mode 100644 lib/kokkos/core/src/Makefile.generate_build_files delete mode 100644 lib/kokkos/core/src/Makefile.generate_header_lists create mode 100644 lib/kokkos/core/src/dummy.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_MemorySpace.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Stacktrace.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Stacktrace.hpp rename lib/kokkos/{example/feint/feint_fwd.hpp => core/unit_test/TestStackTrace.cpp} (69%) create mode 100644 lib/kokkos/core/unit_test/TestStackTrace.hpp rename lib/kokkos/{example/feint/feint_openmp.cpp => core/unit_test/TestStackTrace_f0.cpp} (67%) rename lib/kokkos/{example/fixture/TestFixture.cpp => core/unit_test/TestStackTrace_f1.cpp} (69%) rename lib/kokkos/{example/feint/feint_serial.cpp => core/unit_test/TestStackTrace_f2.cpp} (67%) create mode 100644 lib/kokkos/core/unit_test/TestStackTrace_f3.cpp create mode 100644 lib/kokkos/core/unit_test/TestStackTrace_f4.cpp create mode 100644 lib/kokkos/core/unit_test/TestViewResize.hpp create mode 100644 lib/kokkos/core/unit_test/configuration/test-code/CMakeLists.txt create mode 100644 lib/kokkos/core/unit_test/configuration/test-code/Makefile create mode 100644 lib/kokkos/core/unit_test/configuration/test-code/main.cpp create mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config.bash create mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash create mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_device_list.bash create mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_options_list.bash create mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_run.bash create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_DebugPinUVMSpace.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_DebugSerialExecution.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_FunctorAnalysis.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ViewResize.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_FunctorAnalysis.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_LocalDeepCopy.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reductions_DeviceView.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_TeamTeamSize.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewLayoutStrideAssignment.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewResize.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_FunctorAnalysis.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewResize.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_FunctorAnalysis.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_UniqueToken.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_ViewResize.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_FunctorAnalysis.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_UniqueToken.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewResize.cpp rename lib/kokkos/example/{cmake_build => build_cmake_in_tree}/CMakeLists.txt (100%) rename lib/kokkos/example/{cmake_build => build_cmake_in_tree}/cmake_example.cpp (85%) rename lib/kokkos/example/{cmake_build => build_cmake_in_tree}/foo.f (100%) create mode 100644 lib/kokkos/example/build_cmake_installed/CMakeLists.txt rename lib/kokkos/example/{feint/main.cpp => build_cmake_installed/cmake_example.cpp} (55%) create mode 100644 lib/kokkos/example/build_cmake_installed/foo.f delete mode 100644 lib/kokkos/example/common/VectorImport.hpp delete mode 100644 lib/kokkos/example/common/WrapMPI.hpp delete mode 100644 lib/kokkos/example/feint/CMakeLists.txt delete mode 100644 lib/kokkos/example/feint/ElemFunctor.hpp delete mode 100644 lib/kokkos/example/feint/Makefile delete mode 100644 lib/kokkos/example/feint/feint.hpp delete mode 100644 lib/kokkos/example/feint/feint_cuda.cpp delete mode 100644 lib/kokkos/example/feint/feint_hpx.cpp delete mode 100644 lib/kokkos/example/feint/feint_rocm.cpp delete mode 100644 lib/kokkos/example/feint/feint_threads.cpp delete mode 100644 lib/kokkos/example/fenl/CGSolve.hpp delete mode 100644 lib/kokkos/example/fenl/CMakeLists.txt delete mode 100644 lib/kokkos/example/fenl/Makefile delete mode 100644 lib/kokkos/example/fenl/fenl.cpp delete mode 100644 lib/kokkos/example/fenl/fenl.hpp delete mode 100644 lib/kokkos/example/fenl/fenl_functors.hpp delete mode 100644 lib/kokkos/example/fenl/fenl_impl.hpp delete mode 100644 lib/kokkos/example/fenl/main.cpp delete mode 100644 lib/kokkos/example/fixture/BoxElemFixture.hpp delete mode 100644 lib/kokkos/example/fixture/BoxElemPart.cpp delete mode 100644 lib/kokkos/example/fixture/BoxElemPart.hpp delete mode 100644 lib/kokkos/example/fixture/CMakeLists.txt delete mode 100644 lib/kokkos/example/fixture/HexElement.hpp delete mode 100644 lib/kokkos/example/fixture/Main.cpp delete mode 100644 lib/kokkos/example/fixture/Makefile delete mode 100644 lib/kokkos/example/fixture/TestFixture.hpp delete mode 100644 lib/kokkos/example/global_2_local_ids/CMakeLists.txt delete mode 100644 lib/kokkos/example/global_2_local_ids/G2L.hpp delete mode 100644 lib/kokkos/example/global_2_local_ids/G2L_Main.cpp delete mode 100644 lib/kokkos/example/global_2_local_ids/Makefile delete mode 100644 lib/kokkos/example/grow_array/CMakeLists.txt delete mode 100644 lib/kokkos/example/grow_array/Makefile delete mode 100644 lib/kokkos/example/grow_array/grow_array.hpp delete mode 100644 lib/kokkos/example/grow_array/main.cpp delete mode 100644 lib/kokkos/example/md_skeleton/CMakeLists.txt delete mode 100644 lib/kokkos/example/md_skeleton/Makefile delete mode 100644 lib/kokkos/example/md_skeleton/README delete mode 100644 lib/kokkos/example/md_skeleton/force.cpp delete mode 100644 lib/kokkos/example/md_skeleton/main.cpp delete mode 100644 lib/kokkos/example/md_skeleton/neighbor.cpp delete mode 100644 lib/kokkos/example/md_skeleton/setup.cpp delete mode 100644 lib/kokkos/example/md_skeleton/system.h delete mode 100644 lib/kokkos/example/md_skeleton/types.h delete mode 100644 lib/kokkos/example/multi_fem/BoxMeshFixture.hpp delete mode 100644 lib/kokkos/example/multi_fem/BoxMeshPartition.cpp delete mode 100644 lib/kokkos/example/multi_fem/BoxMeshPartition.hpp delete mode 100644 lib/kokkos/example/multi_fem/CMakeLists.txt delete mode 100644 lib/kokkos/example/multi_fem/Explicit.hpp delete mode 100644 lib/kokkos/example/multi_fem/ExplicitFunctors.hpp delete mode 100644 lib/kokkos/example/multi_fem/FEMesh.hpp delete mode 100644 lib/kokkos/example/multi_fem/HexElement.hpp delete mode 100644 lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp delete mode 100644 lib/kokkos/example/multi_fem/Implicit.hpp delete mode 100644 lib/kokkos/example/multi_fem/ImplicitFunctors.hpp delete mode 100644 lib/kokkos/example/multi_fem/LinAlgBLAS.hpp delete mode 100644 lib/kokkos/example/multi_fem/Makefile delete mode 100644 lib/kokkos/example/multi_fem/Nonlinear.hpp delete mode 100644 lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp delete mode 100644 lib/kokkos/example/multi_fem/NonlinearFunctors.hpp delete mode 100644 lib/kokkos/example/multi_fem/ParallelComm.hpp delete mode 100644 lib/kokkos/example/multi_fem/ParallelDataMap.hpp delete mode 100644 lib/kokkos/example/multi_fem/ParallelMachine.cpp delete mode 100644 lib/kokkos/example/multi_fem/ParallelMachine.hpp delete mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystem.hpp delete mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystemFill.hpp delete mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystem_Cuda.hpp delete mode 100644 lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp delete mode 100644 lib/kokkos/example/multi_fem/TestBoxMeshPartition.cpp delete mode 100644 lib/kokkos/example/multi_fem/TestCuda.cpp delete mode 100644 lib/kokkos/example/multi_fem/TestHost.cpp delete mode 100644 lib/kokkos/example/multi_fem/TestHybridFEM.cpp delete mode 100644 lib/kokkos/example/sort_array/CMakeLists.txt delete mode 100644 lib/kokkos/example/sort_array/Makefile delete mode 100644 lib/kokkos/example/sort_array/main.cpp delete mode 100644 lib/kokkos/example/sort_array/sort_array.hpp diff --git a/lib/kokkos/BUILD.md b/lib/kokkos/BUILD.md new file mode 100644 index 0000000000..c4d6c98365 --- /dev/null +++ b/lib/kokkos/BUILD.md @@ -0,0 +1,323 @@ +![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) + +# Installing and Using Kokkos + +## Kokkos Philosophy +Kokkos provides a modern CMake style build system. +As C++ continues to develop for C++20 and beyond, CMake is likely to provide the most robust support +for C++. Applications heavily leveraging Kokkos are strongly encouraged to use a CMake build system. + +You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project. +Modern CMake is exceedingly simple at a high-level (with the devil in the details). +Once Kokkos is installed In your `CMakeLists.txt` simply use: +```` +find_package(Kokkos REQUIRED) +```` +Then for every executable or library in your project: +```` +target_link_libraries(myTarget Kokkos::kokkos) +```` +That's it! There is no checking Kokkos preprocessor, compiler, or linker flags. +Kokkos propagates all the necesssary flags to your project. +This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your* +project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`. + + +## Configuring CMake +A very basic installation is done with: +```` +cmake ${srcdir} \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_INSTALL_PREFIX=${my_install_folder} +```` +which builds and installed a default Kokkos when you run `make install`. +There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g. +```` +cmake ${srcdir} \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_INSTALL_PREFIX=${my_install_folder} \ + -DKokkos_ENABLE_OPENMP=On +```` +which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below. + +## Spack +An alternative to manually building with the CMake is to use the Spack package manager. +To do so, download the `kokkos-spack` git repo and add to the package list: +```` +spack repo add $path-to-kokkos-spack +```` +A basic installation would be done as: +```` +spack install kokkos +```` +Spack allows options and and compilers to be tuned in the install command. +```` +spack install kokkos@3.0 %gcc@7.3.0 +openmp +```` +This example illustrates the three most common parameters to Spack: +* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. +* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build +* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. + +For a complete list of Kokkos options, run: +```` +spack info kokkos +```` + +#### Spack Development +Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". +Generally, Spack usage should never really require you to reference the computer-generated unique install folder. +If you must know, you can locate Spack Kokkos installations with: +```` +spack find -p kokkos ... +```` +where `...` is the unique spec identifying the particular Kokkos configuration and version. + +A better way to use Spack for doing Kokkos development is the DIY feature of Spack. +If you wish to develop Kokkos itself, go to the Kokkos source folder: +```` +spack diy -u cmake kokkos@diy ... +```` +where `...` is a Spack spec identifying the exact Kokkos configuration. +This then creates a `spack-build` directory where you can run `make`. + +If doing development on a downstream project, you can do almost exactly the same thing. +```` +spack diy -u cmake ${myproject}@${myversion} ... ^kokkos... +```` +where the `...` are the specs for your project and the desired Kokkos configuration. +Again, a `spack-build` directory will be created where you can run `make`. + +Spack has a few idiosyncracies that make building outside of Spack annoying related to Spack forcing use of a compiler wrapper. This can be worked around by having a `-DSpack_WORKAROUND=On` given your CMake. Then add the block of code to your CMakeLists.txt: + +```` +if (Spack_WORKAROUND) + set(SPACK_CXX $ENV{SPACK_CXX}) + if(SPACK_CXX) + set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) + set(ENV{CXX} ${SPACK_CXX}) + endif() +endif() +```` + +# Kokkos Keyword Listing + +## Device Backends +Device backends can be enabled by specifiying `-DKokkos_ENABLE_X`. + +* Kokkos_ENABLE_CUDA + * Whether to build CUDA backend + * BOOL Default: OFF +* Kokkos_ENABLE_HPX + * Whether to build HPX backend (experimental) + * BOOL Default: OFF +* Kokkos_ENABLE_OPENMP + * Whether to build OpenMP backend + * BOOL Default: OFF +* Kokkos_ENABLE_PTHREAD + * Whether to build Pthread backend + * BOOL Default: OFF +* Kokkos_ENABLE_SERIAL + * Whether to build serial backend + * BOOL Default: ON + +## Enable Options +Options can be enabled by specifiying `-DKokkos_ENABLE_X`. + +* Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION + * Whether to aggressively vectorize loops + * BOOL Default: OFF +* Kokkos_ENABLE_COMPILER_WARNINGS + * Whether to print all compiler warnings + * BOOL Default: OFF +* Kokkos_ENABLE_CUDA_CONSTEXPR + * Whether to activate experimental relaxed constexpr functions + * BOOL Default: OFF +* Kokkos_ENABLE_CUDA_LAMBDA + * Whether to activate experimental lambda features + * BOOL Default: OFF +* Kokkos_ENABLE_CUDA_LDG_INTRINSIC + * Whether to use CUDA LDG intrinsics + * BOOL Default: OFF +* Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + * Whether to enable relocatable device code (RDC) for CUDA + * BOOL Default: OFF +* Kokkos_ENABLE_CUDA_UVM + * Whether to use unified memory (UM) by default for CUDA + * BOOL Default: OFF +* Kokkos_ENABLE_DEBUG + * Whether to activate extra debug features - may increase compile times + * BOOL Default: OFF +* Kokkos_ENABLE_DEBUG_BOUNDS_CHECK + * Whether to use bounds checking - will increase runtime + * BOOL Default: OFF +* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + * Debug check on dual views + * BOOL Default: OFF +* Kokkos_ENABLE_DEPRECATED_CODE + * Whether to enable deprecated code + * BOOL Default: OFF +* Kokkos_ENABLE_HPX_ASYNC_DISPATCH + * Whether HPX supports asynchronous dispatch + * BOOL Default: OFF +* Kokkos_ENABLE_LARGE_MEM_TESTS + * Whether to perform extra large memory tests + * BOOL_Default: OFF +* Kokkos_ENABLE_PROFILING + * Whether to create bindings for profiling tools + * BOOL Default: ON +* Kokkos_ENABLE_PROFILING_LOAD_PRINT + * Whether to print information about which profiling tools gotloaded + * BOOL Default: OFF +* Kokkos_ENABLE_TESTS + * Whether to build serial backend + * BOOL Default: OFF + +## Other Options +* Kokkos_CXX_STANDARD + * The C++ standard for Kokkos to use: c++11, c++14, c++17, or c++20. This should be given in CMake style as 11, 14, 17, or 20. + * STRING Default: 11 + +## Third-party Libraries (TPLs) +The following options control enabling TPLs: +* Kokkos_ENABLE_HPX + * Whether to enable the HPX library + * BOOL Default: OFF +* Kokkos_ENABLE_HWLOC + * Whether to enable the HWLOC library + * BOOL Default: Off +* Kokkos_ENABLE_LIBNUMA + * Whether to enable the LIBNUMA library + * BOOL Default: Off +* Kokkos_ENABLE_MEMKIND + * Whether to enable the MEMKIND library + * BOOL Default: Off +* Kokkos_ENABLE_LIBDL + * Whether to enable the LIBDL library + * BOOL Default: On +* Kokkos_ENABLE_LIBRT + * Whether to enable the LIBRT library + * BOOL Default: Off + +The following options control finding and configuring non-CMake TPLs: +* Kokkos_CUDA_DIR or CUDA_ROOT + * Location of CUDA install prefix for libraries + * PATH Default: +* Kokkos_HWLOC_DIR or HWLOC_ROOT + * Location of HWLOC install prefix + * PATH Default: +* Kokkos_LIBNUMA_DIR or LIBNUMA_ROOT + * Location of LIBNUMA install prefix + * PATH Default: +* Kokkos_MEMKIND_DIR or MEMKIND_ROOT + * Location of MEMKIND install prefix + * PATH Default: +* Kokkos_LIBDL_DIR or LIBDL_ROOT + * Location of LIBDL install prefix + * PATH Default: +* Kokkos_LIBRT_DIR or LIBRT_ROOT + * Location of LIBRT install prefix + * PATH Default: + +The following options control `find_package` paths for CMake-based TPLs: +* HPX_DIR or HPX_ROOT + * Location of HPX prefix (ROOT) or CMake config file (DIR) + * PATH Default: + +## Architecture Keywords +Architecture-specific optimizations can be enabled by specifiying `-DKokkos_ARCH_X`. + +* Kokkos_ARCH_AMDAVX + * Whether to optimize for the AMDAVX architecture + * BOOL Default: OFF +* Kokkos_ARCH_ARMV80 + * Whether to optimize for the ARMV80 architecture + * BOOL Default: OFF +* Kokkos_ARCH_ARMV81 + * Whether to optimize for the ARMV81 architecture + * BOOL Default: OFF +* Kokkos_ARCH_ARMV8_THUNDERX + * Whether to optimize for the ARMV8_THUNDERX architecture + * BOOL Default: OFF +* Kokkos_ARCH_ARMV8_TX2 + * Whether to optimize for the ARMV8_TX2 architecture + * BOOL Default: OFF +* Kokkos_ARCH_BDW + * Whether to optimize for the BDW architecture + * BOOL Default: OFF +* Kokkos_ARCH_BGQ + * Whether to optimize for the BGQ architecture + * BOOL Default: OFF +* Kokkos_ARCH_EPYC + * Whether to optimize for the EPYC architecture + * BOOL Default: OFF +* Kokkos_ARCH_HSW + * Whether to optimize for the HSW architecture + * BOOL Default: OFF +* Kokkos_ARCH_KEPLER30 + * Whether to optimize for the KEPLER30 architecture + * BOOL Default: OFF +* Kokkos_ARCH_KEPLER32 + * Whether to optimize for the KEPLER32 architecture + * BOOL Default: OFF +* Kokkos_ARCH_KEPLER35 + * Whether to optimize for the KEPLER35 architecture + * BOOL Default: OFF +* Kokkos_ARCH_KEPLER37 + * Whether to optimize for the KEPLER37 architecture + * BOOL Default: OFF +* Kokkos_ARCH_KNC + * Whether to optimize for the KNC architecture + * BOOL Default: OFF +* Kokkos_ARCH_KNL + * Whether to optimize for the KNL architecture + * BOOL Default: OFF +* Kokkos_ARCH_MAXWELL50 + * Whether to optimize for the MAXWELL50 architecture + * BOOL Default: OFF +* Kokkos_ARCH_MAXWELL52 + * Whether to optimize for the MAXWELL52 architecture + * BOOL Default: OFF +* Kokkos_ARCH_MAXWELL53 + * Whether to optimize for the MAXWELL53 architecture + * BOOL Default: OFF +* Kokkos_ARCH_PASCAL60 + * Whether to optimize for the PASCAL60 architecture + * BOOL Default: OFF +* Kokkos_ARCH_PASCAL61 + * Whether to optimize for the PASCAL61 architecture + * BOOL Default: OFF +* Kokkos_ARCH_POWER7 + * Whether to optimize for the POWER7 architecture + * BOOL Default: OFF +* Kokkos_ARCH_POWER8 + * Whether to optimize for the POWER8 architecture + * BOOL Default: OFF +* Kokkos_ARCH_POWER9 + * Whether to optimize for the POWER9 architecture + * BOOL Default: OFF +* Kokkos_ARCH_SKX + * Whether to optimize for the SKX architecture + * BOOL Default: OFF +* Kokkos_ARCH_SNB + * Whether to optimize for the SNB architecture + * BOOL Default: OFF +* Kokkos_ARCH_TURING75 + * Whether to optimize for the TURING75 architecture + * BOOL Default: OFF +* Kokkos_ARCH_VOLTA70 + * Whether to optimize for the VOLTA70 architecture + * BOOL Default: OFF +* Kokkos_ARCH_VOLTA72 + * Whether to optimize for the VOLTA72 architecture + * BOOL Default: OFF +* Kokkos_ARCH_WSM + * Whether to optimize for the WSM architecture + * BOOL Default: OFF + +##### [LICENSE](https://github.com/kokkos/kokkos/blob/devel/LICENSE) + +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 8d196e2c35..459aeb3d2e 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,45 @@ # Change Log +## [3.0.00](https://github.com/kokkos/kokkos/tree/3.0.00) (2020-01-27) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.9.00...3.0.00) + +**Implemented enhancements:** + +- BuildSystem: Standalone Modern CMake Support [\#2104](https://github.com/kokkos/kokkos/issues/2104) +- StyleFormat: ClangFormat Style [\#2157](https://github.com/kokkos/kokkos/issues/2157) +- Documentation: Document build system and CMake philosophy [\#2263](https://github.com/kokkos/kokkos/issues/2263) +- BuildSystem: Add Alias with Namespace Kokkos:: to Interal Libraries [\#2530](https://github.com/kokkos/kokkos/issues/2530) +- BuildSystem: Universal Kokkos find\_package [\#2099](https://github.com/kokkos/kokkos/issues/2099) +- BuildSystem: Dropping support for Kokkos\_{DEVICES,OPTIONS,ARCH} in CMake [\#2329](https://github.com/kokkos/kokkos/issues/2329) +- BuildSystem: Set Kokkos\_DEVICES and Kokkos\_ARCH variables in exported CMake configuration [\#2193](https://github.com/kokkos/kokkos/issues/2193) +- BuildSystem: Drop support for CUDA 7 and CUDA 8 [\#2489](https://github.com/kokkos/kokkos/issues/2489) +- BuildSystem: Drop CMake option SEPARATE\_TESTS [\#2266](https://github.com/kokkos/kokkos/issues/2266) +- BuildSystem: Support expt-relaxed-constexpr same as expt-extended-lambda [\#2411](https://github.com/kokkos/kokkos/issues/2411) +- BuildSystem: Add Xnvlink to command line options allowed in nvcc\_wrapper [\#2197](https://github.com/kokkos/kokkos/issues/2197) +- BuildSystem: Install Kokkos config files and target files to lib/cmake/Kokkos [\#2162](https://github.com/kokkos/kokkos/issues/2162) +- BuildSystem: nvcc\_wrappers and c++ 14 [\#2035](https://github.com/kokkos/kokkos/issues/2035) +- BuildSystem: Kokkos version major/version minor \(Feature request\) [\#1930](https://github.com/kokkos/kokkos/issues/1930) +- BuildSystem: CMake namespaces \(and other modern cmake cleanup\) [\#1924](https://github.com/kokkos/kokkos/issues/1924) +- BuildSystem: Remove capability to install Kokkos via GNU Makefiles [\#2332](https://github.com/kokkos/kokkos/issues/2332) +- Documentation: Remove PDF ProgrammingGuide in Kokkos replace with link [\#2244](https://github.com/kokkos/kokkos/issues/2244) +- View: Add Method to Resize View without Initialization [\#2048](https://github.com/kokkos/kokkos/issues/2048) +- Vector: implement “insert†method for Kokkos\_Vector \(as a serial function on host\) [\#2437](https://github.com/kokkos/kokkos/issues/2437) + +**Fixed bugs:** + +- ParallelScan: Kokkos::parallel\scan fix race condition seen in inter-block fence [\#2681](https://github.com/kokkos/kokkos/issues/2681) +- OffsetView: Kokkos::OffsetView missing constructor which takes pointer [\#2247](https://github.com/kokkos/kokkos/issues/2247) +- OffsetView: Kokkos::OffsetView: allow offset=0 [\#2246](https://github.com/kokkos/kokkos/issues/2246) +- DeepCopy: Missing DeepCopy instrumentation in Kokkos [\#2522](https://github.com/kokkos/kokkos/issues/2522) +- nvcc\_wrapper: --host-only fails with mutiple -W\* flags [\#2484](https://github.com/kokkos/kokkos/issues/2484) +- nvcc\_wrapper: taking first -std option is counterintuitive [\#2553](https://github.com/kokkos/kokkos/issues/2553) +- Subview: Error taking subviews of views with static_extents of min rank [\#2448](https://github.com/kokkos/kokkos/issues/2448) +- TeamPolicy: reducers with valuetypes without += broken on CUDA [\#2410](https://github.com/kokkos/kokkos/issues/2410) +- Libs: Fix inconsistency of Kokkos library names in Kokkos and Trilinos [\#1902](https://github.com/kokkos/kokkos/issues/1902) +- Complex: operator\>\> for complex\ uses std::ostream, not std::istream [\#2313](https://github.com/kokkos/kokkos/issues/2313) +- Macros: Restrict not honored for non-intel compilers [\#1922](https://github.com/kokkos/kokkos/issues/1922) + + ## [2.9.00](https://github.com/kokkos/kokkos/tree/2.9.00) (2019-06-24) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.8.00...2.9.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 236f523aec..6a4451b2e7 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,128 +1,218 @@ -# Is this a build as part of Trilinos? +# We want to determine if options are given with the wrong case +# In order to detect which arguments are given to compare against +# the list of valid arguments, at the beginning here we need to +# form a list of all the given variables. If it begins with any +# case of KoKkOS, we add it to the list. + + +GET_CMAKE_PROPERTY(_variableNames VARIABLES) +SET(KOKKOS_GIVEN_VARIABLES) +FOREACH (var ${_variableNames}) + STRING(TOUPPER ${var} UC_VAR) + STRING(FIND ${UC_VAR} KOKKOS IDX) + IF (${IDX} EQUAL 0) + LIST(APPEND KOKKOS_GIVEN_VARIABLES ${var}) + ENDIF() +ENDFOREACH() + +# Basic initialization (Used in KOKKOS_SETTINGS) +SET(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +SET(KOKKOS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) +SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) +SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Needed to simplify syntax of if statements +CMAKE_POLICY(SET CMP0054 NEW) + +# Is this a build as part of Trilinos? IF(COMMAND TRIBITS_PACKAGE_DECL) - SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "") + SET(KOKKOS_HAS_TRILINOS ON) ELSE() - SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "") + SET(KOKKOS_HAS_TRILINOS OFF) ENDIF() -IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.3 FATAL_ERROR) +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake) + +SET(KOKKOS_ENABLED_OPTIONS) #exported in config file +SET(KOKKOS_ENABLED_DEVICES) #exported in config file +SET(KOKKOS_ENABLED_TPLS) #exported in config file +SET(KOKKOS_ENABLED_ARCH_LIST) #exported in config file + +#These are helper flags used for sanity checks during config +#Certain features should depend on other features being configured first +SET(KOKKOS_CFG_DAG_NONE On) #sentinel to indicate no dependencies +SET(KOKKOS_CFG_DAG_DEVICES_DONE Off) +SET(KOKKOS_CFG_DAG_OPTIONS_DONE Off) +SET(KOKKOS_CFG_DAG_ARCH_DONE Off) +SET(KOKKOS_CFG_DAG_CXX_STD_DONE Off) +SET(KOKKOS_CFG_DAG_COMPILER_ID_DONE Off) +FUNCTION(KOKKOS_CFG_DEPENDS SUCCESSOR PRECURSOR) + SET(PRE_FLAG KOKKOS_CFG_DAG_${PRECURSOR}) + SET(POST_FLAG KOKKOS_CFG_DAG_${SUCCESSOR}) + IF (NOT ${PRE_FLAG}) + MESSAGE(FATAL_ERROR "Bad CMake refactor: feature ${SUCCESSOR} cannot be configured until ${PRECURSOR} is configured") + ENDIF() + GLOBAL_SET(${POST_FLAG} On) +ENDFUNCTION() - # Define Project Name if this is a standalone build + +LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) + +IF(NOT KOKKOS_HAS_TRILINOS) + cmake_minimum_required(VERSION 3.10 FATAL_ERROR) + set(CMAKE_DISABLE_SOURCE_CHANGES ON) + set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) + IF (Spack_WORKAROUND) + #if we are explicitly using Spack for development, + #nuke the Spack compiler + SET(SPACK_CXX $ENV{SPACK_CXX}) + IF(SPACK_CXX) + SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) + SET(ENV{CXX} ${SPACK_CXX}) + ENDIF() + ENDif() IF(NOT DEFINED ${PROJECT_NAME}) - project(Kokkos CXX) + PROJECT(Kokkos CXX) ENDIF() +ENDIF() - # Basic initialization (Used in KOKKOS_SETTINGS) - set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) - set(KOKKOS_PATH ${KOKKOS_SRC_PATH}) - - #------------ COMPILER AND FEATURE CHECKS ------------------------------------ - include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) - set_kokkos_cxx_compiler() - set_kokkos_cxx_standard() - - #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- - # Add Kokkos' modules to CMake's module path. - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") - - set(KOKKOS_CMAKE_VERBOSE True) - include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake) - - include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) - - #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- - execute_process( - COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings - WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" - OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out - RESULT_VARIABLE GEN_SETTINGS_RESULT - ) - if (GEN_SETTINGS_RESULT) - message(FATAL_ERROR "Kokkos settings generation failed:\n" - "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") - endif() - include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) - string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") - list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "") - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) - - #------------ NOW BUILD ------------------------------------------------------ - include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake) - - #------------ Add in Fake Tribits Handling to allow unit test builds- -------- - - include(${KOKKOS_SRC_PATH}/cmake/tribits.cmake) - - TRIBITS_PACKAGE_DECL(Kokkos) - - ADD_SUBDIRECTORY(core) - ADD_SUBDIRECTORY(containers) - ADD_SUBDIRECTORY(algorithms) +IF (NOT CMAKE_SIZEOF_VOID_P) + STRING(FIND ${CMAKE_CXX_COMPILER} nvcc_wrapper FIND_IDX) + IF (NOT FIND_IDX STREQUAL -1) + MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is CUDA linkage using nvcc_wrapper. Please ensure your CUDA environment is correctly configured.") + ELSE() + MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation") + ENDIF() +ELSEIF (NOT CMAKE_SIZEOF_VOID_P EQUAL 8) + MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build; i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead") +ENDIF() -ELSE() -#------------------------------------------------------------------------------ -# -# A) Forward declare the package so that certain options are also defined for -# subpackages -# -TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) +set(Kokkos_VERSION_MAJOR 3) +set(Kokkos_VERSION_MINOR 0) +set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") +IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") + MESSAGE(STATUS "Setting policy CMP0074 to use _ROOT variables") + CMAKE_POLICY(SET CMP0074 NEW) +ENDIF() + +# Load either the real TriBITS or a TriBITS wrapper +# for certain utility functions that are universal (like GLOBAL_SET) +INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) + +IF (Kokkos_ENABLE_CUDA AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0") + #If we are building CUDA, we have tricked CMake because we declare a CXX project + #If the default C++ standard for a given compiler matches the requested + #standard, then CMake just omits the -std flag in later versions of CMake + #This breaks CUDA compilation (CUDA compiler can have a different default + #-std then the underlying host compiler by itself). Setting this variable + #forces CMake to always add the -std flag even if it thinks it doesn't need it + GLOBAL_SET(CMAKE_CXX_STANDARD_DEFAULT 98) +ENDIF() + +# These are the variables we will append to as we go +# I really wish these were regular variables +# but scoping issues can make it difficult +GLOBAL_RESET(KOKKOS_COMPILE_OPTIONS) +GLOBAL_RESET(KOKKOS_LINK_OPTIONS) +GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) +GLOBAL_RESET(KOKKOS_CUDAFE_OPTIONS) +GLOBAL_RESET(KOKKOS_XCOMPILER_OPTIONS) +# We need to append text here for making sure TPLs +# we import are available for an installed Kokkos +GLOBAL_RESET(KOKKOS_TPL_EXPORTS) +# We need these for controlling the exact -std flag +GLOBAL_RESET(KOKKOS_DONT_ALLOW_EXTENSIONS) +GLOBAL_RESET(KOKKOS_USE_CXX_EXTENSIONS) +GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE) + +# Include a set of Kokkos-specific wrapper functions that +# will either call raw CMake or TriBITS +# These are functions like KOKKOS_INCLUDE_DIRECTORIES +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) + + +# The build environment setup goes in the following steps +# 1) Check all the enable options. This includes checking Kokkos_DEVICES +# 2) Check the compiler ID (type and version) +# 3) Check the CXX standard and select important CXX flags +# 4) Check for any third-party libraries (TPLs) like hwloc +# 5) Check if optimizing for a particular architecture and add arch-specific flags +KOKKOS_SETUP_BUILD_ENVIRONMENT() + +# Finish off the build +# 6) Recurse into subdirectories and configure individual libraries +# 7) Export and install targets + +OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) +# Workaround for building position independent code. +IF(BUILD_SHARED_LIBS) + SET(CMAKE_POSITION_INDEPENDENT_CODE ON) +ENDIF() + +SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms) +SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) +SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES}) + +GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) +IF (KOKKOS_HAS_TRILINOS) + SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) + SET(KOKKOS_IS_SUBDIRECTORY TRUE) +ELSEIF(HAS_PARENT) + SET(KOKKOS_HEADER_DIR "include/kokkos") + SET(KOKKOS_IS_SUBDIRECTORY TRUE) +ELSE() + SET(KOKKOS_HEADER_DIR "${CMAKE_INSTALL_INCLUDEDIR}") + SET(KOKKOS_IS_SUBDIRECTORY FALSE) +ENDIF() -#------------------------------------------------------------------------------ -# -# B) Install Kokkos' build files -# -# If using the Makefile-generated files, then need to set things up. -# Here, assume that TriBITS has been run from ProjectCompilerPostConfig.cmake -# and already generated KokkosCore_config.h and kokkos_generated_settings.cmake -# in the previously define Kokkos_GEN_DIR -# We need to copy them over to the correct place and source the cmake file - -if(NOT KOKKOS_LEGACY_TRIBITS) - set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR}) - file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) - install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION include) - file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) - - include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) - # Sources come from makefile-generated kokkos_generated_settings.cmake file - # Enable using the individual sources if needed - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) -endif () #------------------------------------------------------------------------------ # -# C) Install Kokkos' executable scripts -# +# A) Forward declare the package so that certain options are also defined for +# subpackages -# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. -# Kokkos needs nvcc_wrapper in order to build. Other libraries and -# executables also need nvcc_wrapper. Thus, we need to install it. -# If the argument of DESTINATION is a relative path, CMake computes it -# as relative to ${CMAKE_INSTALL_PATH}. +## This restores the old behavior of ProjectCompilerPostConfig.cmake +# It sets the CMAKE_CXX_FLAGS globally to those used by Kokkos +# We must do this before KOKKOS_PACKAGE_DECL +IF (KOKKOS_HAS_TRILINOS) + # Overwrite the old flags at the top-level + # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags + # we have to match the annoying behavior + STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}") + STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") + FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) + SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}") + ENDFOREACH() + FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS}) + SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") + ENDFOREACH() + SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}") + # Both parent scope and this package + # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in + # TRILINOS_TOPLEVEL_CXX_FLAGS + SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}" PARENT_SCOPE) + SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}") + #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here + #These flags get set up in KOKKOS_PACKAGE_DECL, which means they + #must be configured before KOKKOS_PACKAGE_DECL +ENDIF() -INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) +KOKKOS_PACKAGE_DECL() #------------------------------------------------------------------------------ # -# D) Process the subpackages for Kokkos +# D) Process the subpackages (subdirectories) for Kokkos # - -TRIBITS_PROCESS_SUBPACKAGES() +KOKKOS_PROCESS_SUBPACKAGES() #------------------------------------------------------------------------------ @@ -130,10 +220,39 @@ TRIBITS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -TRIBITS_PACKAGE_DEF() +KOKKOS_PACKAGE_DEF() +KOKKOS_EXCLUDE_AUTOTOOLS_FILES() +KOKKOS_PACKAGE_POSTPROCESS() -TRIBITS_EXCLUDE_AUTOTOOLS_FILES() +#We are ready to configure the header +CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) -TRIBITS_PACKAGE_POSTPROCESS() +IF (NOT KOKKOS_HAS_TRILINOS) + ADD_LIBRARY(kokkos INTERFACE) + #Make sure in-tree projects can reference this as Kokkos:: + #to match the installed target names + ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) + TARGET_LINK_LIBRARIES(kokkos INTERFACE kokkoscore kokkoscontainers kokkosalgorithms) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) +ENDIF() +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) +# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. +# Kokkos needs nvcc_wrapper in order to build. Other libraries and +# executables also need nvcc_wrapper. Thus, we need to install it. +# If the argument of DESTINATION is a relative path, CMake computes it +# as relative to ${CMAKE_INSTALL_PATH}. +INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR}) +INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + + +# Finally - if we are a subproject - make sure the enabled devices are visible +IF (HAS_PARENT) + FOREACH(DEV Kokkos_ENABLED_DEVICES) + #I would much rather not make these cache variables or global properties, but I can't + #make any guarantees on whether PARENT_SCOPE is good enough to make + #these variables visible where I need them + SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE) + SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON) + ENDFOREACH() ENDIF() diff --git a/lib/kokkos/CONTRIBUTING.md b/lib/kokkos/CONTRIBUTING.md new file mode 100644 index 0000000000..b4f3057cef --- /dev/null +++ b/lib/kokkos/CONTRIBUTING.md @@ -0,0 +1,14 @@ +# Contributing to Kokkos + +## Pull Requests +We actively welcome pull requests. +1. Fork the repo and create your branch from `develop`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. + +## License +By contributing to Kokkos, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree. diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt index 50b76995af..06184796b2 100644 --- a/lib/kokkos/Copyright.txt +++ b/lib/kokkos/Copyright.txt @@ -1,10 +1,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -22,10 +23,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE index c68a8a2a9f..c6f17087d5 100644 --- a/lib/kokkos/LICENSE +++ b/lib/kokkos/LICENSE @@ -1,10 +1,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Kokkos is licensed under 3-clause BSD terms of use: @@ -24,10 +25,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index e9ad57f0ae..fd96e14bb5 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -23,14 +23,16 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a KOKKOS_CXX_STANDARD ?= "c++11" -# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests +# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align KOKKOS_OPTIONS ?= "" # Option for setting ETI path KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti KOKKOS_CMAKE ?= "no" +KOKKOS_TRIBITS ?= "no" +KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda +# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Default settings specific options. @@ -47,7 +49,8 @@ kokkos_has_string=$(if $(findstring $2,$1),1,0) # Will return a 1 if /path/to/file exists kokkos_path_exists=$(if $(wildcard $1),1,0) -# Check for general settings. +# Check for general settings + KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11) KOKKOS_INTERNAL_ENABLE_CXX14 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++14) @@ -67,6 +70,7 @@ KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$ KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code) +KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) @@ -74,6 +78,7 @@ KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) +KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti) @@ -123,7 +128,7 @@ KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI) KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); echo "$(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)>0" | bc)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) @@ -383,10 +388,10 @@ endif # Generating the list of Flags. -#CPPFLAGS is now unused KOKKOS_CPPFLAGS = +KOKKOS_LIBDIRS = ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) + KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) endif KOKKOS_TPL_INCLUDE_DIRS = KOKKOS_TPL_LIBRARY_DIRS = @@ -399,7 +404,7 @@ endif KOKKOS_LIBS = -ldl KOKKOS_TPL_LIBRARY_NAMES += dl ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_LDFLAGS = -L$(shell pwd) + KOKKOS_LIBDIRS = -L$(shell pwd) # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command KOKKOS_CXXLDFLAGS = -L$(shell pwd) endif @@ -492,28 +497,38 @@ ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) tmp := $(call kokkos_append_header,"\#endif") endif +#only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1) +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20") endif @@ -531,23 +546,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK") endif endif +ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN") +endif ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT") endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - ifneq ($(HWLOC_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(HWLOC_PATH),) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LIBDIRS += -L$(HWLOC_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib endif - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + KOKKOS_TPL_LIBRARY_NAMES += hwloc endif - KOKKOS_LIBS += -lhwloc - KOKKOS_TPL_LIBRARY_NAMES += hwloc tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC") endif @@ -558,17 +576,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - ifneq ($(MEMKIND_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(MEMKIND_PATH),) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib endif - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind -lnuma + KOKKOS_TPL_LIBRARY_NAMES += memkind numa endif - KOKKOS_LIBS += -lmemkind -lnuma - KOKKOS_TPL_LIBRARY_NAMES += memkind numa tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE") endif @@ -580,9 +598,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") endif - ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") - endif endif ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) @@ -648,6 +663,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR") + KOKKOS_CXXFLAGS += -expt-relaxed-constexpr + else + $(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.) + endif + endif + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR") + endif + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") endif @@ -1089,15 +1119,13 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include - endif + KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib64 KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib else @@ -1153,17 +1181,17 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) - ifneq ($(QTHREADS_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(QTHREADS_PATH),) + KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include + KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 endif - KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 + KOKKOS_LIBS += -lqthread + KOKKOS_TPL_LIBRARY_NAMES += qthread endif - KOKKOS_LIBS += -lqthread - KOKKOS_TPL_LIBRARY_NAMES += qthread endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) @@ -1173,21 +1201,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) - KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) - KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) endif else ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) - KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application_debug) + KOKKOS_LIBS += $(shell pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) - KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application) + KOKKOS_LIBS += $(shell pkg-config --libs hpx_application) endif endif KOKKOS_TPL_LIBRARY_NAMES += hpx @@ -1248,4 +1276,16 @@ libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ar cr libkokkos.a $(KOKKOS_OBJ_LINK) ranlib libkokkos.a +print-cxx-flags: + echo "$(KOKKOS_CXXFLAGS)" + KOKKOS_LINK_DEPENDS=libkokkos.a + +#we have carefully separated LDFLAGS from LIBS and LIBDIRS +#we have also separated CPPFLAGS from CXXFLAGS +#if this is not cmake, for backwards compatibility +#we just jam everything together into the CXXFLAGS and LDFLAGS +ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += $(KOKKOS_CPPFLAGS) + KOKKOS_LDFLAGS += $(KOKKOS_LIBDIRS) +endif diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index e7d5a3c907..0a1f522016 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -6,6 +6,8 @@ Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp +Kokkos_Stacktrace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp diff --git a/lib/kokkos/README b/lib/kokkos/README deleted file mode 100644 index cb6ceb5581..0000000000 --- a/lib/kokkos/README +++ /dev/null @@ -1,193 +0,0 @@ -Kokkos Core implements a programming model in C++ for writing performance portable -applications targeting all major HPC platforms. For that purpose it provides -abstractions for both parallel execution of code and data management. -Kokkos is designed to target complex node architectures with N-level memory -hierarchies and multiple types of execution resources. It currently can use -OpenMP, Pthreads and CUDA as backend programming models. - -Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, -which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as -profiling and debugging tools (https://github.com/kokkos/kokkos-tools). - -# Learning about Kokkos - -A programming guide can be found on the Wiki, the API reference is under development. - -For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. - -For non-public questions send an email to -crtrott(at)sandia.gov - -A separate repository with extensive tutorial material can be found under -https://github.com/kokkos/kokkos-tutorials. - -Furthermore, the 'example/tutorial' directory provides step by step tutorial -examples which explain many of the features of Kokkos. They work with -simple Makefiles. To build with g++ and OpenMP simply type 'make' -in the 'example/tutorial' directory. This will build all examples in the -subfolders. To change the build options refer to the Programming Guide -in the compilation section. - -To learn more about Kokkos consider watching one of our presentations: -* GTC 2015: - - http://on-demand.gputechconf.com/gtc/2015/video/S5166.html - - http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf - - -# Contributing to Kokkos - -We are open and try to encourage contributions from external developers. -To do so please first open an issue describing the contribution and then issue -a pull request against the develop branch. For larger features it may be good -to get guidance from the core development team first through the github issue. - -Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. -Which means contributing to Kokkos allows anyone else to use your contributions -not just for public purposes but also for closed source commercial projects. -For specifics see the LICENSE file contained in the repository or distribution. - -# Requirements - -### Primary tested compilers on X86 are: - * GCC 4.8.4 - * GCC 4.9.3 - * GCC 5.1.0 - * GCC 5.5.0 - * GCC 6.1.0 - * GCC 7.2.0 - * GCC 7.3.0 - * GCC 8.1.0 - * Intel 15.0.2 - * Intel 16.0.1 - * Intel 17.0.1 - * Intel 17.4.196 - * Intel 18.2.128 - * Clang 3.6.1 - * Clang 3.7.1 - * Clang 3.8.1 - * Clang 3.9.0 - * Clang 4.0.0 - * Clang 6.0.0 for CUDA (CUDA Toolkit 9.0) - * Clang 7.0.0 for CUDA (CUDA Toolkit 9.1) - * PGI 18.7 - * NVCC 7.5 for CUDA (with gcc 4.8.4) - * NVCC 8.0.44 for CUDA (with gcc 5.3.0) - * NVCC 9.1 for CUDA (with gcc 6.1.0) - * NVCC 9.2 for CUDA (with gcc 7.2.0) - * NVCC 10.0 for CUDA (with gcc 7.4.0) - -### Primary tested compilers on Power 8 are: - * GCC 6.4.0 (OpenMP,Serial) - * GCC 7.2.0 (OpenMP,Serial) - * IBM XL 16.1.0 (OpenMP, Serial) - * NVCC 9.2.88 for CUDA (with gcc 7.2.0 and XL 16.1.0) - -### Primary tested compilers on Intel KNL are: - * Intel 16.4.258 (with gcc 4.7.2) - * Intel 17.2.174 (with gcc 4.9.3) - * Intel 18.2.199 (with gcc 4.9.3) - -### Primary tested compilers on ARM (Cavium ThunderX2) - * GCC 7.2.0 - * ARM/Clang 18.4.0 - -### Other compilers working: - * X86: - - Cygwin 2.1.0 64bit with gcc 4.9.3 - - GCC 8.1.0 (not warning free) - -### Known non-working combinations: - * Power8: - - Pthreads backend - * ARM - - Pthreads backend - - -Primary tested compiler are passing in release mode -with warnings as errors. They also are tested with a comprehensive set of -backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). -We are using the following set of flags: -GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits - -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized -Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized -Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized -NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized - -Other compilers are tested occasionally, in particular when pushing from develop to -master branch, without -Werror and only for a select set of backends. - -# Running Unit Tests - -To run the unit tests create a build directory and run the following commands - -KOKKOS_PATH/generate_makefile.bash -make build-test -make test - -Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as -changing the device type for which to build. - -# Installing the library - -To install Kokkos as a library create a build directory and run the following - -KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH -make kokkoslib -make install - -KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as -changing the device type for which to build. - -Note that in many cases it is preferable to build Kokkos inline with an -application. The main reason is that you may otherwise need many different -configurations of Kokkos installed depending on the required compile time -features an application needs. For example there is only one default -execution space, which means you need different installations to have OpenMP -or Pthreads as the default space. Also for the CUDA backend there are certain -choices, such as allowing relocatable device code, which must be made at -installation time. Building Kokkos inline uses largely the same process -as compiling an application against an installed Kokkos library. See for -example benchmarks/bytes_and_flops/Makefile which can be used with an installed -library and for an inline build. - -### CMake - -Kokkos supports being build as part of a CMake applications. An example can -be found in example/cmake_build. - -# Kokkos and CUDA UVM - -Kokkos does support UVM as a specific memory space called CudaUVMSpace. -Allocations made with that space are accessible from host and device. -You can tell Kokkos to use that as the default space for Cuda allocations. -In either case UVM comes with a number of restrictions: -(i) You can't access allocations on the host while a kernel is potentially -running. This will lead to segfaults. To avoid that you either need to -call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or -you can set the environment variable CUDA_LAUNCH_BLOCKING=1. -Furthermore in multi socket multi GPU machines without NVLINK, UVM defaults -to using zero copy allocations for technical reasons related to using multiple -GPUs from the same process. If an executable doesn't do that (e.g. each -MPI rank of an application uses a single GPU [can be the same GPU for -multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. -This will enforce proper UVM allocations, but can lead to errors if -more than a single GPU is used by a single process. - - -# Citing Kokkos - -If you publish work which mentions Kokkos, please cite the following paper: - -@article{CarterEdwards20143202, -title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ", -journal = "Journal of Parallel and Distributed Computing ", -volume = "74", -number = "12", -pages = "3202 - 3216", -year = "2014", -note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ", -issn = "0743-7315", -doi = "https://doi.org/10.1016/j.jpdc.2014.07.003", -url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257", -author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland" -} diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md new file mode 100644 index 0000000000..322dabfdab --- /dev/null +++ b/lib/kokkos/README.md @@ -0,0 +1,299 @@ +![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) + +# Kokkos: Core Libraries + +Kokkos Core implements a programming model in C++ for writing performance portable +applications targeting all major HPC platforms. For that purpose it provides +abstractions for both parallel execution of code and data management. +Kokkos is designed to target complex node architectures with N-level memory +hierarchies and multiple types of execution resources. It currently can use +CUDA, HPX, OpenMP and Pthreads as backend programming models with several other +backends in development. + +Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, +which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as +profiling and debugging tools (https://github.com/kokkos/kokkos-tools). + +# Learning about Kokkos + +A programming guide can be found on the Wiki, the API reference is under development. + +For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. + +For non-public questions send an email to +crtrott(at)sandia.gov + +A separate repository with extensive tutorial material can be found under +https://github.com/kokkos/kokkos-tutorials. + +Furthermore, the 'example/tutorial' directory provides step by step tutorial +examples which explain many of the features of Kokkos. They work with +simple Makefiles. To build with g++ and OpenMP simply type 'make' +in the 'example/tutorial' directory. This will build all examples in the +subfolders. To change the build options refer to the Programming Guide +in the compilation section. + +To learn more about Kokkos consider watching one of our presentations: +* GTC 2015: + - http://on-demand.gputechconf.com/gtc/2015/video/S5166.html + - http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf + + +# Contributing to Kokkos + +We are open and try to encourage contributions from external developers. +To do so please first open an issue describing the contribution and then issue +a pull request against the develop branch. For larger features it may be good +to get guidance from the core development team first through the github issue. + +Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. +Which means contributing to Kokkos allows anyone else to use your contributions +not just for public purposes but also for closed source commercial projects. +For specifics see the LICENSE file contained in the repository or distribution. + +# Requirements + +### Primary tested compilers on X86 are: +* GCC 4.8.4 +* GCC 4.9.3 +* GCC 5.1.0 +* GCC 5.4.0 +* GCC 5.5.0 +* GCC 6.1.0 +* GCC 7.2.0 +* GCC 7.3.0 +* GCC 8.1.0 +* Intel 15.0.2 +* Intel 16.0.1 +* Intel 17.0.1 +* Intel 17.4.196 +* Intel 18.2.128 +* Clang 3.6.1 +* Clang 3.7.1 +* Clang 3.8.1 +* Clang 3.9.0 +* Clang 4.0.0 +* Clang 6.0.0 for CUDA (CUDA Toolkit 9.0) +* Clang 7.0.0 for CUDA (CUDA Toolkit 9.1) +* Clang 8.0.0 for CUDA (CUDA Toolkit 9.2) +* PGI 18.7 +* NVCC 9.1 for CUDA (with gcc 6.1.0) +* NVCC 9.2 for CUDA (with gcc 7.2.0) +* NVCC 10.0 for CUDA (with gcc 7.4.0) +* NVCC 10.1 for CUDA (with gcc 7.4.0) + +### Primary tested compilers on Power 8 are: +* GCC 6.4.0 (OpenMP,Serial) +* GCC 7.2.0 (OpenMP,Serial) +* IBM XL 16.1.0 (OpenMP, Serial) +* NVCC 9.2.88 for CUDA (with gcc 7.2.0 and XL 16.1.0) + +### Primary tested compilers on Intel KNL are: +* Intel 16.4.258 (with gcc 4.7.2) +* Intel 17.2.174 (with gcc 4.9.3) +* Intel 18.2.199 (with gcc 4.9.3) + +### Primary tested compilers on ARM (Cavium ThunderX2) +* GCC 7.2.0 +* ARM/Clang 18.4.0 + +### Other compilers working: +* X86: + * Cygwin 2.1.0 64bit with gcc 4.9.3 + * GCC 8.1.0 (not warning free) + +### Known non-working combinations: +* Power8: + * Pthreads backend +* ARM + * Pthreads backend + + +Primary tested compiler are passing in release mode +with warnings as errors. They also are tested with a comprehensive set of +backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). +We are using the following set of flags: +* GCC: + ```` + -Wall -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits + -Wignored-qualifiers -Wempty-body + -Wclobbered -Wuninitialized + ```` +* Intel: + ```` + -Wall -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits + -Wuninitialized + ```` +* Clang: + ```` + -Wall -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits + -Wuninitialized + ```` + +* NVCC: + ```` + -Wall -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits + -Wuninitialized + ```` + +Other compilers are tested occasionally, in particular when pushing from develop to +master branch. These are tested less rigorously without `-Werror` and only for a select set of backends. + +# Building and Installing Kokkos +Kokkos provide a CMake build system and a raw Makefile build system. +The CMake build system is strongly encouraged and will be the most rigorously supported in future releases. +Full details are given in the [build instructions](BUILD.md). Basic setups are shown here: + +## CMake + +The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: +```` +cmake $srcdir \ + -DCMAKE_CXX_COMPILER=$path_to_compiler \ + -DCMAKE_INSTALL_PREFIX=$path_to_install \ + -DKokkos_ENABLE_OPENMP=On \ + -DKokkos_ARCH_HSW=On \ + -DKokkos_ENABLE_HWLOC=On \ + -DKokkos_HWLOC_DIR=$path_to_hwloc +```` +then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages. + +To validate the Kokkos build, configure with +```` + -DKokkos_ENABLE_TESTS=On +```` +and run `make test` after completing the build. + +For your CMake project using Kokkos, code such as the following: + +```` +find_package(Kokkos) +... +target_link_libraries(myTarget Kokkos::kokkos) +```` +should be added to your CMakeLists.txt. Your configure should additionally include +```` +-DKokkos_DIR=$path_to_install/cmake/lib/Kokkos +```` +or +```` +-DKokkos_ROOT=$path_to_install +```` +for the install location given above. + +## Spack +An alternative to manually building with the CMake is to use the Spack package manager. +To do so, download the `kokkos-spack` git repo and add to the package list: +```` +spack repo add $path-to-kokkos-spack +```` +A basic installation would be done as: +```` +spack install kokkos +```` +Spack allows options and and compilers to be tuned in the install command. +```` +spack install kokkos@3.0 %gcc@7.3.0 +openmp +```` +This example illustrates the three most common parameters to Spack: +* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. +* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build +* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. + +For a complete list of Kokkos options, run: +```` +spack info kokkos +```` +Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". +Generally, Spack usage should never really require you to reference the computer-generated unique install folder. +More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with: +```` +spack find -p kokkos ... +```` +where `...` is the unique spec identifying the particular Kokkos configuration and version. + + +## Raw Makefile +A bash script is provided to generate raw makefiles. +To install Kokkos as a library create a build directory and run the following +```` +$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install +```` +Once the Makefile is generated, run: +```` +make kokkoslib +make install +```` +To additionally run the unit tests: +```` +make build-test +make test +```` +Run `generate_makefile.bash --help` for more detailed options such as +changing the device type for which to build. + +## Inline Builds vs. Installed Package +For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package. +The main reason is that you may otherwise need many different +configurations of Kokkos installed depending on the required compile time +features an application needs. For example there is only one default +execution space, which means you need different installations to have OpenMP +or Pthreads as the default space. Also for the CUDA backend there are certain +choices, such as allowing relocatable device code, which must be made at +installation time. Building Kokkos inline uses largely the same process +as compiling an application against an installed Kokkos library. + +For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt. + +For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build. + +# Kokkos and CUDA UVM + +Kokkos does support UVM as a specific memory space called CudaUVMSpace. +Allocations made with that space are accessible from host and device. +You can tell Kokkos to use that as the default space for Cuda allocations. +In either case UVM comes with a number of restrictions: +* You can't access allocations on the host while a kernel is potentially +running. This will lead to segfaults. To avoid that you either need to +call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or +you can set the environment variable CUDA_LAUNCH_BLOCKING=1. +* In multi socket multi GPU machines without NVLINK, UVM defaults +to using zero copy allocations for technical reasons related to using multiple +GPUs from the same process. If an executable doesn't do that (e.g. each +MPI rank of an application uses a single GPU [can be the same GPU for +multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. +This will enforce proper UVM allocations, but can lead to errors if +more than a single GPU is used by a single process. + + +# Citing Kokkos + +If you publish work which mentions Kokkos, please cite the following paper: + +```` +@article{CarterEdwards20143202, + title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ", + journal = "Journal of Parallel and Distributed Computing ", + volume = "74", + number = "12", + pages = "3202 - 3216", + year = "2014", + note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ", + issn = "0743-7315", + doi = "https://doi.org/10.1016/j.jpdc.2014.07.003", + url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257", + author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland" +} +```` + +##### [LICENSE](https://github.com/kokkos/kokkos/blob/master/LICENSE) + +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. + diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index 507c9f2fdb..38747c152c 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -1,12 +1,12 @@ - - -TRIBITS_SUBPACKAGE(Algorithms) - -IF(KOKKOS_HAS_TRILINOS) - ADD_SUBDIRECTORY(src) -ENDIF() - -TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) -#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) - -TRIBITS_SUBPACKAGE_POSTPROCESS() + + +KOKKOS_SUBPACKAGE(Algorithms) + +ADD_SUBDIRECTORY(src) + +KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) + +KOKKOS_SUBPACKAGE_POSTPROCESS() + + + diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt index dfbf3323c2..5afd319fcc 100644 --- a/lib/kokkos/algorithms/src/CMakeLists.txt +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -1,8 +1,9 @@ -TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) +KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +#I have to leave these here for tribits +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- @@ -12,10 +13,18 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) #----------------------------------------------------------------------------- -TRIBITS_ADD_LIBRARY( - kokkosalgorithms - HEADERS ${HEADERS} - SOURCES ${SOURCES} - DEPLIBS - ) +# We have to pass the sources in here for Tribits +# These will get ignored for standalone CMake and a true interface library made +KOKKOS_ADD_INTERFACE_LIBRARY( + kokkosalgorithms + HEADERS ${HEADERS} + SOURCES ${SOURCES} +) +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) + + diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index da781de4fe..078db18edd 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -1,13 +1,14 @@ /* //@HEADER // ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -36,7 +37,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -59,6 +60,7 @@ namespace Kokkos { +// clang-format off /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type template @@ -229,1019 +231,979 @@ namespace Kokkos { ViewType::value_type start, ViewType::value_type end); */ +// clang-format on - template - struct rand; - - - template - struct rand { - - KOKKOS_INLINE_FUNCTION - static short max(){return 127;} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen) - {return short((gen.rand()&0xff+256)%256);} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const char& range) - {return char(gen.rand(range));} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const char& start, const char& end) - {return char(gen.rand(start,end));} - - }; - - template - struct rand { - KOKKOS_INLINE_FUNCTION - static short max(){return 32767;} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen) - {return short((gen.rand()&0xffff+65536)%32768);} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const short& range) - {return short(gen.rand(range));} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const short& start, const short& end) - {return short(gen.rand(start,end));} - - }; - - template - struct rand { - KOKKOS_INLINE_FUNCTION - static int max(){return Generator::MAX_RAND;} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen) - {return gen.rand();} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen, const int& range) - {return gen.rand(range);} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen, const int& start, const int& end) - {return gen.rand(start,end);} +template +struct rand; - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static short max() { return 127; } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) { + return short((gen.rand() & 0xff + 256) % 256); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& range) { + return char(gen.rand(range)); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& start, const char& end) { + return char(gen.rand(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned int max () { - return Generator::MAX_URAND; - } - KOKKOS_INLINE_FUNCTION - static unsigned int draw (Generator& gen) { - return gen.urand (); - } - KOKKOS_INLINE_FUNCTION - static unsigned int draw(Generator& gen, const unsigned int& range) { - return gen.urand (range); - } - KOKKOS_INLINE_FUNCTION - static unsigned int - draw (Generator& gen, const unsigned int& start, const unsigned int& end) { - return gen.urand (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static short max() { return 32767; } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) { + return short((gen.rand() & 0xffff + 65536) % 32768); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& range) { + return short(gen.rand(range)); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& start, const short& end) { + return short(gen.rand(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static long max () { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (Generator::MAX_RAND) : - static_cast (Generator::MAX_RAND64); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand ()) : - static_cast (gen.rand64 ()); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen, const long& range) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand (static_cast (range))) : - static_cast (gen.rand64 (range)); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen, const long& start, const long& end) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand (static_cast (start), - static_cast (end))) : - static_cast (gen.rand64 (start, end)); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static int max() { return Generator::MAX_RAND; } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen) { return gen.rand(); } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& range) { return gen.rand(range); } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& start, const int& end) { + return gen.rand(start, end); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned long max () { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (Generator::MAX_URAND) : - static_cast (Generator::MAX_URAND64); - } - KOKKOS_INLINE_FUNCTION - static unsigned long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand ()) : - static_cast (gen.urand64 ()); - } - KOKKOS_INLINE_FUNCTION - static unsigned long draw(Generator& gen, const unsigned long& range) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand (static_cast (range))) : - static_cast (gen.urand64 (range)); - } - KOKKOS_INLINE_FUNCTION - static unsigned long - draw (Generator& gen, const unsigned long& start, const unsigned long& end) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand (static_cast (start), - static_cast (end))) : - static_cast (gen.urand64 (start, end)); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned int max() { return Generator::MAX_URAND; } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen) { return gen.urand(); } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& range) { + return gen.urand(range); + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& start, + const unsigned int& end) { + return gen.urand(start, end); + } +}; - // NOTE (mfh 26 oct 2014) This is a partial specialization for long - // long, a C99 / C++11 signed type which is guaranteed to be at - // least 64 bits. Do NOT write a partial specialization for - // int64_t!!! This is just a typedef! It could be either long or - // long long. We don't know which a priori, and I've seen both. - // The types long and long long are guaranteed to differ, so it's - // always safe to specialize for both. - template - struct rand { - KOKKOS_INLINE_FUNCTION - static long long max () { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return Generator::MAX_RAND64; - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (); - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen, const long long& range) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (range); - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen, const long long& start, const long long& end) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static long max() { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 ? static_cast(Generator::MAX_RAND) + : static_cast(Generator::MAX_RAND64); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 ? static_cast(gen.rand()) + : static_cast(gen.rand64()); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen, const long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 + ? static_cast(gen.rand(static_cast(range))) + : static_cast(gen.rand64(range)); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen, const long& start, const long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 + ? static_cast( + gen.rand(static_cast(start), static_cast(end))) + : static_cast(gen.rand64(start, end)); + } +}; - // NOTE (mfh 26 oct 2014) This is a partial specialization for - // unsigned long long, a C99 / C++11 unsigned type which is - // guaranteed to be at least 64 bits. Do NOT write a partial - // specialization for uint64_t!!! This is just a typedef! It could - // be either unsigned long or unsigned long long. We don't know - // which a priori, and I've seen both. The types unsigned long and - // unsigned long long are guaranteed to differ, so it's always safe - // to specialize for both. - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned long long max () { - // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. - return Generator::MAX_URAND64; - } - KOKKOS_INLINE_FUNCTION - static unsigned long long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. - return gen.urand64 (); - } - KOKKOS_INLINE_FUNCTION - static unsigned long long draw (Generator& gen, const unsigned long long& range) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.urand64 (range); - } - KOKKOS_INLINE_FUNCTION - static unsigned long long - draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.urand64 (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long max() { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast(Generator::MAX_URAND) + : static_cast(Generator::MAX_URAND64); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast(gen.urand()) + : static_cast(gen.urand64()); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast( + gen.urand(static_cast(range))) + : static_cast(gen.urand64(range)); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& start, + const unsigned long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast( + gen.urand(static_cast(start), + static_cast(end))) + : static_cast(gen.urand64(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static float max(){return 1.0f;} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen) - {return gen.frand();} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen, const float& range) - {return gen.frand(range);} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen, const float& start, const float& end) - {return gen.frand(start,end);} +// NOTE (mfh 26 oct 2014) This is a partial specialization for long +// long, a C99 / C++11 signed type which is guaranteed to be at +// least 64 bits. Do NOT write a partial specialization for +// int64_t!!! This is just a typedef! It could be either long or +// long long. We don't know which a priori, and I've seen both. +// The types long and long long are guaranteed to differ, so it's +// always safe to specialize for both. +template +struct rand { + KOKKOS_INLINE_FUNCTION + static long long max() { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return Generator::MAX_RAND64; + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(); + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen, const long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(range); + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen, const long long& start, + const long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(start, end); + } +}; - }; +// NOTE (mfh 26 oct 2014) This is a partial specialization for +// unsigned long long, a C99 / C++11 unsigned type which is +// guaranteed to be at least 64 bits. Do NOT write a partial +// specialization for uint64_t!!! This is just a typedef! It could +// be either unsigned long or unsigned long long. We don't know +// which a priori, and I've seen both. The types unsigned long and +// unsigned long long are guaranteed to differ, so it's always safe +// to specialize for both. +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long long max() { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 + // bits. + return Generator::MAX_URAND64; + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 + // bits. + return gen.urand64(); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen, + const unsigned long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64(range); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen, + const unsigned long long& start, + const unsigned long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64(start, end); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static double max(){return 1.0;} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen) - {return gen.drand();} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen, const double& range) - {return gen.drand(range);} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen, const double& start, const double& end) - {return gen.drand(start,end);} +template +struct rand { + KOKKOS_INLINE_FUNCTION + static float max() { return 1.0f; } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen) { return gen.frand(); } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& range) { + return gen.frand(range); + } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& start, const float& end) { + return gen.frand(start, end); + } +}; - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static double max() { return 1.0; } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen) { return gen.drand(); } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& range) { + return gen.drand(range); + } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& start, const double& end) { + return gen.drand(start, end); + } +}; - template - struct rand > { - KOKKOS_INLINE_FUNCTION - static Kokkos::complex max () { - return Kokkos::complex (1.0, 1.0); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen) { - const float re = gen.frand (); - const float im = gen.frand (); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& range) { - const float re = gen.frand (real (range)); - const float im = gen.frand (imag (range)); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { - const float re = gen.frand (real (start), real (end)); - const float im = gen.frand (imag (start), imag (end)); - return Kokkos::complex (re, im); - } - }; +template +struct rand > { + KOKKOS_INLINE_FUNCTION + static Kokkos::complex max() { + return Kokkos::complex(1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen) { + const float re = gen.frand(); + const float im = gen.frand(); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& range) { + const float re = gen.frand(real(range)); + const float im = gen.frand(imag(range)); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& start, + const Kokkos::complex& end) { + const float re = gen.frand(real(start), real(end)); + const float im = gen.frand(imag(start), imag(end)); + return Kokkos::complex(re, im); + } +}; - template - struct rand > { - KOKKOS_INLINE_FUNCTION - static Kokkos::complex max () { - return Kokkos::complex (1.0, 1.0); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen) { - const double re = gen.drand (); - const double im = gen.drand (); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& range) { - const double re = gen.drand (real (range)); - const double im = gen.drand (imag (range)); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { - const double re = gen.drand (real (start), real (end)); - const double im = gen.drand (imag (start), imag (end)); - return Kokkos::complex (re, im); - } - }; +template +struct rand > { + KOKKOS_INLINE_FUNCTION + static Kokkos::complex max() { + return Kokkos::complex(1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen) { + const double re = gen.drand(); + const double im = gen.drand(); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& range) { + const double re = gen.drand(real(range)); + const double im = gen.drand(imag(range)); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& start, + const Kokkos::complex& end) { + const double re = gen.drand(real(start), real(end)); + const double im = gen.drand(imag(start), imag(end)); + return Kokkos::complex(re, im); + } +}; - template - class Random_XorShift64_Pool; +template +class Random_XorShift64_Pool; - template - class Random_XorShift64 { - private: - uint64_t state_; - const int state_idx_; - friend class Random_XorShift64_Pool; - public: +template +class Random_XorShift64 { + private: + uint64_t state_; + const int state_idx_; + friend class Random_XorShift64_Pool; - typedef DeviceType device_type; + public: + typedef DeviceType device_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffff/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffLL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffff / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffLL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift64 (uint64_t state, int state_idx = 0) - : state_(state==0?uint64_t(1318319):state),state_idx_(state_idx){} + KOKKOS_INLINE_FUNCTION + Random_XorShift64(uint64_t state, int state_idx = 0) + : state_(state == 0 ? uint64_t(1318319) : state), state_idx_(state_idx) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - state_ ^= state_ >> 12; - state_ ^= state_ << 25; - state_ ^= state_ >> 27; - - uint64_t tmp = state_ * 2685821657736338717ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + + uint64_t tmp = state_ * 2685821657736338717ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - state_ ^= state_ >> 12; - state_ ^= state_ << 25; - state_ ^= state_ >> 27; - return (state_ * 2685821657736338717ULL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + return (state_ * 2685821657736338717ULL) - 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - tmp = urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) tmp = urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - tmp = urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) tmp = urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - tmp = rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) tmp = rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - tmp = rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) tmp = rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return drand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return drand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; - }; +template +class Random_XorShift64_Pool { + private: + typedef View lock_type; + typedef View state_data_type; + lock_type locks_; + state_data_type state_; + int num_states_; - template - class Random_XorShift64_Pool { - private: - typedef View lock_type; - typedef View state_data_type; - lock_type locks_; - state_data_type state_; - int num_states_; - - public: - typedef Random_XorShift64 generator_type; - typedef DeviceType device_type; + public: + typedef Random_XorShift64 generator_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool() { - num_states_ = 0; - } - Random_XorShift64_Pool(uint64_t seed) { - num_states_ = 0; + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool() { num_states_ = 0; } + Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed,DeviceType::max_hardware_threads()); + init(seed, DeviceType::max_hardware_threads()); #else - init(seed,DeviceType::impl_max_hardware_threads()); + init(seed, DeviceType::impl_max_hardware_threads()); #endif - } + } - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool(const Random_XorShift64_Pool& src): - locks_(src.locks_), - state_(src.state_), - num_states_(src.num_states_) - {} + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool(const Random_XorShift64_Pool& src) + : locks_(src.locks_), state_(src.state_), num_states_(src.num_states_) {} - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { - locks_ = src.locks_; - state_ = src.state_; - num_states_ = src.num_states_; - return *this; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool operator=(const Random_XorShift64_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + num_states_ = src.num_states_; + return *this; + } - void init(uint64_t seed, int num_states) { - if(seed==0) - seed = uint64_t(1318319); - - num_states_ = num_states; - - locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); - state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_); - - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename lock_type::HostMirror h_lock = create_mirror_view(locks_); - - // Execute on the HostMirror's default execution space. - Random_XorShift64 gen(seed,0); - for(int i = 0; i < 17; i++) - gen.rand(); - for(int i = 0; i < num_states_; i++) { - int n1 = gen.rand(); - int n2 = gen.rand(); - int n3 = gen.rand(); - int n4 = gen.rand(); - h_state(i) = (((static_cast(n1)) & 0xffff)<<00) | - (((static_cast(n2)) & 0xffff)<<16) | - (((static_cast(n3)) & 0xffff)<<32) | - (((static_cast(n4)) & 0xffff)<<48); - h_lock(i) = 0; - } - deep_copy(state_,h_state); - deep_copy(locks_,h_lock); - } + void init(uint64_t seed, int num_states) { + if (seed == 0) seed = uint64_t(1318319); + + num_states_ = num_states; + + locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_); + state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 + gen(seed, 0); + for (int i = 0; i < 17; i++) gen.rand(); + for (int i = 0; i < num_states_; i++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i) = (((static_cast(n1)) & 0xffff) << 00) | + (((static_cast(n2)) & 0xffff) << 16) | + (((static_cast(n3)) & 0xffff) << 32) | + (((static_cast(n4)) & 0xffff) << 48); + h_lock(i) = 0; + } + deep_copy(state_, h_state); + deep_copy(locks_, h_lock); + } - KOKKOS_INLINE_FUNCTION - Random_XorShift64 get_state() const { + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state() const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id();; + const int i = DeviceType::hardware_thread_id(); + ; #else - const int i = DeviceType::impl_hardware_thread_id();; + const int i = DeviceType::impl_hardware_thread_id(); + ; #endif - return Random_XorShift64(state_(i),i); - } - - // NOTE: state_idx MUST be unique and less than num_states - KOKKOS_INLINE_FUNCTION - Random_XorShift64 get_state(const int state_idx) const { - return Random_XorShift64(state_(state_idx),state_idx); - } - - KOKKOS_INLINE_FUNCTION - void free_state(const Random_XorShift64& state) const { - state_(state.state_idx_) = state.state_; - } - }; + return Random_XorShift64(state_(i), i); + } + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state(const int state_idx) const { + return Random_XorShift64(state_(state_idx), state_idx); + } - template - class Random_XorShift1024_Pool; + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift64& state) const { + state_(state.state_idx_) = state.state_; + } +}; - template - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t state_[16]; - friend class Random_XorShift1024_Pool; - public: +template +class Random_XorShift1024_Pool; - typedef Random_XorShift1024_Pool pool_type; - typedef DeviceType device_type; +template +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t state_[16]; + friend class Random_XorShift1024_Pool; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + public: + typedef Random_XorShift1024_Pool pool_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx){ - for(int i=0 ; i<16; i++) - state_[i] = state(state_idx,i); - } + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), state_idx_(state_idx) { + for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_]; + uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = (state_[p_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - tmp = urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_]; + uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_] = state_0 ^ state_1) * 1181783497276652981LL) - 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) tmp = urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - tmp = urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) tmp = urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - tmp = rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) tmp = rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - tmp = rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) tmp = rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; - template - class Random_XorShift1024_Pool { - private: - typedef View int_view_type; - typedef View state_data_type; +template +class Random_XorShift1024_Pool { + private: + typedef View int_view_type; + typedef View state_data_type; - int_view_type locks_; - state_data_type state_; - int_view_type p_; - int num_states_; - friend class Random_XorShift1024; + int_view_type locks_; + state_data_type state_; + int_view_type p_; + int num_states_; + friend class Random_XorShift1024; - public: - typedef Random_XorShift1024 generator_type; + public: + typedef Random_XorShift1024 generator_type; - typedef DeviceType device_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool() { - num_states_ = 0; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool() { num_states_ = 0; } - inline - Random_XorShift1024_Pool(uint64_t seed){ - num_states_ = 0; + inline Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed,DeviceType::max_hardware_threads()); + init(seed, DeviceType::max_hardware_threads()); #else - init(seed,DeviceType::impl_max_hardware_threads()); + init(seed, DeviceType::impl_max_hardware_threads()); #endif - } + } - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): - locks_(src.locks_), - state_(src.state_), - p_(src.p_), - num_states_(src.num_states_) - {} + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src) + : locks_(src.locks_), + state_(src.state_), + p_(src.p_), + num_states_(src.num_states_) {} - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { - locks_ = src.locks_; - state_ = src.state_; - p_ = src.p_; - num_states_ = src.num_states_; - return *this; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool operator=(const Random_XorShift1024_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + p_ = src.p_; + num_states_ = src.num_states_; + return *this; + } - inline - void init(uint64_t seed, int num_states) { - if(seed==0) - seed = uint64_t(1318319); - num_states_ = num_states; - locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); - state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); - p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); - - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); - typename int_view_type::HostMirror h_p = create_mirror_view(p_); - - // Execute on the HostMirror's default execution space. - Random_XorShift64 gen(seed,0); - for(int i = 0; i < 17; i++) - gen.rand(); - for(int i = 0; i < num_states_; i++) { - for(int j = 0; j < 16 ; j++) { - int n1 = gen.rand(); - int n2 = gen.rand(); - int n3 = gen.rand(); - int n4 = gen.rand(); - h_state(i,j) = (((static_cast(n1)) & 0xffff)<<00) | - (((static_cast(n2)) & 0xffff)<<16) | - (((static_cast(n3)) & 0xffff)<<32) | - (((static_cast(n4)) & 0xffff)<<48); - } - h_p(i) = 0; - h_lock(i) = 0; + inline void init(uint64_t seed, int num_states) { + if (seed == 0) seed = uint64_t(1318319); + num_states_ = num_states; + locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_); + state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); + p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename int_view_type::HostMirror h_p = create_mirror_view(p_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 + gen(seed, 0); + for (int i = 0; i < 17; i++) gen.rand(); + for (int i = 0; i < num_states_; i++) { + for (int j = 0; j < 16; j++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i, j) = (((static_cast(n1)) & 0xffff) << 00) | + (((static_cast(n2)) & 0xffff) << 16) | + (((static_cast(n3)) & 0xffff) << 32) | + (((static_cast(n4)) & 0xffff) << 48); } - deep_copy(state_,h_state); - deep_copy(locks_,h_lock); + h_p(i) = 0; + h_lock(i) = 0; } + deep_copy(state_, h_state); + deep_copy(locks_, h_lock); + } - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 get_state() const { + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state() const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id(); + const int i = DeviceType::hardware_thread_id(); #else - const int i = DeviceType::impl_hardware_thread_id(); + const int i = DeviceType::impl_hardware_thread_id(); #endif - return Random_XorShift1024(state_,p_(i),i); - }; + return Random_XorShift1024(state_, p_(i), i); + }; - // NOTE: state_idx MUST be unique and less than num_states - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 get_state(const int state_idx) const { - return Random_XorShift1024(state_,p_(state_idx),state_idx); - } + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state(const int state_idx) const { + return Random_XorShift1024(state_, p_(state_idx), state_idx); + } - KOKKOS_INLINE_FUNCTION - void free_state(const Random_XorShift1024& state) const { - for(int i = 0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; - p_(state.state_idx_) = state.p_; - } - }; + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift1024& state) const { + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; + p_(state.state_idx_) = state.p_; + } +}; #if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__) - template<> - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - public: +template <> +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool; - typedef Kokkos::Cuda device_type; - typedef Random_XorShift1024_Pool pool_type; + public: + typedef Kokkos::Cuda device_type; + typedef Random_XorShift1024_Pool pool_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), + state_idx_(state_idx), + state_(&state(state_idx, 0)), + stride_(state.stride_1()) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = + (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_ * stride_] = state_0 ^ state_1) * + 1181783497276652981LL) - + 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; -template<> -inline -Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { +template <> +inline Random_XorShift64_Pool::Random_XorShift64_Pool( + uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift64 Random_XorShift64_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift64 +Random_XorShift64_Pool::get_state() const { #ifdef __CUDA_ARCH__ - const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; - int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * - blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim.x*blockDim.y*blockDim.z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + blockDim.x * blockDim.y * blockDim.z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= num_states_) { + i = i_offset; + } } - return Random_XorShift64(state_(i),i); + return Random_XorShift64(state_(i), i); #else - return Random_XorShift64(state_(0),0); + return Random_XorShift64(state_(0), 0); #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool::free_state( + const Random_XorShift64& state) const { state_(state.state_idx_) = state.state_; #ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; @@ -1249,24 +1211,28 @@ void Random_XorShift64_Pool::free_state(const Random_XorShift64 -inline -Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { +template <> +inline Random_XorShift1024_Pool::Random_XorShift1024_Pool( + uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift1024 +Random_XorShift1024_Pool::get_state() const { #ifdef __CUDA_ARCH__ - const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; - int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * - blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim.x*blockDim.y*blockDim.z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + blockDim.x * blockDim.y * blockDim.z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= num_states_) { + i = i_offset; + } } return Random_XorShift1024(state_, p_(i), i); @@ -1275,210 +1241,205 @@ Random_XorShift1024 Random_XorShift1024_Pool::get_st #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { - for(int i=0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; +template <> +KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool::free_state( + const Random_XorShift1024& state) const { + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; #ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; return; #endif } - #endif -#if defined(KOKKOS_ENABLE_ROCM) +#if defined(KOKKOS_ENABLE_ROCM) - template<> - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - public: +template <> +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool; - typedef Kokkos::Experimental::ROCm device_type; - typedef Random_XorShift1024_Pool pool_type; + public: + typedef Kokkos::Experimental::ROCm device_type; + typedef Random_XorShift1024_Pool pool_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), + state_idx_(state_idx), + state_(&state(state_idx, 0)), + stride_(state.stride_1()) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = + (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_ * stride_] = state_0 ^ state_1) * + 1181783497276652981LL) - + 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; -template<> -inline -Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { +template <> +inline Random_XorShift64_Pool< + Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift64 Random_XorShift64_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift64 +Random_XorShift64_Pool::get_state() const { #ifdef __HCC_ACCELERATOR__ - const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; - int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * - blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim_x*blockDim_y*blockDim_z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; + int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * + blockDim_x * blockDim_y * blockDim_z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim_x * blockDim_y * blockDim_z; + if (i >= num_states_) { + i = i_offset; + } } - return Random_XorShift64(state_(i),i); + return Random_XorShift64(state_(i), i); #else - return Random_XorShift64(state_(0),0); + return Random_XorShift64(state_(0), 0); #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void +Random_XorShift64_Pool::free_state( + const Random_XorShift64& state) const { #ifdef __HCC_ACCELERATOR__ state_(state.state_idx_) = state.state_; locks_(state.state_idx_) = 0; @@ -1486,24 +1447,28 @@ void Random_XorShift64_Pool::free_state(const Random #endif } - -template<> -inline -Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { +template <> +inline Random_XorShift1024_Pool< + Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift1024 +Random_XorShift1024_Pool::get_state() const { #ifdef __HCC_ACCELERATOR__ - const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; - int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * - blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim_x*blockDim_y*blockDim_z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; + int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * + blockDim_x * blockDim_y * blockDim_z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim_x * blockDim_y * blockDim_z; + if (i >= num_states_) { + i = i_offset; + } } return Random_XorShift1024(state_, p_(i), i); @@ -1512,515 +1477,589 @@ Random_XorShift1024 Random_XorShift1024_Pool -KOKKOS_INLINE_FUNCTION -void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void +Random_XorShift1024_Pool::free_state( + const Random_XorShift1024& state) const { #ifdef __HCC_ACCELERATOR__ - for(int i=0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; locks_(state.state_idx_) = 0; return; #endif } - #endif - namespace Impl { -template +template struct fill_random_functor_range; -template +template struct fill_random_functor_begin_end; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (const IndexType& i) const { + void operator()(const IndexType& i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) - a(idx) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) + a(idx) = Rand::draw(gen, range); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - a(idx,k) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + a(idx, k) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - a(idx,k,l) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + a(idx, k, l) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - a(idx,k,l,m) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + a(idx, k, l, m) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - a(idx,k,l,m,n) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + a(idx, k, l, m, n) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - a(idx,k,l,m,n,o) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + a(idx, k, l, m, n, o) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + a(idx, k, l, m, n, o, p) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - for(IndexType q=0;q(a.extent(7));q++) - a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + for (IndexType q = 0; + q < static_cast(a.extent(7)); q++) + a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) - a(idx) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) + a(idx) = Rand::draw(gen, begin, end); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - a(idx,k) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + a(idx, k) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - a(idx,k,l) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + a(idx, k, l) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - a(idx,k,l,m) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + a(idx, k, l, m) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))){ - for(IndexType l=0;l(a.extent(1));l++) - for(IndexType m=0;m(a.extent(2));m++) - for(IndexType n=0;n(a.extent(3));n++) - for(IndexType o=0;o(a.extent(4));o++) - a(idx,l,m,n,o) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType l = 0; l < static_cast(a.extent(1)); l++) + for (IndexType m = 0; m < static_cast(a.extent(2)); m++) + for (IndexType n = 0; n < static_cast(a.extent(3)); n++) + for (IndexType o = 0; o < static_cast(a.extent(4)); + o++) + a(idx, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + a(idx, k, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + a(idx, k, l, m, n, o, p) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - for(IndexType q=0;q(a.extent(7));q++) - a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + for (IndexType q = 0; + q < static_cast(a.extent(7)); q++) + a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -} +} // namespace Impl -template -void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type range) { int64_t LDA = a.extent(0); - if(LDA>0) - parallel_for((LDA+127)/128,Impl::fill_random_functor_range(a,g,range)); + if (LDA > 0) + parallel_for((LDA + 127) / 128, + Impl::fill_random_functor_range( + a, g, range)); } -template -void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type begin, + typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); - if(LDA>0) - parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end(a,g,begin,end)); -} + if (LDA > 0) + parallel_for((LDA + 127) / 128, + Impl::fill_random_functor_begin_end( + a, g, begin, end)); } +} // namespace Kokkos #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 7fb8505fe5..b7a988361f 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -1,13 +1,14 @@ /* //@HEADER // ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -36,12 +37,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ @@ -51,125 +51,107 @@ namespace Kokkos { - namespace Impl { +namespace Impl { - template< class DstViewType , class SrcViewType - , int Rank = DstViewType::Rank > - struct CopyOp; +template +struct CopyOp; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - dst(i_dst) = src(i_src); - } - }; +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - for(int j = 0;j< (int) dst.extent(1); j++) - dst(i_dst,j) = src(i_src,j); - } - }; +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - for(int j = 0; j +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); } +}; +} // namespace Impl //---------------------------------------------------------------------------- -template< class KeyViewType - , class BinSortOp - , class Space = typename KeyViewType::device_type - , class SizeType = typename KeyViewType::memory_space::size_type - > +template class BinSort { -public: - - template< class DstViewType , class SrcViewType > + public: + template struct copy_functor { + typedef typename SrcViewType::const_type src_view_type; - typedef typename SrcViewType::const_type src_view_type ; + typedef Impl::CopyOp copy_op; - typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + DstViewType dst_values; + src_view_type src_values; + int dst_offset; - DstViewType dst_values ; - src_view_type src_values ; - int dst_offset ; - - copy_functor( DstViewType const & dst_values_ - , int const & dst_offset_ - , SrcViewType const & src_values_ - ) - : dst_values( dst_values_ ) - , src_values( src_values_ ) - , dst_offset( dst_offset_ ) - {} + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - copy_op::copy(dst_values,i+dst_offset,src_values,i); + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); } }; - template< class DstViewType - , class PermuteViewType - , class SrcViewType - > + template struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - typedef typename std::conditional - < Kokkos::is_view< SrcViewType >::value - , Kokkos::View< typename SrcViewType::const_data_type - , typename SrcViewType::array_layout - , typename SrcViewType::device_type - , Kokkos::MemoryTraits - > - , typename SrcViewType::const_type - >::type src_view_type ; - - typedef typename PermuteViewType::const_type perm_view_type ; - - typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; - - DstViewType dst_values ; - perm_view_type sort_order ; - src_view_type src_values ; - int src_offset ; - - copy_permute_functor( DstViewType const & dst_values_ - , PermuteViewType const & sort_order_ - , SrcViewType const & src_values_ - , int const & src_offset_ - ) - : dst_values( dst_values_ ) - , sort_order( sort_order_ ) - , src_values( src_values_ ) - , src_offset( src_offset_ ) - {} + typedef typename std::conditional< + Kokkos::is_view::value, + Kokkos::View >, + typename SrcViewType::const_type>::type src_view_type; + + typedef typename PermuteViewType::const_type perm_view_type; + + typedef Impl::CopyOp copy_op; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - copy_op::copy(dst_values,i,src_values,src_offset+sort_order(i)); + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); } }; - typedef typename Space::execution_space execution_space; + typedef typename Space::execution_space execution_space; typedef BinSortOp bin_op_type; struct bin_count_tag {}; @@ -177,221 +159,236 @@ public: struct bin_binning_tag {}; struct bin_sort_bins_tag {}; -public: - + public: typedef SizeType size_type; typedef size_type value_type; typedef Kokkos::View offset_type; typedef Kokkos::View bin_count_type; - typedef typename KeyViewType::const_type const_key_view_type ; + typedef typename KeyViewType::const_type const_key_view_type; // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - typedef typename std::conditional - < Kokkos::is_view< KeyViewType >::value - , Kokkos::View< typename KeyViewType::const_data_type, - typename KeyViewType::array_layout, - typename KeyViewType::device_type, - Kokkos::MemoryTraits > - , const_key_view_type - >::type const_rnd_key_view_type; + typedef typename std::conditional< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>::type const_rnd_key_view_type; typedef typename KeyViewType::non_const_value_type non_const_key_scalar; - typedef typename KeyViewType::const_value_type const_key_scalar; - - typedef Kokkos::View > bin_count_atomic_type ; + typedef typename KeyViewType::const_value_type const_key_scalar; -private: + typedef Kokkos::View > + bin_count_atomic_type; + private: const_key_view_type keys; const_rnd_key_view_type keys_rnd; -public: - - BinSortOp bin_op ; - offset_type bin_offsets ; - bin_count_atomic_type bin_count_atomic ; - bin_count_type bin_count_const ; - offset_type sort_order ; + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; - int range_begin ; - int range_end ; - bool sort_within_bins ; - -public: + int range_begin; + int range_end; + bool sort_within_bins; + public: BinSort() {} //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) - BinSort( const_key_view_type keys_ - , int range_begin_ - , int range_end_ - , BinSortOp bin_op_ - , bool sort_within_bins_ = false - ) - : keys(keys_) - , keys_rnd(keys_) - , bin_op(bin_op_) - , bin_offsets() - , bin_count_atomic() - , bin_count_const() - , sort_order() - , range_begin( range_begin_ ) - , range_end( range_end_ ) - , sort_within_bins( sort_within_bins_ ) - { - bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::bin_offsets"),bin_op.max_bins()); - sort_order = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sort_order"),range_end-range_begin); + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); } - BinSort( const_key_view_type keys_ - , BinSortOp bin_op_ - , bool sort_within_bins_ = false - ) - : BinSort( keys_ , 0 , keys_.extent(0), bin_op_ , sort_within_bins_ ) {} + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed void create_permute_vector() { - const size_t len = range_end - range_begin ; - Kokkos::parallel_for ("Kokkos::Sort::BinCount",Kokkos::RangePolicy (0,len),*this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset",Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); - - Kokkos::deep_copy(bin_count_atomic,0); - Kokkos::parallel_for ("Kokkos::Sort::BinBinning",Kokkos::RangePolicy (0,len),*this); - - if(sort_within_bins) - Kokkos::parallel_for ("Kokkos::Sort::BinSort",Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(0, len), *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(0, len), *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + 0, bin_op.max_bins()), + *this); } - // Sort a subset of a view with respect to the first dimension using the permutation array - template - void sort( ValuesViewType const & values - , int values_range_begin - , int values_range_end) const - { - typedef - Kokkos::View< typename ValuesViewType::data_type, - typename ValuesViewType::array_layout, - typename ValuesViewType::device_type > - scratch_view_type ; - - const size_t len = range_end - range_begin ; - const size_t values_len = values_range_end - values_range_begin ; + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + typedef Kokkos::View + scratch_view_type; + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; if (len != values_len) { - Kokkos::abort("BinSort::sort: values range length != permutation vector length"); + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - scratch_view_type - sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), - len, - values.extent(1), - values.extent(2), - values.extent(3), - values.extent(4), - values.extent(5), - values.extent(6), - values.extent(7)); + scratch_view_type sorted_values( + ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + len, values.extent(1), values.extent(2), values.extent(3), + values.extent(4), values.extent(5), values.extent(6), values.extent(7)); #else - scratch_view_type - sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG , - values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + scratch_view_type sorted_values( + ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); #endif { - copy_permute_functor< scratch_view_type /* DstViewType */ - , offset_type /* PermuteViewType */ - , ValuesViewType /* SrcViewType */ - > - functor( sorted_values , sort_order , values, values_range_begin - range_begin ); - - parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy(0,len),functor); + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(0, len), functor); } { - copy_functor< ValuesViewType , scratch_view_type > - functor( values , range_begin , sorted_values ); + copy_functor functor( + values, range_begin, sorted_values); - parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(0,len),functor); + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(0, len), functor); } Kokkos::fence(); } - template - void sort( ValuesViewType const & values ) const - { - this->sort( values, 0, /*values.extent(0)*/ range_end - range_begin ); + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); } // Get the permutation vector KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order;} + offset_type get_permute_vector() const { return sort_order; } // Get the start offsets for each bin KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets;} + offset_type get_bin_offsets() const { return bin_offsets; } // Get the count for each bin KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const {return bin_count_const;} - -public: + bin_count_type get_bin_count() const { return bin_count_const; } + public: KOKKOS_INLINE_FUNCTION - void operator() (const bin_count_tag& tag, const int& i) const { - const int j = range_begin + i ; + void operator()(const bin_count_tag& tag, const int& i) const { + const int j = range_begin + i; bin_count_atomic(bin_op.bin(keys, j))++; } KOKKOS_INLINE_FUNCTION - void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const { - if(final) { + void operator()(const bin_offset_tag& tag, const int& i, value_type& offset, + const bool& final) const { + if (final) { bin_offsets(i) = offset; } - offset+=bin_count_const(i); + offset += bin_count_const(i); } KOKKOS_INLINE_FUNCTION - void operator() (const bin_binning_tag& tag, const int& i) const { - const int j = range_begin + i ; - const int bin = bin_op.bin(keys,j); + void operator()(const bin_binning_tag& tag, const int& i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); const int count = bin_count_atomic(bin)++; - sort_order(bin_offsets(bin) + count) = j ; + sort_order(bin_offsets(bin) + count) = j; } KOKKOS_INLINE_FUNCTION - void operator() (const bin_sort_bins_tag& tag, const int&i ) const { + void operator()(const bin_sort_bins_tag& tag, const int& i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; - int upper_bound = bin_offsets(i)+bin_size; - bool sorted = false; - while(!sorted) { - sorted = true; + int upper_bound = bin_offsets(i) + bin_size; + bool sorted = false; + while (!sorted) { + sorted = true; int old_idx = sort_order(bin_offsets(i)); int new_idx; - for(int k=bin_offsets(i)+1; k +template struct BinOp1D { int max_bins_; double mul_; typename KeyViewType::const_value_type range_; typename KeyViewType::const_value_type min_; - BinOp1D():max_bins_(0),mul_(0.0), - range_(typename KeyViewType::const_value_type()), - min_(typename KeyViewType::const_value_type()) {} + BinOp1D() + : max_bins_(0), + mul_(0.0), + range_(typename KeyViewType::const_value_type()), + min_(typename KeyViewType::const_value_type()) {} - //Construct BinOp with number of bins, minimum value and maxuimum value + // Construct BinOp with number of bins, minimum value and maxuimum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max ) - :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} - - //Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION - int bin(ViewType& keys, const int& i) const { - return int(mul_*(keys(i)-min_)); + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + mul_(1.0 * max_bins__ / (max - min)), + range_(max - min), + min_(min) {} + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int(mul_ * (keys(i) - min_)); } - //Return maximum bin index + 1 + // Return maximum bin index + 1 KOKKOS_INLINE_FUNCTION - int max_bins() const { - return max_bins_; - } + int max_bins() const { return max_bins_; } - //Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION - bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { - return keys(i1) + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); } }; -template +template struct BinOp3D { int max_bins_[3]; double mul_[3]; @@ -450,43 +449,42 @@ struct BinOp3D { BinOp3D() {} BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[] ) - { + typename KeyViewType::const_value_type max[]) { max_bins_[0] = max_bins__[0]; max_bins_[1] = max_bins__[1]; max_bins_[2] = max_bins__[2]; - mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); - mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); - mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); - range_[0] = max[0]-min[0]; - range_[1] = max[1]-min[1]; - range_[2] = max[2]-min[2]; - min_[0] = min[0]; - min_[1] = min[1]; - min_[2] = min[2]; + mul_[0] = 1.0 * max_bins__[0] / (max[0] - min[0]); + mul_[1] = 1.0 * max_bins__[1] / (max[1] - min[1]); + mul_[2] = 1.0 * max_bins__[2] / (max[2] - min[2]); + range_[0] = max[0] - min[0]; + range_[1] = max[1] - min[1]; + range_[2] = max[2] - min[2]; + min_[0] = min[0]; + min_[1] = min[1]; + min_[2] = min[2]; } - template - KOKKOS_INLINE_FUNCTION - int bin(ViewType& keys, const int& i) const { - return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) + - int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) + - int(mul_[2]*(keys(i,2)-min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { - return max_bins_[0]*max_bins_[1]*max_bins_[2]; + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); } - template KOKKOS_INLINE_FUNCTION - bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const { - if (keys(i1,0)>keys(i2,0)) return true; - else if (keys(i1,0)==keys(i2,0)) { - if (keys(i1,1)>keys(i2,1)) return true; - else if (keys(i1,1)==keys(i2,1)) { - if (keys(i1,2)>keys(i2,2)) return true; + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; } } return false; @@ -495,85 +493,80 @@ struct BinOp3D { namespace Impl { -template +template bool try_std_sort(ViewType view) { - bool possible = true; - size_t stride[8] = { view.stride_0() - , view.stride_1() - , view.stride_2() - , view.stride_3() - , view.stride_4() - , view.stride_5() - , view.stride_6() - , view.stride_7() - }; - possible = possible && std::is_same::value; - possible = possible && (ViewType::Rank == 1); - possible = possible && (stride[0] == 1); - if(possible) { - std::sort(view.data(),view.data()+view.extent(0)); + bool possible = true; + size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(), + view.stride_3(), view.stride_4(), view.stride_5(), + view.stride_6(), view.stride_7()}; + possible = possible && + std::is_same::value; + possible = possible && (ViewType::Rank == 1); + possible = possible && (stride[0] == 1); + if (possible) { + std::sort(view.data(), view.data() + view.extent(0)); } return possible; } -template +template struct min_max_functor { - typedef Kokkos::MinMaxScalar minmax_scalar; + typedef Kokkos::MinMaxScalar + minmax_scalar; ViewType view; - min_max_functor(const ViewType& view_):view(view_) {} + min_max_functor(const ViewType& view_) : view(view_) {} KOKKOS_INLINE_FUNCTION - void operator() (const size_t& i, minmax_scalar& minmax) const { - if(view(i) < minmax.min_val) minmax.min_val = view(i); - if(view(i) > minmax.max_val) minmax.max_val = view(i); + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); } }; -} +} // namespace Impl -template -void sort( ViewType const & view , bool const always_use_kokkos_sort = false) -{ - if(!always_use_kokkos_sort) { - if(Impl::try_std_sort(view)) return; +template +void sort(ViewType const& view, bool const always_use_kokkos_sort = false) { + if (!always_use_kokkos_sort) { + if (Impl::try_std_sort(view)) return; } typedef BinOp1D CompType; Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy(0,view.extent(0)), - Impl::min_max_functor(view),reducer); - if(result.min_val == result.max_val) return; - BinSort bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + 0, view.extent(0)), + Impl::min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + BinSort bin_sort( + view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true); bin_sort.create_permute_vector(); bin_sort.sort(view); } -template -void sort( ViewType view - , size_t const begin - , size_t const end - ) -{ - typedef Kokkos::RangePolicy range_policy ; +template +void sort(ViewType view, size_t const begin, size_t const end) { + typedef Kokkos::RangePolicy range_policy; typedef BinOp1D CompType; Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end ) - , Impl::min_max_functor(view),reducer ); + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end), + Impl::min_max_functor(view), reducer); - if(result.min_val == result.max_val) return; + if (result.min_val == result.max_val) return; - BinSort - bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true); + BinSort bin_sort( + view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); bin_sort.create_permute_vector(); - bin_sort.sort(view,begin,end); + bin_sort.sort(view, begin, end); } -} +} // namespace Kokkos #endif diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt index e238b37c8e..6fb08ce2ed 100644 --- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -1,18 +1,12 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +#Leave these here for now - I don't need transitive deps anyway +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) -INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) # mfh 03 Nov 2017: The gtest library used here must have a different # name than that of the gtest library built in KokkosCore. We can't @@ -20,23 +14,20 @@ INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) # possible to build only (e.g.,) KokkosAlgorithms tests, without # building KokkosCore tests. -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") -TRIBITS_ADD_LIBRARY( +KOKKOS_ADD_TEST_LIBRARY( kokkosalgorithms_gtest HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc - TESTONLY - ) +) +KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") SET(SOURCES UnitTestMain.cpp TestCuda.cpp ) -SET(LIBRARIES kokkoscore) - -IF(Kokkos_ENABLE_OpenMP) +IF(Kokkos_ENABLE_OPENMP) LIST( APPEND SOURCES TestOpenMP.cpp ) @@ -48,23 +39,19 @@ IF(Kokkos_ENABLE_HPX) ) ENDIF() -IF(Kokkos_ENABLE_Serial) +IF(Kokkos_ENABLE_SERIAL) LIST( APPEND SOURCES TestSerial.cpp ) ENDIF() -IF(Kokkos_ENABLE_Pthread) +IF(Kokkos_ENABLE_PTHREAD) LIST( APPEND SOURCES TestThreads.cpp ) ENDIF() -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest SOURCES ${SOURCES} - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkosalgorithms_gtest ${TEST_LINK_TARGETS} - ) +) diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp index 86fdccd0e7..ab727b0326 100644 --- a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -57,51 +58,31 @@ namespace Test { -class cuda : public ::testing::Test { -protected: - static void SetUpTestCase() - { - } - static void TearDownTestCase() - { - } -}; - -void cuda_test_random_xorshift64( int num_draws ) -{ +void cuda_test_random_xorshift64(int num_draws) { Impl::test_random >(num_draws); } -void cuda_test_random_xorshift1024( int num_draws ) -{ +void cuda_test_random_xorshift1024(int num_draws) { Impl::test_random >(num_draws); } +#define CUDA_RANDOM_XORSHIFT64(num_draws) \ + TEST(cuda, Random_XorShift64) { cuda_test_random_xorshift64(num_draws); } -#define CUDA_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( cuda, Random_XorShift64 ) { \ - cuda_test_random_xorshift64(num_draws); \ - } - -#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( cuda, Random_XorShift1024 ) { \ - cuda_test_random_xorshift1024(num_draws); \ - } +#define CUDA_RANDOM_XORSHIFT1024(num_draws) \ + TEST(cuda, Random_XorShift1024) { cuda_test_random_xorshift1024(num_draws); } -#define CUDA_SORT_UNSIGNED( size ) \ - TEST_F( cuda, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Cuda, unsigned >(size); \ - } +#define CUDA_SORT_UNSIGNED(size) \ + TEST(cuda, SortUnsigned) { Impl::test_sort(size); } -CUDA_RANDOM_XORSHIFT64( 132141141 ) -CUDA_RANDOM_XORSHIFT1024( 52428813 ) +CUDA_RANDOM_XORSHIFT64(132141141) +CUDA_RANDOM_XORSHIFT1024(52428813) CUDA_SORT_UNSIGNED(171) #undef CUDA_RANDOM_XORSHIFT64 #undef CUDA_RANDOM_XORSHIFT1024 #undef CUDA_SORT_UNSIGNED -} +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_CUDA */ - +#endif /* #ifdef KOKKOS_ENABLE_CUDA */ diff --git a/lib/kokkos/algorithms/unit_tests/TestHPX.cpp b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp index e5b7dbdb7a..2981e97945 100644 --- a/lib/kokkos/algorithms/unit_tests/TestHPX.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,7 +42,6 @@ //@HEADER */ - #include #ifdef KOKKOS_ENABLE_HPX @@ -55,42 +55,33 @@ namespace Test { -class hpx : public ::testing::Test { -protected: - static void SetUpTestCase() - { - std::cout << std::setprecision(5) << std::scientific; - } - - static void TearDownTestCase() - { +#define HPX_RANDOM_XORSHIFT64(num_draws) \ + TEST(hpx, Random_XorShift64) { \ + Impl::test_random< \ + Kokkos::Random_XorShift64_Pool >( \ + num_draws); \ } -}; -#define HPX_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( hpx, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define HPX_RANDOM_XORSHIFT1024(num_draws) \ + TEST(hpx, Random_XorShift1024) { \ + Impl::test_random< \ + Kokkos::Random_XorShift1024_Pool >( \ + num_draws); \ } -#define HPX_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( hpx, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define HPX_SORT_UNSIGNED(size) \ + TEST(hpx, SortUnsigned) { \ + Impl::test_sort(size); \ } -#define HPX_SORT_UNSIGNED( size ) \ - TEST_F( hpx, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Experimental::HPX, unsigned >(size); \ - } - -HPX_RANDOM_XORSHIFT64( 10240000 ) -HPX_RANDOM_XORSHIFT1024( 10130144 ) +HPX_RANDOM_XORSHIFT64(10240000) +HPX_RANDOM_XORSHIFT1024(10130144) HPX_SORT_UNSIGNED(171) #undef HPX_RANDOM_XORSHIFT64 #undef HPX_RANDOM_XORSHIFT1024 #undef HPX_SORT_UNSIGNED -} // namespace test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTHPX_PREVENT_LINK_ERROR() {} #endif - diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp index c4ddde7b7f..3a9e306014 100644 --- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,7 +42,6 @@ //@HEADER */ - #include #ifdef KOKKOS_ENABLE_OPENMP @@ -55,42 +55,31 @@ namespace Test { -class openmp : public ::testing::Test { -protected: - static void SetUpTestCase() - { - std::cout << std::setprecision(5) << std::scientific; - } - - static void TearDownTestCase() - { +#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ + TEST(openmp, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } -}; -#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( openmp, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \ + TEST(openmp, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( openmp, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define OPENMP_SORT_UNSIGNED(size) \ + TEST(openmp, SortUnsigned) { \ + Impl::test_sort(size); \ } -#define OPENMP_SORT_UNSIGNED( size ) \ - TEST_F( openmp, SortUnsigned ) { \ - Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \ - } - -OPENMP_RANDOM_XORSHIFT64( 10240000 ) -OPENMP_RANDOM_XORSHIFT1024( 10130144 ) +OPENMP_RANDOM_XORSHIFT64(10240000) +OPENMP_RANDOM_XORSHIFT1024(10130144) OPENMP_SORT_UNSIGNED(171) #undef OPENMP_RANDOM_XORSHIFT64 #undef OPENMP_RANDOM_XORSHIFT1024 #undef OPENMP_SORT_UNSIGNED -} // namespace test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} #endif - diff --git a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp index 15179509bb..29814cca3e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -57,52 +58,35 @@ namespace Test { -class rocm : public ::testing::Test { -protected: - static void SetUpTestCase() - { - std::cout << std::setprecision(5) << std::scientific; - } - static void TearDownTestCase() - { - } -}; - -void rocm_test_random_xorshift64( int num_draws ) -{ - Impl::test_random >(num_draws); +void rocm_test_random_xorshift64(int num_draws) { + Impl::test_random< + Kokkos::Random_XorShift64_Pool >(num_draws); } -void rocm_test_random_xorshift1024( int num_draws ) -{ - Impl::test_random >(num_draws); +void rocm_test_random_xorshift1024(int num_draws) { + Impl::test_random< + Kokkos::Random_XorShift1024_Pool >(num_draws); } +#define ROCM_RANDOM_XORSHIFT64(num_draws) \ + TEST(rocm, Random_XorShift64) { rocm_test_random_xorshift64(num_draws); } -#define ROCM_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( rocm, Random_XorShift64 ) { \ - rocm_test_random_xorshift64(num_draws); \ - } - -#define ROCM_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( rocm, Random_XorShift1024 ) { \ - rocm_test_random_xorshift1024(num_draws); \ - } +#define ROCM_RANDOM_XORSHIFT1024(num_draws) \ + TEST(rocm, Random_XorShift1024) { rocm_test_random_xorshift1024(num_draws); } -#define ROCM_SORT_UNSIGNED( size ) \ - TEST_F( rocm, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Experimental::ROCm, unsigned >(size); \ +#define ROCM_SORT_UNSIGNED(size) \ + TEST(rocm, SortUnsigned) { \ + Impl::test_sort(size); \ } -ROCM_RANDOM_XORSHIFT64( 132141141 ) -ROCM_RANDOM_XORSHIFT1024( 52428813 ) +ROCM_RANDOM_XORSHIFT64(132141141) +ROCM_RANDOM_XORSHIFT1024(52428813) ROCM_SORT_UNSIGNED(171) #undef ROCM_RANDOM_XORSHIFT64 #undef ROCM_RANDOM_XORSHIFT1024 #undef ROCM_SORT_UNSIGNED -} +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTROCM_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_ROCM */ - +#endif /* #ifdef KOKKOS_ENABLE_ROCM */ diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 73bd416f2a..bc55ebfad3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -1,10 +1,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -22,10 +23,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -54,18 +55,19 @@ namespace Test { -namespace Impl{ +namespace Impl { // This test runs the random number generators and uses some statistic tests to // check the 'goodness' of the random numbers: // (i) mean: the mean is expected to be 0.5*RAND_MAX // (ii) variance: the variance is 1/3*mean*mean // (iii) covariance: the covariance is 0 -// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers -// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers +// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram +// of random numbers (v) 3-tupledistr: the mean, variance and covariance of +// a 3D Histrogram of random numbers #define HIST_DIM3D 24 -#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D) +#define HIST_DIM1D (HIST_DIM3D * HIST_DIM3D * HIST_DIM3D) struct RandomProperties { uint64_t count; @@ -77,37 +79,37 @@ struct RandomProperties { KOKKOS_INLINE_FUNCTION RandomProperties() { - count = 0; - mean = 0.0; - variance = 0.0; + count = 0; + mean = 0.0; + variance = 0.0; covariance = 0.0; - min = 1e64; - max = -1e64; + min = 1e64; + max = -1e64; } KOKKOS_INLINE_FUNCTION RandomProperties& operator+=(const RandomProperties& add) { - count += add.count; - mean += add.mean; - variance += add.variance; + count += add.count; + mean += add.mean; + variance += add.variance; covariance += add.covariance; - min = add.minmax?add.max:max; + min = add.min < min ? add.min : min; + max = add.max > max ? add.max : max; return *this; } KOKKOS_INLINE_FUNCTION void operator+=(const volatile RandomProperties& add) volatile { - count += add.count; - mean += add.mean; - variance += add.variance; + count += add.count; + mean += add.mean; + variance += add.variance; covariance += add.covariance; - min = add.minmax?add.max:max; + min = add.min < min ? add.min : min; + max = add.max > max ? add.max : max; } }; -template +template struct test_random_functor { typedef typename GeneratorPool::generator_type rnd_type; @@ -123,38 +125,40 @@ struct test_random_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View types below. - typedef Kokkos::View type_1d; + typedef Kokkos::View + type_1d; type_1d density_1d; - typedef Kokkos::View type_3d; + typedef Kokkos::View + type_3d; type_3d density_3d; - test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) : - rand_pool (rand_pool_), - mean (0.5*Kokkos::rand::max ()), - density_1d (d1d), - density_3d (d3d) - {} + test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) + : rand_pool(rand_pool_), + mean(0.5 * Kokkos::rand::max()), + density_1d(d1d), + density_3d(d3d) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, RandomProperties& prop) const { + void operator()(int i, RandomProperties& prop) const { using Kokkos::atomic_fetch_add; rnd_type rand_gen = rand_pool.get_state(); for (int k = 0; k < 1024; ++k) { - const Scalar tmp = Kokkos::rand::draw(rand_gen); + const Scalar tmp = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp; - prop.variance += (tmp-mean)*(tmp-mean); - const Scalar tmp2 = Kokkos::rand::draw(rand_gen); + prop.variance += (tmp - mean) * (tmp - mean); + const Scalar tmp2 = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp2; - prop.variance += (tmp2-mean)*(tmp2-mean); - prop.covariance += (tmp-mean)*(tmp2-mean); - const Scalar tmp3 = Kokkos::rand::draw(rand_gen); + prop.variance += (tmp2 - mean) * (tmp2 - mean); + prop.covariance += (tmp - mean) * (tmp2 - mean); + const Scalar tmp3 = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp3; - prop.variance += (tmp3-mean)*(tmp3-mean); - prop.covariance += (tmp2-mean)*(tmp3-mean); + prop.variance += (tmp3 - mean) * (tmp3 - mean); + prop.covariance += (tmp2 - mean) * (tmp3 - mean); // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to // define an exclusive upper bound on the range of random @@ -169,26 +173,32 @@ struct test_random_functor { // returns values of max(), the histograms will still catch this // indirectly, since none of the other values will be filled in. - const Scalar theMax = Kokkos::rand::max (); - - const uint64_t ind1_1d = static_cast (1.0 * HIST_DIM1D * tmp / theMax); - const uint64_t ind2_1d = static_cast (1.0 * HIST_DIM1D * tmp2 / theMax); - const uint64_t ind3_1d = static_cast (1.0 * HIST_DIM1D * tmp3 / theMax); - - const uint64_t ind1_3d = static_cast (1.0 * HIST_DIM3D * tmp / theMax); - const uint64_t ind2_3d = static_cast (1.0 * HIST_DIM3D * tmp2 / theMax); - const uint64_t ind3_3d = static_cast (1.0 * HIST_DIM3D * tmp3 / theMax); - - atomic_fetch_add (&density_1d(ind1_1d), 1); - atomic_fetch_add (&density_1d(ind2_1d), 1); - atomic_fetch_add (&density_1d(ind3_1d), 1); - atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); + const Scalar theMax = Kokkos::rand::max(); + + const uint64_t ind1_1d = + static_cast(1.0 * HIST_DIM1D * tmp / theMax); + const uint64_t ind2_1d = + static_cast(1.0 * HIST_DIM1D * tmp2 / theMax); + const uint64_t ind3_1d = + static_cast(1.0 * HIST_DIM1D * tmp3 / theMax); + + const uint64_t ind1_3d = + static_cast(1.0 * HIST_DIM3D * tmp / theMax); + const uint64_t ind2_3d = + static_cast(1.0 * HIST_DIM3D * tmp2 / theMax); + const uint64_t ind3_3d = + static_cast(1.0 * HIST_DIM3D * tmp3 / theMax); + + atomic_fetch_add(&density_1d(ind1_1d), 1); + atomic_fetch_add(&density_1d(ind2_1d), 1); + atomic_fetch_add(&density_1d(ind3_1d), 1); + atomic_fetch_add(&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); } rand_pool.free_state(rand_gen); } }; -template +template struct test_histogram1d_functor { typedef RandomProperties value_type; typedef typename DeviceType::execution_space execution_space; @@ -200,34 +210,29 @@ struct test_histogram1d_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View type below. - typedef Kokkos::View type_1d; + typedef Kokkos::View type_1d; type_1d density_1d; double mean; - test_histogram1d_functor (type_1d d1d, int num_draws) : - density_1d (d1d), - mean (1.0*num_draws/HIST_DIM1D*3) - { - } + test_histogram1d_functor(type_1d d1d, int num_draws) + : density_1d(d1d), mean(1.0 * num_draws / HIST_DIM1D * 3) {} - KOKKOS_INLINE_FUNCTION void - operator() (const typename memory_space::size_type i, - RandomProperties& prop) const - { + KOKKOS_INLINE_FUNCTION void operator()( + const typename memory_space::size_type i, RandomProperties& prop) const { typedef typename memory_space::size_type size_type; const double count = density_1d(i); prop.mean += count; prop.variance += 1.0 * (count - mean) * (count - mean); - //prop.covariance += 1.0*count*count; + // prop.covariance += 1.0*count*count; prop.min = count < prop.min ? count : prop.min; prop.max = count > prop.max ? count : prop.max; - if (i < static_cast (HIST_DIM1D-1)) { - prop.covariance += (count - mean) * (density_1d(i+1) - mean); + if (i < static_cast(HIST_DIM1D - 1)) { + prop.covariance += (count - mean) * (density_1d(i + 1) - mean); } } }; -template +template struct test_histogram3d_functor { typedef RandomProperties value_type; typedef typename DeviceType::execution_space execution_space; @@ -239,29 +244,28 @@ struct test_histogram3d_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View type below. - typedef Kokkos::View type_3d; + typedef Kokkos::View + type_3d; type_3d density_3d; double mean; - test_histogram3d_functor (type_3d d3d, int num_draws) : - density_3d (d3d), - mean (1.0*num_draws/HIST_DIM1D) - {} + test_histogram3d_functor(type_3d d3d, int num_draws) + : density_3d(d3d), mean(1.0 * num_draws / HIST_DIM1D) {} - KOKKOS_INLINE_FUNCTION void - operator() (const typename memory_space::size_type i, - RandomProperties& prop) const - { + KOKKOS_INLINE_FUNCTION void operator()( + const typename memory_space::size_type i, RandomProperties& prop) const { typedef typename memory_space::size_type size_type; - const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D), - (i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, - i % HIST_DIM3D); + const double count = density_3d( + i / (HIST_DIM3D * HIST_DIM3D), + (i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D); prop.mean += count; prop.variance += (count - mean) * (count - mean); - if (i < static_cast (HIST_DIM1D-1)) { - const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D), - ((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, - (i+1)%HIST_DIM3D); + if (i < static_cast(HIST_DIM1D - 1)) { + const double count_next = + density_3d((i + 1) / (HIST_DIM3D * HIST_DIM3D), + ((i + 1) % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, + (i + 1) % HIST_DIM3D); prop.covariance += (count - mean) * (count_next - mean); } } @@ -270,212 +274,223 @@ struct test_histogram3d_functor { // // Templated test that uses the above functors. // -template +template struct test_random_scalar { typedef typename RandomGenerator::generator_type rnd_type; - int pass_mean,pass_var,pass_covar; - int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar; - int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar; + int pass_mean, pass_var, pass_covar; + int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar; + int pass_hist3d_mean, pass_hist3d_var, pass_hist3d_covar; - test_random_scalar (typename test_random_functor::type_1d& density_1d, - typename test_random_functor::type_3d& density_3d, - RandomGenerator& pool, - unsigned int num_draws) - { + test_random_scalar( + typename test_random_functor::type_1d& density_1d, + typename test_random_functor::type_3d& density_3d, + RandomGenerator& pool, unsigned int num_draws) { + using Kokkos::parallel_reduce; using std::cout; using std::endl; - using Kokkos::parallel_reduce; { cout << " -- Testing randomness properties" << endl; RandomProperties result; typedef test_random_functor functor_type; - parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); - - //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); - double tolerance = 1.6*std::sqrt(1.0/num_draws); - double mean_expect = 0.5*Kokkos::rand::max(); - double variance_expect = 1.0/3.0*mean_expect*mean_expect; - double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; - double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0; - double covariance_eps = result.covariance/num_draws/2/variance_expect; - pass_mean = ((-tolerance < mean_eps) && - ( tolerance > mean_eps)) ? 1:0; - pass_var = ((-1.5*tolerance < variance_eps) && - ( 1.5*tolerance > variance_eps)) ? 1:0; - pass_covar = ((-2.0*tolerance < covariance_eps) && - ( 2.0*tolerance > covariance_eps)) ? 1:0; - cout << "Pass: " << pass_mean - << " " << pass_var - << " " << mean_eps - << " " << variance_eps - << " " << covariance_eps - << " || " << tolerance << endl; + parallel_reduce(num_draws / 1024, + functor_type(pool, density_1d, density_3d), result); + + // printf("Result: %lf %lf + // %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); + double tolerance = 1.6 * std::sqrt(1.0 / num_draws); + double mean_expect = 0.5 * Kokkos::rand::max(); + double variance_expect = 1.0 / 3.0 * mean_expect * mean_expect; + double mean_eps = mean_expect / (result.mean / num_draws / 3) - 1.0; + double variance_eps = + variance_expect / (result.variance / num_draws / 3) - 1.0; + double covariance_eps = + result.covariance / num_draws / 2 / variance_expect; + pass_mean = ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0; + pass_var = ((-1.5 * tolerance < variance_eps) && + (1.5 * tolerance > variance_eps)) + ? 1 + : 0; + pass_covar = ((-2.0 * tolerance < covariance_eps) && + (2.0 * tolerance > covariance_eps)) + ? 1 + : 0; + cout << "Pass: " << pass_mean << " " << pass_var << " " << mean_eps << " " + << variance_eps << " " << covariance_eps << " || " << tolerance + << endl; } { cout << " -- Testing 1-D histogram" << endl; RandomProperties result; - typedef test_histogram1d_functor functor_type; - parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); - - double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); - double mean_expect = 1.0*num_draws*3/HIST_DIM1D; - double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); - double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; - double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; - double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; - double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; - pass_hist1d_mean = ((-0.0001 < mean_eps) && - ( 0.0001 > mean_eps)) ? 1:0; - pass_hist1d_var = ((-0.07 < variance_eps) && - ( 0.07 > variance_eps)) ? 1:0; - pass_hist1d_covar = ((-0.06 < covariance_eps) && - ( 0.06 > covariance_eps)) ? 1:0; - - cout << "Density 1D: " << mean_eps - << " " << variance_eps - << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) - << " || " << tolerance - << " " << result.min - << " " << result.max - << " || " << result.variance/HIST_DIM1D - << " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D) - << " || " << result.covariance/HIST_DIM1D - << " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D - << endl; + typedef test_histogram1d_functor + functor_type; + parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result); + + double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); + double mean_expect = 1.0 * num_draws * 3 / HIST_DIM1D; + double variance_expect = + 1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D); + double covariance_expect = -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D; + double mean_eps = mean_expect / (result.mean / HIST_DIM1D) - 1.0; + double variance_eps = + variance_expect / (result.variance / HIST_DIM1D) - 1.0; + double covariance_eps = + (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; + pass_hist1d_mean = ((-0.0001 < mean_eps) && (0.0001 > mean_eps)) ? 1 : 0; + pass_hist1d_var = + ((-0.07 < variance_eps) && (0.07 > variance_eps)) ? 1 : 0; + pass_hist1d_covar = + ((-0.06 < covariance_eps) && (0.06 > covariance_eps)) ? 1 : 0; + + cout << "Density 1D: " << mean_eps << " " << variance_eps << " " + << (result.covariance / HIST_DIM1D / HIST_DIM1D) << " || " + << tolerance << " " << result.min << " " << result.max << " || " + << result.variance / HIST_DIM1D << " " + << 1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D) + << " || " << result.covariance / HIST_DIM1D << " " + << -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D << endl; } { cout << " -- Testing 3-D histogram" << endl; RandomProperties result; - typedef test_histogram3d_functor functor_type; - parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); - - double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); - double mean_expect = 1.0*num_draws/HIST_DIM1D; - double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); - double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; - double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; - double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; - double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; - pass_hist3d_mean = ((-tolerance < mean_eps) && - ( tolerance > mean_eps)) ? 1:0; - pass_hist3d_var = ((-1.2*tolerance < variance_eps) && - ( 1.2*tolerance > variance_eps)) ? 1:0; - pass_hist3d_covar = ((-tolerance < covariance_eps) && - ( tolerance > covariance_eps)) ? 1:0; - - cout << "Density 3D: " << mean_eps - << " " << variance_eps - << " " << result.covariance/HIST_DIM1D/HIST_DIM1D - << " || " << tolerance - << " " << result.min - << " " << result.max << endl; + typedef test_histogram3d_functor + functor_type; + parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result); + + double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); + double mean_expect = 1.0 * num_draws / HIST_DIM1D; + double variance_expect = + 1.0 * num_draws / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D); + double covariance_expect = -1.0 * num_draws / HIST_DIM1D / HIST_DIM1D; + double mean_eps = mean_expect / (result.mean / HIST_DIM1D) - 1.0; + double variance_eps = + variance_expect / (result.variance / HIST_DIM1D) - 1.0; + double covariance_eps = + (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; + pass_hist3d_mean = + ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0; + pass_hist3d_var = ((-1.2 * tolerance < variance_eps) && + (1.2 * tolerance > variance_eps)) + ? 1 + : 0; + pass_hist3d_covar = + ((-tolerance < covariance_eps) && (tolerance > covariance_eps)) ? 1 + : 0; + + cout << "Density 3D: " << mean_eps << " " << variance_eps << " " + << result.covariance / HIST_DIM1D / HIST_DIM1D << " || " << tolerance + << " " << result.min << " " << result.max << endl; } } }; template -void test_random(unsigned int num_draws) -{ +void test_random(unsigned int num_draws) { using std::cout; using std::endl; - typename test_random_functor::type_1d density_1d("D1d"); - typename test_random_functor::type_3d density_3d("D3d"); + typename test_random_functor::type_1d density_1d("D1d"); + typename test_random_functor::type_3d density_3d("D3d"); - - uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + uint64_t ticks = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); cout << "Test Seed:" << ticks << endl; RandomGenerator pool(ticks); cout << "Test Scalar=int" << endl; - test_random_scalar test_int(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_int.pass_mean,1); - ASSERT_EQ( test_int.pass_var,1); - ASSERT_EQ( test_int.pass_covar,1); - ASSERT_EQ( test_int.pass_hist1d_mean,1); - ASSERT_EQ( test_int.pass_hist1d_var,1); - ASSERT_EQ( test_int.pass_hist1d_covar,1); - ASSERT_EQ( test_int.pass_hist3d_mean,1); - ASSERT_EQ( test_int.pass_hist3d_var,1); - ASSERT_EQ( test_int.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_int(density_1d, density_3d, + pool, num_draws); + ASSERT_EQ(test_int.pass_mean, 1); + ASSERT_EQ(test_int.pass_var, 1); + ASSERT_EQ(test_int.pass_covar, 1); + ASSERT_EQ(test_int.pass_hist1d_mean, 1); + ASSERT_EQ(test_int.pass_hist1d_var, 1); + ASSERT_EQ(test_int.pass_hist1d_covar, 1); + ASSERT_EQ(test_int.pass_hist3d_mean, 1); + ASSERT_EQ(test_int.pass_hist3d_var, 1); + ASSERT_EQ(test_int.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=unsigned int" << endl; - test_random_scalar test_uint(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_uint.pass_mean,1); - ASSERT_EQ( test_uint.pass_var,1); - ASSERT_EQ( test_uint.pass_covar,1); - ASSERT_EQ( test_uint.pass_hist1d_mean,1); - ASSERT_EQ( test_uint.pass_hist1d_var,1); - ASSERT_EQ( test_uint.pass_hist1d_covar,1); - ASSERT_EQ( test_uint.pass_hist3d_mean,1); - ASSERT_EQ( test_uint.pass_hist3d_var,1); - ASSERT_EQ( test_uint.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_uint( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_uint.pass_mean, 1); + ASSERT_EQ(test_uint.pass_var, 1); + ASSERT_EQ(test_uint.pass_covar, 1); + ASSERT_EQ(test_uint.pass_hist1d_mean, 1); + ASSERT_EQ(test_uint.pass_hist1d_var, 1); + ASSERT_EQ(test_uint.pass_hist1d_covar, 1); + ASSERT_EQ(test_uint.pass_hist3d_mean, 1); + ASSERT_EQ(test_uint.pass_hist3d_var, 1); + ASSERT_EQ(test_uint.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=int64_t" << endl; - test_random_scalar test_int64(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_int64.pass_mean,1); - ASSERT_EQ( test_int64.pass_var,1); - ASSERT_EQ( test_int64.pass_covar,1); - ASSERT_EQ( test_int64.pass_hist1d_mean,1); - ASSERT_EQ( test_int64.pass_hist1d_var,1); - ASSERT_EQ( test_int64.pass_hist1d_covar,1); - ASSERT_EQ( test_int64.pass_hist3d_mean,1); - ASSERT_EQ( test_int64.pass_hist3d_var,1); - ASSERT_EQ( test_int64.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_int64( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_int64.pass_mean, 1); + ASSERT_EQ(test_int64.pass_var, 1); + ASSERT_EQ(test_int64.pass_covar, 1); + ASSERT_EQ(test_int64.pass_hist1d_mean, 1); + ASSERT_EQ(test_int64.pass_hist1d_var, 1); + ASSERT_EQ(test_int64.pass_hist1d_covar, 1); + ASSERT_EQ(test_int64.pass_hist3d_mean, 1); + ASSERT_EQ(test_int64.pass_hist3d_var, 1); + ASSERT_EQ(test_int64.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=uint64_t" << endl; - test_random_scalar test_uint64(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_uint64.pass_mean,1); - ASSERT_EQ( test_uint64.pass_var,1); - ASSERT_EQ( test_uint64.pass_covar,1); - ASSERT_EQ( test_uint64.pass_hist1d_mean,1); - ASSERT_EQ( test_uint64.pass_hist1d_var,1); - ASSERT_EQ( test_uint64.pass_hist1d_covar,1); - ASSERT_EQ( test_uint64.pass_hist3d_mean,1); - ASSERT_EQ( test_uint64.pass_hist3d_var,1); - ASSERT_EQ( test_uint64.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_uint64( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_uint64.pass_mean, 1); + ASSERT_EQ(test_uint64.pass_var, 1); + ASSERT_EQ(test_uint64.pass_covar, 1); + ASSERT_EQ(test_uint64.pass_hist1d_mean, 1); + ASSERT_EQ(test_uint64.pass_hist1d_var, 1); + ASSERT_EQ(test_uint64.pass_hist1d_covar, 1); + ASSERT_EQ(test_uint64.pass_hist3d_mean, 1); + ASSERT_EQ(test_uint64.pass_hist3d_var, 1); + ASSERT_EQ(test_uint64.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=float" << endl; - test_random_scalar test_float(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_float.pass_mean,1); - ASSERT_EQ( test_float.pass_var,1); - ASSERT_EQ( test_float.pass_covar,1); - ASSERT_EQ( test_float.pass_hist1d_mean,1); - ASSERT_EQ( test_float.pass_hist1d_var,1); - ASSERT_EQ( test_float.pass_hist1d_covar,1); - ASSERT_EQ( test_float.pass_hist3d_mean,1); - ASSERT_EQ( test_float.pass_hist3d_var,1); - ASSERT_EQ( test_float.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_float(density_1d, density_3d, + pool, num_draws); + ASSERT_EQ(test_float.pass_mean, 1); + ASSERT_EQ(test_float.pass_var, 1); + ASSERT_EQ(test_float.pass_covar, 1); + ASSERT_EQ(test_float.pass_hist1d_mean, 1); + ASSERT_EQ(test_float.pass_hist1d_var, 1); + ASSERT_EQ(test_float.pass_hist1d_covar, 1); + ASSERT_EQ(test_float.pass_hist3d_mean, 1); + ASSERT_EQ(test_float.pass_hist3d_var, 1); + ASSERT_EQ(test_float.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=double" << endl; - test_random_scalar test_double(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_double.pass_mean,1); - ASSERT_EQ( test_double.pass_var,1); - ASSERT_EQ( test_double.pass_covar,1); - ASSERT_EQ( test_double.pass_hist1d_mean,1); - ASSERT_EQ( test_double.pass_hist1d_var,1); - ASSERT_EQ( test_double.pass_hist1d_covar,1); - ASSERT_EQ( test_double.pass_hist3d_mean,1); - ASSERT_EQ( test_double.pass_hist3d_var,1); - ASSERT_EQ( test_double.pass_hist3d_covar,1); -} + test_random_scalar test_double( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_double.pass_mean, 1); + ASSERT_EQ(test_double.pass_var, 1); + ASSERT_EQ(test_double.pass_covar, 1); + ASSERT_EQ(test_double.pass_hist1d_mean, 1); + ASSERT_EQ(test_double.pass_hist1d_var, 1); + ASSERT_EQ(test_double.pass_hist1d_covar, 1); + ASSERT_EQ(test_double.pass_hist3d_mean, 1); + ASSERT_EQ(test_double.pass_hist3d_var, 1); + ASSERT_EQ(test_double.pass_hist3d_covar, 1); } +} // namespace Impl -} // namespace Test +} // namespace Test -#endif //KOKKOS_TEST_UNORDERED_MAP_HPP +#endif // KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp index 9cf998f773..2eacdc2677 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -52,49 +53,36 @@ #include #include - //---------------------------------------------------------------------------- - namespace Test { -class serial : public ::testing::Test { -protected: - static void SetUpTestCase() - { - } - - static void TearDownTestCase () - { +#define SERIAL_RANDOM_XORSHIFT64(num_draws) \ + TEST(serial, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } -}; -#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( serial, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define SERIAL_RANDOM_XORSHIFT1024(num_draws) \ + TEST(serial, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( serial, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define SERIAL_SORT_UNSIGNED(size) \ + TEST(serial, SortUnsigned) { \ + Impl::test_sort(size); \ } -#define SERIAL_SORT_UNSIGNED( size ) \ - TEST_F( serial, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Serial, unsigned >(size); \ - } - -SERIAL_RANDOM_XORSHIFT64( 10240000 ) -SERIAL_RANDOM_XORSHIFT1024( 10130144 ) +SERIAL_RANDOM_XORSHIFT64(10240000) +SERIAL_RANDOM_XORSHIFT1024(10130144) SERIAL_SORT_UNSIGNED(171) #undef SERIAL_RANDOM_XORSHIFT64 #undef SERIAL_RANDOM_XORSHIFT1024 #undef SERIAL_SORT_UNSIGNED -} // namespace Test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {} -#endif // KOKKOS_ENABLE_SERIAL - - +#endif // KOKKOS_ENABLE_SERIAL diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 5fd7f09b50..310a93c93d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -1,10 +1,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -22,10 +23,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -43,235 +44,248 @@ #define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP #include -#include -#include -#include -#include +#include +#include +#include +#include namespace Test { -namespace Impl{ +namespace Impl { -template +template struct is_sorted_struct { typedef unsigned int value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - is_sorted_struct(Kokkos::View keys_):keys(keys_) {} + is_sorted_struct(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, unsigned int& count) const { - if(keys(i)>keys(i+1)) count++; + void operator()(int i, unsigned int& count) const { + if (keys(i) > keys(i + 1)) count++; } }; -template +template struct sum { typedef double value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - sum(Kokkos::View keys_):keys(keys_) {} + sum(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, double& count) const { - count+=keys(i); - } + void operator()(int i, double& count) const { count += keys(i); } }; -template +template struct bin3d_is_sorted_struct { typedef unsigned int value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; int max_bins; Scalar min; Scalar max; - bin3d_is_sorted_struct(Kokkos::View keys_,int max_bins_,Scalar min_,Scalar max_): - keys(keys_),max_bins(max_bins_),min(min_),max(max_) { - } + bin3d_is_sorted_struct(Kokkos::View keys_, + int max_bins_, Scalar min_, Scalar max_) + : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, unsigned int& count) const { - int ix1 = int ((keys(i,0)-min)/max * max_bins); - int iy1 = int ((keys(i,1)-min)/max * max_bins); - int iz1 = int ((keys(i,2)-min)/max * max_bins); - int ix2 = int ((keys(i+1,0)-min)/max * max_bins); - int iy2 = int ((keys(i+1,1)-min)/max * max_bins); - int iz2 = int ((keys(i+1,2)-min)/max * max_bins); - - if (ix1>ix2) count++; - else if(ix1==ix2) { - if (iy1>iy2) count++; - else if ((iy1==iy2) && (iz1>iz2)) count++; + void operator()(int i, unsigned int& count) const { + int ix1 = int((keys(i, 0) - min) / max * max_bins); + int iy1 = int((keys(i, 1) - min) / max * max_bins); + int iz1 = int((keys(i, 2) - min) / max * max_bins); + int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); + int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); + int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); + + if (ix1 > ix2) + count++; + else if (ix1 == ix2) { + if (iy1 > iy2) + count++; + else if ((iy1 == iy2) && (iz1 > iz2)) + count++; } } }; -template +template struct sum3D { typedef double value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - sum3D(Kokkos::View keys_):keys(keys_) {} + sum3D(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, double& count) const { - count+=keys(i,0); - count+=keys(i,1); - count+=keys(i,2); + void operator()(int i, double& count) const { + count += keys(i, 0); + count += keys(i, 1); + count += keys(i, 2); } }; -template -void test_1D_sort(unsigned int n,bool force_kokkos) { - typedef Kokkos::View KeyViewType; - KeyViewType keys("Keys",n); +template +void test_1D_sort(unsigned int n, bool force_kokkos) { + typedef Kokkos::View KeyViewType; + KeyViewType keys("Keys", n); // Test sorting array with all numbers equal - Kokkos::deep_copy(keys,KeyType(1)); - Kokkos::sort(keys,force_kokkos); + Kokkos::deep_copy(keys, KeyType(1)); + Kokkos::sort(keys, force_kokkos); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + Kokkos::fill_random(keys, g, + Kokkos::Random_XorShift64_Pool< + ExecutionSpace>::generator_type::MAX_URAND); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(n,sum(keys),sum_before); + Kokkos::parallel_reduce(n, sum(keys), sum_before); - Kokkos::sort(keys,force_kokkos); + Kokkos::sort(keys, force_kokkos); - Kokkos::parallel_reduce(n,sum(keys),sum_after); - Kokkos::parallel_reduce(n-1,is_sorted_struct(keys),sort_fails); + Kokkos::parallel_reduce(n, sum(keys), sum_after); + Kokkos::parallel_reduce( + n - 1, is_sorted_struct(keys), sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } -template +template void test_3D_sort(unsigned int n) { - typedef Kokkos::View KeyViewType; + typedef Kokkos::View KeyViewType; - KeyViewType keys("Keys",n*n*n); + KeyViewType keys("Keys", n * n * n); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys,g,100.0); + Kokkos::fill_random(keys, g, 100.0); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_before); + Kokkos::parallel_reduce(keys.extent(0), sum3D(keys), + sum_before); int bin_1d = 1; - while( bin_1d*bin_1d*bin_1d*4< (int) keys.extent(0) ) bin_1d*=2; - int bin_max[3] = {bin_1d,bin_1d,bin_1d}; - typename KeyViewType::value_type min[3] = {0,0,0}; - typename KeyViewType::value_type max[3] = {100,100,100}; - - typedef Kokkos::BinOp3D< KeyViewType > BinOp; - BinOp bin_op(bin_max,min,max); - Kokkos::BinSort< KeyViewType , BinOp > - Sorter(keys,bin_op,false); + while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; + int bin_max[3] = {bin_1d, bin_1d, bin_1d}; + typename KeyViewType::value_type min[3] = {0, 0, 0}; + typename KeyViewType::value_type max[3] = {100, 100, 100}; + + typedef Kokkos::BinOp3D BinOp; + BinOp bin_op(bin_max, min, max); + Kokkos::BinSort Sorter(keys, bin_op, false); Sorter.create_permute_vector(); - Sorter.template sort< KeyViewType >(keys); + Sorter.template sort(keys); - Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_after); - Kokkos::parallel_reduce(keys.extent(0)-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); + Kokkos::parallel_reduce(keys.extent(0), sum3D(keys), + sum_after); + Kokkos::parallel_reduce(keys.extent(0) - 1, + bin3d_is_sorted_struct( + keys, bin_1d, min[0], max[0]), + sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - if ( sort_fails ) - printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + if (sort_fails) + printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } //---------------------------------------------------------------------------- -template -void test_dynamic_view_sort(unsigned int n ) -{ - typedef Kokkos::Experimental::DynamicView KeyDynamicViewType; - typedef Kokkos::View KeyViewType; +template +void test_dynamic_view_sort(unsigned int n) { + typedef Kokkos::Experimental::DynamicView + KeyDynamicViewType; + typedef Kokkos::View KeyViewType; - const size_t upper_bound = 2 * n ; + const size_t upper_bound = 2 * n; const size_t min_chunk_size = 1024; KeyDynamicViewType keys("Keys", min_chunk_size, upper_bound); keys.resize_serial(n); - KeyViewType keys_view("KeysTmp", n ); + KeyViewType keys_view("KeysTmp", n); // Test sorting array with all numbers equal - Kokkos::deep_copy(keys_view,KeyType(1)); - Kokkos::deep_copy(keys,keys_view); - Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + Kokkos::deep_copy(keys_view, KeyType(1)); + Kokkos::deep_copy(keys, keys_view); + Kokkos::sort(keys, 0 /* begin */, n /* end */); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + Kokkos::fill_random(keys_view, g, + Kokkos::Random_XorShift64_Pool< + ExecutionSpace>::generator_type::MAX_URAND); ExecutionSpace().fence(); - Kokkos::deep_copy(keys,keys_view); - //ExecutionSpace().fence(); + Kokkos::deep_copy(keys, keys_view); + // ExecutionSpace().fence(); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(n,sum(keys_view),sum_before); + Kokkos::parallel_reduce(n, sum(keys_view), + sum_before); - Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + Kokkos::sort(keys, 0 /* begin */, n /* end */); - ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda - Kokkos::deep_copy( keys_view , keys ); - //ExecutionSpace().fence(); + ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda + Kokkos::deep_copy(keys_view, keys); + // ExecutionSpace().fence(); - Kokkos::parallel_reduce(n,sum(keys_view),sum_after); - Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); + Kokkos::parallel_reduce(n, sum(keys_view), + sum_after); + Kokkos::parallel_reduce( + n - 1, is_sorted_struct(keys_view), sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; - - if ( sort_fails != 0 || equal_sum != 1 ) { - std::cout << " N = " << n - << " ; sum_before = " << sum_before - << " ; sum_after = " << sum_after - << " ; ratio = " << ratio - << std::endl ; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; + + if (sort_fails != 0 || equal_sum != 1) { + std::cout << " N = " << n << " ; sum_before = " << sum_before + << " ; sum_after = " << sum_after << " ; ratio = " << ratio + << std::endl; } - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } //---------------------------------------------------------------------------- -template -void test_issue_1160() -{ +template +void test_issue_1160() { Kokkos::View element_("element", 10); Kokkos::View x_("x", 10); Kokkos::View v_("y", 10); auto h_element = Kokkos::create_mirror_view(element_); - auto h_x = Kokkos::create_mirror_view(x_); - auto h_v = Kokkos::create_mirror_view(v_); + auto h_x = Kokkos::create_mirror_view(x_); + auto h_v = Kokkos::create_mirror_view(v_); h_element(0) = 9; h_element(1) = 8; @@ -292,20 +306,21 @@ void test_issue_1160() Kokkos::deep_copy(v_, h_v); typedef decltype(element_) KeyViewType; - typedef Kokkos::BinOp1D< KeyViewType > BinOp; + typedef Kokkos::BinOp1D BinOp; int begin = 3; - int end = 8; - auto max = h_element(begin); - auto min = h_element(end - 1); + int end = 8; + auto max = h_element(begin); + auto min = h_element(end - 1); BinOp binner(end - begin, min, max); - Kokkos::BinSort Sorter(element_,begin,end,binner,false); + Kokkos::BinSort Sorter(element_, begin, end, binner, + false); Sorter.create_permute_vector(); - Sorter.sort(element_,begin,end); + Sorter.sort(element_, begin, end); - Sorter.sort(x_,begin,end); - Sorter.sort(v_,begin,end); + Sorter.sort(x_, begin, end); + Sorter.sort(v_, begin, end); Kokkos::deep_copy(h_element, element_); Kokkos::deep_copy(h_x, x_); @@ -330,18 +345,17 @@ void test_issue_1160() //---------------------------------------------------------------------------- -template -void test_sort(unsigned int N) -{ - test_1D_sort(N*N*N, true); - test_1D_sort(N*N*N, false); +template +void test_sort(unsigned int N) { + test_1D_sort(N * N * N, true); + test_1D_sort(N * N * N, false); #if !defined(KOKKOS_ENABLE_ROCM) - test_3D_sort(N); - test_dynamic_view_sort(N*N); + test_3D_sort(N); + test_dynamic_view_sort(N * N); #endif test_issue_1160(); } -} -} +} // namespace Impl +} // namespace Test #endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp index 99cdb7da92..c75e6e8dfb 100644 --- a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -52,51 +53,36 @@ #include #include - //---------------------------------------------------------------------------- - namespace Test { -class threads : public ::testing::Test { -protected: - static void SetUpTestCase() - { - std::cout << std::setprecision(5) << std::scientific; +#define THREADS_RANDOM_XORSHIFT64(num_draws) \ + TEST(threads, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } - static void TearDownTestCase() - { +#define THREADS_RANDOM_XORSHIFT1024(num_draws) \ + TEST(threads, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -}; -#define THREADS_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( threads, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define THREADS_SORT_UNSIGNED(size) \ + TEST(threads, SortUnsigned) { \ + Impl::test_sort(size); \ } -#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( threads, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ - } - -#define THREADS_SORT_UNSIGNED( size ) \ - TEST_F( threads, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Threads, double >(size); \ - } - - -THREADS_RANDOM_XORSHIFT64( 10240000 ) -THREADS_RANDOM_XORSHIFT1024( 10130144 ) +THREADS_RANDOM_XORSHIFT64(10240000) +THREADS_RANDOM_XORSHIFT1024(10130144) THREADS_SORT_UNSIGNED(171) #undef THREADS_RANDOM_XORSHIFT64 #undef THREADS_RANDOM_XORSHIFT1024 #undef THREADS_SORT_UNSIGNED -} // namespace Test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {} #endif - - diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp index 8feb08332f..e245aad35f 100644 --- a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -45,10 +46,9 @@ #include int main(int argc, char *argv[]) { - Kokkos::initialize(argc,argv); - ::testing::InitGoogleTest(&argc,argv); + Kokkos::initialize(argc, argv); + ::testing::InitGoogleTest(&argc, argv); int result = RUN_ALL_TESTS(); Kokkos::finalize(); return result; } - diff --git a/lib/kokkos/benchmarks/atomic/main.cpp b/lib/kokkos/benchmarks/atomic/main.cpp index d86d196249..5f0977f754 100644 --- a/lib/kokkos/benchmarks/atomic/main.cpp +++ b/lib/kokkos/benchmarks/atomic/main.cpp @@ -1,124 +1,120 @@ -#include -#include -#include +#include +#include +#include -template -double test_atomic(int L, int N, int M,int K,int R,Kokkos::View offsets) { - Kokkos::View output("Output",N); +template +double test_atomic(int L, int N, int M, int K, int R, + Kokkos::View offsets) { + Kokkos::View output("Output", N); Kokkos::Impl::Timer timer; - for(int r = 0; r -double test_no_atomic(int L, int N, int M,int K,int R,Kokkos::View offsets) { - Kokkos::View output("Output",N); +template +double test_no_atomic(int L, int N, int M, int K, int R, + Kokkos::View offsets) { + Kokkos::View output("Output", N); Kokkos::Impl::Timer timer; - for(int r = 0; r\n"); - printf("Example Input GPU:\n"); - printf(" Histogram : 1000000 1000 1 1000 1 10 1\n"); - printf(" MD Force : 100000 100000 100 1000 20 10 4\n"); - printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n"); - Kokkos::finalize(); - return 0; - } + Kokkos::initialize(argc, argv); + { + if (argc < 8) { + printf("Arguments: L N M D K R T\n"); + printf(" L: Number of iterations to run\n"); + printf(" N: Length of array to do atomics into\n"); + printf(" M: Number of atomics per iteration to do\n"); + printf(" D: Distance from index i to do atomics into (randomly)\n"); + printf(" K: Number of FMAD per atomic\n"); + printf(" R: Number of repeats of the experiments\n"); + printf(" T: Type of atomic\n"); + printf(" 1 - int\n"); + printf(" 2 - long\n"); + printf(" 3 - float\n"); + printf(" 4 - double\n"); + printf(" 5 - complex\n"); + printf("Example Input GPU:\n"); + printf(" Histogram : 1000000 1000 1 1000 1 10 1\n"); + printf(" MD Force : 100000 100000 100 1000 20 10 4\n"); + printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n"); + Kokkos::finalize(); + return 0; + } + int L = atoi(argv[1]); + int N = atoi(argv[2]); + int M = atoi(argv[3]); + int D = atoi(argv[4]); + int K = atoi(argv[5]); + int R = atoi(argv[6]); + int type = atoi(argv[7]); - int L = atoi(argv[1]); - int N = atoi(argv[2]); - int M = atoi(argv[3]); - int D = atoi(argv[4]); - int K = atoi(argv[5]); - int R = atoi(argv[6]); - int type = atoi(argv[7]); - - Kokkos::View offsets("Offsets",L,M); - Kokkos::Random_XorShift64_Pool<> pool(12371); - Kokkos::fill_random(offsets,pool,D); - double time = 0; - if(type==1) - time = test_atomic(L,N,M,K,R,offsets); - if(type==2) - time = test_atomic(L,N,M,K,R,offsets); - if(type==3) - time = test_atomic(L,N,M,K,R,offsets); - if(type==4) - time = test_atomic(L,N,M,K,R,offsets); - if(type==5) - time = test_atomic >(L,N,M,K,R,offsets); + Kokkos::View offsets("Offsets", L, M); + Kokkos::Random_XorShift64_Pool<> pool(12371); + Kokkos::fill_random(offsets, pool, D); + double time = 0; + if (type == 1) time = test_atomic(L, N, M, K, R, offsets); + if (type == 2) time = test_atomic(L, N, M, K, R, offsets); + if (type == 3) time = test_atomic(L, N, M, K, R, offsets); + if (type == 4) time = test_atomic(L, N, M, K, R, offsets); + if (type == 5) + time = test_atomic >(L, N, M, K, R, offsets); - double time2 = 1; - if(type==1) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==2) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==3) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==4) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==5) - time2 = test_no_atomic >(L,N,M,K,R,offsets); + double time2 = 1; + if (type == 1) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 2) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 3) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 4) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 5) + time2 = test_no_atomic >(L, N, M, K, R, offsets); - int size = 0; - if(type==1) size = sizeof(int); - if(type==2) size = sizeof(long); - if(type==3) size = sizeof(float); - if(type==4) size = sizeof(double); - if(type==5) size = sizeof(Kokkos::complex); + int size = 0; + if (type == 1) size = sizeof(int); + if (type == 2) size = sizeof(long); + if (type == 3) size = sizeof(float); + if (type == 4) size = sizeof(double); + if (type == 5) size = sizeof(Kokkos::complex); - printf("%i\n",size); - printf("Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf )( GUpdates/s: %lf GB/s: %lf )\n", - (type==1)?"int": ( - (type==2)?"long": ( - (type==3)?"float": ( - (type==4)?"double":"complex"))), - L,N,M,D,K,R,time,time2,time/time2, - 1.e-9*L*R*M/time, 1.0*L*R*M*2*size/time/1024/1024/1024); -} + printf("%i\n", size); + printf( + "Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf " + ")( GUpdates/s: %lf GB/s: %lf )\n", + (type == 1) + ? "int" + : ((type == 2) + ? "long" + : ((type == 3) ? "float" + : ((type == 4) ? "double" : "complex"))), + L, N, M, D, K, R, time, time2, time / time2, 1.e-9 * L * R * M / time, + 1.0 * L * R * M * 2 * size / time / 1024 / 1024 / 1024); + } Kokkos::finalize(); } - diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp index 59b4d50c44..62d7ef4a4c 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,59 +42,52 @@ //@HEADER */ -#include -#include +#include +#include -template +template struct Run { -static void run(int N, int K, int R, int F, int T, int S); + static void run(int N, int K, int R, int F, int T, int S); }; -template +template struct RunStride { -static void run_1(int N, int K, int R, int F, int T, int S); -static void run_2(int N, int K, int R, int F, int T, int S); -static void run_3(int N, int K, int R, int F, int T, int S); -static void run_4(int N, int K, int R, int F, int T, int S); -static void run_5(int N, int K, int R, int F, int T, int S); -static void run_6(int N, int K, int R, int F, int T, int S); -static void run_7(int N, int K, int R, int F, int T, int S); -static void run_8(int N, int K, int R, int F, int T, int S); -static void run(int N, int K, int R, int U, int F, int T, int S); + static void run_1(int N, int K, int R, int F, int T, int S); + static void run_2(int N, int K, int R, int F, int T, int S); + static void run_3(int N, int K, int R, int F, int T, int S); + static void run_4(int N, int K, int R, int F, int T, int S); + static void run_5(int N, int K, int R, int F, int T, int S); + static void run_6(int N, int K, int R, int F, int T, int S); + static void run_7(int N, int K, int R, int F, int T, int S); + static void run_8(int N, int K, int R, int F, int T, int S); + static void run(int N, int K, int R, int U, int F, int T, int S); }; #define STRIDE 1 -#include +#include #undef STRIDE #define STRIDE 2 -#include +#include #undef STRIDE #define STRIDE 4 -#include +#include #undef STRIDE #define STRIDE 8 -#include +#include #undef STRIDE #define STRIDE 16 -#include +#include #undef STRIDE #define STRIDE 32 -#include +#include #undef STRIDE -template +template void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) { - if(D == 1) - RunStride::run(N,K,R,U,F,T,S); - if(D == 2) - RunStride::run(N,K,R,U,F,T,S); - if(D == 4) - RunStride::run(N,K,R,U,F,T,S); - if(D == 8) - RunStride::run(N,K,R,U,F,T,S); - if(D == 16) - RunStride::run(N,K,R,U,F,T,S); - if(D == 32) - RunStride::run(N,K,R,U,F,T,S); + if (D == 1) RunStride::run(N, K, R, U, F, T, S); + if (D == 2) RunStride::run(N, K, R, U, F, T, S); + if (D == 4) RunStride::run(N, K, R, U, F, T, S); + if (D == 8) RunStride::run(N, K, R, U, F, T, S); + if (D == 16) RunStride::run(N, K, R, U, F, T, S); + if (D == 32) RunStride::run(N, K, R, U, F, T, S); } - diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp index 6509c654e7..64817fe9dc 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,84 +42,82 @@ //@HEADER */ - #define UNROLL 1 -#include +#include #undef UNROLL #define UNROLL 2 -#include +#include #undef UNROLL #define UNROLL 3 -#include +#include #undef UNROLL #define UNROLL 4 -#include +#include #undef UNROLL #define UNROLL 5 -#include +#include #undef UNROLL #define UNROLL 6 -#include +#include #undef UNROLL #define UNROLL 7 -#include +#include #undef UNROLL #define UNROLL 8 -#include +#include #undef UNROLL -template -struct RunStride { -static void run_1(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_2(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_3(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_4(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_5(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_6(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_7(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_8(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} - -static void run(int N, int K, int R, int U, int F, int T, int S) { - if(U==1) { - run_1(N,K,R,F,T,S); +template +struct RunStride { + static void run_1(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==2) { - run_2(N,K,R,F,T,S); + static void run_2(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==3) { - run_3(N,K,R,F,T,S); + static void run_3(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==4) { - run_4(N,K,R,F,T,S); + static void run_4(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==5) { - run_5(N,K,R,F,T,S); + static void run_5(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==6) { - run_6(N,K,R,F,T,S); + static void run_6(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==7) { - run_7(N,K,R,F,T,S); + static void run_7(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); + } + static void run_8(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==8) { - run_8(N,K,R,F,T,S); - } -} -}; + static void run(int N, int K, int R, int U, int F, int T, int S) { + if (U == 1) { + run_1(N, K, R, F, T, S); + } + if (U == 2) { + run_2(N, K, R, F, T, S); + } + if (U == 3) { + run_3(N, K, R, F, T, S); + } + if (U == 4) { + run_4(N, K, R, F, T, S); + } + if (U == 5) { + run_5(N, K, R, F, T, S); + } + if (U == 6) { + run_6(N, K, R, F, T, S); + } + if (U == 7) { + run_7(N, K, R, F, T, S); + } + if (U == 8) { + run_8(N, K, R, F, T, S); + } + } +}; diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp index c6651da1e7..00ce635a48 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,108 +42,110 @@ //@HEADER */ -template -struct Run { -static void run(int N, int K, int R, int F, int T, int S) { - Kokkos::View A("A",N,K); - Kokkos::View B("B",N,K); - Kokkos::View C("C",N,K); +template +struct Run { + static void run(int N, int K, int R, int F, int T, int S) { + Kokkos::View A("A", N, K); + Kokkos::View B("B", N, K); + Kokkos::View C("C", N, K); - Kokkos::deep_copy(A,Scalar(1.5)); - Kokkos::deep_copy(B,Scalar(2.5)); - Kokkos::deep_copy(C,Scalar(3.5)); + Kokkos::deep_copy(A, Scalar(1.5)); + Kokkos::deep_copy(B, Scalar(2.5)); + Kokkos::deep_copy(C, Scalar(3.5)); - Kokkos::Timer timer; - Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)), - KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) { - const int n = team.league_rank(); - for(int r=0; r1) - Scalar a2 = a1*1.3; -#endif -#if(UNROLL>2) - Scalar a3 = a2*1.1; -#endif -#if(UNROLL>3) - Scalar a4 = a3*1.1; -#endif -#if(UNROLL>4) - Scalar a5 = a4*1.3; -#endif -#if(UNROLL>5) - Scalar a6 = a5*1.1; -#endif -#if(UNROLL>6) - Scalar a7 = a6*1.1; -#endif -#if(UNROLL>7) - Scalar a8 = a7*1.1; + Kokkos::Timer timer; + Kokkos::parallel_for( + "BenchmarkKernel", + Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + const int n = team.league_rank(); + for (int r = 0; r < R; r++) { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, 0, K), [&](const int& i) { + Scalar a1 = A(n, i, 0); + const Scalar b = B(n, i, 0); +#if (UNROLL > 1) + Scalar a2 = a1 * 1.3; +#endif +#if (UNROLL > 2) + Scalar a3 = a2 * 1.1; +#endif +#if (UNROLL > 3) + Scalar a4 = a3 * 1.1; +#endif +#if (UNROLL > 4) + Scalar a5 = a4 * 1.3; +#endif +#if (UNROLL > 5) + Scalar a6 = a5 * 1.1; +#endif +#if (UNROLL > 6) + Scalar a7 = a6 * 1.1; +#endif +#if (UNROLL > 7) + Scalar a8 = a7 * 1.1; #endif - - for(int f = 0; f1) - a2 += b*a2; + for (int f = 0; f < F; f++) { + a1 += b * a1; +#if (UNROLL > 1) + a2 += b * a2; #endif -#if(UNROLL>2) - a3 += b*a3; +#if (UNROLL > 2) + a3 += b * a3; #endif -#if(UNROLL>3) - a4 += b*a4; +#if (UNROLL > 3) + a4 += b * a4; #endif -#if(UNROLL>4) - a5 += b*a5; +#if (UNROLL > 4) + a5 += b * a5; #endif -#if(UNROLL>5) - a6 += b*a6; +#if (UNROLL > 5) + a6 += b * a6; #endif -#if(UNROLL>6) - a7 += b*a7; +#if (UNROLL > 6) + a7 += b * a7; #endif -#if(UNROLL>7) - a8 += b*a8; +#if (UNROLL > 7) + a8 += b * a8; #endif - - - } -#if(UNROLL==1) - C(n,i,0) = a1; + } +#if (UNROLL == 1) + C(n, i, 0) = a1; #endif -#if(UNROLL==2) - C(n,i,0) = a1+a2; +#if (UNROLL == 2) + C(n, i, 0) = a1 + a2; #endif -#if(UNROLL==3) - C(n,i,0) = a1+a2+a3; +#if (UNROLL == 3) + C(n, i, 0) = a1 + a2 + a3; #endif -#if(UNROLL==4) - C(n,i,0) = a1+a2+a3+a4; +#if (UNROLL == 4) + C(n, i, 0) = a1 + a2 + a3 + a4; #endif -#if(UNROLL==5) - C(n,i,0) = a1+a2+a3+a4+a5; +#if (UNROLL == 5) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5; #endif -#if(UNROLL==6) - C(n,i,0) = a1+a2+a3+a4+a5+a6; +#if (UNROLL == 6) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6; #endif -#if(UNROLL==7) - C(n,i,0) = a1+a2+a3+a4+a5+a6+a7; +#if (UNROLL == 7) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7; #endif -#if(UNROLL==8) - C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8; +#if (UNROLL == 8) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; #endif + }); + } + }); + Kokkos::fence(); + double seconds = timer.seconds(); - }); - } - }); - Kokkos::fence(); - double seconds = timer.seconds(); - - double bytes = 1.0*N*K*R*3*sizeof(Scalar); - double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); - printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds); -} + double bytes = 1.0 * N * K * R * 3 * sizeof(Scalar); + double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1)); + printf( + "NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: " + "%lf\n", + N, K, R, UNROLL, F, T, S, seconds, + 1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds); + } }; - diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp index 4f46b38717..c21a16200e 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,25 +42,27 @@ //@HEADER */ -#include -#include -#include -#include +#include +#include +#include +#include int main(int argc, char* argv[]) { Kokkos::initialize(); - - if(argc<10) { + if (argc < 10) { printf("Arguments: N K R D U F T S\n"); printf(" P: Precision (1==float, 2==double)\n"); printf(" N,K: dimensions of the 2D array to allocate\n"); printf(" R: how often to loop through the K dimension with each team\n"); printf(" D: distance between loaded elements (stride)\n"); printf(" U: how many independent flops to do per load\n"); - printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf( + " F: how many times to repeat the U unrolled operations before " + "reading next element\n"); printf(" T: team size\n"); - printf(" S: shared memory per team (used to control occupancy on GPUs)\n"); + printf( + " S: shared memory per team (used to control occupancy on GPUs)\n"); printf("Example Input GPU:\n"); printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n"); printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n"); @@ -70,7 +73,6 @@ int main(int argc, char* argv[]) { return 0; } - int P = atoi(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); @@ -81,17 +83,25 @@ int main(int argc, char* argv[]) { int T = atoi(argv[8]); int S = atoi(argv[9]); - if(U>8) {printf("U must be 1-8\n"); return 0;} - if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;} - if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;} + if (U > 8) { + printf("U must be 1-8\n"); + return 0; + } + if ((D != 1) && (D != 2) && (D != 4) && (D != 8) && (D != 16) && (D != 32)) { + printf("D must be one of 1,2,4,8,16,32\n"); + return 0; + } + if ((P != 1) && (P != 2)) { + printf("P must be one of 1,2\n"); + return 0; + } - if(P==1) { - run_stride_unroll(N,K,R,D,U,F,T,S); + if (P == 1) { + run_stride_unroll(N, K, R, D, U, F, T, S); } - if(P==2) { - run_stride_unroll(N,K,R,D,U,F,T,S); + if (P == 2) { + run_stride_unroll(N, K, R, D, U, F, T, S); } Kokkos::finalize(); } - diff --git a/lib/kokkos/benchmarks/gather/gather.hpp b/lib/kokkos/benchmarks/gather/gather.hpp index bbbd65850f..239614184b 100644 --- a/lib/kokkos/benchmarks/gather/gather.hpp +++ b/lib/kokkos/benchmarks/gather/gather.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,52 +42,44 @@ //@HEADER */ -template +template struct RunGather { static void run(int N, int K, int D, int R, int F); }; #define UNROLL 1 -#include +#include #undef UNROLL #define UNROLL 2 -#include +#include #undef UNROLL #define UNROLL 3 -#include +#include #undef UNROLL #define UNROLL 4 -#include +#include #undef UNROLL #define UNROLL 5 -#include +#include #undef UNROLL #define UNROLL 6 -#include +#include #undef UNROLL #define UNROLL 7 -#include +#include #undef UNROLL #define UNROLL 8 -#include +#include #undef UNROLL -template +template void run_gather_test(int N, int K, int D, int R, int U, int F) { - if(U == 1) - RunGather::run(N,K,D,R,F); - if(U == 2) - RunGather::run(N,K,D,R,F); - if(U == 3) - RunGather::run(N,K,D,R,F); - if(U == 4) - RunGather::run(N,K,D,R,F); - if(U == 5) - RunGather::run(N,K,D,R,F); - if(U == 6) - RunGather::run(N,K,D,R,F); - if(U == 7) - RunGather::run(N,K,D,R,F); - if(U == 8) - RunGather::run(N,K,D,R,F); + if (U == 1) RunGather::run(N, K, D, R, F); + if (U == 2) RunGather::run(N, K, D, R, F); + if (U == 3) RunGather::run(N, K, D, R, F); + if (U == 4) RunGather::run(N, K, D, R, F); + if (U == 5) RunGather::run(N, K, D, R, F); + if (U == 6) RunGather::run(N, K, D, R, F); + if (U == 7) RunGather::run(N, K, D, R, F); + if (U == 8) RunGather::run(N, K, D, R, F); } diff --git a/lib/kokkos/benchmarks/gather/gather_unroll.hpp b/lib/kokkos/benchmarks/gather/gather_unroll.hpp index 1d9c99adf9..4dc046f99c 100644 --- a/lib/kokkos/benchmarks/gather/gather_unroll.hpp +++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,129 +42,132 @@ //@HEADER */ -#include -#include +#include +#include -template -struct RunGather { -static void run(int N, int K, int D, int R, int F) { - Kokkos::View connectivity("Connectivity",N,K); - Kokkos::View A_in("Input",N); - Kokkos::View B_in("Input",N); - Kokkos::View C("Output",N); +template +struct RunGather { + static void run(int N, int K, int D, int R, int F) { + Kokkos::View connectivity("Connectivity", N, K); + Kokkos::View A_in("Input", N); + Kokkos::View B_in("Input", N); + Kokkos::View C("Output", N); - Kokkos::Random_XorShift64_Pool<> rand_pool(12313); + Kokkos::Random_XorShift64_Pool<> rand_pool(12313); - Kokkos::deep_copy(A_in,1.5); - Kokkos::deep_copy(B_in,2.0); + Kokkos::deep_copy(A_in, 1.5); + Kokkos::deep_copy(B_in, 2.0); - Kokkos::View > A(A_in); - Kokkos::View > B(B_in); - - Kokkos::parallel_for("InitKernel",N, - KOKKOS_LAMBDA (const int& i) { - auto rand_gen = rand_pool.get_state(); - for( int jj=0; jj > A( + A_in); + Kokkos::View > B( + B_in); + Kokkos::parallel_for( + "InitKernel", N, KOKKOS_LAMBDA(const int& i) { + auto rand_gen = rand_pool.get_state(); + for (int jj = 0; jj < K; jj++) { + connectivity(i, jj) = (rand_gen.rand(D) + i - D / 2 + N) % N; + } + rand_pool.free_state(rand_gen); + }); + Kokkos::fence(); - Kokkos::Timer timer; - for(int r = 0; r1) - Scalar a2 = a1*Scalar(1.3); + Kokkos::Timer timer; + for (int r = 0; r < R; r++) { + Kokkos::parallel_for( + "BenchmarkKernel", N, KOKKOS_LAMBDA(const int& i) { + Scalar c = Scalar(0.0); + for (int jj = 0; jj < K; jj++) { + const int j = connectivity(i, jj); + Scalar a1 = A(j); + const Scalar b = B(j); +#if (UNROLL > 1) + Scalar a2 = a1 * Scalar(1.3); #endif -#if(UNROLL>2) - Scalar a3 = a2*Scalar(1.1); +#if (UNROLL > 2) + Scalar a3 = a2 * Scalar(1.1); #endif -#if(UNROLL>3) - Scalar a4 = a3*Scalar(1.1); +#if (UNROLL > 3) + Scalar a4 = a3 * Scalar(1.1); #endif -#if(UNROLL>4) - Scalar a5 = a4*Scalar(1.3); +#if (UNROLL > 4) + Scalar a5 = a4 * Scalar(1.3); #endif -#if(UNROLL>5) - Scalar a6 = a5*Scalar(1.1); +#if (UNROLL > 5) + Scalar a6 = a5 * Scalar(1.1); #endif -#if(UNROLL>6) - Scalar a7 = a6*Scalar(1.1); +#if (UNROLL > 6) + Scalar a7 = a6 * Scalar(1.1); #endif -#if(UNROLL>7) - Scalar a8 = a7*Scalar(1.1); +#if (UNROLL > 7) + Scalar a8 = a7 * Scalar(1.1); #endif - - for(int f = 0; f1) - a2 += b*a2; + for (int f = 0; f < F; f++) { + a1 += b * a1; +#if (UNROLL > 1) + a2 += b * a2; #endif -#if(UNROLL>2) - a3 += b*a3; +#if (UNROLL > 2) + a3 += b * a3; #endif -#if(UNROLL>3) - a4 += b*a4; +#if (UNROLL > 3) + a4 += b * a4; #endif -#if(UNROLL>4) - a5 += b*a5; +#if (UNROLL > 4) + a5 += b * a5; #endif -#if(UNROLL>5) - a6 += b*a6; +#if (UNROLL > 5) + a6 += b * a6; #endif -#if(UNROLL>6) - a7 += b*a7; +#if (UNROLL > 6) + a7 += b * a7; #endif -#if(UNROLL>7) - a8 += b*a8; +#if (UNROLL > 7) + a8 += b * a8; #endif - - - } -#if(UNROLL==1) - c += a1; + } +#if (UNROLL == 1) + c += a1; #endif -#if(UNROLL==2) - c += a1+a2; +#if (UNROLL == 2) + c += a1 + a2; #endif -#if(UNROLL==3) - c += a1+a2+a3; +#if (UNROLL == 3) + c += a1 + a2 + a3; #endif -#if(UNROLL==4) - c += a1+a2+a3+a4; +#if (UNROLL == 4) + c += a1 + a2 + a3 + a4; #endif -#if(UNROLL==5) - c += a1+a2+a3+a4+a5; +#if (UNROLL == 5) + c += a1 + a2 + a3 + a4 + a5; #endif -#if(UNROLL==6) - c += a1+a2+a3+a4+a5+a6; +#if (UNROLL == 6) + c += a1 + a2 + a3 + a4 + a5 + a6; #endif -#if(UNROLL==7) - c += a1+a2+a3+a4+a5+a6+a7; +#if (UNROLL == 7) + c += a1 + a2 + a3 + a4 + a5 + a6 + a7; #endif -#if(UNROLL==8) - c += a1+a2+a3+a4+a5+a6+a7+a8; +#if (UNROLL == 8) + c += a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; #endif + } + C(i) = c; + }); + Kokkos::fence(); + } + double seconds = timer.seconds(); - } - C(i) = c ; - }); - Kokkos::fence(); + double bytes = 1.0 * N * K * R * (2 * sizeof(Scalar) + sizeof(int)) + + 1.0 * N * R * sizeof(Scalar); + double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1)); + double gather_ops = 1.0 * N * K * R * 2; + printf( + "SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: " + "%lf GGather/s: %lf\n", + sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds, + 1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds, + 1.e-9 * gather_ops / seconds); } - double seconds = timer.seconds(); - - double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar); - double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); - double gather_ops = 1.0*N*K*R*2; - printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds); -} }; diff --git a/lib/kokkos/benchmarks/gather/main.cpp b/lib/kokkos/benchmarks/gather/main.cpp index ca5238e7fd..6a2db3e024 100644 --- a/lib/kokkos/benchmarks/gather/main.cpp +++ b/lib/kokkos/benchmarks/gather/main.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -41,23 +42,26 @@ //@HEADER */ -#include -#include -#include -#include +#include +#include +#include +#include int main(int argc, char* argv[]) { - Kokkos::initialize(argc,argv); + Kokkos::initialize(argc, argv); - if(argc<8) { + if (argc < 8) { printf("Arguments: S N K D\n"); - printf(" S: Scalar Type Size (1==float, 2==double, 4=complex)\n"); + printf( + " S: Scalar Type Size (1==float, 2==double, 4=complex)\n"); printf(" N: Number of entities\n"); printf(" K: Number of things to gather per entity\n"); printf(" D: Max distance of gathered things of an entity\n"); printf(" R: how often to loop through the K dimension with each team\n"); printf(" U: how many independent flops to do per load\n"); - printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf( + " F: how many times to repeat the U unrolled operations before " + "reading next element\n"); printf("Example Input GPU:\n"); printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n"); printf(" Cache Bound : 2 10000000 64 1 10 1 1\n"); @@ -68,7 +72,6 @@ int main(int argc, char* argv[]) { return 0; } - int S = atoi(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); @@ -77,17 +80,22 @@ int main(int argc, char* argv[]) { int U = atoi(argv[6]); int F = atoi(argv[7]); - if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;} - if( N(N,K,D,R,U,F); + if ((S != 1) && (S != 2) && (S != 4)) { + printf("S must be one of 1,2,4\n"); + return 0; + } + if (N < D) { + printf("N must be larger or equal to D\n"); + return 0; + } + if (S == 1) { + run_gather_test(N, K, D, R, U, F); } - if(S==2) { - run_gather_test(N,K,D,R,U,F); + if (S == 2) { + run_gather_test(N, K, D, R, U, F); } - if(S==4) { - run_gather_test >(N,K,D,R,U,F); + if (S == 4) { + run_gather_test >(N, K, D, R, U, F); } Kokkos::finalize(); } - diff --git a/lib/kokkos/benchmarks/gups/gups-kokkos.cc b/lib/kokkos/benchmarks/gups/gups-kokkos.cc index 4602adda79..9ac59be4a6 100644 --- a/lib/kokkos/benchmarks/gups/gups-kokkos.cc +++ b/lib/kokkos/benchmarks/gups/gups-kokkos.cc @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp index 2f5395734a..332e5574da 100644 --- a/lib/kokkos/benchmarks/policy_performance/main.cpp +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -44,67 +45,86 @@ #include #include "policy_perf_test.hpp" -int main(int argc, char* argv[] ) { - Kokkos::initialize(argc,argv); +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); - if(argc<10) { + if (argc < 10) { printf(" Ten arguments are needed to run this program:\n"); - printf(" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, (5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, (9)schedule, (10)test_type\n"); + printf( + " (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, " + "(5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, " + "(9)schedule, (10)test_type\n"); printf(" team_range: number of teams (league_size)\n"); printf(" thread_range: range for nested TeamThreadRange parallel_*\n"); printf(" vector_range: range for nested ThreadVectorRange parallel_*\n"); printf(" outer_repeat: number of repeats for outer parallel_* call\n"); - printf(" thread_repeat: number of repeats for TeamThreadRange parallel_* call\n"); - printf(" vector_repeat: number of repeats for ThreadVectorRange parallel_* call\n"); + printf( + " thread_repeat: number of repeats for TeamThreadRange parallel_* " + "call\n"); + printf( + " vector_repeat: number of repeats for ThreadVectorRange parallel_* " + "call\n"); printf(" team_size: number of team members (team_size)\n"); printf(" vector_size: desired vectorization (if possible)\n"); printf(" schedule: 1 == Static 2 == Dynamic\n"); - printf(" test_type: 3-digit code XYZ for testing (nested) parallel_*\n"); - printf(" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in {0,1,2}\n"); + printf( + " test_type: 3-digit code XYZ for testing (nested) parallel_*\n"); + printf( + " code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in " + "{0,1,2}\n"); printf(" TeamPolicy:\n"); - printf(" X: 0 = none (never used, makes no sense); 1 = parallel_for; 2 = parallel_reduce\n"); - printf(" Y: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); - printf(" Z: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); + printf( + " X: 0 = none (never used, makes no sense); 1 = " + "parallel_for; 2 = parallel_reduce\n"); + printf( + " Y: 0 = none; 1 = parallel_for; 2 = " + "parallel_reduce\n"); + printf( + " Z: 0 = none; 1 = parallel_for; 2 = " + "parallel_reduce\n"); printf(" RangePolicy:\n"); - printf(" X: 3 = parallel_for; 4 = parallel_reduce; 5 = parallel_scan\n"); + printf( + " X: 3 = parallel_for; 4 = parallel_reduce; 5 = " + "parallel_scan\n"); printf(" Y: 0 = none\n"); printf(" Z: 0 = none\n"); printf(" Example Input:\n"); - printf(" 100000 32 32 100 100 100 8 1 1 100\n"); + printf(" 100000 32 32 100 100 100 8 1 1 100\n"); Kokkos::finalize(); return 0; } - int team_range = atoi(argv[1]); + int team_range = atoi(argv[1]); int thread_range = atoi(argv[2]); int vector_range = atoi(argv[3]); - int outer_repeat = atoi(argv[4]); + int outer_repeat = atoi(argv[4]); int thread_repeat = atoi(argv[5]); int vector_repeat = atoi(argv[6]); - int team_size = atoi(argv[7]); + int team_size = atoi(argv[7]); int vector_size = atoi(argv[8]); - int schedule = atoi(argv[9]); - int test_type = atoi(argv[10]); + int schedule = atoi(argv[9]); + int test_type = atoi(argv[10]); - int disable_verbose_output = 0; - if ( argc > 11 ) { + int disable_verbose_output = 0; + if (argc > 11) { disable_verbose_output = atoi(argv[11]); } - if ( schedule != 1 && schedule != 2 ) { + if (schedule != 1 && schedule != 2) { printf("schedule: %d\n", schedule); printf("Options for schedule are: 1 == Static 2 == Dynamic\n"); Kokkos::finalize(); return -1; } - if ( test_type != 100 && test_type != 110 && test_type != 111 && test_type != 112 && test_type != 120 && test_type != 121 && test_type != 122 - && test_type != 200 && test_type != 210 && test_type != 211 && test_type != 212 && test_type != 220 && test_type != 221 && test_type != 222 - && test_type != 300 && test_type != 400 && test_type != 500 - ) - { + if (test_type != 100 && test_type != 110 && test_type != 111 && + test_type != 112 && test_type != 120 && test_type != 121 && + test_type != 122 && test_type != 200 && test_type != 210 && + test_type != 211 && test_type != 212 && test_type != 220 && + test_type != 221 && test_type != 222 && test_type != 300 && + test_type != 400 && test_type != 500) { printf("Incorrect test_type option\n"); Kokkos::finalize(); return -2; @@ -112,56 +132,85 @@ int main(int argc, char* argv[] ) { double result = 0.0; - Kokkos::parallel_reduce( "parallel_reduce warmup", Kokkos::TeamPolicy<>(10,1), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, double& lval) { - lval += 1; - }, result); + Kokkos::parallel_reduce( + "parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, + double& lval) { lval += 1; }, + result); - typedef Kokkos::View view_type_1d; - typedef Kokkos::View view_type_2d; + typedef Kokkos::View view_type_1d; + typedef Kokkos::View view_type_2d; typedef Kokkos::View view_type_3d; // Allocate view without initializing - // Call a 'warmup' test with 1 repeat - this will initialize the corresponding view appropriately for test and should obey first-touch etc - // Second call to test is the one we actually care about and time - view_type_1d v_1( Kokkos::ViewAllocateWithoutInitializing("v_1"), team_range*team_size); - view_type_2d v_2( Kokkos::ViewAllocateWithoutInitializing("v_2"), team_range*team_size, thread_range); - view_type_3d v_3( Kokkos::ViewAllocateWithoutInitializing("v_3"), team_range*team_size, thread_range, vector_range); + // Call a 'warmup' test with 1 repeat - this will initialize the corresponding + // view appropriately for test and should obey first-touch etc Second call to + // test is the one we actually care about and time + view_type_1d v_1(Kokkos::ViewAllocateWithoutInitializing("v_1"), + team_range * team_size); + view_type_2d v_2(Kokkos::ViewAllocateWithoutInitializing("v_2"), + team_range * team_size, thread_range); + view_type_3d v_3(Kokkos::ViewAllocateWithoutInitializing("v_3"), + team_range * team_size, thread_range, vector_range); double result_computed = 0.0; - double result_expect = 0.0; - double time = 0.0; + double result_expect = 0.0; + double time = 0.0; - if(schedule==1) { - if ( test_type != 500 ) { + if (schedule == 1) { + if (test_type != 500) { // warmup - no repeat of loops - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - } - else { + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); + } else { // parallel_scan: initialize 1d view for parallel_scan - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, 100, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); } } - if(schedule==2) { - if ( test_type != 500 ) { + if (schedule == 2) { + if (test_type != 500) { // warmup - no repeat of loops - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - } - else { + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); + } else { // parallel_scan: initialize 1d view for parallel_scan - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, 100, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); } } - if ( disable_verbose_output == 0 ) { - printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n",team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,schedule,test_type,result_computed,result_expect,time); - } - else { - printf("%lf\n",time); + if (disable_verbose_output == 0) { + printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n", team_range, + thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, schedule, test_type, + result_computed, result_expect, time); + } else { + printf("%lf\n", time); } Kokkos::finalize(); diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 1ab437928d..7a1500891f 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -43,297 +44,375 @@ #include -template < class ViewType > +template struct ParallelScanFunctor { using value_type = double; ViewType v; - ParallelScanFunctor( const ViewType & v_ ) - : v(v_) - {} + ParallelScanFunctor(const ViewType& v_) : v(v_) {} KOKKOS_INLINE_FUNCTION - void operator()( const int idx, value_type& val, const bool& final ) const - { - // inclusive scan - val += v(idx); - if ( final ) { - v(idx) = val; - } + void operator()(const int idx, value_type& val, const bool& final) const { + // inclusive scan + val += v(idx); + if (final) { + v(idx) = val; } + } }; -template +template void test_policy(int team_range, int thread_range, int vector_range, - int outer_repeat, int thread_repeat, int inner_repeat, - int team_size, int vector_size, int test_type, - ViewType1 &v1, ViewType2 &v2, ViewType3 &v3, - double &result, double &result_expect, double &time) { - - typedef Kokkos::TeamPolicy t_policy; + int outer_repeat, int thread_repeat, int inner_repeat, + int team_size, int vector_size, int test_type, ViewType1& v1, + ViewType2& v2, ViewType3& v3, double& result, + double& result_expect, double& time) { + typedef Kokkos::TeamPolicy t_policy; typedef typename t_policy::member_type t_team; Kokkos::Timer timer; - for(int orep = 0; orep(v1) + Kokkos::parallel_scan("500 outer scan", team_size * team_range, + ParallelScanFunctor(v1) #if 0 // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation KOKKOS_LAMBDA (const int idx, double& val, const bool& final) { @@ -345,11 +424,12 @@ void test_policy(int team_range, int thread_range, int vector_range, } #endif ); - // result = v1( team_size*team_range - 1 ); // won't work with Cuda - need to copy result back to host to print - // result_expect = 0.5*(team_size*team_range)*(team_size*team_range-1); + // result = v1( team_size*team_range - 1 ); // won't work with Cuda - need + // to copy result back to host to print result_expect = + // 0.5*(team_size*team_range)*(team_size*team_range-1); } - } // end outer for loop + } // end outer for loop time = timer.seconds(); -} //end test_policy +} // end test_policy diff --git a/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh b/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh index 1c2db56648..f4bfb87f8f 100755 --- a/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh +++ b/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh @@ -2,7 +2,7 @@ # Sample script for benchmarking policy performance -# Suggested environment variables to export prior to executing script: +# Suggested enviroment variables to export prior to executing script: # KNL: # OMP_NUM_THREADS=256 KMP_AFFINITY=compact # Power: diff --git a/lib/kokkos/benchmarks/stream/stream-kokkos.cc b/lib/kokkos/benchmarks/stream/stream-kokkos.cc index 370995432e..6ce789dd82 100644 --- a/lib/kokkos/benchmarks/stream/stream-kokkos.cc +++ b/lib/kokkos/benchmarks/stream/stream-kokkos.cc @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/lib/kokkos/bin/hpcbind b/lib/kokkos/bin/hpcbind index 6af091a7d8..b185a92821 100755 --- a/lib/kokkos/bin/hpcbind +++ b/lib/kokkos/bin/hpcbind @@ -383,7 +383,7 @@ fi # Check unknown arguments ################################################################################ if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then - echo "HPCBIND Unknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG}) + echo "HPCBIND Uknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG}) exit 1 fi diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index 94bc72854e..8a23d0d620 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -85,11 +85,11 @@ first_xcompiler_arg=1 temp_dir=${TMPDIR:-/tmp} -# Check if we have an optimization argument already -optimization_applied=0 +# optimization flag added as a command-line argument +optimization_flag="" -# Check if we have -std=c++X or --std=c++X already -stdcxx_applied=0 +# std standard flag added as a command-line argument +std_flag="" # Run nvcc a second time to generate dependencies if needed depfile_separate=0 @@ -99,6 +99,10 @@ depfile_target_arg="" # Option to remove duplicate libraries and object files remove_duplicate_link_files=0 +function warn_std_flag() { + echo "nvcc_wrapper - *warning* you have set multiple standard flags (-std=c++1* or --std=c++1*), only the last is used because nvcc can only accept a single std setting" +} + #echo "Arguments: $# $@" while [ $# -gt 0 ] @@ -130,12 +134,16 @@ do ;; # Ensure we only have one optimization flag because NVCC doesn't allow muliple -O*) - if [ $optimization_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." + if [ -n "$optimization_flag" ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the last is used because nvcc can only accept a single optimization setting." + shared_args=${shared_args/ $optimization_flag/} + fi + if [ "$1" = "-O" ]; then + optimization_flag="-O2" else - shared_args="$shared_args $1" - optimization_applied=1 + optimization_flag=$1 fi + shared_args="$shared_args $optimization_flag" ;; #Handle shared args (valid for both nvcc and the host compiler) -D*) @@ -171,7 +179,7 @@ do shift ;; #Handle known nvcc args - --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) + --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*|--fmad*) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args @@ -179,21 +187,43 @@ do cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream) + -rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad) cuda_args="$cuda_args $1 $2" shift ;; -rdc=*|-maxrregcount*|--maxrregcount*) cuda_args="$cuda_args $1" ;; - #Handle c++11 - --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1y|-std=c++1y|--std=c++17|-std=c++17|--std=c++1z|-std=c++1z) - if [ $stdcxx_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting" - else - shared_args="$shared_args $1" - stdcxx_applied=1 + #Handle unsupported standard flags + --std=c++1y|-std=c++1y|--std=c++1z|-std=c++1z|--std=gnu++1y|-std=gnu++1y|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a|--std=c++17|-std=c++17) + fallback_std_flag="-std=c++14" + # this is hopefully just occurring in a downstream project during CMake feature tests + # we really have no choice here but to accept the flag and change to an accepted C++ standard + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + std_flag=$fallback_std_flag + shared_args="$shared_args $std_flag" + ;; + -std=gnu*) + corrected_std_flag=${1/gnu/c} + echo "nvcc_wrapper has been given GNU extension standard flag $1 - reverting flag to $corrected_std_flag" + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + std_flag=$corrected_std_flag + shared_args="$shared_args $std_flag" + ;; + --std=c++11|-std=c++11|--std=c++14|-std=c++14) + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} fi + std_flag=$1 + shared_args="$shared_args $std_flag" ;; #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 @@ -308,16 +338,6 @@ do shift done -#Check if nvcc exists -if [ $host_only -ne 1 ]; then - var=$(which nvcc ) - if [ $? -gt 0 ]; then - echo "Could not find nvcc in PATH" - exit $? - fi -fi - - # Only print host compiler version if [ $get_host_version -eq 1 ]; then $host_compiler --version @@ -372,6 +392,9 @@ if [ $first_xcompiler_arg -eq 0 ]; then nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" fi +#Replace all commas in xcompiler_args with a space for the host only command +xcompiler_args=${xcompiler_args//,/" "} + #Compose host only command host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host" diff --git a/lib/kokkos/cm_generate_makefile.bash b/lib/kokkos/cm_generate_makefile.bash new file mode 100755 index 0000000000..fd7cfe2d32 --- /dev/null +++ b/lib/kokkos/cm_generate_makefile.bash @@ -0,0 +1,339 @@ +#!/bin/bash + +update_kokkos_devices() { + SEARCH_TEXT="*$1*" + if [[ $KOKKOS_DEVICES == $SEARCH_TEXT ]]; then + echo kokkos devices already includes $SEARCH_TEXT + else + if [ "$KOKKOS_DEVICES" = "" ]; then + KOKKOS_DEVICES="$1" + echo reseting kokkos devices to $KOKKOS_DEVICES + else + KOKKOS_DEVICES="${KOKKOS_DEVICES},$1" + echo appending to kokkos devices $KOKKOS_DEVICES + fi + fi +} + +get_kokkos_device_list() { + KOKKOS_DEVICE_CMD= + PARSE_DEVICES_LST=$(echo $KOKKOS_DEVICES | tr "," "\n") + for DEVICE_ in $PARSE_DEVICES_LST + do + UC_DEVICE=$(echo $DEVICE_ | tr "[:lower:]" "[:upper:]") + KOKKOS_DEVICE_CMD="-DKokkos_ENABLE_${UC_DEVICE}=ON ${KOKKOS_DEVICE_CMD}" + done +} + +get_kokkos_arch_list() { + KOKKOS_ARCH_CMD= + PARSE_ARCH_LST=$(echo $KOKKOS_ARCH | tr "," "\n") + for ARCH_ in $PARSE_ARCH_LST + do + UC_ARCH=$(echo $ARCH_ | tr "[:lower:]" "[:upper:]") + KOKKOS_ARCH_CMD="-DKokkos_ARCH_${UC_ARCH}=ON ${KOKKOS_ARCH_CMD}" + done +} + +get_kokkos_cuda_option_list() { + echo parsing KOKKOS_CUDA_OPTIONS=$KOKKOS_CUDA_OPTIONS + KOKKOS_CUDA_OPTION_CMD= + PARSE_CUDA_LST=$(echo $KOKKOS_CUDA_OPTIONS | tr "," "\n") + for CUDA_ in $PARSE_CUDA_LST + do + CUDA_OPT_NAME= + if [ "${CUDA_}" == "enable_lambda" ]; then + CUDA_OPT_NAME=CUDA_LAMBDA + elif [ "${CUDA_}" == "rdc" ]; then + CUDA_OPT_NAME=CUDA_RELOCATABLE_DEVICE_CODE + elif [ "${CUDA_}" == "force_uvm" ]; then + CUDA_OPT_NAME=CUDA_UVM + elif [ "${CUDA_}" == "use_ldg" ]; then + CUDA_OPT_NAME=CUDA_LDG_INTRINSIC + else + echo "${CUDA_} is not a valid cuda options..." + fi + if [ "${CUDA_OPT_NAME}" != "" ]; then + KOKKOS_CUDA_OPTION_CMD="-DKokkos_ENABLE_${CUDA_OPT_NAME}=ON ${KOKKOS_CUDA_OPTION_CMD}" + fi + done +} + +get_kokkos_option_list() { + echo parsing KOKKOS_OPTIONS=$KOKKOS_OPTIONS + KOKKOS_OPTION_CMD= + PARSE_OPTIONS_LST=$(echo $KOKKOS_OPTIONS | tr "," "\n") + for OPT_ in $PARSE_OPTIONS_LST + do + UC_OPT_=$(echo $OPT_ | tr "[:lower:]" "[:upper:]") + if [[ "$UC_OPT_" == *DISABLE* ]]; then + FLIP_OPT_=${UC_OPT_/DISABLE/ENABLE} + KOKKOS_OPTION_CMD="-DKokkos_${FLIP_OPT_}=OFF ${KOKKOS_OPTION_CMD}" + elif [[ "$UC_OPT_" == *ENABLE* ]]; then + KOKKOS_OPTION_CMD="-DKokkos_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" + else + KOKKOS_OPTION_CMD="-DKokkos_ENABLE_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" + fi + done +} + +display_help_text() { + + echo "Kokkos configure options:" + echo "" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." + echo "--prefix=/Install/Path: Path to install the Kokkos library." + echo "" + echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." + echo "--with-openmp: Enable OpenMP backend." + echo "--with-pthread: Enable Pthreads backend." + echo "--with-serial: Enable Serial backend." + echo "--with-devices: Explicitly add a set of backends." + echo "" + echo "--arch=[OPT]: Set target architectures. Options are:" + echo " [AMD]" + echo " AMDAVX = AMD CPU" + echo " EPYC = AMD EPYC Zen-Core CPU" + echo " [ARM]" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU" + echo " [IBM]" + echo " BGQ = IBM Blue Gene Q" + echo " Power7 = IBM POWER7 and POWER7+ CPUs" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo " [Intel]" + echo " WSM = Intel Westmere CPUs" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " [Intel Xeon Phi]" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " [NVIDIA]" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler32 = NVIDIA Kepler generation CC 3.2" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" + echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Volta70 = NVIDIA Volta generation CC 7.0" + echo " Volta72 = NVIDIA Volta generation CC 7.2" + echo "" + echo "--compiler=/Path/To/Compiler Set the compiler." + echo "--debug,-dbg: Enable Debugging." + echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," + echo " --std=c++11, etc.)." + echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" + echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" + echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," + echo " -lpthread, etc.)." + echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" + echo " tests.)" + echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library." + echo "--with-memkind=/Path/To/MemKind: Set path to memkind library." + echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " compiler_warnings" + echo " aggressive_vectorization = add ivdep on loops" + echo " disable_profiling = do not compile with profiling hooks" + echo " " + echo "--with-cuda-options=[OPT]: Additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc" + echo "--with-hpx-options=[OPT]: Additional options to HPX:" + echo " enable_async_dispatch" + echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" + echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" + echo " -j flag" + +} + +while [[ $# > 0 ]] +do + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --hpx-path*) + HPX_PATH="${key#*=}" + ;; + --prefix*) + PREFIX="${key#*=}" + ;; + --with-cuda) + update_kokkos_devices Cuda + CUDA_PATH_NVCC=$(command -v nvcc) + CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} + ;; + # Catch this before '--with-cuda*' + --with-cuda-options*) + KOKKOS_CUDA_OPTIONS="${key#*=}" + ;; + --with-cuda*) + update_kokkos_devices Cuda + CUDA_PATH="${key#*=}" + ;; + --with-openmp) + update_kokkos_devices OpenMP + ;; + --with-pthread) + update_kokkos_devices Pthread + ;; + --with-serial) + update_kokkos_devices Serial + ;; + --with-hpx-options*) + KOKKOS_HPX_OPT="${key#*=}" + ;; + --with-hpx*) + update_kokkos_devices HPX + if [ -z "$HPX_PATH" ]; then + HPX_PATH="${key#*=}" + fi + ;; + --with-devices*) + DEVICES="${key#*=}" + PARSE_DEVICES=$(echo $DEVICES | tr "," "\n") + for DEVICE_ in $PARSE_DEVICES + do + update_kokkos_devices $DEVICE_ + done + ;; + --with-gtest*) + GTEST_PATH="${key#*=}" + ;; + --with-hwloc*) + HWLOC_PATH="${key#*=}" + ;; + --with-memkind*) + MEMKIND_PATH="${key#*=}" + ;; + --arch*) + KOKKOS_ARCH="${key#*=}" + ;; + --cxxflags*) + KOKKOS_CXXFLAGS="${key#*=}" + KOKKOS_CXXFLAGS=${KOKKOS_CXXFLAGS//,/ } + ;; + --cxxstandard*) + KOKKOS_CXX_STANDARD="${key#*=}" + ;; + --ldflags*) + KOKKOS_LDFLAGS="${key#*=}" + ;; + --debug|-dbg) + KOKKOS_DEBUG=yes + ;; + --make-j*) + echo "Warning: ${key} is deprecated" + echo "Call make with appropriate -j flag" + ;; + --compiler*) + COMPILER="${key#*=}" + CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) + if [ ${CNUM} -gt 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + if [[ ! -n ${COMPILER} ]]; then + echo "Empty compiler specified by --compiler command." + exit + fi + CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) + if [ ${CNUM} -eq 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + # ... valid compiler, ensure absolute path set + WCOMPATH=$(command -v $COMPILER) + COMPDIR=$(dirname $WCOMPATH) + COMPNAME=$(basename $WCOMPATH) + COMPILER=${COMPDIR}/${COMPNAME} + ;; + --with-options*) + KOKKOS_OPTIONS="${key#*=}" + ;; + --gcc-toolchain*) + KOKKOS_GCC_TOOLCHAIN="${key#*=}" + ;; + --help) + display_help_text + exit 0 + ;; + *) + echo "warning: ignoring unknown option $key" + ;; + esac + + shift +done + + +if [ "$COMPILER" == "" ]; then + COMPILER_CMD= +else + COMPILER_CMD=-DCMAKE_CXX_COMPILER=$COMPILER +fi + +if [ "$KOKKOS_DEBUG" == "" ]; then + KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=RELEASE +else + KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=DEBUG +fi + +if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then + if [ "${KOKKOS_PATH}" == "" ]; then + CM_SCRIPT=$0 + KOKKOS_PATH=`dirname $CM_SCRIPT` + if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then + echo "${KOKKOS_PATH} repository appears to not be complete. please verify and try again" + exit 0 + fi + else + echo "KOKKOS_PATH does not appear to be set properly. please specify in location of CMakeLists.txt" + display_help_text + exit 0 + fi +fi + +get_kokkos_device_list +get_kokkos_option_list +get_kokkos_arch_list +get_kokkos_cuda_option_list + +## if HPX is enabled, we need to enforce cxx standard = 14 +if [[ ${KOKKOS_DEVICE_CMD} == *Kokkos_ENABLE_HPX* ]]; then + if [ "${KOKKOS_CXX_STANDARD}" == "" ] || [ ${#KOKKOS_CXX_STANDARD} -lt 14 ]; then + echo CXX Standard must be 14 or higher for HPX to work. + KOKKOS_CXX_STANDARD=14 + fi +fi + +if [ "$KOKKOS_CXX_STANDARD" == "" ]; then + STANDARD_CMD= +else + STANDARD_CMD=-DKokkos_CXX_STANDARD=${KOKKOS_CXX_STANDARD} +fi + +if [[ ${COMPILER} == *clang* ]]; then + gcc_path=$(which g++ | awk --field-separator='/bin/g++' '{printf $1}' ) + KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --gcc-toolchain=${gcc_path}" + + if [ ! "${CUDA_PATH}" == "" ]; then + KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --cuda-path=${CUDA_PATH}" + fi +fi + +echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH} +cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH} diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in index fc099a494c..6f4607687e 100644 --- a/lib/kokkos/cmake/KokkosConfig.cmake.in +++ b/lib/kokkos/cmake/KokkosConfig.cmake.in @@ -1,18 +1,14 @@ -# - Config file for the Kokkos package -# It defines the following variables -# Kokkos_INCLUDE_DIRS - include directories for Kokkos -# Kokkos_LIBRARIES - libraries to link against - # Compute paths -GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") +@PACKAGE_INIT@ -# Our library dependencies (contains definitions for IMPORTED targets) -IF(NOT TARGET kokkos AND NOT Kokkos_BINARY_DIR) - INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") -ENDIF() +#Find dependencies +INCLUDE(CMakeFindDependencyMacro) -# These are IMPORTED targets created by KokkosTargets.cmake -SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) -SET(Kokkos_LIBRARIES @Kokkos_LIBRARIES_NAMES@) -SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) +#This needs to go above the KokkosTargets in case +#the Kokkos targets depend in some way on the TPL imports +@KOKKOS_TPL_EXPORTS@ + +GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") +UNSET(Kokkos_CMAKE_DIR) diff --git a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in new file mode 100644 index 0000000000..da9c61976c --- /dev/null +++ b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -0,0 +1,87 @@ +SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) +SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) +SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) +SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) + +# These are needed by KokkosKernels +FOREACH(DEV ${Kokkos_DEVICES}) + SET(Kokkos_ENABLE_${DEV} ON) +ENDFOREACH() + +IF(NOT Kokkos_FIND_QUIETLY) + MESSAGE(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}") +ENDIF() + +IF (Kokkos_ENABLE_CUDA AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0") + #If we are building CUDA, we have tricked CMake because we declare a CXX project + #If the default C++ standard for a given compiler matches the requested + #standard, then CMake just omits the -std flag in later versions of CMake + #This breaks CUDA compilation (CUDA compiler can have a different default + #-std then the underlying host compiler by itself). Setting this variable + #forces CMake to always add the -std flag even if it thinks it doesn't need it + SET(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE) +ENDIF() + +SET(KOKKOS_USE_CXX_EXTENSIONS @KOKKOS_USE_CXX_EXTENSIONS@) +IF (NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS) + IF (NOT KOKKOS_USE_CXX_EXTENSIONS) + MESSAGE(WARNING "The installed Kokkos configuration does not support CXX extensions. Forcing -DCMAKE_CXX_EXTENSIONS=Off") + SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "" FORCE) + ENDIF() +ENDIF() + +include(FindPackageHandleStandardArgs) + +# This function makes sure that Kokkos was built with the requested backends +# and target architectures and generates a fatal error if it was not. +# +# kokkos_check( +# [DEVICES ...] # Set of backends (e.g. "OpenMP" and/or "Cuda") +# [ARCH ...] # Target architectures (e.g. "Power9" and/or "Volta70") +# [OPTIONS ...] # Optional settings (e.g. "PROFILING") +# [TPLS ...] # Third party libraries +# [RETURN_VALUE ] # Set a variable that indicates the result of the +# # check instead of a fatal error +# ) +function(kokkos_check) + set(ALLOWED_ARGS DEVICES ARCH OPTIONS TPLS) + cmake_parse_arguments(KOKKOS_CHECK "" "RETURN_VALUE" "${ALLOWED_ARGS}" ${ARGN}) + foreach(_arg ${KOKKOS_CHECK_UNPARSED_ARGUMENTS}) + message(SEND_ERROR "Argument '${_arg}' passed to kokkos_check() was not recognized") + endforeach() + # Get the list of keywords that were actually passed to the function. + set(REQUESTED_ARGS) + foreach(arg ${ALLOWED_ARGS}) + if(KOKKOS_CHECK_${arg}) + list(APPEND REQUESTED_ARGS ${arg}) + endif() + endforeach() + set(KOKKOS_CHECK_SUCCESS TRUE) + foreach(arg ${REQUESTED_ARGS}) + # Define variables named after the required arguments that are provided by + # the Kokkos install. + foreach(requested ${KOKKOS_CHECK_${arg}}) + foreach(provided ${Kokkos_${arg}}) + STRING(TOUPPER ${requested} REQUESTED_UC) + STRING(TOUPPER ${provided} PROVIDED_UC) + if(PROVIDED_UC STREQUAL REQUESTED_UC) + string(REPLACE ";" " " ${requested} "${KOKKOS_CHECK_${arg}}") + endif() + endforeach() + endforeach() + # Somewhat divert the CMake function below from its original purpose and + # use it to check that there are variables defined for all required + # arguments. Success or failure messages will be displayed but we are + # responsible for signaling failure and skip the build system generation. + find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG + ${KOKKOS_CHECK_${arg}}) + if(NOT Kokkos_${arg}_FOUND) + set(KOKKOS_CHECK_SUCCESS FALSE) + endif() + endforeach() + if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE) + message(FATAL_ERROR "Kokkos does NOT provide all backends and/or architectures requested") + else() + set(${KOKKOS_CHECK_RETURN_VALUE} ${KOKKOS_CHECK_SUCCESS} PARENT_SCOPE) + endif() +endfunction() diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in new file mode 100644 index 0000000000..084afba8a8 --- /dev/null +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -0,0 +1,89 @@ + +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif + +/* Execution Spaces */ +#cmakedefine KOKKOS_ENABLE_SERIAL +#cmakedefine KOKKOS_ENABLE_OPENMP +#cmakedefine KOKKOS_ENABLE_THREADS +#cmakedefine KOKKOS_ENABLE_CUDA +#cmakedefine KOKKOS_ENABLE_HPX +#cmakedefine KOKKOS_ENABLE_MEMKIND +#cmakedefine KOKKOS_ENABLE_LIBRT + +#ifndef __CUDA_ARCH__ +#cmakedefine KOKKOS_ENABLE_TM +#cmakedefine KOKKOS_USE_ISA_X86_64 +#cmakedefine KOKKOS_USE_ISA_KNC +#cmakedefine KOKKOS_USE_ISA_POWERPCLE +#cmakedefine KOKKOS_USE_ISA_POWERPCBE +#endif + +/* General Settings */ +#cmakedefine KOKKOS_ENABLE_CXX11 +#cmakedefine KOKKOS_ENABLE_CXX14 +#cmakedefine KOKKOS_ENABLE_CXX17 +#cmakedefine KOKKOS_ENABLE_CXX20 + +#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE +#cmakedefine KOKKOS_ENABLE_CUDA_UVM +#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA +#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR +#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC +#cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH +#cmakedefine KOKKOS_ENABLE_DEBUG +#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK +#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK +#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS +#cmakedefine KOKKOS_ENABLE_PROFILING +#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT +#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE +#cmakedefine KOKKOS_ENABLE_ETI +#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS +#cmakedefine KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK +#cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN +#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + +/* TPL Settings */ +#cmakedefine KOKKOS_ENABLE_HWLOC +#cmakedefine KOKKOS_USE_LIBRT +#cmakedefine KOKKOS_ENABLE_HWBSPACE + +#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND + +#cmakedefine KOKKOS_COMPILER_CUDA_VERSION @KOKKOS_COMPILER_CUDA_VERSION@ + +#cmakedefine KOKKOS_ARCH_SSE42 +#cmakedefine KOKKOS_ARCH_ARMV80 +#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX +#cmakedefine KOKKOS_ARCH_ARMV81 +#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2 +#cmakedefine KOKKOS_ARCH_AMD_AVX2 +#cmakedefine KOKKOS_ARCH_AVX +#cmakedefine KOKKOS_ARCH_AVX2 +#cmakedefine KOKKOS_ARCH_AVX512XEON +#cmakedefine KOKKOS_ARCH_KNC +#cmakedefine KOKKOS_ARCH_AVX512MIC +#cmakedefine KOKKOS_ARCH_POWER7 +#cmakedefine KOKKOS_ARCH_POWER8 +#cmakedefine KOKKOS_ARCH_POWER9 +#cmakedefine KOKKOS_ARCH_KEPLER +#cmakedefine KOKKOS_ARCH_KEPLER30 +#cmakedefine KOKKOS_ARCH_KEPLER32 +#cmakedefine KOKKOS_ARCH_KEPLER35 +#cmakedefine KOKKOS_ARCH_KEPLER37 +#cmakedefine KOKKOS_ARCH_MAXWELL +#cmakedefine KOKKOS_ARCH_MAXWELL50 +#cmakedefine KOKKOS_ARCH_MAXWELL52 +#cmakedefine KOKKOS_ARCH_MAXWELL53 +#cmakedefine KOKKOS_ARCH_PASCAL +#cmakedefine KOKKOS_ARCH_PASCAL60 +#cmakedefine KOKKOS_ARCH_PASCAL61 +#cmakedefine KOKKOS_ARCH_VOLTA +#cmakedefine KOKKOS_ARCH_VOLTA70 +#cmakedefine KOKKOS_ARCH_VOLTA72 +#cmakedefine KOKKOS_ARCH_TURING75 +#cmakedefine KOKKOS_ARCH_AMD_EPYC diff --git a/lib/kokkos/cmake/Makefile.generate_cmake_settings b/lib/kokkos/cmake/Makefile.generate_cmake_settings deleted file mode 100644 index da076b23db..0000000000 --- a/lib/kokkos/cmake/Makefile.generate_cmake_settings +++ /dev/null @@ -1,8 +0,0 @@ -ifndef KOKKOS_PATH - MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) - KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH)).. -endif - -include $(KOKKOS_PATH)/Makefile.kokkos -include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists -include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files diff --git a/lib/kokkos/cmake/Modules/FindHWLOC.cmake b/lib/kokkos/cmake/Modules/FindHWLOC.cmake deleted file mode 100644 index 60df8084d8..0000000000 --- a/lib/kokkos/cmake/Modules/FindHWLOC.cmake +++ /dev/null @@ -1,20 +0,0 @@ -#.rst: -# FindHWLOC -# ---------- -# -# Try to find HWLOC, based on KOKKOS_HWLOC_DIR -# -# The following variables are defined: -# -# HWLOC_FOUND - System has HWLOC -# HWLOC_INCLUDE_DIR - HWLOC include directory -# HWLOC_LIBRARIES - Libraries needed to use HWLOC - -find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include") -find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib") - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(HWLOC DEFAULT_MSG - HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) - -mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindMemkind.cmake b/lib/kokkos/cmake/Modules/FindMemkind.cmake deleted file mode 100644 index 245fb44c19..0000000000 --- a/lib/kokkos/cmake/Modules/FindMemkind.cmake +++ /dev/null @@ -1,20 +0,0 @@ -#.rst: -# FindMemkind -# ---------- -# -# Try to find Memkind. -# -# The following variables are defined: -# -# MEMKIND_FOUND - System has Memkind -# MEMKIND_INCLUDE_DIR - Memkind include directory -# MEMKIND_LIBRARIES - Libraries needed to use Memkind - -find_path(MEMKIND_INCLUDE_DIR memkind.h) -find_library(MEMKIND_LIBRARIES memkind) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Memkind DEFAULT_MSG - MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) - -mark_as_advanced(MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindQthreads.cmake b/lib/kokkos/cmake/Modules/FindQthreads.cmake deleted file mode 100644 index a254b0e996..0000000000 --- a/lib/kokkos/cmake/Modules/FindQthreads.cmake +++ /dev/null @@ -1,20 +0,0 @@ -#.rst: -# FindQthreads -# ---------- -# -# Try to find Qthreads. -# -# The following variables are defined: -# -# QTHREADS_FOUND - System has Qthreads -# QTHREADS_INCLUDE_DIR - Qthreads include directory -# QTHREADS_LIBRARIES - Libraries needed to use Qthreads - -find_path(QTHREADS_INCLUDE_DIR qthread.h) -find_library(QTHREADS_LIBRARIES qthread) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Qthreads DEFAULT_MSG - QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) - -mark_as_advanced(QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake new file mode 100644 index 0000000000..36aefcdb44 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake @@ -0,0 +1,13 @@ + +IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + KOKKOS_FIND_IMPORTED(CUDA INTERFACE + LIBRARIES cudart cuda + LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH + ALLOW_SYSTEM_PATH_FALLBACK + ) +ELSE() + KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE + LINK_LIBRARIES cuda + ) +ENDIF() + diff --git a/lib/kokkos/cmake/Modules/FindTPLHPX.cmake b/lib/kokkos/cmake/Modules/FindTPLHPX.cmake new file mode 100644 index 0000000000..c8b3bc4c9b --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLHPX.cmake @@ -0,0 +1,15 @@ + +FIND_PACKAGE(HPX REQUIRED) +#as of right now, HPX doesn't export correctly +#so let's convert it to an interface target +KOKKOS_CREATE_IMPORTED_TPL(HPX INTERFACE + LINK_LIBRARIES ${HPX_LIBRARIES} + INCLUDES ${HPX_INCLUDE_DIRS} +) +#this is a bit funky since this is a CMake target +#but HPX doesn't export itself correctly +KOKKOS_EXPORT_CMAKE_TPL(HPX) + +#I would prefer all of this gets replaced with +#KOKKOS_IMPORT_CMAKE_TPL(HPX) + diff --git a/lib/kokkos/cmake/Modules/FindTPLHWLOC.cmake b/lib/kokkos/cmake/Modules/FindTPLHWLOC.cmake new file mode 100644 index 0000000000..cf763b7e5b --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLHWLOC.cmake @@ -0,0 +1 @@ +KOKKOS_FIND_IMPORTED(HWLOC HEADER hwloc.h LIBRARY hwloc) diff --git a/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake b/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake new file mode 100644 index 0000000000..5fc6a69303 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake @@ -0,0 +1 @@ +KOKKOS_FIND_IMPORTED(LIBDL HEADER dlfcn.h LIBRARY dl) diff --git a/lib/kokkos/cmake/Modules/FindTPLLIBNUMA.cmake b/lib/kokkos/cmake/Modules/FindTPLLIBNUMA.cmake new file mode 100644 index 0000000000..811db5851b --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLLIBNUMA.cmake @@ -0,0 +1 @@ +KOKKOS_FIND_IMPORTED(LIBNUMA HEADER numa.h LIBRARY numa) diff --git a/lib/kokkos/cmake/Modules/FindTPLLIBRT.cmake b/lib/kokkos/cmake/Modules/FindTPLLIBRT.cmake new file mode 100644 index 0000000000..e75da56b5b --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLLIBRT.cmake @@ -0,0 +1 @@ +KOKKOS_FIND_IMPORTED(LIBRT HEADER time.h LIBRARY rt) diff --git a/lib/kokkos/cmake/Modules/FindTPLMEMKIND.cmake b/lib/kokkos/cmake/Modules/FindTPLMEMKIND.cmake new file mode 100644 index 0000000000..20aaff2295 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLMEMKIND.cmake @@ -0,0 +1 @@ +KOKKOS_FIND_IMPORTED(MEMKIND HEADER memkind.h LIBRARY memkind) diff --git a/lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake b/lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake new file mode 100644 index 0000000000..b4b8c34122 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake @@ -0,0 +1,17 @@ + +TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG + ${KOKKOS_TOP_BUILD_DIR}/tpl_tests + ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp + LINK_LIBRARIES -pthread + COMPILE_DEFINITIONS -pthread) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG) + +KOKKOS_CREATE_IMPORTED_TPL(PTHREAD + INTERFACE #this is not a real library with a real location + COMPILE_OPTIONS -pthread + LINK_OPTIONS -pthread) + + + diff --git a/lib/kokkos/cmake/README.md b/lib/kokkos/cmake/README.md new file mode 100644 index 0000000000..2ac8731586 --- /dev/null +++ b/lib/kokkos/cmake/README.md @@ -0,0 +1,331 @@ +![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) + +# Developing Kokkos + +This document contains a build system overview for developers with information on adding new CMake options that could influence +* Header configuration macros +* Optional features +* Third-partly libraries +* Compiler and linker flags +For build system details for users, refer to the [build instructions](../BUILD.md). + +## Build System + +Kokkos uses CMake to configure, build, and install. +Rather than being a completely straightforward use of modern CMake, +Kokkos has several extra complications, primarily due to: +* Kokkos must support linking to an installed version or in-tree builds as a subdirectory of a larger project. +* Kokkos must configure a special compiler `nvcc_wrapper` that allows `nvcc` to accept all C++ flags (which `nvcc` currently does not). +* Kokkos must work as a part of TriBITS, a CMake library providing a particular build idiom for Trilinos. +* Kokkos has many pre-existing users. We need to be careful about breaking previous versions or generating meaningful error messags if we do break backwards compatibility. + +If you are looking at the build system code wondering why certain decisions were made: we have had to balance many competing requirements and certain technical debt. Everything in the build system was done for a reason, trying to adhere as closely as possible to modern CMake best practices while meeting all pre-existing. customer requirements. + +### Modern CMake Philosophy + +Modern CMake relies on understanding the principle of *building* and *using* a code project. +What preprocessor, compiler, and linker flags do I need to *build* my project? +What flags does a downstream project that links to me need to *use* my project? +In CMake terms, flags that are only needed for building are `PRIVATE`. +Only Kokkos needs these flags, not a package that depends on Kokkos. +Flags that must be used in a downstream project are `PUBLIC`. +Kokkos must tell other projects to use them. + +In Kokkos, almost everything is a public flag since Kokkos is driven by headers and Kokkos is in charge of optimizing your code to achieve performance portability! +Include paths, C++ standard flags, architecture-specific optimizations, or OpenMP and CUDA flags are all examples of flags that Kokkos configures and adds to your project. + +Modern CMake now automatically propagates flags through the `target_link_libraries` command. +Suppose you have a library `stencil` that needs to build with Kokkos. +Consider the following CMake code: + +```` +find_package(Kokkos) +add_library(stencil stencil.cpp) +target_link_libraries(stencil Kokkos::kokkos) +```` + +This locates the Kokkos package, adds your library, and tells CMake to link Kokkos to your library. +All public build flags get added automatically through the `target_link_libraries` command. +There is nothing to do. You can be happily oblivious to how Kokkos was configured. +Everything should just work. + +As a Kokkos developer who wants to add new public compiler flags, how do you ensure that CMake does this properly? Modern CMake works through targets and properties. +Each target has a set of standard properties: +* `INTERFACE_COMPILE_OPTIONS` contains all the compiler options that Kokkos should add to downstream projects +* `INTERFACE_INCLUDE_DIRECTORIES` contains all the directories downstream projects must include from Kokkos +* `INTERFACE_COMPILE_DEFINITIONS` contains the list of preprocessor `-D` flags +* `INTERFACE_LINK_LIBRARIES` contains all the libraries downstream projects need to link +* `INTERFACE_COMPILE_FEATURES` essentially adds compiler flags, but with extra complications. Features names are specific to CMake. More later. + +CMake makes it easy to append to these properties using: +* `target_compile_options(kokkos PUBLIC -fmyflag)` +* `target_include_directories(kokkos PUBLIC mySpecialFolder)` +* `target_compile_definitions(kokkos PUBLIC -DmySpecialFlag=0)` +* `target_link_libraries(kokkos PUBLIC mySpecialLibrary)` +* `target_compile_features(kokkos PUBLIC mySpecialFeature)` +Note that all of these use `PUBLIC`! Almost every Kokkos flag is not private to Kokkos, but must also be used by downstream projects. + + +### Compiler Features and Compiler Options +Compiler options are flags like `-fopenmp` that do not need to be "resolved." +The flag is either on or off. +Compiler features are more fine-grained and require conflicting requests to be resolved. +Suppose I have +```` +add_library(A a.cpp) +target_compile_features(A PUBLIC cxx_std_11) +```` +then another target +```` +add_library(B b.cpp) +target_compile_features(B PUBLIC cxx_std_14) +target_link_libraries(A B) +```` +I have requested two diferent features. +CMake understands the requests and knows that `cxx_std_11` is a subset of `cxx_std_14`. +CMake then picks C++14 for library `B`. +CMake would not have been able to do feature resolution if we had directly done: +```` +target_compile_options(A PUBLIC -std=c++11) +```` + +### Adding Kokkos Options +After configuring for the first time, +CMake creates a cache of configure variables in `CMakeCache.txt`. +Reconfiguring in the folder "restarts" from those variables. +All flags passed as `-DKokkos_SOME_OPTION=X` to `cmake` become variables in the cache. +All Kokkos options begin with camel case `Kokkos_` followed by an upper case option name. + +CMake best practice is to avoid cache variables, if possible. +In essence, you want the minimal amount of state cached between configurations. +And never, ever have behavior influenced by multiple cache variables. +If you want to change the Kokkos configuration, have a single unique variable that needs to be changed. +Never require two cache variables to be changed. + +Kokkos provides a function `KOKKOS_OPTION` for defining valid cache-level variables, +proofreading them, and defining local project variables. +The most common variables are called `Kokkos_ENABLE_X`, +for which a helper function `KOKKOS_ENABLE_OPTION` is provided, e.g. +```` +KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build tests") +```` +The function checks if `-DKokkos_ENABLE_TESTS` was given, +whether it was given with the wrong case, e.g. `-DKokkos_Enable_Tests`, +and then defines a regular (non-cache) variable `KOKKOS_ENABLE_TESTS` to `ON` or `OFF` +depending on the given default and whether the option was specified. + +### Defining Kokkos Config Macros + +Sometimes you may want to add `#define Kokkos_X` macros to the config header. +This is straightforward with CMake. +Suppose you want to define an optional macro `KOKKOS_SUPER_SCIENCE`. +Simply go into `KokkosCore_config.h.in` and add +```` +#cmakedefine KOKKOS_SUPER_SCIENCE +```` +I can either add +```` +KOKKOS_OPTION(SUPER_SCIENCE ON "Whether to do some super science") +```` +to directly set the variable as a command-line `-D` option. +Alternatively, based on other logic, I could add to a `CMakeLists.txt` +```` +SET(KOKKOS_SUPER_SCIENCE ON) +```` +If not set as a command-line option (cache variable), you must make sure the variable is visible in the top-level scope. +If set in a function, you would need: +```` +SET(KOKKOS_SUPER_SCIENCE ON PARENT_SCOPE) +```` + +### Third-Party Libraries +In much the same way that compiler flags transitively propagate to dependent projects, +modern CMake allows us to propagate dependent libraries. +If Kokkos depends on, e.g. `hwloc` the downstream project will also need to link `hwloc`. +There are three stages in adding a new third-party library (TPL): +* Finding: find the desired library on the system and verify the installation is correct +* Importing: create a CMake target, if necessary, that is compatible with `target_link_libraries`. This is mostly relevant for TPLs not installed with CMake. +* Exporting: make the desired library visible to downstream projects + +TPLs are somewhat complicated by whether the library was installed with CMake or some other build system. +If CMake, our lives are greatly simplified. We simply use `find_package` to locate the installed CMake project then call `target_link_libraries(kokkoscore PUBLIC/PRIVATE TPL)`. For libaries not installed with CMake, the process is a bit more complex. +It is up to the Kokkos developers to "convert" the library into a CMake target as if it had been installed as a valid modern CMake target with properties. +There are helper functions for simplifying the process of importing TPLs in Kokkos, but we walk through the process in detail to clearly illustrate the steps involved. + +#### TPL Search Order + +There are several options for where CMake could try to find a TPL. +If there are multiple installations of the same TPL on the system, +the search order is critical for making sure the correct TPL is found. +There are 3 possibilities that could be used: + +1. Default system paths like /usr +1. User-provided paths through options `_ROOT` and `Kokkos__DIR` +1. Additional paths not in the CMake default list or provided by the user that Kokkos decides to add. For example, Kokkos may query `nvcc` or `LD_LIBRARY_PATH` for where to find CUDA libraries. + +The following is the search order that Kokkos follows. Note: This differs from the default search order used by CMake `find_library` and `find_header`. CMake prefers default system paths over user-provided paths. +For Kokkos (and package managers in general), it is better to prefer user-provided paths since this usually indicates a specific version we want. + +1. `_ROOT` +1. `Kokkos__DIR` +1. Paths added by Kokkos CMake logic +1. Default system paths (if allowed) + +Default system paths are allowed in two cases. First, none of the other options are given so the only place to look is system paths. Second, if explicitly given permission, configure will look in system paths. +The rationale for this logic is that if you specify a custom location, you usually *only* want to look in that location. +If you do not find the TPL where you expect it, you should error out rather than grab another random match. + + +#### Finding TPLs + +If finding a TPL that is not a modern CMake project, refer to the `FindHWLOC.cmake` file in `cmake/Modules` for an example. +You will ususally need to verify expected headers with `find_path` +```` +find_path(TPL_INCLUDE_DIR mytpl.h PATHS "${KOKKOS_MYTPL_DIR}/include") +```` +This insures that the library header is in the expected include directory and defines the variable `TPL_INCLUDE_DIR` with a valid path if successful. +Similarly, you can verify a library +```` +find_library(TPL_LIBRARY mytpl PATHS "${KOKKOS_MYTPL_DIR/lib") +```` +that then defines the variable `TPL_LIBRARY` with a valid path if successful. +CMake provides a utility for checking if the `find_path` and `find_library` calls were successful that emulates the behavior of `find_package` for a CMake target. +```` +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MYTPL DEFAULT_MSG + MYTPL_INCLUDE_DIR MYTPL_LIBRARY) +```` +If the find failed, CMake will print standard error messages explaining the failure. + +#### Importing TPLs + +The installed TPL must be adapted into a CMake target. +CMake allows libraries to be added that are built externally as follows: +```` +add_library(Kokkos::mytpl UNKNOWN IMPORTED) +```` +Importantly, we use a `Kokkos::` namespace to avoid name conflicts and identify this specifically as the version imported by Kokkos. +Because we are importing a non-CMake target, we must populate all the target properties that would have been automatically populated for a CMake target. +```` +set_target_properties(Kokkos::mytpl PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${MYTPL_INCLUDE_DIR}" + IMPORTED_LOCATION "${MYTPL_LIBRARY}" +) +```` + +#### Exporting TPLs + +Kokkos may now depend on the target `Kokkos::mytpl` as a `PUBLIC` library (remember building and using). +This means that downstream projects must also know about `Kokkos::myptl` - so Kokkos must export them. +In the `KokkosConfig.cmake.in` file, we need to add code like the following: +```` +set(MYTPL_LIBRARY @MYTPL_LIBRARY@) +set(MYTPL_INCLUDE_DIR @MYTPL_INCLUDE_DIR@) +add_library(Kokkos::mytpl UNKNOWN IMPORTED) +set_target_properties(Kokkos::mytpl PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${MYTPL_INCLUDE_DIR}" + IMPORTED_LOCATION "${MYTPL_LIBRARY}" +) +```` +If this looks familiar, that's because it is exactly the same code as above for importing the TPL. +Exporting a TPL really just means importing the TPL when Kokkos is loaded by an external project. +We will describe helper functions that simplify this process. + +#### Interface TPLs + +If a TPL is just a library and set of headers, we can make a simple `IMPORTED` target. +However, a TPL is actually completely flexible and need not be limited to just headers and libraries. +TPLs can configure compiler flags, linker flags, or multiple different libraries. +For this, we use a special type of CMake target: `INTERFACE` libraries. +These libraries don't build anything. +They simply populate properties that will configure flags for dependent targets. +We consider the example: +```` +add_library(PTHREAD INTERFACE) +target_compile_options(PTHREAD PUBLIC -pthread) +```` +Kokkos uses the compiler flag `-pthread` to define compiler macros for re-entrant functions rather than treating it simply as a library with header `pthread.h` and library `-lpthread`. +Any property can be configured, e.g. +```` +target_link_libraries(MYTPL ...) +```` +In contrast to imported TPLs which require direct modification of `KokkosConfig.cmake.in`, +we can use CMake's built-in export functions: +```` +INSTALL( + TARGETS MYTPL + EXPORT KokkosTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +```` +These interface targets will be automatically populated in the config file. + +#### Linking the TPL +After finishing the import process, it still remains to link the imported target as needed. +For example, +```` +target_link_libraries(kokkoscore PUBLIC Kokkos::HWLOC) +```` +The complexity of which includes, options, and libraries the TPL requires +should be encapsulated in the CMake target. + +#### TPL Helper Functions +##### KOKKOS_IMPORT_TPL +This function can be invoked as, e.g. +```` +KOKKOS_IMPORT_TPL(HWLOC) +```` +This function checks if the TPL was enabled by a `-DKokkos_ENABLE_HWLOC=On` flag. +If so, it calls `find_package(TPLHWLOC)`. +This invokes the file `FindTPLHWLOC.cmake` which should be contained in the `cmake/Modules` folder. +If successful, another function `KOKKOS_EXPORT_CMAKE_TPL` gets invoked. +This automatically adds all the necessary import commands to `KokkosConfig.cmake`. + +##### KOKKOS_FIND_IMPORTED +Inside a `FindTPLX.cmake` file, the simplest way to import a library is to call, e.g. +```` +KOKKOS_FIND_IMPORTED(HWLOC LIBRARY hwloc HEADER hwloc.h) +```` +This finds the location of the library and header and creates an imported target `Kokkos::HWLOC` +that can be linked against. +The library/header find can be guided with `-DHWLOC_ROOT=` or `-DKokkos_HWLOC_DIR=` during CMake configure. +These both specify the install prefix. + +##### KOKKOS_LINK_TPL +This function checks if the TPL has been enabled. +If so, it links a given library against the imported (or interface) TPL target. + +##### KOKKOS_CREATE_IMPORTED_TPL +This helper function is best understood by reading the actual code. +This function takes arguments specifying the properties and creates the actual TPL target. +The most important thing to understand for this function is whether you call this function with the optional `INTERFACE` keyword. +This tells the project to either create the target as an imported target or interface target, as discussed above. + +##### KOKKOS_EXPORT_CMAKE_TPL +Even if the TPL just loads a valid CMake target, we still must "export" it into the config file. +When Kokkos is loaded by a downstream project, this TPL must be loaded. +Calling this function simply appends text recording the location where the TPL was found +and adding a `find_dependency(...)` call that will reload the CMake target. + +### The Great TriBITS Compromise + +TriBITS was a masterpiece of CMake version 2 before the modern CMake idioms of building and using. +TriBITS greatly limited verbosity of CMake files, handled complicated dependency trees between packages, and handled automatically setting up include and linker paths for dependent libraries. + +Kokkos is now used by numerous projects that don't (and won't) depend on TriBITS for their build systems. +Kokkos has to work outside of TriBITS and provide a standard CMake 3+ build system. +At the same time, Kokkos is used by numerous projects that depend on TriBITS and don't (and won't) switch to a standard CMake 3+ build system. + +Instead of calling functions `TRIBITS_X(...)`, the CMake calls wrapper functions `KOKKOS_X(...)`. +If TriBITS is available (as in Trilinos), `KOKKOS_X` will just be a thin wrapper around `TRIBITS_X`. +If TriBITS is not available, Kokkos maps `KOKKOS_X` calls to native CMake that complies with CMake 3 idioms. +For the time being, this seems the most sensible way to handle the competing requirements of a standalone modern CMake and TriBITS build system. + +##### [LICENSE](https://github.com/kokkos/kokkos/blob/devel/LICENSE) + +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/lib/kokkos/cmake/compile_tests/clang_omp.cpp b/lib/kokkos/cmake/compile_tests/clang_omp.cpp new file mode 100644 index 0000000000..ce3bbfb262 --- /dev/null +++ b/lib/kokkos/cmake/compile_tests/clang_omp.cpp @@ -0,0 +1,9 @@ +#include + +int main(int argc, char** argv) { + int thr = omp_get_num_threads(); + if (thr > 0) + return thr; + else + return 0; +} diff --git a/lib/kokkos/cmake/compile_tests/pthread.cpp b/lib/kokkos/cmake/compile_tests/pthread.cpp new file mode 100644 index 0000000000..3b13f7ba35 --- /dev/null +++ b/lib/kokkos/cmake/compile_tests/pthread.cpp @@ -0,0 +1,10 @@ +#include + +void* kokkos_test(void* args) { return args; } + +int main(void) { + pthread_t thread; + pthread_create(&thread, NULL, kokkos_test, NULL); + pthread_join(thread, NULL); + return 0; +} diff --git a/lib/kokkos/cmake/cray.cmake b/lib/kokkos/cmake/cray.cmake new file mode 100644 index 0000000000..08912f5130 --- /dev/null +++ b/lib/kokkos/cmake/cray.cmake @@ -0,0 +1,9 @@ + + +function(kokkos_set_cray_flags full_standard int_standard) + STRING(TOLOWER ${full_standard} FULL_LC_STANDARD) + STRING(TOLOWER ${int_standard} INT_LC_STANDARD) + SET(KOKKOS_CXX_STANDARD_FLAG "-hstd=c++${FULL_LC_STANDARD}", PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMDIATE_STANDARD_FLAG "-hstd=c++${INT_LC_STANDARD}" PARENT_SCOPE) +endfunction() + diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake index 801c20067b..4876bca259 100644 --- a/lib/kokkos/cmake/deps/CUDA.cmake +++ b/lib/kokkos/cmake/deps/CUDA.cmake @@ -73,7 +73,7 @@ IF(NOT _CUDA_FAILURE) GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) ELSE() SET(TPL_ENABLE_CUDA OFF) ENDIF() diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake index 6f26d857c0..b2420d1168 100644 --- a/lib/kokkos/cmake/deps/CUSPARSE.cmake +++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake @@ -59,6 +59,6 @@ # GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) # GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) # GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -# TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) #ENDIF() diff --git a/lib/kokkos/cmake/deps/HWLOC.cmake b/lib/kokkos/cmake/deps/HWLOC.cmake index 275abd3a5d..ed89c8c1e5 100644 --- a/lib/kokkos/cmake/deps/HWLOC.cmake +++ b/lib/kokkos/cmake/deps/HWLOC.cmake @@ -64,7 +64,7 @@ # Version: 1.3 # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC REQUIRED_HEADERS hwloc.h REQUIRED_LIBS_NAMES "hwloc" ) diff --git a/lib/kokkos/cmake/deps/Pthread.cmake b/lib/kokkos/cmake/deps/Pthread.cmake index 46d0a939ca..5f835fc300 100644 --- a/lib/kokkos/cmake/deps/Pthread.cmake +++ b/lib/kokkos/cmake/deps/Pthread.cmake @@ -74,9 +74,9 @@ IF(USE_THREADS) SET(TPL_Pthread_INCLUDE_DIRS "") SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") SET(TPL_Pthread_LIBRARY_DIRS "") - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) ELSE() - TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread REQUIRED_HEADERS pthread.h REQUIRED_LIBS_NAMES pthread ) diff --git a/lib/kokkos/cmake/deps/QTHREADS.cmake b/lib/kokkos/cmake/deps/QTHREADS.cmake deleted file mode 100644 index c312f2590b..0000000000 --- a/lib/kokkos/cmake/deps/QTHREADS.cmake +++ /dev/null @@ -1,69 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -#----------------------------------------------------------------------------- -# Hardware locality detection and control library. -# -# Acquisition information: -# Date checked: July 2014 -# Checked by: H. Carter Edwards -# Source: https://code.google.com/p/qthreads -# - -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS - REQUIRED_HEADERS qthread.h - REQUIRED_LIBS_NAMES "qthread" - ) diff --git a/lib/kokkos/cmake/fake_tribits.cmake b/lib/kokkos/cmake/fake_tribits.cmake new file mode 100644 index 0000000000..26948d2cfb --- /dev/null +++ b/lib/kokkos/cmake/fake_tribits.cmake @@ -0,0 +1,338 @@ +#These are tribits wrappers used by all projects in the Kokkos ecosystem + +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +cmake_policy(SET CMP0054 NEW) + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +MACRO(KOKKOS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE ) +SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) +IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") + IF(${USER_OPTION_NAME}) + GLOBAL_SET(${MACRO_DEFINE_NAME} ON) + ELSE() + GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) + ENDIF() +ENDIF() +ENDMACRO() + +MACRO(GLOBAL_RESET VARNAME) + SET(${VARNAME} "" CACHE INTERNAL "" FORCE) +ENDMACRO() + +MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE) + SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE) +ENDMACRO() + +IF (NOT KOKKOS_HAS_TRILINOS) +MACRO(APPEND_GLOB VAR) + FILE(GLOB LOCAL_TMP_VAR ${ARGN}) + LIST(APPEND ${VAR} ${LOCAL_TMP_VAR}) +ENDMACRO() + +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "" FORCE) +ENDMACRO() + +FUNCTION(VERIFY_EMPTY CONTEXT) +if(${ARGN}) +MESSAGE(FATAL_ERROR "Kokkos does not support all of Tribits. Unhandled arguments in ${CONTEXT}:\n${ARGN}") +endif() +ENDFUNCTION() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() +endif() + + +FUNCTION(KOKKOS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME_CONFIG_FILE}) + else() + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + endif() +ENDFUNCTION() + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + +IF(NOT TARGET check) + ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) +ENDIF() + +FUNCTION(KOKKOS_ADD_TEST) + if (KOKKOS_HAS_TRILINOS) + CMAKE_PARSE_ARGUMENTS(TEST + "" + "EXE;NAME" + "" + ${ARGN}) + IF(TEST_EXE) + SET(EXE_ROOT ${TEST_EXE}) + ELSE() + SET(EXE_ROOT ${TEST_NAME}) + ENDIF() + + TRIBITS_ADD_TEST( + ${EXE_ROOT} + NAME ${TEST_NAME} + ${ARGN} + COMM serial mpi + NUM_MPI_PROCS 1 + ${TEST_UNPARSED_ARGUMENTS} + ) + else() + CMAKE_PARSE_ARGUMENTS(TEST + "WILL_FAIL" + "FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME" + "CATEGORIES;CMD_ARGS" + ${ARGN}) + IF(TEST_EXE) + SET(EXE ${TEST_EXE}) + ELSE() + SET(EXE ${TEST_NAME}) + ENDIF() + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_CMD_ARGS}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE} ${TEST_CMD_ARGS}) + ENDIF() + IF(TEST_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) + ENDIF() + IF(TEST_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) + ENDIF() + IF(TEST_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) + ENDIF() + VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_ADVANCED_TEST) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_ADVANCED_TEST(${ARGN}) + else() + # TODO Write this + endif() +ENDFUNCTION() + +MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "REQUIRED_HEADERS;REQUIRED_LIBS_NAMES" + ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (_${TPL_NAME}_ENABLE_SUCCESS) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + VERIFY_EMPTY(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) +if(KOKKOS_HAS_TRILINOS) + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) +else() + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) +endif() +ENDMACRO() + + +MACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + else() + #do nothing + endif() +ENDMACRO() + +FUNCTION(KOKKOS_LIB_TYPE LIB RET) +GET_TARGET_PROPERTY(PROP ${LIB} TYPE) +IF (${PROP} STREQUAL "INTERFACE_LIBRARY") + SET(${RET} "INTERFACE" PARENT_SCOPE) +ELSE() + SET(${RET} "PUBLIC" PARENT_SCOPE) +ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + #don't trust tribits to do this correctly - but need to add package name + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + #the target actually exists - this means we are doing separate libs + #or this a test library + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) +ELSE() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_INCLUDES ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set include directories on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) +IF(KOKKOS_HAS_TRILINOS) + #do nothing +ELSE() + SET(options INTERFACE) + SET(oneValueArgs) + SET(multiValueArgs) + CMAKE_PARSE_ARGUMENTS(PARSE + "INTERFACE" + "" + "" + ${ARGN}) + SET(LINK_TYPE) + IF(PARSE_INTERFACE) + SET(LINK_TYPE INTERFACE) + ELSE() + SET(LINK_TYPE PUBLIC) + ENDIF() + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) +ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY + ADDED_LIB_TARGET_NAME_OUT ${NAME} + ) +ELSE() + SET(oneValueArgs) + SET(multiValueArgs HEADERS SOURCES) + + CMAKE_PARSE_ARGUMENTS(PARSE + "STATIC;SHARED" + "" + "HEADERS;SOURCES" + ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) + target_link_libraries( + ${NAME} + PUBLIC kokkos + ) +ENDIF() +ENDFUNCTION() + + +FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) + IF (KOKKOS_HAS_TRILINOS) + TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) + ELSE() + TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) + ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) +IF(KOKKOS_HAS_TRILINOS) + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) +ELSE() + CMAKE_PARSE_ARGUMENTS( + INC + "REQUIRED_DURING_INSTALLATION_TESTING" + "" + "" + ${ARGN} + ) + INCLUDE_DIRECTORIES(${INC_UNPARSED_ARGUMENTS}) +ENDIF() +ENDFUNCTION() + + +MACRO(KOKKOS_ADD_COMPILE_OPTIONS) +ADD_COMPILE_OPTIONS(${ARGN}) +ENDMACRO() + +MACRO(PRINTALL match) +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + if("${_variableName}" MATCHES "${match}") + message(STATUS "${_variableName}=${${_variableName}}") + endif() +endforeach() +ENDMACRO() + +MACRO(SET_GLOBAL_REPLACE SUBSTR VARNAME) + STRING(REPLACE ${SUBSTR} ${${VARNAME}} TEMP) + GLOBAL_SET(${VARNAME} ${TEMP}) +ENDMACRO() + +FUNCTION(GLOBAL_APPEND VARNAME) + #We make this a function since we are setting variables + #and want to use scope to avoid overwriting local variables + SET(TEMP ${${VARNAME}}) + LIST(APPEND TEMP ${ARGN}) + GLOBAL_SET(${VARNAME} ${TEMP}) +ENDFUNCTION() + diff --git a/lib/kokkos/cmake/gnu.cmake b/lib/kokkos/cmake/gnu.cmake new file mode 100644 index 0000000000..aa11fe87b1 --- /dev/null +++ b/lib/kokkos/cmake/gnu.cmake @@ -0,0 +1,23 @@ + +FUNCTION(kokkos_set_gnu_flags full_standard int_standard) + STRING(TOLOWER ${full_standard} FULL_LC_STANDARD) + STRING(TOLOWER ${int_standard} INT_LC_STANDARD) + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + IF(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC) + SET(_std -Qstd) + SET(_ext c++) + ELSE() + SET(_std -std) + SET(_ext gnu++) + ENDIF() + + IF (CMAKE_CXX_EXTENSIONS) + SET(KOKKOS_CXX_STANDARD_FLAG "-std=gnu++${FULL_LC_STANDARD}" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "-std=gnu++${INT_LC_STANDARD}" PARENT_SCOPE) + ELSE() + SET(KOKKOS_CXX_STANDARD_FLAG "-std=c++${FULL_LC_STANDARD}" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "-std=c++${INT_LC_STANDARD}" PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + diff --git a/lib/kokkos/cmake/intel.cmake b/lib/kokkos/cmake/intel.cmake new file mode 100644 index 0000000000..f36f01d8ca --- /dev/null +++ b/lib/kokkos/cmake/intel.cmake @@ -0,0 +1,30 @@ + +FUNCTION(kokkos_set_intel_flags full_standard int_standard) + STRING(TOLOWER ${full_standard} FULL_LC_STANDARD) + STRING(TOLOWER ${int_standard} INT_LC_STANDARD) + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + IF(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC) + SET(_std -Qstd) + SET(_ext c++) + ELSE() + SET(_std -std) + SET(_ext gnu++) + ENDIF() + + IF(NOT KOKKOS_CXX_STANDARD STREQUAL 11 AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + #There is no gnu++14 value supported; figure out what to do. + SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++${FULL_LC_STANDARD}" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "${_std}=c++${INT_LC_STANDARD}" PARENT_SCOPE) + ELSEIF(KOKKOS_CXX_STANDARD STREQUAL 11 AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) + IF (CMAKE_CXX_EXTENSIONS) + SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=${_ext}c++11" PARENT_SCOPE) + ELSE() + SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++11" PARENT_SCOPE) + ENDIF() + ELSE() + MESSAGE(FATAL_ERROR "Intel compiler version too low - need 13.0 for C++11 and 15.0 for C++14") + ENDIF() + +ENDFUNCTION() + diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake new file mode 100644 index 0000000000..c33247c955 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -0,0 +1,438 @@ + +FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION) + #all optimizations off by default + KOKKOS_OPTION(ARCH_${SUFFIX} OFF BOOL "Optimize for ${DESCRIPTION} (${DEV_TYPE})") + IF (KOKKOS_ARCH_${SUFFIX}) + LIST(APPEND KOKKOS_ENABLED_ARCH_LIST ${SUFFIX}) + SET(KOKKOS_ENABLED_ARCH_LIST ${KOKKOS_ENABLED_ARCH_LIST} PARENT_SCOPE) + ENDIF() + SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE) +ENDFUNCTION() + +FUNCTION(ARCH_FLAGS) + SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU) + CMAKE_PARSE_ARGUMENTS( + PARSE + "LINK_ONLY;COMPILE_ONLY" + "" + "${COMPILERS}" + ${ARGN}) + + SET(COMPILER ${KOKKOS_CXX_COMPILER_ID}) + + SET(FLAGS) + SET(NEW_COMPILE_OPTIONS) + SET(NEW_XCOMPILER_OPTIONS) + SET(NEW_LINK_OPTIONS) + LIST(APPEND NEW_XCOMPILER_OPTIONS ${KOKKOS_XCOMPILER_OPTIONS}) + LIST(APPEND NEW_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) + LIST(APPEND NEW_LINK_OPTIONS ${KOKKOS_LINK_OPTIONS}) + FOREACH(COMP ${COMPILERS}) + IF (COMPILER STREQUAL "${COMP}") + IF (PARSE_${COMPILER}) + IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED") + SET(FLAGS ${PARSE_${COMPILER}}) + ENDIF() + ELSEIF(PARSE_DEFAULT) + SET(FLAGS ${PARSE_DEFAULT}) + ENDIF() + ENDIF() + ENDFOREACH() + + IF (NOT LINK_ONLY) + # The funky logic here is for future handling of argument deduplication + # If we naively pass multiple -Xcompiler flags to target_compile_options + # -Xcompiler will get deduplicated and break the build + IF ("-Xcompiler" IN_LIST FLAGS) + LIST(REMOVE_ITEM FLAGS "-Xcompiler") + GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${FLAGS}) + ELSE() + GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${FLAGS}) + ENDIF() + ENDIF() + + IF (NOT COMPILE_ONLY) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${FLAGS}) + ENDIF() +ENDFUNCTION() + +# Make sure devices and compiler ID are done +KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID) +KOKKOS_CFG_DEPENDS(ARCH DEVICES) +KOKKOS_CFG_DEPENDS(ARCH OPTIONS) + + +#------------------------------------------------------------------------------- +# List of possible host architectures. +#------------------------------------------------------------------------------- +SET(KOKKOS_ARCH_LIST) + + +KOKKOS_DEPRECATED_LIST(ARCH ARCH) +KOKKOS_ARCH_OPTION(AMDAVX HOST "AMD chip") +KOKKOS_ARCH_OPTION(ARMV80 HOST "ARMv8.0 Compatible CPU") +KOKKOS_ARCH_OPTION(ARMV81 HOST "ARMv8.1 Compatible CPU") +KOKKOS_ARCH_OPTION(ARMV8_THUNDERX HOST "ARMv8 Cavium ThunderX CPU") +KOKKOS_ARCH_OPTION(ARMV8_THUNDERX2 HOST "ARMv8 Cavium ThunderX2 CPU") +KOKKOS_ARCH_OPTION(WSM HOST "Intel Westmere CPU") +KOKKOS_ARCH_OPTION(SNB HOST "Intel Sandy/Ivy Bridge CPUs") +KOKKOS_ARCH_OPTION(HSW HOST "Intel Haswell CPUs") +KOKKOS_ARCH_OPTION(BDW HOST "Intel Broadwell Xeon E-class CPUs") +KOKKOS_ARCH_OPTION(SKX HOST "Intel Sky Lake Xeon E-class HPC CPUs (AVX512)") +KOKKOS_ARCH_OPTION(KNC HOST "Intel Knights Corner Xeon Phi") +KOKKOS_ARCH_OPTION(KNL HOST "Intel Knights Landing Xeon Phi") +KOKKOS_ARCH_OPTION(BGQ HOST "IBM Blue Gene Q") +KOKKOS_ARCH_OPTION(POWER7 HOST "IBM POWER7 CPUs") +KOKKOS_ARCH_OPTION(POWER8 HOST "IBM POWER8 CPUs") +KOKKOS_ARCH_OPTION(POWER9 HOST "IBM POWER9 CPUs") +KOKKOS_ARCH_OPTION(KEPLER30 GPU "NVIDIA Kepler generation CC 3.0") +KOKKOS_ARCH_OPTION(KEPLER32 GPU "NVIDIA Kepler generation CC 3.2") +KOKKOS_ARCH_OPTION(KEPLER35 GPU "NVIDIA Kepler generation CC 3.5") +KOKKOS_ARCH_OPTION(KEPLER37 GPU "NVIDIA Kepler generation CC 3.7") +KOKKOS_ARCH_OPTION(MAXWELL50 GPU "NVIDIA Maxwell generation CC 5.0") +KOKKOS_ARCH_OPTION(MAXWELL52 GPU "NVIDIA Maxwell generation CC 5.2") +KOKKOS_ARCH_OPTION(MAXWELL53 GPU "NVIDIA Maxwell generation CC 5.3") +KOKKOS_ARCH_OPTION(PASCAL60 GPU "NVIDIA Pascal generation CC 6.0") +KOKKOS_ARCH_OPTION(PASCAL61 GPU "NVIDIA Pascal generation CC 6.1") +KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0") +KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2") +KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5") +KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture") + + +IF (KOKKOS_ENABLE_CUDA) + #Regardless of version, make sure we define the general architecture name + IF (KOKKOS_ARCH_KEPLER30 OR KOKKOS_ARCH_KEPLER32 OR KOKKOS_ARCH_KEPLER35 OR KOKKOS_ARCH_KEPLER37) + SET(KOKKOS_ARCH_KEPLER ON) + ENDIF() + + #Regardless of version, make sure we define the general architecture name + IF (KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53) + SET(KOKKOS_ARCH_MAXWELL ON) + ENDIF() + + #Regardless of version, make sure we define the general architecture name + IF (KOKKOS_ARCH_PASCAL60 OR KOKKOS_ARCH_PASCAL61) + SET(KOKKOS_ARCH_PASCAL ON) + ENDIF() + + #Regardless of version, make sure we define the general architecture name + IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72) + SET(KOKKOS_ARCH_VOLTA ON) + ENDIF() +ENDIF() + + + +IF(KOKKOS_ENABLE_COMPILER_WARNINGS) + SET(COMMON_WARNINGS + "-Wall" "-Wshadow" "-pedantic" + "-Wsign-compare" "-Wtype-limits" "-Wuninitialized") + + SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" + ${COMMON_WARNINGS}) + + ARCH_FLAGS( + PGI NO-VALUE-SPECIFIED + GNU ${GNU_WARNINGS} + DEFAULT ${COMMON_WARNINGS} + ) +ENDIF() + + +#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- +GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) +# Construct the Makefile options +IF (KOKKOS_ENABLE_CUDA_LAMBDA) + IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda") + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_CUDA_CONSTEXPR) + IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-relaxed-constexpr") + ENDIF() +ENDIF() + +IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + SET(CUDA_ARCH_FLAG "--cuda-gpu-arch") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda) + IF (KOKKOS_ENABLE_CUDA) + SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE) + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + SET(CUDA_ARCH_FLAG "-arch") +ENDIF() + +IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + STRING(TOUPPER "${CMAKE_BUILD_TYPE}" _UPPERCASE_CMAKE_BUILD_TYPE) + IF (KOKKOS_ENABLE_DEBUG OR _UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo) + ENDIF() + UNSET(_UPPERCASE_CMAKE_BUILD_TYPE) + IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) + GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) + ENDIF() +ENDIF() + +IF(KOKKOS_ENABLE_OPENMP) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang) + MESSAGE(FATAL_ERROR "Apple Clang does not support OpenMP. Use native Clang instead") + ENDIF() + ARCH_FLAGS( + Clang -fopenmp=libomp + PGI -mp + NVIDIA -Xcompiler -fopenmp + Cray NO-VALUE-SPECIFIED + XL -qsmp=omp + DEFAULT -fopenmp + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV80) + ARCH_FLAGS( + Cray NO-VALUE-SPECIFIED + PGI NO-VALUE-SPECIFIED + DEFAULT -march=armv8-a + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV81) + ARCH_FLAGS( + Cray NO-VALUE-SPECIFIED + PGI NO-VALUE-SPECIFIED + DEFAULT -march=armv8.1-a + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV8_THUNDERX) + SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable + ARCH_FLAGS( + Cray NO-VALUE-SPECIFIED + PGI NO-VALUE-SPECIFIED + DEFAULT -march=armv8-a -mtune=thunderx + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV8_THUNDERX2) + SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable + ARCH_FLAGS( + Cray NO-VALUE-SPECIFIED + PGI NO-VALUE-SPECIFIED + DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99 + ) +ENDIF() + +IF (KOKKOS_ARCH_EPYC) + ARCH_FLAGS( + Intel -mavx2 + DEFAULT -march=znver1 -mtune=znver1 + ) + SET(KOKKOS_ARCH_AMD_EPYC ON) + SET(KOKKOS_ARCH_AMD_AVX2 ON) +ENDIF() + +IF (KOKKOS_ARCH_WSM) + ARCH_FLAGS( + Intel -xSSE4.2 + PGI -tp=nehalem + Cray NO-VALUE-SPECIFIED + DEFAULT -msse4.2 + ) + SET(KOKKOS_ARCH_SSE42 ON) +ENDIF() + +IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) + SET(KOKKOS_ARCH_AVX ON) + ARCH_FLAGS( + Intel -mavx + PGI -tp=sandybridge + Cray NO-VALUE-SPECIFIED + DEFAULT -mavx + ) +ENDIF() + +IF (KOKKOS_ARCH_HSW) + SET(KOKKOS_ARCH_AVX2 ON) + ARCH_FLAGS( + Intel -xCORE-AVX2 + PGI -tp=haswell + Cray NO-VALUE-SPECIFIED + DEFAULT -march=core-avx2 -mtune=core-avx2 + ) +ENDIF() + +IF (KOKKOS_ARCH_BDW) + SET(KOKKOS_ARCH_AVX2 ON) + ARCH_FLAGS( + Intel -xCORE-AVX2 + PGI -tp=haswell + Cray NO-VALUE-SPECIFIED + DEFAULT -march=core-avx2 -mtune=core-avx2 -mrtm + ) +ENDIF() + +IF (KOKKOS_ARCH_EPYC) + SET(KOKKOS_ARCH_AMD_AVX2 ON) + ARCH_FLAGS( + Intel -mvax2 + DEFAULT -march=znver1 -mtune=znver1 + ) +ENDIF() + +IF (KOKKOS_ARCH_KNL) + #avx512-mic + SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable + ARCH_FLAGS( + Intel -xMIC-AVX512 + PGI NO-VALUE-SPECIFIED + Cray NO-VALUE-SPECIFIED + DEFAULT -march=knl -mtune=knl + ) +ENDIF() + +IF (KOKKOS_ARCH_KNC) + SET(KOKKOS_USE_ISA_KNC ON) + ARCH_FLAGS( + DEFAULT -mmic + ) +ENDIF() + +IF (KOKKOS_ARCH_SKX) + #avx512-xeon + SET(KOKKOS_ARCH_AVX512XEON ON) + ARCH_FLAGS( + Intel -xCORE-AVX512 + PGI NO-VALUE-SPECIFIED + Cray NO-VALUE-SPECIFIED + DEFAULT -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + ) +ENDIF() + +IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_EPYC) + SET(KOKKOS_USE_ISA_X86_64 ON) +ENDIF() + +IF (KOKKOS_ARCH_BDW OR KOKKOS_ARCH_SKX) + SET(KOKKOS_ENABLE_TM ON) #not a cache variable +ENDIF() + +IF (KOKKOS_ARCH_POWER7) + ARCH_FLAGS( + PGI NO-VALUE-SPECIFIED + DEFAULT -mcpu=power7 -mtune=power7 + ) + SET(KOKKOS_USE_ISA_POWERPCBE ON) +ENDIF() + +IF (KOKKOS_ARCH_POWER8) + ARCH_FLAGS( + PGI NO-VALUE-SPECIFIED + NVIDIA NO-VALUE-SPECIFIED + DEFAULT -mcpu=power8 -mtune=power8 + ) +ENDIF() + +IF (KOKKOS_ARCH_POWER9) + ARCH_FLAGS( + PGI NO-VALUE-SPECIFIED + NVIDIA NO-VALUE-SPECIFIED + DEFAULT -mcpu=power9 -mtune=power9 + ) +ENDIF() + +IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9) + SET(KOKKOS_USE_ISA_POWERPCLE ON) +ENDIF() + +IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + ARCH_FLAGS( + Clang -fcuda-rdc + NVIDIA --relocatable-device-code=true + ) +ENDIF() + + +SET(CUDA_ARCH_ALREADY_SPECIFIED "") +FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) +IF(KOKKOS_ARCH_${ARCH}) + IF(CUDA_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") + ENDIF() + SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) + IF (NOT KOKKOS_ENABLE_CUDA) + MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA is OFF. Option will be ignored.") + UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) + ELSE() + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + ENDIF() + ENDIF() +ENDIF() +ENDFUNCTION() + + +CHECK_CUDA_ARCH(KEPLER30 sm_30) +CHECK_CUDA_ARCH(KEPLER32 sm_32) +CHECK_CUDA_ARCH(KEPLER35 sm_35) +CHECK_CUDA_ARCH(KEPLER37 sm_37) +CHECK_CUDA_ARCH(MAXWELL50 sm_50) +CHECK_CUDA_ARCH(MAXWELL52 sm_52) +CHECK_CUDA_ARCH(MAXWELL53 sm_53) +CHECK_CUDA_ARCH(PASCAL60 sm_60) +CHECK_CUDA_ARCH(PASCAL61 sm_61) +CHECK_CUDA_ARCH(VOLTA70 sm_70) +CHECK_CUDA_ARCH(VOLTA72 sm_72) +CHECK_CUDA_ARCH(TURING75 sm_75) + +#CMake verbose is kind of pointless +#Let's just always print things +MESSAGE(STATUS "Execution Spaces:") +IF(KOKKOS_ENABLE_CUDA) + MESSAGE(STATUS " Device Parallel: CUDA") +ELSE() + MESSAGE(STATUS " Device Parallel: NONE") +ENDIF() + +FOREACH (_BACKEND OPENMP PTHREAD HPX) + IF(KOKKOS_ENABLE_${_BACKEND}) + IF(_HOST_PARALLEL) + MESSAGE(FATAL_ERROR "Multiple host parallel execution spaces are not allowed! " + "Trying to enable execution space ${_BACKEND}, " + "but execution space ${_HOST_PARALLEL} is already enabled. " + "Remove the CMakeCache.txt file and re-configure.") + ENDIF() + SET(_HOST_PARALLEL ${_BACKEND}) + ENDIF() +ENDFOREACH() + +IF(NOT _HOST_PARALLEL AND NOT KOKKOS_ENABLE_SERIAL) + MESSAGE(FATAL_ERROR "At least one host execution space must be enabled, " + "but no host parallel execution space was requested " + "and Kokkos_ENABLE_SERIAL=OFF.") +ENDIF() + +IF(NOT _HOST_PARALLEL) + SET(_HOST_PARALLEL "NONE") +ENDIF() +MESSAGE(STATUS " Host Parallel: ${_HOST_PARALLEL}") +UNSET(_HOST_PARALLEL) + +IF(KOKKOS_ENABLE_PTHREAD) + SET(KOKKOS_ENABLE_THREADS ON) +ENDIF() + +IF(KOKKOS_ENABLE_SERIAL) + MESSAGE(STATUS " Host Serial: SERIAL") +ELSE() + MESSAGE(STATUS " Host Serial: NONE") +ENDIF() + +MESSAGE(STATUS "") +MESSAGE(STATUS "Architectures:") +FOREACH(Arch ${KOKKOS_ENABLED_ARCH_LIST}) + MESSAGE(STATUS " ${Arch}") +ENDFOREACH() + diff --git a/lib/kokkos/cmake/kokkos_build.cmake b/lib/kokkos/cmake/kokkos_build.cmake deleted file mode 100644 index f9b995baae..0000000000 --- a/lib/kokkos/cmake/kokkos_build.cmake +++ /dev/null @@ -1,261 +0,0 @@ -############################ Detect if submodule ############################### -# -# With thanks to StackOverflow: -# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake -# -get_directory_property(HAS_PARENT PARENT_DIRECTORY) -if(HAS_PARENT) - message(STATUS "Submodule build") - SET(KOKKOS_HEADER_DIR "include/kokkos") -else() - message(STATUS "Standalone build") - SET(KOKKOS_HEADER_DIR "include") -endif() - -################################ Handle the actual build ####################### - -SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") -SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") -SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH - "Installation directory for header files") -IF(WIN32 AND NOT CYGWIN) - SET(DEF_INSTALL_CMAKE_DIR CMake) -ELSE() - SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) -ENDIF() - -SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH - "Installation directory for CMake files") - -# Make relative paths absolute (needed later on) -FOREACH(p LIB BIN INCLUDE CMAKE) - SET(var INSTALL_${p}_DIR) - IF(NOT IS_ABSOLUTE "${${var}}") - SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") - ENDIF() -ENDFOREACH() - -# set up include-directories -SET (Kokkos_INCLUDE_DIRS - ${Kokkos_SOURCE_DIR}/core/src - ${Kokkos_SOURCE_DIR}/containers/src - ${Kokkos_SOURCE_DIR}/algorithms/src - ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h - ${KOKKOS_INCLUDE_DIRS} -) - -# pass include dirs back to parent scope -if(HAS_PARENT) -SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE) -else() -SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS}) -endif() - -INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) - -IF(KOKKOS_SEPARATE_LIBS) - # Sources come from makefile-generated kokkos_generated_settings.cmake file - # Separate libs need to separate the sources - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) - - # kokkoscore - ADD_LIBRARY( - kokkoscore - ${KOKKOS_CORE_SRCS} - ) - - target_compile_options( - kokkoscore - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - - target_include_directories( - kokkoscore - PUBLIC - ${KOKKOS_TPL_INCLUDE_DIRS} - ) - - foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) - set(LIB_cuda "-lcuda") - elseif ("${lib}" STREQUAL "hpx") - find_package(HPX REQUIRED) - if(${HPX_FOUND}) - target_link_libraries(kokkoscore PUBLIC ${HPX_LIBRARIES}) - target_link_libraries(kokkoscontainers PUBLIC ${HPX_LIBRARIES}) - target_link_libraries(kokkosalgorithms PUBLIC ${HPX_LIBRARIES}) - target_include_directories(kokkoscore PUBLIC ${HPX_INCLUDE_DIRS}) - target_include_directories(kokkoscontainers PUBLIC ${HPX_INCLUDE_DIRS}) - target_include_directories(kokkosalgorithms PUBLIC ${HPX_INCLUDE_DIRS}) - else() - message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") - endif() - else() - find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) - endif() - target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}}) - endforeach() - - target_link_libraries(kokkoscore PUBLIC "${KOKKOS_LINK_FLAGS}") - - # Install the kokkoscore library - INSTALL (TARGETS kokkoscore - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - ) - - # kokkoscontainers - if (DEFINED KOKKOS_CONTAINERS_SRCS) - ADD_LIBRARY( - kokkoscontainers - ${KOKKOS_CONTAINERS_SRCS} - ) - endif() - - TARGET_LINK_LIBRARIES( - kokkoscontainers - kokkoscore - ) - - # Install the kokkocontainers library - INSTALL (TARGETS kokkoscontainers - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - # kokkosalgorithms - Build as interface library since no source files. - ADD_LIBRARY( - kokkosalgorithms - INTERFACE - ) - - target_include_directories( - kokkosalgorithms - INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src - ) - - TARGET_LINK_LIBRARIES( - kokkosalgorithms - INTERFACE kokkoscore - ) - - # Install the kokkoalgorithms library - INSTALL (TARGETS kokkosalgorithms - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) - -ELSE() - # kokkos - ADD_LIBRARY( - kokkos - ${KOKKOS_CORE_SRCS} - ${KOKKOS_CONTAINERS_SRCS} - ) - - target_compile_options( - kokkos - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - - target_include_directories( - kokkos - PUBLIC - ${KOKKOS_TPL_INCLUDE_DIRS} - ) - - foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) - set(LIB_cuda "-lcuda") - elseif ("${lib}" STREQUAL "hpx") - find_package(HPX REQUIRED) - if(${HPX_FOUND}) - target_link_libraries(kokkos PUBLIC ${HPX_LIBRARIES}) - target_include_directories(kokkos PUBLIC ${HPX_INCLUDE_DIRS}) - else() - message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") - endif() - else() - find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) - endif() - target_link_libraries(kokkos PUBLIC ${LIB_${lib}}) - endforeach() - - target_link_libraries(kokkos PUBLIC "${KOKKOS_LINK_FLAGS}") - - # Install the kokkos library - INSTALL (TARGETS kokkos - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - - SET (Kokkos_LIBRARIES_NAMES kokkos) - -endif() # KOKKOS_SEPARATE_LIBS - -# Install the kokkos headers -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/core/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/containers/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/algorithms/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) - -INSTALL (FILES - ${Kokkos_BINARY_DIR}/KokkosCore_config.h - DESTINATION ${KOKKOS_HEADER_DIR} -) - -# Add all targets to the build-tree export set -export(TARGETS ${Kokkos_LIBRARIES_NAMES} - FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") - -# Export the package for use from the build-tree -# (this registers the build-tree with a global CMake-registry) -export(PACKAGE Kokkos) - -# Create the KokkosConfig.cmake and KokkosConfigVersion files -file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" - "${INSTALL_INCLUDE_DIR}") -# ... for the build tree -set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) -# ... for the install tree -set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) - -# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake -install(FILES - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" - DESTINATION "${INSTALL_CMAKE_DIR}") - -#This seems not to do anything? -#message(STATUS "KokkosTargets: " ${KokkosTargets}) -# Install the export set for use with the install-tree -INSTALL(EXPORT KokkosTargets DESTINATION - "${INSTALL_CMAKE_DIR}") - -# build and install pkgconfig file -CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) diff --git a/lib/kokkos/cmake/kokkos_compiler_id.cmake b/lib/kokkos/cmake/kokkos_compiler_id.cmake new file mode 100644 index 0000000000..d239c3b32e --- /dev/null +++ b/lib/kokkos/cmake/kokkos_compiler_id.cmake @@ -0,0 +1,80 @@ +KOKKOS_CFG_DEPENDS(COMPILER_ID NONE) + +SET(KOKKOS_CXX_COMPILER ${CMAKE_CXX_COMPILER}) +SET(KOKKOS_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) +SET(KOKKOS_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) + +# Check if the compiler is nvcc (which really means nvcc_wrapper). +EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + COMMAND grep nvcc + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC + OUTPUT_STRIP_TRAILING_WHITESPACE) + + +STRING(REGEX REPLACE "^ +" "" + INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + + +IF(INTERNAL_HAVE_COMPILER_NVCC) + # SET the compiler id to nvcc. We use the value used by CMake 3.8. + SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL FORCE) + + # SET nvcc's compiler version. + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + COMMAND grep release + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" + TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) +ENDIF() + +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + + # SET nvcc's compiler version. + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" + TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) +ENDIF() + +# Enforce the minimum compilers supported by Kokkos. +SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 9.0.69 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") + +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.8.4) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 9.0.69) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() + SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE) +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.1) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ENDIF() + +STRING(REPLACE "." ";" VERSION_LIST ${KOKKOS_CXX_COMPILER_VERSION}) +LIST(GET VERSION_LIST 0 KOKKOS_COMPILER_VERSION_MAJOR) +LIST(GET VERSION_LIST 1 KOKKOS_COMPILER_VERSION_MINOR) +LIST(GET VERSION_LIST 2 KOKKOS_COMPILER_VERSION_PATCH) diff --git a/lib/kokkos/cmake/kokkos_corner_cases.cmake b/lib/kokkos/cmake/kokkos_corner_cases.cmake new file mode 100644 index 0000000000..c03c385faf --- /dev/null +++ b/lib/kokkos/cmake/kokkos_corner_cases.cmake @@ -0,0 +1,35 @@ +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP) + # The clang "version" doesn't actually tell you what runtimes and tools + # were built into Clang. We should therefore make sure that libomp + # was actually built into Clang. Otherwise the user will get nonsensical + # errors when they try to build. + + #Try compile is the height of CMake nonsense + #I can't just give it compiler and link flags + #I have to hackily pretend that compiler flags are compiler definitions + #and that linker flags are libraries + #also - this is easier to use than CMakeCheckCXXSourceCompiles + TRY_COMPILE(CLANG_HAS_OMP + ${KOKKOS_TOP_BUILD_DIR}/corner_cases + ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp + COMPILE_DEFINITIONS -fopenmp=libomp + LINK_LIBRARIES -fopenmp=libomp + ) + IF (NOT CLANG_HAS_OMP) + UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this + MESSAGE(FATAL_ERROR "Clang failed OpenMP check. You have requested -DKokkos_ENABLE_OPENMP=ON, but the Clang compiler does not appear to have been built with OpenMP support") + ENDIF() + UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this +ENDIF() + + +IF (KOKKOS_CXX_STANDARD STREQUAL 17) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7) + MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need 17 support") + ENDIF() + + IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC. Please reduce the C++ standard to 14. No versions of NVCC currently support 17.") + ENDIF() +ENDIF() + diff --git a/lib/kokkos/cmake/kokkos_enable_devices.cmake b/lib/kokkos/cmake/kokkos_enable_devices.cmake new file mode 100644 index 0000000000..ff09876673 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_enable_devices.cmake @@ -0,0 +1,61 @@ + +FUNCTION(KOKKOS_DEVICE_OPTION SUFFIX DEFAULT DEV_TYPE DOCSTRING) + KOKKOS_OPTION(ENABLE_${SUFFIX} ${DEFAULT} BOOL ${DOCSTRING}) + STRING(TOUPPER ${SUFFIX} UC_NAME) + IF (KOKKOS_ENABLE_${UC_NAME}) + LIST(APPEND KOKKOS_ENABLED_DEVICES ${SUFFIX}) + #I hate that CMake makes me do this + SET(KOKKOS_ENABLED_DEVICES ${KOKKOS_ENABLED_DEVICES} PARENT_SCOPE) + ENDIF() + SET(KOKKOS_ENABLE_${UC_NAME} ${KOKKOS_ENABLE_${UC_NAME}} PARENT_SCOPE) + IF (KOKKOS_ENABLE_${UC_NAME} AND DEV_TYPE STREQUAL "HOST") + SET(KOKKOS_HAS_HOST ON PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +KOKKOS_CFG_DEPENDS(DEVICES NONE) + +# Put a check in just in case people are using this option +KOKKOS_DEPRECATED_LIST(DEVICES ENABLE) + + +KOKKOS_DEVICE_OPTION(PTHREAD OFF HOST "Whether to build Pthread backend") +IF (KOKKOS_ENABLE_PTHREAD) + #patch the naming here + SET(KOKKOS_ENABLE_THREADS ON) +ENDIF() + +IF(Trilinos_ENABLE_Kokkos AND Trilinos_ENABLE_OpenMP) + SET(OMP_DEFAULT ON) +ELSE() + SET(OMP_DEFAULT OFF) +ENDIF() +KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) + SET(CUDA_DEFAULT ON) +ELSE() + SET(CUDA_DEFAULT OFF) +ENDIF() +KOKKOS_DEVICE_OPTION(CUDA ${CUDA_DEFAULT} DEVICE "Whether to build CUDA backend") + +IF (KOKKOS_ENABLE_CUDA) + GLOBAL_SET(KOKKOS_DONT_ALLOW_EXTENSIONS "CUDA enabled") +ENDIF() + +# We want this to default to OFF for cache reasons, but if no +# host space is given, then activate serial +IF (KOKKOS_HAS_TRILINOS) + #However, Trilinos always wants Serial ON + SET(SERIAL_DEFAULT ON) +ELSEIF (KOKKOS_HAS_HOST) + SET(SERIAL_DEFAULT OFF) +ELSE() + SET(SERIAL_DEFAULT ON) + IF (NOT DEFINED Kokkos_ENABLE_SERIAL) + MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") + ENDIF() +ENDIF() +KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") + +KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") diff --git a/lib/kokkos/cmake/kokkos_enable_options.cmake b/lib/kokkos/cmake/kokkos_enable_options.cmake new file mode 100644 index 0000000000..c0e49482b6 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_enable_options.cmake @@ -0,0 +1,92 @@ +########################## NOTES ############################################### +# List the options for configuring kokkos using CMake method of doing it. +# These options then get mapped onto KOKKOS_SETTINGS environment variable by +# kokkos_settings.cmake. It is separate to allow other packages to override +# these variables (e.g., TriBITS). + +########################## AVAILABLE OPTIONS ################################### +# Use lists for documentation, verification, and programming convenience + + +FUNCTION(KOKKOS_ENABLE_OPTION SUFFIX DEFAULT DOCSTRING) + KOKKOS_OPTION(ENABLE_${SUFFIX} ${DEFAULT} BOOL ${DOCSTRING}) + STRING(TOUPPER ${SUFFIX} UC_NAME) + IF (KOKKOS_ENABLE_${UC_NAME}) + LIST(APPEND KOKKOS_ENABLED_OPTIONS ${UC_NAME}) + #I hate that CMake makes me do this + SET(KOKKOS_ENABLED_OPTIONS ${KOKKOS_ENABLED_OPTIONS} PARENT_SCOPE) + ENDIF() + SET(KOKKOS_ENABLE_${UC_NAME} ${KOKKOS_ENABLE_${UC_NAME}} PARENT_SCOPE) +ENDFUNCTION() + +# Certain defaults will depend on knowing the enabled devices +KOKKOS_CFG_DEPENDS(OPTIONS DEVICES) + +# Put a check in just in case people are using this option +KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE) + +KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA") +KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default") +KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics") +KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch") +KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests") +STRING(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE) +IF(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + KOKKOS_ENABLE_OPTION(DEBUG ON "Whether to activate extra debug features - may increase compile times") + KOKKOS_ENABLE_OPTION(DEBUG_DUALVIEW_MODIFY_CHECK ON "Debug check on dual views") +ELSE() + KOKKOS_ENABLE_OPTION(DEBUG OFF "Whether to activate extra debug features - may increase compile times") + KOKKOS_ENABLE_OPTION(DEBUG_DUALVIEW_MODIFY_CHECK OFF "Debug check on dual views") +ENDIF() +UNSET(_UPPERCASE_CMAKE_BUILD_TYPE) +KOKKOS_ENABLE_OPTION(LARGE_MEM_TESTS OFF "Whether to perform extra large memory tests") +KOKKOS_ENABLE_OPTION(DEBUG_BOUNDS_CHECK OFF "Whether to use bounds checking - will increase runtime") +KOKKOS_ENABLE_OPTION(COMPILER_WARNINGS OFF "Whether to print all compiler warnings") +KOKKOS_ENABLE_OPTION(PROFILING ON "Whether to create bindings for profiling tools") +KOKKOS_ENABLE_OPTION(PROFILING_LOAD_PRINT OFF "Whether to print information about which profiling tools got loaded") +KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops") +KOKKOS_ENABLE_OPTION(DEPRECATED_CODE OFF "Whether to enable deprecated code") + +IF (KOKKOS_ENABLE_CUDA) + SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}") +ENDIF() + +IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA AND DEFINED KOKKOS_COMPILER_CUDA_VERSION AND KOKKOS_COMPILER_CUDA_VERSION GREATER 70) + SET(LAMBDA_DEFAULT ON) +ELSE() + SET(LAMBDA_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${LAMBDA_DEFAULT} "Whether to activate experimental lambda features") +IF (Trilinos_ENABLE_Kokkos) + SET(COMPLEX_ALIGN_DEFAULT OFF) +ELSE() + SET(COMPLEX_ALIGN_DEFAULT ON) +ENDIF() +KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)") + +KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR OFF "Whether to activate experimental relaxed constexpr functions") + +FUNCTION(check_device_specific_options) + CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN}) + IF(NOT KOKKOS_ENABLE_${SOME_DEVICE}) + FOREACH(OPTION ${SOME_OPTIONS}) + IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.14) + IF(NOT DEFINED CACHE{Kokkos_ENABLE_${OPTION}} OR NOT DEFINED CACHE{Kokkos_ENABLE_${SOME_DEVICE}}) + MESSAGE(FATAL_ERROR "Internal logic error: option '${OPTION}' or device '${SOME_DEVICE}' not recognized.") + ENDIF() + ENDIF() + IF(KOKKOS_ENABLE_${OPTION}) + MESSAGE(WARNING "Kokkos_ENABLE_${OPTION} is ON but ${SOME_DEVICE} backend is not enabled. Option will be ignored.") + UNSET(KOKKOS_ENABLE_${OPTION} PARENT_SCOPE) + ENDIF() + ENDFOREACH() + ENDIF() +ENDFUNCTION() + +CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) +CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS HPX_ASYNC_DISPATCH) + +# Needed due to change from deprecated name to new header define name +IF (KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) + SET(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ON) +ENDIF() diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake index 616618753b..3644c48ddd 100644 --- a/lib/kokkos/cmake/kokkos_functions.cmake +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -1,345 +1,700 @@ ################################### FUNCTIONS ################################## # List of functions -# set_kokkos_cxx_compiler -# set_kokkos_cxx_standard -# set_kokkos_srcs - -#------------------------------------------------------------------------------- -# function(set_kokkos_cxx_compiler) -# Sets the following compiler variables that are analogous to the CMAKE_* -# versions. We add the ability to detect NVCC (really nvcc_wrapper). -# KOKKOS_CXX_COMPILER -# KOKKOS_CXX_COMPILER_ID -# KOKKOS_CXX_COMPILER_VERSION -# -# Inputs: -# KOKKOS_ENABLE_CUDA -# CMAKE_CXX_COMPILER -# CMAKE_CXX_COMPILER_ID -# CMAKE_CXX_COMPILER_VERSION -# -# Also verifies the compiler version meets the minimum required by Kokkos. -function(set_kokkos_cxx_compiler) - # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own - # version of the CMake variables and detect nvcc ourselves. Initially set to - # the CMake variable values. - set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) - set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) - set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) - - # Check if the compiler is nvcc (which really means nvcc_wrapper). - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep nvcc - COMMAND wc -l - OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX REPLACE "^ +" "" - INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) - - if(INTERNAL_HAVE_COMPILER_NVCC) - # Set the compiler id to nvcc. We use the value used by CMake 3.8. - set(INTERNAL_CXX_COMPILER_ID NVIDIA) - - # Set nvcc's compiler version. - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep release - OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" - INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) - endif() - - # Enforce the minimum compilers supported by Kokkos. - set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") - - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.8.4) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - endif() - - # Enforce that extensions are turned off for nvcc_wrapper. - if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") - endif() - endif() - - if(KOKKOS_ENABLE_CUDA) - # Enforce that the compiler can compile CUDA code. - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) - message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") - endif() - elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${INTERNAL_CXX_COMPILER_ID}") - endif() - endif() - - set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) -endfunction() - -#------------------------------------------------------------------------------- -# function(set_kokkos_cxx_standard) -# Transitively enforces that the appropriate CXX standard compile flags (C++11 -# or above) are added to targets that use the Kokkos library. Compile features -# are used if possible. Otherwise, the appropriate flags are added to -# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and -# CMAKE_CXX_EXTENSIONS are honored. -# -# Outputs: -# KOKKOS_CXX11_FEATURES -# KOKKOS_CXX_FLAGS -# -# Inputs: -# KOKKOS_CXX_COMPILER -# KOKKOS_CXX_COMPILER_ID -# KOKKOS_CXX_COMPILER_VERSION -# -function(set_kokkos_cxx_standard) - # The following table lists the versions of CMake that supports CXX_STANDARD - # and the CXX compile features for different compilers. The versions are - # based on CMake documentation, looking at CMake code, and verifying by - # testing with specific CMake versions. - # - # COMPILER CXX_STANDARD Compile Features - # --------------------------------------------------------------- - # Clang 3.1 3.1 - # GNU 3.1 3.2 - # AppleClang 3.2 3.2 - # Intel 3.6 3.6 - # Cray No No - # PGI No No - # XL No No - # - # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's - # flags for turning on C++11. Since for compiler ID and versioning purposes - # CMake recognizes the host compiler when calling nvcc_wrapper, this just - # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means - # that we can only use host compilers for CUDA builds that use those flags. - # It also means that extensions (gnu++11) can't be turned on for CUDA builds. - - # Check if we can use compile features. - if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) - if(NOT CMAKE_VERSION VERSION_LESS 3.1) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) - if(NOT CMAKE_VERSION VERSION_LESS 3.2) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - if(NOT CMAKE_VERSION VERSION_LESS 3.6) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - endif() - endif() - - if(INTERNAL_USE_COMPILE_FEATURES) - # Use the compile features aspect of CMake to transitively cause C++ flags - # to populate to user code. - - # I'm using a hack by requiring features that I know force the lowest version - # of the compilers we want to support. Clang 3.3 and later support all of - # the C++11 standard. With CMake 3.8 and higher, we could switch to using - # cxx_std_11. - set(KOKKOS_CXX11_FEATURES - cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. - PARENT_SCOPE - ) - else() - # CXX compile features are not yet implemented for this combination of - # compiler and version of CMake. - - if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) - # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile - # features for the AppleClang compiler. Set compiler flags transitively - # here such that they trickle down to a call to target_compile_options(). - - # The following two blocks of code were copied from - # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then - # modified. - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) - # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro - # greater than 201103L. - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile - # features for the Intel compiler. Set compiler flags transitively here - # such that they trickle down to a call to target_compile_options(). - - # The following three blocks of code were copied from - # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. - if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") - set(_std -Qstd) - set(_ext c++) - else() - set(_std -std) - set(_ext gnu++) - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") - else() - # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and - # above for this compiler. If the user explicitly requests a C++ - # standard, CMake takes care of it. If not, transitively require C++11. - if(NOT CMAKE_CXX_STANDARD) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) - endif() - endif() - - # Set the C++ standard info for Kokkos respecting user set values for - # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. - # Only use cxx extension if explicitly requested - if(CMAKE_CXX_STANDARD EQUAL 14) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) - endif() - elseif(CMAKE_CXX_STANDARD EQUAL 11) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - endif() - else() - # The user didn't explicitly request a standard, transitively require - # C++11 respecting CMAKE_CXX_EXTENSIONS. - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - endif() - endif() - - set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) - endif() -endfunction() - - -#------------------------------------------------------------------------------- -# function(set_kokkos_sources) -# Takes a list of sources for kokkos (e.g., KOKKOS_SRC from Makefile.kokkos and -# put it into kokkos_generated_settings.cmake) and sorts the files into the subpackages or -# separate_libraries. This is core and containers (algorithms is pure header -# files). -# -# Inputs: -# KOKKOS_SRC +# kokkos_option + +# Validate options are given with correct case and define an internal +# upper-case version for use within + # -# Outputs: -# KOKKOS_CORE_SRCS -# KOKKOS_CONTAINERS_SRCS -# -function(set_kokkos_srcs) - set(opts ) # no-value args - set(oneValArgs ) - set(multValArgs KOKKOS_SRC) # e.g., lists - cmake_parse_arguments(IN "${opts}" "${oneValArgs}" "${multValArgs}" ${ARGN}) - - foreach(sfile ${IN_KOKKOS_SRC}) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" stripfile "${sfile}") - string(REPLACE "/" ";" striplist "${stripfile}") - list(GET striplist 0 firstdir) - if(${firstdir} STREQUAL "core") - list(APPEND KOKKOS_CORE_SRCS ${sfile}) - else() - list(APPEND KOKKOS_CONTAINERS_SRCS ${sfile}) - endif() - endforeach() - set(KOKKOS_CORE_SRCS ${KOKKOS_CORE_SRCS} PARENT_SCOPE) - set(KOKKOS_CONTAINERS_SRCS ${KOKKOS_CONTAINERS_SRCS} PARENT_SCOPE) - return() -endfunction() - -# Setting a default value if it is not already set -macro(set_kokkos_default_default VARIABLE DEFAULT) - IF( "${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}" STREQUAL "" ) - IF( "${KOKKOS_ENABLE_${VARIABLE}}" STREQUAL "" ) - set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${DEFAULT}) - # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") +# +# @FUNCTION: kokkos_deprecated_list +# +# Function that checks if a deprecated list option like Kokkos_ARCH was given. +# This prints an error and prevents configure from completing. +# It attempts to print a helpful message about updating the options for the new CMake. +# Kokkos_${SUFFIX} is the name of the option (like Kokkos_ARCH) being checked. +# Kokkos_${PREFIX}_X is the name of new option to be defined from a list X,Y,Z,... +FUNCTION(kokkos_deprecated_list SUFFIX PREFIX) + SET(CAMEL_NAME Kokkos_${SUFFIX}) + STRING(TOUPPER ${CAMEL_NAME} UC_NAME) + + #I don't love doing it this way but better to be safe + FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) + STRING(TOUPPER ${opt} OPT_UC) + IF ("${OPT_UC}" STREQUAL "${UC_NAME}") + STRING(REPLACE "," ";" optlist "${${opt}}") + SET(ERROR_MSG "Given deprecated option list ${opt}. This must now be given as separate -D options, which assuming you spelled options correctly would be:") + FOREACH(entry ${optlist}) + STRING(TOUPPER ${entry} ENTRY_UC) + STRING(APPEND ERROR_MSG "\n -DKokkos_${PREFIX}_${ENTRY_UC}=ON") + ENDFOREACH() + STRING(APPEND ERROR_MSG "\nRemove CMakeCache.txt and re-run. For a list of valid options, refer to BUILD.md or even look at CMakeCache.txt (before deleting it).") + IF (KOKKOS_HAS_TRILINOS) + MESSAGE(WARNING ${ERROR_MSG}) + FOREACH(entry ${optlist}) + STRING(TOUPPER ${entry} ENTRY_UC) + SET(${CAMEL_NAME}_${ENTRY_UC} ON CACHE BOOL "Deprecated Trilinos translation") + ENDFOREACH() + UNSET(${opt} CACHE) + ELSE() + MESSAGE(SEND_ERROR ${ERROR_MSG}) + ENDIF() + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) + SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX}) + STRING(TOUPPER ${CAMEL_NAME} UC_NAME) + + # Make sure this appears in the cache with the appropriate DOCSTRING + SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) + + #I don't love doing it this way because it's N^2 in number options, but cest la vie + FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) + STRING(TOUPPER ${opt} OPT_UC) + IF ("${OPT_UC}" STREQUAL "${UC_NAME}") + IF (NOT "${opt}" STREQUAL "${CAMEL_NAME}") + IF (KOKKOS_HAS_TRILINOS) + #Allow this for now if Trilinos... we need to bootstrap our way to integration + MESSAGE(WARNING "Deprecated option ${opt} found - please change spelling to ${CAMEL_NAME}") + SET(${CAMEL_NAME} "${${opt}}" CACHE ${TYPE} ${DOCSTRING} FORCE) + UNSET(${opt} CACHE) + ELSE() + MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.") + ENDIF() + ENDIF() + ENDIF() + ENDFOREACH() + + #okay, great, we passed the validation test - use the default + IF (DEFINED ${CAMEL_NAME}) + SET(${UC_NAME} ${${CAMEL_NAME}} PARENT_SCOPE) + ELSE() + SET(${UC_NAME} ${DEFAULT} PARENT_SCOPE) + ENDIF() + +ENDFUNCTION() + +FUNCTION(kokkos_append_config_line LINE) + GLOBAL_APPEND(KOKKOS_TPL_EXPORTS "${LINE}") +ENDFUNCTION() + +MACRO(kokkos_export_cmake_tpl NAME) + #CMake TPLs are located with a call to find_package + #find_package locates XConfig.cmake files through + #X_DIR or X_ROOT variables set prior to calling find_package + + #If Kokkos was configured to find the TPL through a _DIR variable + #make sure thar DIR variable is available to downstream packages + IF (DEFINED ${NAME}_DIR) + #The downstream project may override the TPL location that Kokkos used + #Check if the downstream project chose its own TPL location + #If not, make the Kokkos found location available + KOKKOS_APPEND_CONFIG_LINE("IF(NOT DEFINED ${NAME}_DIR)") + KOKKOS_APPEND_CONFIG_LINE(" SET(${NAME}_DIR ${${NAME}_DIR})") + KOKKOS_APPEND_CONFIG_LINE("ENDIF()") + ENDIF() + + IF (DEFINED ${NAME}_ROOT) + #The downstream project may override the TPL location that Kokkos used + #Check if the downstream project chose its own TPL location + #If not, make the Kokkos found location available + KOKKOS_APPEND_CONFIG_LINE("IF(NOT DEFINED ${NAME}_ROOT)") + KOKKOS_APPEND_CONFIG_LINE(" SET(${NAME}_ROOT ${${NAME}_ROOT})") + KOKKOS_APPEND_CONFIG_LINE("ENDIF()") + ENDIF() + KOKKOS_APPEND_CONFIG_LINE("FIND_DEPENDENCY(${NAME})") +ENDMACRO() + +MACRO(kokkos_export_imported_tpl NAME) + IF (NOT KOKKOS_HAS_TRILINOS) + GET_TARGET_PROPERTY(LIB_TYPE ${NAME} TYPE) + IF (${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY") + # This is not an imported target + # This an interface library that we created + INSTALL( + TARGETS ${NAME} + EXPORT KokkosTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) ELSE() - set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${KOKKOS_ENABLE_${VARIABLE}}) - # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") + #make sure this also gets "exported" in the config file + KOKKOS_APPEND_CONFIG_LINE("IF(NOT TARGET ${NAME})") + KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)") + KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES") + + GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION) + IF(TPL_LIBRARY) + KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}") + ENDIF() + + GET_TARGET_PROPERTY(TPL_INCLUDES ${NAME} INTERFACE_INCLUDE_DIRECTORIES) + IF(TPL_INCLUDES) + KOKKOS_APPEND_CONFIG_LINE("INTERFACE_INCLUDE_DIRECTORIES ${TPL_INCLUDES}") + ENDIF() + + GET_TARGET_PROPERTY(TPL_COMPILE_OPTIONS ${NAME} INTERFACE_COMPILE_OPTIONS) + IF(TPL_COMPILE_OPTIONS) + KOKKOS_APPEND_CONFIG_LINE("INTERFACE_COMPILE_OPTIONS ${TPL_COMPILE_OPTIONS}") + ENDIF() + + SET(TPL_LINK_OPTIONS) + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13.0") + GET_TARGET_PROPERTY(TPL_LINK_OPTIONS ${NAME} INTERFACE_LINK_OPTIONS) + ENDIF() + IF(TPL_LINK_OPTIONS) + KOKKOS_APPEND_CONFIG_LINE("INTERFACE_LINK_OPTIONS ${TPL_LINK_OPTIONS}") + ENDIF() + + GET_TARGET_PROPERTY(TPL_LINK_LIBRARIES ${NAME} INTERFACE_LINK_LIBRARIES) + IF(TPL_LINK_LIBRARIES) + KOKKOS_APPEND_CONFIG_LINE("INTERFACE_LINK_LIBRARIES ${TPL_LINK_LIBRARIES}") + ENDIF() + KOKKOS_APPEND_CONFIG_LINE(")") + KOKKOS_APPEND_CONFIG_LINE("ENDIF()") ENDIF() ENDIF() - UNSET(KOKKOS_ENABLE_${VARIABLE} CACHE) -endmacro() +ENDMACRO() + + +# +# @MACRO: KOKKOS_IMPORT_TPL() +# +# Function that checks if a third-party library (TPL) has been enabled and calls `find_package` +# to create an imported target encapsulating all the flags and libraries +# needed to use the TPL +# +# Usage:: +# +# KOKKOS_IMPORT_TPL( +# +# NO_EXPORT +# INTERFACE +# +# ``NO_EXPORT`` +# +# If specified, this TPL will not be added to KokkosConfig.cmake as an export +# +# ``INTERFACE`` +# +# If specified, this TPL will build an INTERFACE library rather than an +# IMPORTED target +MACRO(kokkos_import_tpl NAME) + CMAKE_PARSE_ARGUMENTS(TPL + "NO_EXPORT;INTERFACE" + "" + "" + ${ARGN}) + IF (TPL_INTERFACE) + SET(TPL_IMPORTED_NAME ${NAME}) + ELSE() + SET(TPL_IMPORTED_NAME Kokkos::${NAME}) + ENDIF() + + # Even though this policy gets set in the top-level CMakeLists.txt, + # I have still been getting errors about ROOT variables being ignored + # I'm not sure if this is a scope issue - but make sure + # the policy is set before we do any find_package calls + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") + CMAKE_POLICY(SET CMP0074 NEW) + ENDIF() + + IF (KOKKOS_ENABLE_${NAME}) + #Tack on a TPL here to make sure we avoid using anyone else's find + FIND_PACKAGE(TPL${NAME} REQUIRED MODULE) + IF(NOT TARGET ${TPL_IMPORTED_NAME}) + MESSAGE(FATAL_ERROR "Find module succeeded for ${NAME}, but did not produce valid target ${TPL_IMPORTED_NAME}") + ENDIF() + IF(NOT TPL_NO_EXPORT) + KOKKOS_EXPORT_IMPORTED_TPL(${TPL_IMPORTED_NAME}) + ENDIF() + LIST(APPEND KOKKOS_ENABLED_TPLS ${NAME}) + ENDIF() +ENDMACRO(kokkos_import_tpl) + +MACRO(kokkos_import_cmake_tpl MODULE_NAME) + kokkos_import_tpl(${MODULE_NAME} ${ARGN} NO_EXPORT) + CMAKE_PARSE_ARGUMENTS(TPL + "NO_EXPORT" + "OPTION_NAME" + "" + ${ARGN}) + + IF (NOT TPL_OPTION_NAME) + SET(TPL_OPTION_NAME ${MODULE_NAME}) + ENDIF() + + IF (NOT TPL_NO_EXPORT) + KOKKOS_EXPORT_CMAKE_TPL(${MODULE_NAME}) + ENDIF() +ENDMACRO() + +# +# @MACRO: KOKKOS_CREATE_IMPORTED_TPL() +# +# Function that creates an imported target encapsulating all the flags +# and libraries needed to use the TPL +# +# Usage:: +# +# KOKKOS_CREATE_IMPORTED_TPL( +# +# INTERFACE +# LIBRARY +# LINK_LIBRARIES ... +# COMPILE_OPTIONS ... +# LINK_OPTIONS ... +# +# ``INTERFACE`` +# +# If specified, this TPL will build an INTERFACE library rather than an +# IMPORTED target +# +# ``LIBRARY `` +# +# If specified, this gives the IMPORTED_LOCATION of the library. +# +# ``LINK_LIBRARIES ...`` +# +# If specified, this gives a list of dependent libraries that also +# need to be linked against. Each entry can be a library path or +# the name of a valid CMake target. +# +# ``INCLUDES ...`` +# +# If specified, this gives a list of directories that must be added +# to the include path for using this library. +# +# ``COMPILE_OPTIONS ...`` +# +# If specified, this gives a list of compiler flags that must be used +# for using this library. +# +# ``LINK_OPTIONS ...`` +# +# If specified, this gives a list of linker flags that must be used +# for using this library. +MACRO(kokkos_create_imported_tpl NAME) + CMAKE_PARSE_ARGUMENTS(TPL + "INTERFACE" + "LIBRARY" + "LINK_LIBRARIES;INCLUDES;COMPILE_OPTIONS;LINK_OPTIONS" + ${ARGN}) + + + IF (KOKKOS_HAS_TRILINOS) + #TODO: we need to set a bunch of cache variables here + ELSEIF (TPL_INTERFACE) + ADD_LIBRARY(${NAME} INTERFACE) + #Give this an importy-looking name + ADD_LIBRARY(Kokkos::${NAME} ALIAS ${NAME}) + IF (TPL_LIBRARY) + MESSAGE(SEND_ERROR "TPL Interface library ${NAME} should not have an IMPORTED_LOCATION") + ENDIF() + #Things have to go in quoted in case we have multiple list entries + IF(TPL_LINK_LIBRARIES) + TARGET_LINK_LIBRARIES(${NAME} INTERFACE ${TPL_LINK_LIBRARIES}) + ENDIF() + IF(TPL_INCLUDES) + TARGET_INCLUDE_DIRECTORIES(${NAME} INTERFACE ${TPL_INCLUDES}) + ENDIF() + IF(TPL_COMPILE_OPTIONS) + TARGET_COMPILE_OPTIONS(${NAME} INTERFACE ${TPL_COMPILE_OPTIONS}) + ENDIF() + IF(TPL_LINK_OPTIONS) + TARGET_LINK_LIBRARIES(${NAME} INTERFACE ${TPL_LINK_OPTIONS}) + ENDIF() + ELSE() + ADD_LIBRARY(${NAME} UNKNOWN IMPORTED) + IF(TPL_LIBRARY) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + IMPORTED_LOCATION ${TPL_LIBRARY}) + ENDIF() + #Things have to go in quoted in case we have multiple list entries + IF(TPL_LINK_LIBRARIES) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + INTERFACE_LINK_LIBRARIES "${TPL_LINK_LIBRARIES}") + ENDIF() + IF(TPL_INCLUDES) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TPL_INCLUDES}") + ENDIF() + IF(TPL_COMPILE_OPTIONS) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + INTERFACE_COMPILE_OPTIONS "${TPL_COMPILE_OPTIONS}") + ENDIF() + IF(TPL_LINK_OPTIONS) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + INTERFACE_LINK_LIBRARIES "${TPL_LINK_OPTIONS}") + ENDIF() + ENDIF() +ENDMACRO() + +# +# @MACRO: KOKKOS_FIND_HEADER +# +# Function that finds a particular header. This searches custom paths +# or default system paths depending on options. In constrast to CMake +# default, custom paths are prioritized over system paths. The searched +# order is: +# 1. _ROOT variable +# 2. Kokkos__DIR variable +# 3. Locations in the PATHS option +# 4. Default system paths, if allowed. +# +# Default system paths are allowed if none of options (1)-(3) are specified +# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK +# +# Usage:: +# +# KOKKOS_FIND_HEADER( +# +#
+# +# [ALLOW_SYSTEM_PATH_FALLBACK] +# [PATHS path1 [path2 ...]] +# ) +# +# ```` +# +# The variable to define with the success or failure of the find +# +# ``
`` +# +# The name of the header to find +# +# ```` +# +# The name of the TPL the header corresponds to +# +# ``[ALLOW_SYSTEM_PATH_FALLBACK]`` +# +# If custom paths are given and the header is not found +# should we be allowed to search default system paths +# or error out if not found in given paths +# +# ``[PATHS path1 [path2 ...]]`` +# +# Custom paths to search for the header +# +MACRO(kokkos_find_header VAR_NAME HEADER TPL_NAME) + CMAKE_PARSE_ARGUMENTS(TPL + "ALLOW_SYSTEM_PATH_FALLBACK" + "" + "PATHS" + ${ARGN}) + + SET(${HEADER}_FOUND FALSE) + SET(HAVE_CUSTOM_PATHS FALSE) + IF(NOT ${HEADER}_FOUND AND DEFINED ${TPL_NAME}_ROOT) + #ONLY look in the root directory + FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${${TPL_NAME}_ROOT}/include NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + IF(NOT ${HEADER}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) + #ONLY look in the root directory + FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${KOKKOS_${TPL_NAME}_DIR}/include NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + IF (NOT ${HEADER}_FOUND AND TPL_PATHS) + #we got custom paths + #ONLY look in these paths and nowhere else + FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${TPL_PATHS} NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) + #Now go ahead and look in system paths + IF (NOT ${HEADER}_FOUND) + FIND_PATH(${VAR_NAME} ${HEADER}) + ENDIF() + ENDIF() +ENDMACRO() + +# +# @MACRO: KOKKOS_FIND_LIBRARY +# +# Function that find a particular library. This searches custom paths +# or default system paths depending on options. In constrast to CMake +# default, custom paths are prioritized over system paths. The search +# order is: +# 1. _ROOT variable +# 2. Kokkos__DIR variable +# 3. Locations in the PATHS option +# 4. Default system paths, if allowed. +# +# Default system paths are allowed if none of options (1)-(3) are specified +# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK +# +# Usage:: +# +# KOKKOS_FIND_LIBRARY( +# +#
+# +# [ALLOW_SYSTEM_PATH_FALLBACK] +# [PATHS path1 [path2 ...]] +# ) +# +# ```` +# +# The variable to define with the success or failure of the find +# +# ```` +# +# The name of the library to find (NOT prefixed with -l) +# +# ```` +# +# The name of the TPL the library corresponds to +# +# ``ALLOW_SYSTEM_PATH_FALLBACK`` +# +# If custom paths are given and the library is not found +# should we be allowed to search default system paths +# or error out if not found in given paths +# +# ``PATHS`` +# +# Custom paths to search for the library +# +MACRO(kokkos_find_library VAR_NAME LIB TPL_NAME) + CMAKE_PARSE_ARGUMENTS(TPL + "ALLOW_SYSTEM_PATH_FALLBACK" + "" + "PATHS" + ${ARGN}) + + SET(${LIB}_FOUND FALSE) + SET(HAVE_CUSTOM_PATHS FALSE) + IF(NOT ${LIB}_FOUND AND DEFINED ${TPL_NAME}_ROOT) + FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${${TPL_NAME}_ROOT}/lib ${${TPL_NAME}_ROOT}/lib64 NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + IF(NOT ${LIB}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) + #we got root paths, only look in these paths and nowhere else + FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${KOKKOS_${TPL_NAME}_DIR}/lib ${KOKKOS_${TPL_NAME}_DIR}/lib64 NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + IF (NOT ${LIB}_FOUND AND TPL_PATHS) + #we got custom paths, only look in these paths and nowhere else + FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${TPL_PATHS} NO_DEFAULT_PATH) + SET(HAVE_CUSTOM_PATHS TRUE) + ENDIF() + + + IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) + IF (NOT ${LIB}_FOUND) + #Now go ahead and look in system paths + FIND_LIBRARY(${VAR_NAME} ${LIB}) + ENDIF() + ENDIF() +ENDMACRO() + +# +# @MACRO: KOKKOS_FIND_IMPORTED +# +# Function that finds all libraries and headers needed for the tpl +# and creates an imported target encapsulating all the flags and libraries +# +# Usage:: +# +# KOKKOS_FIND_IMPORTED( +# +# INTERFACE +# ALLOW_SYSTEM_PATH_FALLBACK +# LIBRARY +# LINK_LIBRARIES ... +# COMPILE_OPTIONS ... +# LINK_OPTIONS ... +# +# ``INTERFACE`` +# +# If specified, this TPL will build an INTERFACE library rather than an +# IMPORTED target +# +# ``ALLOW_SYSTEM_PATH_FALLBACK" +# +# If custom paths are given and the library is not found +# should we be allowed to search default system paths +# or error out if not found in given paths. +# +# ``LIBRARY `` +# +# If specified, this gives the name of the library to look for +# +# ``MODULE_NAME `` +# +# If specified, the name of the enclosing module passed to +# FIND_PACKAGE(). Defaults to TPL${NAME} if not +# given. +# +# ``IMPORTED_NAME `` +# +# If specified, this gives the name of the target to build. +# Defaults to Kokkos:: +# +# ``LIBRARY_PATHS ...`` +# +# If specified, this gives a list of paths to search for the library +# If not given, _ROOT/lib and _ROOT/lib64 will be searched. +# +# ``HEADER_PATHS ...`` +# +# If specified, this gives a list of paths to search for the headers +# If not given, _ROOT/include and _ROOT/include will be searched. +# +# ``HEADERS ...`` +# +# If specified, this gives a list of headers to find for the package +# +# ``LIBRARIES ...`` +# +# If specified, this gives a list of libraries to find for the package +# +MACRO(kokkos_find_imported NAME) + CMAKE_PARSE_ARGUMENTS(TPL + "INTERFACE;ALLOW_SYSTEM_PATH_FALLBACK" + "HEADER;LIBRARY;IMPORTED_NAME;MODULE_NAME" + "HEADER_PATHS;LIBRARY_PATHS;HEADERS;LIBRARIES" + ${ARGN}) + + IF(NOT TPL_MODULE_NAME) + SET(TPL_MODULE_NAME TPL${NAME}) + ENDIF() + + IF (TPL_ALLOW_SYSTEM_PATH_FALLBACK) + SET(ALLOW_PATH_FALLBACK_OPT ALLOW_SYSTEM_PATH_FALLBACK) + ELSE() + SET(ALLOW_PATH_FALLBACK_OPT) + ENDIF() + + IF (NOT TPL_IMPORTED_NAME) + IF (TPL_INTERFACE) + SET(TPL_IMPORTED_NAME ${NAME}) + ELSE() + SET(TPL_IMPORTED_NAME Kokkos::${NAME}) + ENDIF() + ENDIF() + + SET(${NAME}_INCLUDE_DIRS) + IF (TPL_HEADER) + KOKKOS_FIND_HEADER(${NAME}_INCLUDE_DIRS ${TPL_HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS}) + ENDIF() + + FOREACH(HEADER ${TPL_HEADERS}) + KOKKOS_FIND_HEADER(HEADER_FIND_TEMP ${HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS}) + IF(HEADER_FIND_TEMP) + LIST(APPEND ${NAME}_INCLUDE_DIRS ${HEADER_FIND_TEMP}) + ENDIF() + ENDFOREACH() + + SET(${NAME}_LIBRARY) + IF(TPL_LIBRARY) + KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) + ENDIF() + + SET(${NAME}_FOUND_LIBRARIES) + FOREACH(LIB ${TPL_LIBRARIES}) + KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) + IF(${LIB}_LOCATION) + LIST(APPEND ${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) + ELSE() + SET(${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) + BREAK() + ENDIF() + ENDFOREACH() + + INCLUDE(FindPackageHandleStandardArgs) + #Collect all the variables we need to be valid for + #find_package to have succeeded + SET(TPL_VARS_NEEDED) + IF (TPL_LIBRARY) + LIST(APPEND TPL_VARS_NEEDED ${NAME}_LIBRARY) + ENDIF() + IF(TPL_HEADER) + LIST(APPEND TPL_VARS_NEEDED ${NAME}_INCLUDE_DIRS) + ENDIF() + IF(TPL_LIBRARIES) + LIST(APPEND TPL_VARS_NEEDED ${NAME}_FOUND_LIBRARIES) + ENDIF() + FIND_PACKAGE_HANDLE_STANDARD_ARGS(${TPL_MODULE_NAME} REQUIRED_VARS ${TPL_VARS_NEEDED}) + + MARK_AS_ADVANCED(${NAME}_INCLUDE_DIRS ${NAME}_FOUND_LIBRARIES ${NAME}_LIBRARY) + + IF (${TPL_MODULE_NAME}_FOUND) + SET(IMPORT_TYPE) + IF (TPL_INTERFACE) + SET(IMPORT_TYPE "INTERFACE") + ENDIF() + KOKKOS_CREATE_IMPORTED_TPL(${TPL_IMPORTED_NAME} + ${IMPORT_TYPE} + INCLUDES "${${NAME}_INCLUDE_DIRS}" + LIBRARY "${${NAME}_LIBRARY}" + LINK_LIBRARIES "${${NAME}_FOUND_LIBRARIES}") + ENDIF() +ENDMACRO(kokkos_find_imported) + +# +# @MACRO: KOKKOS_LINK_TPL() +# +# Function that checks if a third-party library (TPL) has been enabled and +# calls target_link_libraries on the given target +# +# Usage:: +# +# KOKKOS_LINK_TPL( +# +# PUBLIC +# PRIVATE +# INTERFACE +# IMPORTED_NAME +# +# +# Checks if Kokkos_ENABLE_=ON and if so links the library +# +# ``PUBLIC/PRIVATE/INTERFACE`` +# +# Specifies the linkage mode. One of these arguments should be given. +# This will then invoke target_link_libraries( PUBLIC/PRIVATE/INTERFACE ) +# +# ``IMPORTED_NAME `` +# +# If specified, this gives the exact name of the target to link against +# target_link_libraries( ) +# +FUNCTION(kokkos_link_tpl TARGET) + CMAKE_PARSE_ARGUMENTS(TPL + "PUBLIC;PRIVATE;INTERFACE" + "IMPORTED_NAME" + "" + ${ARGN}) + #the name of the TPL + SET(TPL ${TPL_UNPARSED_ARGUMENTS}) + IF (KOKKOS_HAS_TRILINOS) + #Do nothing, they will have already been linked + ELSE() + IF (NOT TPL_IMPORTED_NAME) + SET(TPL_IMPORTED_NAME Kokkos::${TPL}) + ENDIF() + IF (KOKKOS_ENABLE_${TPL}) + IF (TPL_PUBLIC) + TARGET_LINK_LIBRARIES(${TARGET} PUBLIC ${TPL_IMPORTED_NAME}) + ELSEIF (TPL_PRIVATE) + TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ${TPL_IMPORTED_NAME}) + ELSEIF (TPL_INTERFACE) + TARGET_LINK_LIBRARIES(${TARGET} INTERFACE ${TPL_IMPORTED_NAME}) + ELSE() + TARGET_LINK_LIBRARIES(${TARGET} ${TPL_IMPORTED_NAME}) + ENDIF() + ENDIF() + ENDIF() +ENDFUNCTION() + diff --git a/lib/kokkos/cmake/kokkos_install.cmake b/lib/kokkos/cmake/kokkos_install.cmake new file mode 100644 index 0000000000..1e4a5a2aad --- /dev/null +++ b/lib/kokkos/cmake/kokkos_install.cmake @@ -0,0 +1,42 @@ +IF (NOT KOKKOS_HAS_TRILINOS) + INCLUDE(GNUInstallDirs) + + #Set all the variables needed for KokkosConfig.cmake + GET_PROPERTY(KOKKOS_PROP_LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + SET(KOKKOS_LIBRARIES ${KOKKOS_PROP_LIBS}) + + INCLUDE(CMakePackageConfigHelpers) + CONFIGURE_PACKAGE_CONFIG_FILE( + cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake) + + INCLUDE(CMakePackageConfigHelpers) + CONFIGURE_PACKAGE_CONFIG_FILE( + cmake/KokkosConfigCommon.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake) + + WRITE_BASIC_PACKAGE_VERSION_FILE("${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" + VERSION "${Kokkos_VERSION}" + COMPATIBILITY SameMajorVersion) + + # Install the KokkosConfig*.cmake files + install(FILES + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" + "${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake" + "${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Kokkos) + install(EXPORT KokkosTargets NAMESPACE Kokkos:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Kokkos) +ELSE() + CONFIGURE_FILE(cmake/KokkosConfigCommon.cmake.in ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake @ONLY) + file(READ ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake KOKKOS_CONFIG_COMMON) + file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" ${KOKKOS_CONFIG_COMMON}) +ENDIF() + +# build and install pkgconfig file +CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) + diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake deleted file mode 100644 index 239301925c..0000000000 --- a/lib/kokkos/cmake/kokkos_options.cmake +++ /dev/null @@ -1,419 +0,0 @@ -########################## NOTES ############################################### -# List the options for configuring kokkos using CMake method of doing it. -# These options then get mapped onto KOKKOS_SETTINGS environment variable by -# kokkos_settings.cmake. It is separate to allow other packages to override -# these variables (e.g., TriBITS). - -########################## AVAILABLE OPTIONS ################################### -# Use lists for documentation, verification, and programming convenience - -# All CMake options of the type KOKKOS_ENABLE_* -set(KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) -list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST - Serial - OpenMP - Pthread - Qthread - HPX - Cuda - ROCm - HWLOC - MEMKIND - LIBRT - Cuda_Lambda - Cuda_Relocatable_Device_Code - Cuda_UVM - Cuda_LDG_Intrinsic - HPX_ASYNC_DISPATCH - Debug - Debug_DualView_Modify_Check - Debug_Bounds_Check - Compiler_Warnings - Profiling - Profiling_Load_Print - Aggressive_Vectorization - Deprecated_Code - Explicit_Instantiation - ) - -#------------------------------------------------------------------------------- -#------------------------------- Recognize CamelCase Options --------------------------- -#------------------------------------------------------------------------------- - -foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) - string(TOUPPER ${opt} OPT ) - IF(DEFINED Kokkos_ENABLE_${opt}) - IF(DEFINED KOKKOS_ENABLE_${OPT}) - IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}")) - IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL) - MESSAGE(WARNING "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ! Could be caused by old CMakeCache Variable. Run CMake again and warning should disappear. If not you are truly setting both variables.") - IF(NOT ("${Kokkos_ENABLE_${opt}}" STREQUAL "${KOKKOS_ENABLE_${OPT}_INTERNAL}")) - UNSET(KOKKOS_ENABLE_${OPT} CACHE) - SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}}) - MESSAGE(WARNING "SET BOTH VARIABLES KOKKOS_ENABLE_${OPT}: ${KOKKOS_ENABLE_${OPT}}") - ELSE() - SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}}) - ENDIF() - ELSE() - MESSAGE(FATAL_ERROR "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ!") - ENDIF() - ENDIF() - ELSE() - SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}}) - ENDIF() - ENDIF() -endforeach() - -IF(DEFINED Kokkos_ARCH) - MESSAGE(FATAL_ERROR "Defined Kokkos_ARCH, use KOKKOS_ARCH instead!") -ENDIF() -IF(DEFINED Kokkos_Arch) - MESSAGE(FATAL_ERROR "Defined Kokkos_Arch, use KOKKOS_ARCH instead!") -ENDIF() - -#------------------------------------------------------------------------------- -# List of possible host architectures. -#------------------------------------------------------------------------------- -set(KOKKOS_ARCH_LIST) -list(APPEND KOKKOS_ARCH_LIST - None # No architecture optimization - AMDAVX # (HOST) AMD chip - EPYC # (HOST) AMD EPYC Zen-Core CPU - ARMv80 # (HOST) ARMv8.0 Compatible CPU - ARMv81 # (HOST) ARMv8.1 Compatible CPU - ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU - ARMv8-TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU - WSM # (HOST) Intel Westmere CPU - SNB # (HOST) Intel Sandy/Ivy Bridge CPUs - HSW # (HOST) Intel Haswell CPUs - BDW # (HOST) Intel Broadwell Xeon E-class CPUs - SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512) - KNC # (HOST) Intel Knights Corner Xeon Phi - KNL # (HOST) Intel Knights Landing Xeon Phi - BGQ # (HOST) IBM Blue Gene Q - Power7 # (HOST) IBM POWER7 CPUs - Power8 # (HOST) IBM POWER8 CPUs - Power9 # (HOST) IBM POWER9 CPUs - Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5) - Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0 - Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2 - Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5 - Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7 - Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0) - Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0 - Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2 - Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3 - Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0 - Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1 - Volta70 # (GPU) NVIDIA Volta generation CC 7.0 - Volta72 # (GPU) NVIDIA Volta generation CC 7.2 - Turing75 # (GPU) NVIDIA Turing generation CC 7.5 - ) - -# List of possible device architectures. -# The case and spelling here needs to match Makefile.kokkos -set(KOKKOS_DEVICES_LIST) -# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial -list(APPEND KOKKOS_DEVICES_LIST - Cuda # NVIDIA GPU -- see below - OpenMP # OpenMP - Pthread # pthread - Qthreads # qthreads - HPX # HPX - Serial # serial - ROCm # Relocatable device code - ) - -# List of possible TPLs for Kokkos -# From Makefile.kokkos: Options: hwloc,librt,experimental_memkind -set(KOKKOS_USE_TPLS_LIST) -if(APPLE) -list(APPEND KOKKOS_USE_TPLS_LIST - HWLOC # hwloc - MEMKIND # experimental_memkind - ) -else() -list(APPEND KOKKOS_USE_TPLS_LIST - HWLOC # hwloc - LIBRT # librt - MEMKIND # experimental_memkind - ) -endif() -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_HWLOC hwloc) -set(KOKKOS_INTERNAL_LIBRT librt) -set(KOKKOS_INTERNAL_MEMKIND experimental_memkind) - -# List of possible Advanced options -set(KOKKOS_OPTIONS_LIST) -list(APPEND KOKKOS_OPTIONS_LIST - AGGRESSIVE_VECTORIZATION - DISABLE_PROFILING - DISABLE_DUALVIEW_MODIFY_CHECK - ENABLE_PROFILE_LOAD_PRINT - ) -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) -set(KOKKOS_INTERNAL_UVM librt) -set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) - - -#------------------------------------------------------------------------------- -# List of possible Options for CUDA -#------------------------------------------------------------------------------- -# From Makefile.kokkos: Options: use_ldg,force_uvm,rdc -set(KOKKOS_CUDA_OPTIONS_LIST) -list(APPEND KOKKOS_CUDA_OPTIONS_LIST - LDG_INTRINSIC # use_ldg - UVM # force_uvm - RELOCATABLE_DEVICE_CODE # rdc - LAMBDA # enable_lambda - ) - -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) -set(KOKKOS_INTERNAL_UVM force_uvm) -set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) -set(KOKKOS_INTERNAL_LAMBDA enable_lambda) - - -#------------------------------------------------------------------------------- -# List of possible Options for HPX -#------------------------------------------------------------------------------- -# From Makefile.kokkos: Options: enable_async_dispatch -set(KOKKOS_HPX_OPTIONS_LIST) -list(APPEND KOKKOS_HPX_OPTIONS_LIST - ASYNC_DISPATCH # enable_async_dispatch - ) - -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_ENABLE_ASYNC_DISPATCH enable_async_dispatch) - - -#------------------------------------------------------------------------------- -#------------------------------- Create doc strings ---------------------------- -#------------------------------------------------------------------------------- - -set(tmpr "\n ") -string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") -set(KOKKOS_INTERNAL_ARCH_DOCSTR "${tmpr}${KOKKOS_INTERNAL_ARCH_DOCSTR}") -# This would be useful, but we use Foo_ENABLE mechanisms -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}") -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}") -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_CUDA_OPTIONS_DOCSTR "${KOKKOS_CUDA_OPTIONS_LIST}") - -#------------------------------------------------------------------------------- -#------------------------------- GENERAL OPTIONS ------------------------------- -#------------------------------------------------------------------------------- - -# Setting this variable to a value other than "None" can improve host -# performance by turning on architecture specific code. -# NOT SET is used to determine if the option is passed in. It is reset to -# default "None" down below. -set(KOKKOS_ARCH "NOT_SET" CACHE STRING - "Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") - -# Whether to build separate libraries or now -set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") - -# Qthreads options. -set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") - -# HPX options. -set(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") - -# Whether to build separate libraries or now -set(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_DEVICES -------------------------------- -#------------------------------------------------------------------------------- -# Figure out default settings -IF(Trilinos_ENABLE_Kokkos) - set_kokkos_default_default(SERIAL ON) - set_kokkos_default_default(PTHREAD OFF) - IF(TPL_ENABLE_QTHREAD) - set_kokkos_default_default(QTHREADS ${TPL_ENABLE_QTHREAD}) - ELSE() - set_kokkos_default_default(QTHREADS OFF) - ENDIF() - IF(TPL_ENABLE_HPX) - set_kokkos_default_default(HPX ON) - ELSE() - set_kokkos_default_default(HPX OFF) - ENDIF() - IF(Trilinos_ENABLE_OpenMP) - set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP}) - ELSE() - set_kokkos_default_default(OPENMP OFF) - ENDIF() - IF(TPL_ENABLE_CUDA) - set_kokkos_default_default(CUDA ${TPL_ENABLE_CUDA}) - ELSE() - set_kokkos_default_default(CUDA OFF) - ENDIF() - set_kokkos_default_default(ROCM OFF) -ELSE() - set_kokkos_default_default(SERIAL ON) - set_kokkos_default_default(OPENMP OFF) - set_kokkos_default_default(PTHREAD OFF) - set_kokkos_default_default(QTHREAD OFF) - set_kokkos_default_default(HPX OFF) - set_kokkos_default_default(CUDA OFF) - set_kokkos_default_default(ROCM OFF) -ENDIF() - -# Set which Kokkos backend to use. -# These are the actual options that define the settings. -set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads.") -set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE) -set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.") -set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.") -set(KOKKOS_ENABLE_HPX ${KOKKOS_INTERNAL_ENABLE_HPX_DEFAULT} CACHE BOOL "Enable HPX support in Kokkos.") -set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.") -set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.") - - - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS DEBUG and PROFILING -------------------- -#------------------------------------------------------------------------------- - -# Debug related options enable compiler warnings - -set_kokkos_default_default(DEBUG OFF) -set(KOKKOS_ENABLE_DEBUG ${KOKKOS_INTERNAL_ENABLE_DEBUG_DEFAULT} CACHE BOOL "Enable Kokkos Debug.") - -# From Makefile.kokkos: Advanced Options: -#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print -set_kokkos_default_default(COMPILER_WARNINGS OFF) -set(KOKKOS_ENABLE_COMPILER_WARNINGS ${KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS_DEFAULT} CACHE BOOL "Enable compiler warnings.") - -set_kokkos_default_default(DEBUG_DUALVIEW_MODIFY_CHECK OFF) -set(KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ${KOKKOS_INTERNAL_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK_DEFAULT} CACHE BOOL "Enable dualview modify check.") - -# Enable aggressive vectorization. -set_kokkos_default_default(AGGRESSIVE_VECTORIZATION OFF) -set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ${KOKKOS_INTERNAL_ENABLE_AGGRESSIVE_VECTORIZATION_DEFAULT} CACHE BOOL "Enable aggressive vectorization.") - -# Enable profiling. -set_kokkos_default_default(PROFILING ON) -set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BOOL "Enable profiling.") - -set_kokkos_default_default(PROFILING_LOAD_PRINT OFF) -set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.") - -set_kokkos_default_default(DEPRECATED_CODE ON) -set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.") - -set_kokkos_default_default(EXPLICIT_INSTANTIATION OFF) -set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_USE_TPLS ------------------------------- -#------------------------------------------------------------------------------- -# Enable hwloc library. -# Figure out default: -IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HWLOC) - set_kokkos_default_default(HWLOC ON) -ELSE() - set_kokkos_default_default(HWLOC OFF) -ENDIF() -set(KOKKOS_ENABLE_HWLOC ${KOKKOS_INTERNAL_ENABLE_HWLOC_DEFAULT} CACHE BOOL "Enable hwloc for better process placement.") -set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") - -# Enable memkind library. -set_kokkos_default_default(MEMKIND OFF) -set(KOKKOS_ENABLE_MEMKIND ${KOKKOS_INTERNAL_ENABLE_MEMKIND_DEFAULT} CACHE BOOL "Enable memkind. (kokkos tpl)") -set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") - -# Enable rt library. -IF(Trilinos_ENABLE_Kokkos) - IF(DEFINED TPL_ENABLE_LIBRT) - set_kokkos_default_default(LIBRT ${TPL_ENABLE_LIBRT}) - ELSE() - set_kokkos_default_default(LIBRT OFF) - ENDIF() -ELSE() - set_kokkos_default_default(LIBRT ON) -ENDIF() -set(KOKKOS_ENABLE_LIBRT ${KOKKOS_INTERNAL_ENABLE_LIBRT_DEFAULT} CACHE BOOL "Enable librt for more precise timer. (kokkos tpl)") - - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -#------------------------------------------------------------------------------- - -# CUDA options. -# Set Defaults -set_kokkos_default_default(CUDA_LDG_INTRINSIC_DEFAULT OFF) -set_kokkos_default_default(CUDA_UVM_DEFAULT OFF) -set_kokkos_default_default(CUDA_RELOCATABLE_DEVICE_CODE OFF) -IF(Trilinos_ENABLE_Kokkos) - IF(KOKKOS_ENABLE_CUDA) - find_package(CUDA) - ENDIF() - IF (DEFINED CUDA_VERSION) - IF (CUDA_VERSION VERSION_GREATER "7.0") - set_kokkos_default_default(CUDA_LAMBDA ON) - ELSE() - set_kokkos_default_default(CUDA_LAMBDA OFF) - ENDIF() - ENDIF() -ELSE() - set_kokkos_default_default(CUDA_LAMBDA OFF) -ENDIF() - -# Set actual options -set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") -set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ${KOKKOS_INTERNAL_ENABLE_CUDA_LDG_INTRINSIC_DEFAULT} CACHE BOOL "Enable CUDA LDG. (cuda option)") -set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL "Enable CUDA unified virtual memory.") -set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)") -set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") - - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_HPX_OPTIONS ---------------------------- -#------------------------------------------------------------------------------- - -# HPX options. -# Set Defaults -set_kokkos_default_default(HPX_ASYNC_DISPATCH OFF) - -# Set actual options -set(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH ${KOKKOS_INTERNAL_ENABLE_HPX_ASYNC_DISPATCH_DEFAULT} CACHE BOOL "Enable HPX async dispatch.") - - -#------------------------------------------------------------------------------- -#----------------------- HOST ARCH AND LEGACY TRIBITS -------------------------- -#------------------------------------------------------------------------------- - -# This defines the previous legacy TriBITS builds. -set(KOKKOS_LEGACY_TRIBITS False) -IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") - set(KOKKOS_ARCH "None") - IF(KOKKOS_HAS_TRILINOS) - set(KOKKOS_LEGACY_TRIBITS True) - ENDIF() -ENDIF() -IF (KOKKOS_HAS_TRILINOS) - IF (KOKKOS_LEGACY_TRIBITS) - message(STATUS "Using the legacy tribits build because KOKKOS_ARCH not set") - ELSE() - message(STATUS "NOT using the legacy tribits build because KOKKOS_ARCH *is* set") - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -#----------------------- Set CamelCase Options if they are not yet set --------- -#------------------------------------------------------------------------------- - -foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) - string(TOUPPER ${opt} OPT ) - UNSET(KOKKOS_ENABLE_${OPT}_INTERNAL CACHE) - SET(KOKKOS_ENABLE_${OPT}_INTERNAL ${KOKKOS_ENABLE_${OPT}} CACHE BOOL INTERNAL) - IF(DEFINED KOKKOS_ENABLE_${OPT}) - UNSET(Kokkos_ENABLE_${opt} CACHE) - SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}") - ENDIF() -endforeach() diff --git a/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake b/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake new file mode 100644 index 0000000000..cf14948f43 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake @@ -0,0 +1,46 @@ +# From CMake 3.10 documentation + +#This can run at any time +KOKKOS_OPTION(CXX_STANDARD "" STRING "The C++ standard for Kokkos to use: 11, 14, 17, or 20. If empty, this will default to CMAKE_CXX_STANDARD. If both CMAKE_CXX_STANDARD and Kokkos_CXX_STANDARD are empty, this will default to 11") + +# Set CXX standard flags +SET(KOKKOS_ENABLE_CXX11 OFF) +SET(KOKKOS_ENABLE_CXX14 OFF) +SET(KOKKOS_ENABLE_CXX17 OFF) +SET(KOKKOS_ENABLE_CXX20 OFF) +IF (KOKKOS_CXX_STANDARD) + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++98") + MESSAGE(FATAL_ERROR "Kokkos no longer supports C++98 - minimum C++11") + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++11'. Use '11' instead.") + SET(KOKKOS_CXX_STANDARD "11") + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++14'. Use '14' instead.") + SET(KOKKOS_CXX_STANDARD "14") + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++17'. Use '17' instead.") + SET(KOKKOS_CXX_STANDARD "17") + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++1y") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++1y'. Use '1Y' instead.") + SET(KOKKOS_CXX_STANDARD "1Y") + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++1z") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++1z'. Use '1Z' instead.") + SET(KOKKOS_CXX_STANDARD "1Z") + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++2a") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++2a'. Use '2A' instead.") + SET(KOKKOS_CXX_STANDARD "2A") + ENDIF() +ENDIF() + +IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) + MESSAGE(STATUS "Setting default Kokkos CXX standard to 11") + SET(KOKKOS_CXX_STANDARD "11") +ELSEIF(NOT KOKKOS_CXX_STANDARD) + MESSAGE(STATUS "Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") + SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD}) +ENDIF() + + + + + diff --git a/lib/kokkos/cmake/kokkos_settings.cmake b/lib/kokkos/cmake/kokkos_settings.cmake deleted file mode 100644 index 2c622d0de9..0000000000 --- a/lib/kokkos/cmake/kokkos_settings.cmake +++ /dev/null @@ -1,259 +0,0 @@ -########################## NOTES ############################################### -# This files goal is to take CMake options found in kokkos_options.cmake but -# possibly set from elsewhere -# (see: trilinos/cmake/ProjectCOmpilerPostConfig.cmake) -# using CMake idioms and map them onto the KOKKOS_SETTINGS variables that gets -# passed to the kokkos makefile configuration: -# make -f ${CMAKE_SOURCE_DIR}/core/src/Makefile ${KOKKOS_SETTINGS} build-makefile-cmake-kokkos -# that generates KokkosCore_config.h and kokkos_generated_settings.cmake -# To understand how to form KOKKOS_SETTINGS, see -# /Makefile.kokkos - -#------------------------------------------------------------------------------- -#------------------------------- GENERAL OPTIONS ------------------------------- -#------------------------------------------------------------------------------- - -# Ensure that KOKKOS_ARCH is in the ARCH_LIST -if (KOKKOS_ARCH MATCHES ",") - message("-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") - message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") - message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") - string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") - message("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`") -endif() -foreach(arch ${KOKKOS_ARCH}) - list(FIND KOKKOS_ARCH_LIST ${arch} indx) - if (indx EQUAL -1) - message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." - " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") - endif () -endforeach() - -# KOKKOS_SETTINGS uses KOKKOS_ARCH -string(REPLACE ";" "," KOKKOS_GMAKE_ARCH "${KOKKOS_ARCH}") - -# From Makefile.kokkos: Options: yes,no -if(${KOKKOS_ENABLE_DEBUG}) - set(KOKKOS_GMAKE_DEBUG yes) -else() - set(KOKKOS_GMAKE_DEBUG no) -endif() - -#------------------------------- KOKKOS_DEVICES -------------------------------- -# Can have multiple devices -set(KOKKOS_DEVICESl) -foreach(devopt ${KOKKOS_DEVICES_LIST}) - string(TOUPPER ${devopt} devoptuc) - if (${KOKKOS_ENABLE_${devoptuc}}) - list(APPEND KOKKOS_DEVICESl ${devopt}) - endif () -endforeach() -# List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_GMAKE_DEVICES "${KOKKOS_DEVICESl}") - -#------------------------------- KOKKOS_OPTIONS -------------------------------- -# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling,disable_deprecated_code -#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print - -set(KOKKOS_OPTIONSl) -if(${KOKKOS_ENABLE_COMPILER_WARNINGS}) - list(APPEND KOKKOS_OPTIONSl compiler_warnings) -endif() -if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION}) - list(APPEND KOKKOS_OPTIONSl aggressive_vectorization) -endif() -if(NOT ${KOKKOS_ENABLE_PROFILING}) - list(APPEND KOKKOS_OPTIONSl disable_profiling) -endif() -if(NOT ${KOKKOS_ENABLE_DEPRECATED_CODE}) - list(APPEND KOKKOS_OPTIONSl disable_deprecated_code) -endif() -if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK}) - list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check) -endif() -if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT}) - list(APPEND KOKKOS_OPTIONSl enable_profile_load_print) -endif() -if(${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION}) - list(APPEND KOKKOS_OPTIONSl enable_eti) -endif() -# List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}") - - -#------------------------------- KOKKOS_USE_TPLS ------------------------------- -# Construct the Makefile options -set(KOKKOS_USE_TPLSl) -foreach(tplopt ${KOKKOS_USE_TPLS_LIST}) - if (${KOKKOS_ENABLE_${tplopt}}) - list(APPEND KOKKOS_USE_TPLSl ${KOKKOS_INTERNAL_${tplopt}}) - endif () -endforeach() -# List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_GMAKE_USE_TPLS "${KOKKOS_USE_TPLSl}") - - -#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -# Construct the Makefile options -set(KOKKOS_CUDA_OPTIONSl) -foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST}) - if (${KOKKOS_ENABLE_CUDA_${cudaopt}}) - list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}}) - endif () -endforeach() -# List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_GMAKE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") - -#------------------------------- PATH VARIABLES -------------------------------- -# Want makefile to use same executables specified which means modifying -# the path so the $(shell ...) commands in the makefile see the right exec -# Also, the Makefile's use FOO_PATH naming scheme for -I/-L construction -#TODO: Makefile.kokkos allows this to be overwritten? ROCM_HCC_PATH - -set(KOKKOS_INTERNAL_PATHS) -set(addpathl) -foreach(kvar IN LISTS KOKKOS_USE_TPLS_LIST ITEMS CUDA QTHREADS) - if(${KOKKOS_ENABLE_${kvar}}) - if(DEFINED KOKKOS_${kvar}_DIR) - set(KOKKOS_INTERNAL_PATHS ${KOKKOS_INTERNAL_PATHS} "${kvar}_PATH=${KOKKOS_${kvar}_DIR}") - if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin) - list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin) - endif() - endif() - endif() -endforeach() -# Path env is : delimitted -string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}") - - -######################### SET KOKKOS_SETTINGS ################################## -# Set the KOKKOS_SETTINGS String -- this is the primary communication with the -# makefile configuration. See Makefile.kokkos - -set(KOKKOS_SETTINGS KOKKOS_CMAKE=yes) -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH}) -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX}) - -# Form of KOKKOS_foo=$KOKKOS_foo -foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS) - if(DEFINED KOKKOS_GMAKE_${kvar}) - if (NOT "${KOKKOS_GMAKE_${kvar}}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_${kvar}=${KOKKOS_GMAKE_${kvar}}) - endif() - endif() -endforeach() - -# Form of VAR=VAL -#TODO: Makefile supports MPICH_CXX, OMPI_CXX as well -foreach(ovar CXX;CXXFLAGS;LDFLAGS) - if(DEFINED ${ovar}) - if (NOT "${${ovar}}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${ovar}=${${ovar}}) - endif() - endif() -endforeach() - -# Finally, do the paths -if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS}) -endif() -if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}") -endif() - -if (CMAKE_CXX_STANDARD) - if (CMAKE_CXX_STANDARD STREQUAL "98") - message(FATAL_ERROR "Kokkos requires C++11 or newer!") - endif() - set(KOKKOS_CXX_STANDARD "c++${CMAKE_CXX_STANDARD}") - if (CMAKE_CXX_EXTENSIONS) - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(KOKKOS_CXX_STANDARD "gnu++${CMAKE_CXX_STANDARD}") - endif() - endif() - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"") -endif() - -# Final form that gets passed to make -set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS}) - - -############################ PRINT CONFIGURE STATUS ############################ - -if(KOKKOS_CMAKE_VERBOSE) - message(STATUS "") - message(STATUS "****************** Kokkos Settings ******************") - message(STATUS "Execution Spaces") - - if(KOKKOS_ENABLE_CUDA) - message(STATUS " Device Parallel: Cuda") - else() - message(STATUS " Device Parallel: None") - endif() - - if(KOKKOS_ENABLE_OPENMP) - message(STATUS " Host Parallel: OpenMP") - elseif(KOKKOS_ENABLE_PTHREAD) - message(STATUS " Host Parallel: Pthread") - elseif(KOKKOS_ENABLE_QTHREADS) - message(STATUS " Host Parallel: Qthreads") - elseif(KOKKOS_ENABLE_HPX) - message(STATUS " Host Parallel: HPX") - else() - message(STATUS " Host Parallel: None") - endif() - - if(KOKKOS_ENABLE_SERIAL) - message(STATUS " Host Serial: Serial") - else() - message(STATUS " Host Serial: None") - endif() - - message(STATUS "") - message(STATUS "Architectures:") - message(STATUS " ${KOKKOS_GMAKE_ARCH}") - - message(STATUS "") - message(STATUS "Enabled options") - - if(KOKKOS_SEPARATE_LIBS) - message(STATUS " KOKKOS_SEPARATE_LIBS") - endif() - - foreach(opt IN LISTS KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) - string(TOUPPER ${opt} OPT) - if (KOKKOS_ENABLE_${OPT}) - message(STATUS " KOKKOS_ENABLE_${OPT}") - endif() - endforeach() - - if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_CUDA_DIR) - message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") - endif() - endif() - - if(KOKKOS_QTHREADS_DIR) - message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") - endif() - - if(KOKKOS_HWLOC_DIR) - message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") - endif() - - if(KOKKOS_MEMKIND_DIR) - message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") - endif() - - if(KOKKOS_HPX_DIR) - message(STATUS " KOKKOS_HPX_DIR: ${KOKKOS_HPX_DIR}") - endif() - - message(STATUS "") - message(STATUS "Final kokkos settings variable:") - message(STATUS " ${KOKKOS_SETTINGS}") - - message(STATUS "*****************************************************") - message(STATUS "") -endif() diff --git a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake new file mode 100644 index 0000000000..c264517abe --- /dev/null +++ b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -0,0 +1,144 @@ +KOKKOS_CFG_DEPENDS(CXX_STD COMPILER_ID) + +FUNCTION(kokkos_set_cxx_standard_feature standard) + SET(EXTENSION_NAME CMAKE_CXX${standard}_EXTENSION_COMPILE_OPTION) + SET(STANDARD_NAME CMAKE_CXX${standard}_STANDARD_COMPILE_OPTION) + SET(FEATURE_NAME cxx_std_${standard}) + #CMake's way of telling us that the standard (or extension) + #flags are supported is the extension/standard variables + IF (NOT DEFINED CMAKE_CXX_EXTENSIONS) + IF(KOKKOS_DONT_ALLOW_EXTENSIONS) + GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS OFF) + ELSE() + GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS ON) + ENDIF() + ELSEIF(CMAKE_CXX_EXTENSIONS) + IF(KOKKOS_DONT_ALLOW_EXTENSIONS) + MESSAGE(FATAL_ERROR "The chosen configuration does not support CXX extensions flags: ${KOKKOS_DONT_ALLOW_EXTENSIONS}. Must set CMAKE_CXX_EXTENSIONS=OFF to continue") + ELSE() + GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS ON) + ENDIF() + ELSE() + #For trilinos, we need to make sure downstream projects + GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS OFF) + ENDIF() + + IF (KOKKOS_USE_CXX_EXTENSIONS AND ${EXTENSION_NAME}) + MESSAGE(STATUS "Using ${${EXTENSION_NAME}} for C++${standard} extensions as feature") + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) + ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME}) + MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) + ELSE() + #nope, we can't do anything here + MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.") + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "") + ENDIF() + + IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES) + MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported") + ENDIF() +ENDFUNCTION() + + +IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) + #make sure these are consistent + IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) + MESSAGE(WARNING "Specified both CMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} and KOKKOS_CXX_STANDARD=${KOKKOS_CXX_STANDARD}, but they don't match") + SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) + ENDIF() +ENDIF() + + +IF (KOKKOS_CXX_STANDARD STREQUAL "11" ) + kokkos_set_cxx_standard_feature(11) + SET(KOKKOS_ENABLE_CXX11 ON) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "11") +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "14") + kokkos_set_cxx_standard_feature(14) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "1Y") + SET(KOKKOS_ENABLE_CXX14 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "17") + kokkos_set_cxx_standard_feature(17) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "1Z") + SET(KOKKOS_ENABLE_CXX17 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "20") + kokkos_set_cxx_standard_feature(20) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "2A") + SET(KOKKOS_ENABLE_CXX20 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "98") + MESSAGE(FATAL_ERROR "Kokkos requires C++11 or newer!") +ELSE() + MESSAGE(FATAL_ERROR "Unknown C++ standard ${KOKKOS_CXX_STANDARD} - must be 11, 14, 17, or 20") +ENDIF() + + + +# Enforce that extensions are turned off for nvcc_wrapper. +# For compiling CUDA code using nvcc_wrapper, we will use the host compiler's +# flags for turning on C++11. Since for compiler ID and versioning purposes +# CMake recognizes the host compiler when calling nvcc_wrapper, this just +# works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means +# that we can only use host compilers for CUDA builds that use those flags. +# It also means that extensions (gnu++11) can't be turned on for CUDA builds. + +IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + IF(NOT DEFINED CMAKE_CXX_EXTENSIONS) + SET(CMAKE_CXX_EXTENSIONS OFF) + ELSEIF(CMAKE_CXX_EXTENSIONS) + MESSAGE(FATAL_ERROR "NVCC doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") + ENDIF() +ENDIF() + +IF(KOKKOS_ENABLE_CUDA) + # ENFORCE that the compiler can compile CUDA code. + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) + MESSAGE(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") + ENDIF() + IF(NOT DEFINED CMAKE_CXX_EXTENSIONS) + SET(CMAKE_CXX_EXTENSIONS OFF) + ELSEIF(CMAKE_CXX_EXTENSIONS) + MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") + ENDIF() + ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") + ENDIF() +ENDIF() + +IF (NOT KOKKOS_CXX_STANDARD_FEATURE) + #we need to pick the C++ flags ourselves + UNSET(CMAKE_CXX_STANDARD) + UNSET(CMAKE_CXX_STANDARD CACHE) + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/cray.cmake) + kokkos_set_cray_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/pgi.cmake) + kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake) + kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) + ELSE() + INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake) + kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) + ENDIF() + #check that the compiler accepts the C++ standard flag + INCLUDE(CheckCXXCompilerFlag) + IF (DEFINED CXX_STD_FLAGS_ACCEPTED) + UNSET(CXX_STD_FLAGS_ACCEPTED CACHE) + ENDIF() + CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_STANDARD_FLAG} CXX_STD_FLAGS_ACCEPTED) + IF (NOT CXX_STD_FLAGS_ACCEPTED) + CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG} CXX_INT_STD_FLAGS_ACCEPTED) + IF (NOT CXX_INT_STD_FLAGS_ACCEPTED) + MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG} or ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}") + ENDIF() + SET(KOKKOS_CXX_STANDARD_FLAG ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}) + ENDIF() + MESSAGE(STATUS "Compiler features not supported, but ${KOKKOS_CXX_COMPILER_ID} accepts ${KOKKOS_CXX_STANDARD_FLAG}") +ENDIF() + + + + diff --git a/lib/kokkos/cmake/kokkos_tpls.cmake b/lib/kokkos/cmake/kokkos_tpls.cmake new file mode 100644 index 0000000000..181a497d52 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_tpls.cmake @@ -0,0 +1,47 @@ +KOKKOS_CFG_DEPENDS(TPLS OPTIONS) +KOKKOS_CFG_DEPENDS(TPLS DEVICES) + +FUNCTION(KOKKOS_TPL_OPTION PKG DEFAULT) + KOKKOS_ENABLE_OPTION(${PKG} ${DEFAULT} "Whether to enable the ${PKG} library") + KOKKOS_OPTION(${PKG}_DIR "" PATH "Location of ${PKG} library") + SET(KOKKOS_ENABLE_${PKG} ${KOKKOS_ENABLE_${PKG}} PARENT_SCOPE) + SET(KOKKOS_${PKG}_DIR ${KOKKOS_${PKG}_DIR} PARENT_SCOPE) +ENDFUNCTION() + +KOKKOS_TPL_OPTION(HWLOC Off) +KOKKOS_TPL_OPTION(LIBNUMA Off) +KOKKOS_TPL_OPTION(MEMKIND Off) +KOKKOS_TPL_OPTION(CUDA Off) +KOKKOS_TPL_OPTION(LIBRT Off) +KOKKOS_TPL_OPTION(LIBDL On) + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) +SET(HPX_DEFAULT ON) +ELSE() +SET(HPX_DEFAULT OFF) +ENDIF() +KOKKOS_TPL_OPTION(HPX ${HPX_DEFAULT}) + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_PTHREAD) +SET(PTHREAD_DEFAULT ON) +ELSE() +SET(PTHREAD_DEFAULT OFF) +ENDIF() +KOKKOS_TPL_OPTION(PTHREAD ${PTHREAD_DEFAULT}) + + +#Make sure we use our local FindKokkosCuda.cmake +KOKKOS_IMPORT_TPL(HPX INTERFACE) +KOKKOS_IMPORT_TPL(CUDA INTERFACE) +KOKKOS_IMPORT_TPL(HWLOC) +KOKKOS_IMPORT_TPL(LIBNUMA) +KOKKOS_IMPORT_TPL(LIBRT) +KOKKOS_IMPORT_TPL(LIBDL) +KOKKOS_IMPORT_TPL(MEMKIND) +KOKKOS_IMPORT_TPL(PTHREAD INTERFACE) + +#Convert list to newlines (which CMake doesn't always like in cache variables) +STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}") +#Convert to a regular variable +UNSET(KOKKOS_TPL_EXPORTS CACHE) +SET(KOKKOS_TPL_EXPORTS ${KOKKOS_TPL_EXPORT_TEMP}) diff --git a/lib/kokkos/cmake/kokkos_tribits.cmake b/lib/kokkos/cmake/kokkos_tribits.cmake new file mode 100644 index 0000000000..d2317d2446 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_tribits.cmake @@ -0,0 +1,392 @@ +#These are tribits wrappers only ever called by Kokkos itself + +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) +INCLUDE(GNUInstallDirs) + +MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") + +#Leave this here for now - but only do for tribits +#This breaks the standalone CMake +IF (KOKKOS_HAS_TRILINOS) + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) + SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) + ENDIF() + + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) + SET(${PROJECT_NAME}_ENABLE_HPX OFF) + ENDIF() + + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) + SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) + ENDIF() + + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) + SET(${PROJECT_NAME}_ENABLE_CXX11 ON) + ENDIF() + + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) + SET(${PROJECT_NAME}_ENABLE_TESTS OFF) + ENDIF() + + IF(NOT DEFINED TPL_ENABLE_Pthread) + SET(TPL_ENABLE_Pthread OFF) + ENDIF() +ENDIF() + +MACRO(KOKKOS_SUBPACKAGE NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE(${NAME}) + else() + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +ENDMACRO() + +MACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE_POSTPROCESS() + endif() +ENDMACRO() + +MACRO(KOKKOS_PACKAGE_DECL) + + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_DECL(Kokkos) + else() + SET(PACKAGE_NAME Kokkos) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + endif() + + #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + #FOREACH(TPL_FILE ${TPLS_FILES}) + # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + #ENDFOREACH() + +ENDMACRO() + + +MACRO(KOKKOS_PROCESS_SUBPACKAGES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PROCESS_SUBPACKAGES() + else() + ADD_SUBDIRECTORY(core) + ADD_SUBDIRECTORY(containers) + ADD_SUBDIRECTORY(algorithms) + ADD_SUBDIRECTORY(example) + endif() +ENDMACRO() + +MACRO(KOKKOS_PACKAGE_DEF) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_DEF() + else() + #do nothing + endif() +ENDMACRO() + +MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) + KOKKOS_LIB_TYPE(${LIBRARY_NAME} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} ${INCTYPE} $) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT KokkosTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + + VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) +ENDMACRO() + +FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "TESTONLY" + "" + "SOURCES;TESTONLYLIBS" + ${ARGN}) + + ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES}) + IF (PARSE_TESTONLYLIBS) + TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) + ENDIF() + VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +IF(NOT TARGET check) + ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) +ENDIF() + + +FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ROOT_NAME} + TESTONLYLIBS kokkos_gtest + ${ARGN} + NUM_MPI_PROCS 1 + COMM serial mpi + FAIL_REGULAR_EXPRESSION " FAILED " + ) +ELSE() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES;CATEGORIES" + ${ARGN}) + VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) + SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME}) + KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME} + SOURCES ${PARSE_SOURCES} + ) + KOKKOS_ADD_TEST(NAME ${ROOT_NAME} + EXE ${EXE_NAME} + FAIL_REGULAR_EXPRESSION " FAILED " + ) +ENDIF() +ENDFUNCTION() + +MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_test_cxx_std.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake) + IF (NOT KOKKOS_HAS_TRILINOS) + SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake) + ENDIF() + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake) +ENDMACRO() + +MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES" + ${ARGN}) + KOKKOS_ADD_EXECUTABLE(${EXE_NAME} + SOURCES ${PARSE_SOURCES} + ${PARSE_UNPARSED_ARGUMENTS} + TESTONLYLIBS kokkos_gtest + ) + IF (NOT KOKKOS_HAS_TRILINOS) + ADD_DEPENDENCIES(check ${EXE_NAME}) + ENDIF() +ENDMACRO() + +MACRO(KOKKOS_PACKAGE_POSTPROCESS) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_POSTPROCESS() + endif() +ENDMACRO() + +FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE + "PLAIN_STYLE" + "" + "" + ${ARGN}) + + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13") + #great, this works the "right" way + TARGET_LINK_OPTIONS( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + ELSE() + IF (PARSE_PLAIN_STYLE) + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} ${KOKKOS_LINK_OPTIONS} + ) + ELSE() + #well, have to do it the wrong way for now + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + ENDIF() + ENDIF() + + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} PUBLIC + $<$:${KOKKOS_COMPILE_OPTIONS}> + ) + + IF (KOKKOS_ENABLE_CUDA) + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${KOKKOS_CUDA_OPTIONS}> + ) + SET(NODEDUP_CUDAFE_OPTIONS) + FOREACH(OPT ${KOKKOS_CUDAFE_OPTIONS}) + LIST(APPEND NODEDUP_CUDAFE_OPTIONS -Xcudafe ${OPT}) + ENDFOREACH() + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${NODEDUP_CUDAFE_OPTIONS}> + ) + ENDIF() + + LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH) + IF (XOPT_LENGTH GREATER 1) + MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12") + ENDIF() + IF(KOKKOS_XCOMPILER_OPTIONS) + SET(NODEDUP_XCOMPILER_OPTIONS) + FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS}) + #I have to do this for now because we can't guarantee 3.12 support + #I really should do this with the shell option + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler) + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT}) + ENDFOREACH() + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${NODEDUP_XCOMPILER_OPTIONS}> + ) + ENDIF() + + IF (KOKKOS_CXX_STANDARD_FEATURE) + #GREAT! I can do this the right way + TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FEATURE}) + IF (NOT KOKKOS_USE_CXX_EXTENSIONS) + SET_TARGET_PROPERTIES(${LIBRARY_NAME} PROPERTIES CXX_EXTENSIONS OFF) + ENDIF() + ELSE() + #OH, well, no choice but the wrong way + TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) + ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE + "STATIC;SHARED" + "" + "HEADERS;SOURCES" + ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + + ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_SOURCES} + ) + + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT ${PACKAGE_NAME} + ) + + #In case we are building in-tree, add an alias name + #that matches the install Kokkos:: name + ADD_LIBRARY(Kokkos::${LIBRARY_NAME} ALIAS ${LIBRARY_NAME}) +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) + IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN}) + #Stolen from Tribits - it can add prefixes + SET(TRIBITS_LIBRARY_NAME_PREFIX "${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}") + SET(TRIBITS_LIBRARY_NAME ${TRIBITS_LIBRARY_NAME_PREFIX}${LIBRARY_NAME}) + #Tribits has way too much techinical debt and baggage to even + #allow PUBLIC target_compile_options to be used. It forces C++ flags on projects + #as a giant blob of space-separated strings. We end up with duplicated + #flags between the flags implicitly forced on Kokkos-dependent and those Kokkos + #has in its public INTERFACE_COMPILE_OPTIONS. + #These do NOT get de-deduplicated because Tribits + #creates flags as a giant monolithic space-separated string + #Do not set any transitive properties and keep everything working as before + #KOKKOS_SET_LIBRARY_PROPERTIES(${TRIBITS_LIBRARY_NAME} PLAIN_STYLE) + ELSE() + KOKKOS_INTERNAL_ADD_LIBRARY( + ${LIBRARY_NAME} ${ARGN}) + KOKKOS_SET_LIBRARY_PROPERTIES(${LIBRARY_NAME}) + ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) +ELSE() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "HEADERS;SOURCES" + ${ARGN} + ) + + ADD_LIBRARY(${NAME} INTERFACE) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT ${PACKAGE_NAME} + ) +ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) + IF(KOKKOS_HAS_TRILINOS) + #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) + ELSE() #append to a list for later + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + FOREACH(DIR ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) + ENDFOREACH() + ENDIF() +ENDFUNCTION() + +FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) + IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) + ELSE() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) + ENDIF() +ENDFUNCTION() + +MACRO(KOKKOS_ADD_TEST_DIRECTORIES) + IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) + ELSE() + IF(KOKKOS_ENABLE_TESTS) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() + ENDIF() + ENDIF() +ENDMACRO() diff --git a/lib/kokkos/cmake/pgi.cmake b/lib/kokkos/cmake/pgi.cmake new file mode 100644 index 0000000000..e98e849558 --- /dev/null +++ b/lib/kokkos/cmake/pgi.cmake @@ -0,0 +1,8 @@ + +function(kokkos_set_pgi_flags full_standard int_standard) + STRING(TOLOWER ${full_standard} FULL_LC_STANDARD) + STRING(TOLOWER ${int_standard} INT_LC_STANDARD) + SET(KOKKOS_CXX_STANDARD_FLAG "--c++${FULL_LC_STANDARD}" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMDIATE_STANDARD_FLAG "--c++${INT_LC_STANDARD}" PARENT_SCOPE) +endfunction() + diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake index aad1e2bad7..b8cee04804 100644 --- a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake +++ b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake @@ -67,7 +67,7 @@ ELSE() IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND") MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.") ENDIF() - ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8") + ENDIF() GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake index 715b3e9bde..a4c55e1d7b 100644 --- a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake +++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake @@ -64,7 +64,7 @@ # Version: 1.3 # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC REQUIRED_HEADERS hwloc.h REQUIRED_LIBS_NAMES "hwloc" ) diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake index fc401d7543..4dc1a87e18 100644 --- a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake +++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake @@ -75,7 +75,7 @@ IF(USE_THREADS) SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") SET(TPL_Pthread_LIBRARY_DIRS "") ELSE() - TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread REQUIRED_HEADERS pthread.h REQUIRED_LIBS_NAMES pthread ) diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake deleted file mode 100644 index c312f2590b..0000000000 --- a/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake +++ /dev/null @@ -1,69 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -#----------------------------------------------------------------------------- -# Hardware locality detection and control library. -# -# Acquisition information: -# Date checked: July 2014 -# Checked by: H. Carter Edwards -# Source: https://code.google.com/p/qthreads -# - -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS - REQUIRED_HEADERS qthread.h - REQUIRED_LIBS_NAMES "qthread" - ) diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake deleted file mode 100644 index 1f467f0662..0000000000 --- a/lib/kokkos/cmake/tribits.cmake +++ /dev/null @@ -1,531 +0,0 @@ -INCLUDE(CMakeParseArguments) -INCLUDE(CTest) - -cmake_policy(SET CMP0054 NEW) - -MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) - SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) - SET(${PROJECT_NAME}_ENABLE_HPX OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) - SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) - SET(${PROJECT_NAME}_ENABLE_CXX11 ON) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) - SET(${PROJECT_NAME}_ENABLE_TESTS OFF) -ENDIF() - -IF(NOT DEFINED TPL_ENABLE_Pthread) - SET(TPL_ENABLE_Pthread OFF) -ENDIF() - -FUNCTION(ASSERT_DEFINED VARS) - FOREACH(VAR ${VARS}) - IF(NOT DEFINED ${VAR}) - MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") - ENDIF() - ENDFOREACH() -ENDFUNCTION() - -MACRO(GLOBAL_SET VARNAME) - SET(${VARNAME} ${ARGN} CACHE INTERNAL "") -ENDMACRO() - -MACRO(PREPEND_GLOBAL_SET VARNAME) - ASSERT_DEFINED(${VARNAME}) - GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) -ENDMACRO() - -#FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) -# ASSERT_DEFINED(${VARNAME}) -# IF (${VARNAME}) -# SET(TMP ${${VARNAME}}) -# LIST(REMOVE_DUPLICATES TMP) -# GLOBAL_SET(${VARNAME} ${TMP}) -# ENDIF() -#ENDFUNCTION() - -#MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) -# MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") -# SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) -# IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") -# IF(${USER_OPTION_NAME}) -# GLOBAL_SET(${MACRO_DEFINE_NAME} ON) -# ELSE() -# GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) -# ENDIF() -# ENDIF() -#ENDMACRO() - -FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) - - # Configure the file - CONFIGURE_FILE( - ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} - ) - -ENDFUNCTION() - -#MACRO(TRIBITS_ADD_DEBUG_OPTION) -# TRIBITS_ADD_OPTION_AND_DEFINE( -# ${PROJECT_NAME}_ENABLE_DEBUG -# HAVE_${PROJECT_NAME_UC}_DEBUG -# "Enable a host of runtime debug checking." -# OFF -# ) -#ENDMACRO() - - -MACRO(TRIBITS_ADD_TEST_DIRECTORIES) - IF(${${PROJECT_NAME}_ENABLE_TESTS}) - FOREACH(TEST_DIR ${ARGN}) - ADD_SUBDIRECTORY(${TEST_DIR}) - ENDFOREACH() - ENDIF() -ENDMACRO() - -MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) - IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) - FOREACH(EXAMPLE_DIR ${ARGN}) - ADD_SUBDIRECTORY(${EXAMPLE_DIR}) - ENDFOREACH() - ENDIF() -ENDMACRO() - - -function(INCLUDE_DIRECTORIES) - cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN}) - _INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS}) -endfunction() - - -MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) - SET(PROP_VALUES) - FOREACH(TARGET_X ${ARGN}) - LIST(APPEND PROP_VALUES "$") - ENDFOREACH() - SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}") -ENDMACRO() - -MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) - FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") - ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) - SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) -ENDMACRO() - -# Older versions of cmake does not make include directories transitive -MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME) - TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN}) - FOREACH(DEP_LIB ${ARGN}) - TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) - TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) - ENDFOREACH() -ENDMACRO() - -FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME) - - SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY) - SET(oneValueArgs) - SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - IF(PARSE_HEADERS) - LIST(REMOVE_DUPLICATES PARSE_HEADERS) - ENDIF() - IF(PARSE_SOURCES) - LIST(REMOVE_DUPLICATES PARSE_SOURCES) - ENDIF() - - # Local variable to hold all of the libraries that will be directly linked - # to this library. - SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS}) - - # Add dependent libraries passed directly in - - IF (PARSE_IMPORTEDLIBS) - LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) - ENDIF() - - IF (PARSE_DEPLIBS) - LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS}) - ENDIF() - - # Add the library and all the dependencies - - IF (PARSE_DEFINES) - ADD_DEFINITIONS(${PARSE_DEFINES}) - ENDIF() - - IF (PARSE_STATIC) - SET(STATIC_KEYWORD "STATIC") - ELSE() - SET(STATIC_KEYWORD) - ENDIF() - - IF (PARSE_SHARED) - SET(SHARED_KEYWORD "SHARED") - ELSE() - SET(SHARED_KEYWORD) - ENDIF() - - IF (PARSE_TESTONLY) - SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") - ELSE() - SET(EXCLUDE_FROM_ALL_KEYWORD) - ENDIF() - IF (NOT PARSE_CUDALIBRARY) - ADD_LIBRARY( - ${LIBRARY_NAME} - ${STATIC_KEYWORD} - ${SHARED_KEYWORD} - ${EXCLUDE_FROM_ALL_KEYWORD} - ${PARSE_HEADERS} - ${PARSE_NOINSTALLHEADERS} - ${PARSE_SOURCES} - ) - ELSE() - CUDA_ADD_LIBRARY( - ${LIBRARY_NAME} - ${PARSE_HEADERS} - ${PARSE_NOINSTALLHEADERS} - ${PARSE_SOURCES} - ) - ENDIF() - - TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS}) - - IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS) - - INSTALL( - TARGETS ${LIBRARY_NAME} - EXPORT ${PROJECT_NAME} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - COMPONENT ${PACKAGE_NAME} - ) - - INSTALL( - FILES ${PARSE_HEADERS} - EXPORT ${PROJECT_NAME} - DESTINATION include - COMPONENT ${PACKAGE_NAME} - ) - - INSTALL( - DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR} - EXPORT ${PROJECT_NAME} - DESTINATION include - COMPONENT ${PACKAGE_NAME} - ) - - ENDIF() - - IF (NOT PARSE_TESTONLY) - PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME}) - REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS) - ENDIF() - -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME) - - SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY) - SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT) - SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - IF (PARSE_TARGET_DEFINES) - TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES}) - ENDIF() - - SET(LINK_LIBS PACKAGE_${PACKAGE_NAME}) - - IF (PARSE_TESTONLYLIBS) - LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS}) - ENDIF() - - IF (PARSE_IMPORTEDLIBS) - LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) - ENDIF() - - SET (EXE_SOURCES) - IF(PARSE_DIRECTORY) - FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) - IF(IS_ABSOLUTE ${SOURCE_FILE}) - SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) - ELSE() - SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE}) - ENDIF() - ENDFOREACH( ) - ELSE() - FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) - SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) - ENDFOREACH( ) - ENDIF() - - SET(EXE_BINARY_NAME ${EXE_NAME}) - IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX) - SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME}) - ENDIF() - - # IF (PARSE_TESTONLY) - # SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") - # ELSE() - # SET(EXCLUDE_FROM_ALL_KEYWORD) - # ENDIF() - ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES}) - - TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS}) - - IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) - SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE) - ENDIF() - - IF(PARSE_INSTALLABLE) - INSTALL( - TARGETS ${EXE_BINARY_NAME} - EXPORT ${PROJECT_NAME} - DESTINATION bin - ) - ENDIF() -ENDFUNCTION() - -IF(NOT TARGET check) - ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) -ENDIF() - -FUNCTION(TRIBITS_ADD_TEST) -ENDFUNCTION() -FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE) -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_ADVANCED_TEST) - # TODO Write this -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) - - SET(options STANDARD_PASS_OUTPUT WILL_FAIL) - SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT) - SET(multiValueArgs) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS}) - - IF(WIN32) - ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX}) - ELSE() - ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) - ENDIF() - ADD_DEPENDENCIES(check ${TEST_NAME}) - - IF(PARSE_FAIL_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION}) - ENDIF() - - IF(PARSE_PASS_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION}) - ENDIF() - - IF(PARSE_WILL_FAIL) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL}) - ENDIF() - - IF(PARSE_ADDED_TESTS_NAMES_OUT) - SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE) - ENDIF() - - IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) - SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE) - ENDIF() - -ENDFUNCTION() - -MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) - ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) - TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) - TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) -ENDMACRO() - -FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) - - SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL) - SET(oneValueArgs) - SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) - IF (PARSE_REQUIRED_LIBS_NAMES) - FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) - IF(NOT TPL_${TPL_NAME}_LIBRARIES) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - IF (PARSE_REQUIRED_HEADERS) - FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) - IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - - - IF (_${TPL_NAME}_ENABLE_SUCCESS) - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) - ENDIF() - -ENDFUNCTION() - -#MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) -# GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) -# INCLUDE("${TPL_FILE}") -# IF(TARGET TPL_LIB_${TPL_NAME}) -# MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") -# SET(TPL_ENABLE_${TPL_NAME} TRUE) -# ELSE() -# MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") -# SET(TPL_ENABLE_${TPL_NAME} FALSE) -# ENDIF() -#ENDMACRO() - -MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) - IF(TYPE STREQUAL "REQUIRED") - SET(REQUIRED TRUE) - ELSE() - SET(REQUIRED FALSE) - ENDIF() - IF(TARGET ${TARGET_NAME}) - PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) - ELSE() - IF(REQUIRED) - MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") - ENDIF() - ENDIF() -ENDMACRO() - -MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE) - FOREACH(DEP ${ARGN}) - PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP}) - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE) - FOREACH(DEP ${ARGN}) - PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE}) - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_ENABLE_TPLS) - FOREACH(TPL ${ARGN}) - IF(TARGET ${TPL}) - GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE) - ELSE() - GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE) - ENDIF() - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES) - - SET(options) - SET(oneValueArgs) - SET(multiValueArgs - LIB_REQUIRED_PACKAGES - LIB_OPTIONAL_PACKAGES - TEST_REQUIRED_PACKAGES - TEST_OPTIONAL_PACKAGES - LIB_REQUIRED_TPLS - LIB_OPTIONAL_TPLS - TEST_REQUIRED_TPLS - TEST_OPTIONAL_TPLS - REGRESSION_EMAIL_LIST - SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS - ) - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - GLOBAL_SET(${PACKAGE_NAME}_DEPS "") - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES}) - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS}) - - GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "") - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES}) - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS}) - - TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS}) - -ENDMACRO() - -MACRO(TRIBITS_SUBPACKAGE NAME) - SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) - SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - - ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) - - GLOBAL_SET(${PACKAGE_NAME}_LIBS "") - - INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake) - -ENDMACRO(TRIBITS_SUBPACKAGE) - -MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) - TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS}) -ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) - -MACRO(TRIBITS_PACKAGE_DECL NAME) - - SET(PACKAGE_NAME ${NAME}) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - - #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") - #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") - #FOREACH(TPL_FILE ${TPLS_FILES}) - # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) - #ENDFOREACH() - -ENDMACRO() - - -MACRO(TRIBITS_PROCESS_SUBPACKAGES) - FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake) - FOREACH(SUBPACKAGE ${SUBPACKAGES}) - GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY) - GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY) - ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR}) - ENDFOREACH() -ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES) - -MACRO(TRIBITS_PACKAGE_DEF) -ENDMACRO(TRIBITS_PACKAGE_DEF) - -MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) -ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) - -MACRO(TRIBITS_EXCLUDE_FILES) -ENDMACRO(TRIBITS_EXCLUDE_FILES) - -MACRO(TRIBITS_PACKAGE_POSTPROCESS) -ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS) - diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt index c37aa3e3e2..2bfaea7a13 100644 --- a/lib/kokkos/containers/CMakeLists.txt +++ b/lib/kokkos/containers/CMakeLists.txt @@ -1,13 +1,10 @@ - - -TRIBITS_SUBPACKAGE(Containers) - - -IF(KOKKOS_HAS_TRILINOS) - ADD_SUBDIRECTORY(src) -ENDIF() - -TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) -TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) - -TRIBITS_SUBPACKAGE_POSTPROCESS() + + +KOKKOS_SUBPACKAGE(Containers) + +ADD_SUBDIRECTORY(src) + +KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) +KOKKOS_ADD_TEST_DIRECTORIES(performance_tests) + +KOKKOS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 3c6584bc34..ca76808190 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -1,49 +1,62 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) - -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +IF(Kokkos_ENABLE_CUDA) + SET(SOURCES + TestMain.cpp + TestCuda.cpp + ) + + KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Cuda + SOURCES ${SOURCES} + ) + + KOKKOS_ADD_TEST( NAME PerformanceTest_Cuda + EXE PerfTestExec_Cuda + ) ENDIF() -SET(SOURCES - TestMain.cpp - TestCuda.cpp - ) - -IF(Kokkos_ENABLE_Pthread) - LIST( APPEND SOURCES TestThreads.cpp) +IF(Kokkos_ENABLE_PTHREAD) + SET(SOURCES + TestMain.cpp + TestThreads.cpp + ) + KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Threads + SOURCES ${SOURCES} + ) + + KOKKOS_ADD_TEST( NAME PerformanceTest_Threads + EXE PerfTestExec_Threads + ) ENDIF() -IF(Kokkos_ENABLE_OpenMP) - LIST( APPEND SOURCES TestOpenMP.cpp) +IF(Kokkos_ENABLE_OPENMP) + SET(SOURCES + TestMain.cpp + TestOpenMP.cpp + ) + KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_OpenMP + SOURCES ${SOURCES} + ) + + KOKKOS_ADD_TEST( NAME PerformanceTest_OpenMP + EXE PerfTestExec_OpenMP + ) ENDIF() IF(Kokkos_ENABLE_HPX) - LIST( APPEND SOURCES TestHPX.cpp) + SET(SOURCES + TestMain.cpp + TestHPX.cpp + ) + KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_HPX + SOURCES ${SOURCES} + ) + + KOKKOS_ADD_TEST( NAME PerformanceTest_HPX + EXE PerfTestExec_HPX + ) ENDIF() -# Per #374, we always want to build this test, but we only want to run -# it as a PERFORMANCE test. That's why we separate building the test -# from running the test. - -TRIBITS_ADD_EXECUTABLE( - PerfTestExec - SOURCES ${SOURCES} - COMM serial mpi - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) - -TRIBITS_ADD_TEST( - PerformanceTest - NAME PerfTestExec - COMM serial mpi - NUM_MPI_PROCS 1 - CATEGORIES PERFORMANCE - FAIL_REGULAR_EXPRESSION " FAILED " - ) diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index 351fb86df3..697a006c3c 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -42,7 +43,7 @@ */ #include -#if defined( KOKKOS_ENABLE_CUDA ) +#if defined(KOKKOS_ENABLE_CUDA) #include #include @@ -66,45 +67,38 @@ namespace Performance { class cuda : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::InitArguments args(-1, -1, 0); Kokkos::initialize(args); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( cuda, dynrankview_perf ) -{ +TEST_F(cuda, dynrankview_perf) { std::cout << "Cuda" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 40960 ); + test_dynrankview_op_perf(40960); } -TEST_F( cuda, global_2_local) -{ +TEST_F(cuda, global_2_local) { std::cout << "Cuda" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( cuda, unordered_map_performance_near) -{ - Perf::run_performance_tests("cuda-near"); +TEST_F(cuda, unordered_map_performance_near) { + Perf::run_performance_tests("cuda-near"); } -TEST_F( cuda, unordered_map_performance_far) -{ - Perf::run_performance_tests("cuda-far"); +TEST_F(cuda, unordered_map_performance_far) { + Perf::run_performance_tests("cuda-far"); } -} +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTCUDA_PREVENT_EMPTY_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ +#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index db6274e057..ee13f7e58b 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -49,109 +50,102 @@ #include -// Compare performance of DynRankView to View, specific focus on the parenthesis operators +// Compare performance of DynRankView to View, specific focus on the parenthesis +// operators namespace Performance { -//View functor +// View functor template struct InitViewFunctor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - struct SumComputationTest - { - typedef Kokkos::View inviewtype; + struct SumComputationTest { + typedef Kokkos::View inviewtype; inviewtype _inview; - typedef Kokkos::View outviewtype; + typedef Kokkos::View outviewtype; outviewtype _outview; KOKKOS_INLINE_FUNCTION - SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + SumComputationTest(inviewtype &inview_, outviewtype &outview_) + : _inview(inview_), _outview(outview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _outview(i) += _inview(i,j,k) ; + _outview(i) += _inview(i, j, k); } } } }; - }; template struct InitStrideViewFunctor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitStrideViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - }; template struct InitViewRank7Functor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_) - {} + InitViewRank7Functor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; + _inview(i, j, k, 0, 0, 0, 0) = i / 2 - j * j + k / 3; } } } - }; -//DynRankView functor +// DynRankView functor template struct InitDynRankViewFunctor { typedef Kokkos::DynRankView inviewtype; inviewtype _inview; - InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitDynRankViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - struct SumComputationTest - { + struct SumComputationTest { typedef Kokkos::DynRankView inviewtype; inviewtype _inview; @@ -159,108 +153,121 @@ struct InitDynRankViewFunctor { outviewtype _outview; KOKKOS_INLINE_FUNCTION - SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + SumComputationTest(inviewtype &inview_, outviewtype &outview_) + : _inview(inview_), _outview(outview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _outview(i) += _inview(i,j,k) ; + _outview(i) += _inview(i, j, k); } } } }; - }; - template -void test_dynrankview_op_perf( const int par_size ) -{ - +void test_dynrankview_op_perf(const int par_size) { typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; const size_type dim_2 = 90; const size_type dim_3 = 30; - double elapsed_time_view = 0; - double elapsed_time_compview = 0; + double elapsed_time_view = 0; + double elapsed_time_compview = 0; double elapsed_time_strideview = 0; double elapsed_time_view_rank7 = 0; - double elapsed_time_drview = 0; + double elapsed_time_drview = 0; double elapsed_time_compdrview = 0; Kokkos::Timer timer; { - Kokkos::View testview("testview",par_size,dim_2,dim_3); + Kokkos::View testview("testview", par_size, dim_2, + dim_3); typedef InitViewFunctor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testview)); DeviceType().fence(); elapsed_time_view = timer.seconds(); std::cout << " View time (init only): " << elapsed_time_view << std::endl; - timer.reset(); - Kokkos::View sumview("sumview",par_size); - Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); + Kokkos::View sumview("sumview", par_size); + Kokkos::parallel_for( + policy, typename FunctorType::SumComputationTest(testview, sumview)); DeviceType().fence(); elapsed_time_compview = timer.seconds(); - std::cout << " View sum computation time: " << elapsed_time_view << std::endl; - + std::cout << " View sum computation time: " << elapsed_time_view + << std::endl; - Kokkos::View teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL); + Kokkos::View teststrideview = + Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL); typedef InitStrideViewFunctor FunctorStrideType; timer.reset(); - Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); + Kokkos::parallel_for(policy, FunctorStrideType(teststrideview)); DeviceType().fence(); elapsed_time_strideview = timer.seconds(); - std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; + std::cout << " Strided View time (init only): " << elapsed_time_strideview + << std::endl; } { - Kokkos::View testview("testview",par_size,dim_2,dim_3,1,1,1,1); + Kokkos::View testview("testview", par_size, + dim_2, dim_3, 1, 1, 1, 1); typedef InitViewRank7Functor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testview)); DeviceType().fence(); elapsed_time_view_rank7 = timer.seconds(); - std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; + std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 + << std::endl; } { - Kokkos::DynRankView testdrview("testdrview",par_size,dim_2,dim_3); + Kokkos::DynRankView testdrview("testdrview", par_size, + dim_2, dim_3); typedef InitDynRankViewFunctor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testdrview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testdrview)); DeviceType().fence(); elapsed_time_drview = timer.seconds(); - std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; + std::cout << " DynRankView time (init only): " << elapsed_time_drview + << std::endl; timer.reset(); - Kokkos::DynRankView sumview("sumview",par_size); - Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); + Kokkos::DynRankView sumview("sumview", par_size); + Kokkos::parallel_for( + policy, typename FunctorType::SumComputationTest(testdrview, sumview)); DeviceType().fence(); elapsed_time_compdrview = timer.seconds(); - std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; - + std::cout << " DynRankView sum computation time: " + << elapsed_time_compdrview << std::endl; } - std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1 - std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1 - std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1 - std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1 - std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ? + std::cout << " Ratio of View to DynRankView time: " + << elapsed_time_view / elapsed_time_drview + << std::endl; // expect < 1 + std::cout << " Ratio of View to DynRankView sum computation time: " + << elapsed_time_compview / elapsed_time_compdrview + << std::endl; // expect < 1 + std::cout << " Ratio of View to View Rank7 time: " + << elapsed_time_view / elapsed_time_view_rank7 + << std::endl; // expect < 1 + std::cout << " Ratio of StrideView to DynRankView time: " + << elapsed_time_strideview / elapsed_time_drview + << std::endl; // expect < 1 + std::cout << " Ratio of DynRankView to View Rank7 time: " + << elapsed_time_drview / elapsed_time_view_rank7 + << std::endl; // expect ? timer.reset(); -} //end test_dynrankview - +} // end test_dynrankview -} //end Performance +} // namespace Performance #endif - diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index 98997b3239..0d2ee4bc8d 100644 --- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -1,12 +1,13 @@ //@HEADER // ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -22,10 +23,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -35,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -54,153 +55,137 @@ namespace Performance { static const unsigned begin_id_size = 256u; -static const unsigned end_id_size = 1u << 22; -static const unsigned id_step = 2u; +static const unsigned end_id_size = 1u << 22; +static const unsigned id_step = 2u; -union helper -{ +union helper { uint32_t word; uint8_t byte[4]; }; - template -struct generate_ids -{ +struct generate_ids { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; + typedef Kokkos::View local_id_view; local_id_view local_2_global; - generate_ids( local_id_view & ids) - : local_2_global(ids) - { + generate_ids(local_id_view& ids) : local_2_global(ids) { Kokkos::parallel_for(local_2_global.extent(0), *this); } - KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const - { - + void operator()(size_type i) const { helper x = {static_cast(i)}; // shuffle the bytes of i to create a unique, semi-random global_id x.word = ~x.word; uint8_t tmp = x.byte[3]; - x.byte[3] = x.byte[1]; - x.byte[1] = tmp; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; - tmp = x.byte[2]; + tmp = x.byte[2]; x.byte[2] = x.byte[0]; x.byte[0] = tmp; local_2_global[i] = x.word; } - }; template -struct fill_map -{ +struct fill_map { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View + local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; global_id_view global_2_local; local_id_view local_2_global; - fill_map( global_id_view gIds, local_id_view lIds) - : global_2_local(gIds) , local_2_global(lIds) - { + fill_map(global_id_view gIds, local_id_view lIds) + : global_2_local(gIds), local_2_global(lIds) { Kokkos::parallel_for(local_2_global.extent(0), *this); } KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const - { - global_2_local.insert( local_2_global[i], i); + void operator()(size_type i) const { + global_2_local.insert(local_2_global[i], i); } - }; template -struct find_test -{ +struct find_test { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View + local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; global_id_view global_2_local; local_id_view local_2_global; typedef size_t value_type; - find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) - : global_2_local(gIds) , local_2_global(lIds) - { + find_test(global_id_view gIds, local_id_view lIds, value_type& num_errors) + : global_2_local(gIds), local_2_global(lIds) { Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors); } KOKKOS_INLINE_FUNCTION - void init(value_type & v) const - { v = 0; } + void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type & dst, volatile value_type const & src) const - { dst += src; } + void join(volatile value_type& dst, volatile value_type const& src) const { + dst += src; + } KOKKOS_INLINE_FUNCTION - void operator()(size_type i, value_type & num_errors) const - { - uint32_t index = global_2_local.find( local_2_global[i] ); + void operator()(size_type i, value_type& num_errors) const { + uint32_t index = global_2_local.find(local_2_global[i]); - if ( global_2_local.value_at(index) != i) ++num_errors; + if (global_2_local.value_at(index) != i) ++num_errors; } - }; template -void test_global_to_local_ids(unsigned num_ids) -{ - +void test_global_to_local_ids(unsigned num_ids) { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; - //size + // size std::cout << num_ids << ", "; double elasped_time = 0; Kokkos::Timer timer; local_id_view local_2_global("local_ids", num_ids); - global_id_view global_2_local((3u*num_ids)/2u); + global_id_view global_2_local((3u * num_ids) / 2u); - //create + // create elasped_time = timer.seconds(); std::cout << elasped_time << ", "; timer.reset(); // generate unique ids - { - generate_ids gen(local_2_global); - } + { generate_ids gen(local_2_global); } Device().fence(); // generate elasped_time = timer.seconds(); std::cout << elasped_time << ", "; timer.reset(); - { - fill_map fill(global_2_local, local_2_global); - } + { fill_map fill(global_2_local, local_2_global); } Device().fence(); // fill @@ -208,11 +193,9 @@ void test_global_to_local_ids(unsigned num_ids) std::cout << elasped_time << ", "; timer.reset(); - size_t num_errors = 0; - for (int i=0; i<100; ++i) - { - find_test find(global_2_local, local_2_global,num_errors); + for (int i = 0; i < 100; ++i) { + find_test find(global_2_local, local_2_global, num_errors); } Device().fence(); @@ -220,12 +203,9 @@ void test_global_to_local_ids(unsigned num_ids) elasped_time = timer.seconds(); std::cout << elasped_time << std::endl; - ASSERT_EQ( num_errors, 0u); + ASSERT_EQ(num_errors, 0u); } +} // namespace Performance -} // namespace Performance - - -#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP - +#endif // KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP diff --git a/lib/kokkos/containers/performance_tests/TestHPX.cpp b/lib/kokkos/containers/performance_tests/TestHPX.cpp index 0f43377cee..48be466bfa 100644 --- a/lib/kokkos/containers/performance_tests/TestHPX.cpp +++ b/lib/kokkos/containers/performance_tests/TestHPX.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -42,7 +43,7 @@ */ #include -#if defined( KOKKOS_ENABLE_HPX ) +#if defined(KOKKOS_ENABLE_HPX) #include @@ -61,70 +62,63 @@ #include #include - namespace Performance { class hpx : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::initialize(); - Kokkos::print_configuration( std::cout ); + Kokkos::print_configuration(std::cout); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( hpx, dynrankview_perf ) -{ +TEST_F(hpx, dynrankview_perf) { std::cout << "HPX" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( hpx, global_2_local) -{ +TEST_F(hpx, global_2_local) { std::cout << "HPX" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( hpx, unordered_map_performance_near) -{ +TEST_F(hpx, unordered_map_performance_near) { unsigned num_hpx = 4; std::ostringstream base_file_name; base_file_name << "hpx-" << num_hpx << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests( + base_file_name.str()); } -TEST_F( hpx, unordered_map_performance_far) -{ +TEST_F(hpx, unordered_map_performance_far) { unsigned num_hpx = 4; std::ostringstream base_file_name; base_file_name << "hpx-" << num_hpx << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests( + base_file_name.str()); } -TEST_F( hpx, scatter_view) -{ +TEST_F(hpx, scatter_view) { std::cout << "ScatterView data-duplicated test:\n"; Perf::test_scatter_view(10, 1000 * 1000); -//std::cout << "ScatterView atomics test:\n"; -//Perf::test_scatter_view(10, 1000 * 1000); + Kokkos::Experimental::ScatterDuplicated, + Kokkos::Experimental::ScatterNonAtomic>(10, + 1000 * 1000); + // std::cout << "ScatterView atomics test:\n"; + // Perf::test_scatter_view(10, 1000 * 1000); } -} // namespace test +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTHPX_PREVENT_EMPTY_LINK_ERROR() {} #endif - diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp index 217b01a57a..e3c8edb045 100644 --- a/lib/kokkos/containers/performance_tests/TestMain.cpp +++ b/lib/kokkos/containers/performance_tests/TestMain.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -47,7 +48,6 @@ #include int main(int argc, char *argv[]) { - ::testing::InitGoogleTest(&argc,argv); + ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index e6218074ea..a9c8639ed4 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -42,7 +43,7 @@ */ #include -#if defined( KOKKOS_ENABLE_OPENMP ) +#if defined(KOKKOS_ENABLE_OPENMP) #include @@ -61,82 +62,72 @@ #include #include - namespace Performance { class openmp : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::initialize(); - Kokkos::OpenMP::print_configuration( std::cout ); + Kokkos::OpenMP::print_configuration(std::cout); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( openmp, dynrankview_perf ) -{ +TEST_F(openmp, dynrankview_perf) { std::cout << "OpenMP" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( openmp, global_2_local) -{ +TEST_F(openmp, global_2_local) { std::cout << "OpenMP" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( openmp, unordered_map_performance_near) -{ +TEST_F(openmp, unordered_map_performance_near) { unsigned num_openmp = 4; if (Kokkos::hwloc::available()) { num_openmp = Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa() * - Kokkos::hwloc::get_available_threads_per_core(); - + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); } std::ostringstream base_file_name; base_file_name << "openmp-" << num_openmp << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( openmp, unordered_map_performance_far) -{ +TEST_F(openmp, unordered_map_performance_far) { unsigned num_openmp = 4; if (Kokkos::hwloc::available()) { num_openmp = Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa() * - Kokkos::hwloc::get_available_threads_per_core(); - + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); } std::ostringstream base_file_name; base_file_name << "openmp-" << num_openmp << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( openmp, scatter_view) -{ +TEST_F(openmp, scatter_view) { std::cout << "ScatterView data-duplicated test:\n"; Perf::test_scatter_view(10, 1000 * 1000); -//std::cout << "ScatterView atomics test:\n"; -//Perf::test_scatter_view(10, 1000 * 1000); + Kokkos::Experimental::ScatterDuplicated, + Kokkos::Experimental::ScatterNonAtomic>(10, + 1000 * 1000); + // std::cout << "ScatterView atomics test:\n"; + // Perf::test_scatter_view(10, 1000 * 1000); } -} // namespace test +} // namespace Performance #else -void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() { +} #endif - diff --git a/lib/kokkos/containers/performance_tests/TestROCm.cpp b/lib/kokkos/containers/performance_tests/TestROCm.cpp index 3cf9f3bd14..55b770b49c 100644 --- a/lib/kokkos/containers/performance_tests/TestROCm.cpp +++ b/lib/kokkos/containers/performance_tests/TestROCm.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -42,7 +43,7 @@ */ #include -#if defined( KOKKOS_ENABLE_ROCM ) +#if defined(KOKKOS_ENABLE_ROCM) #include #include @@ -66,15 +67,14 @@ namespace Performance { class rocm : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) ); + Kokkos::Experimental::ROCm::initialize( + Kokkos::Experimental::ROCm::SelectDevice(0)); } - static void TearDownTestCase() - { + static void TearDownTestCase() { Kokkos::Experimental::ROCm::finalize(); Kokkos::HostSpace::execution_space::finalize(); } @@ -97,17 +97,15 @@ TEST_F( rocm, global_2_local) } #endif -TEST_F( rocm, unordered_map_performance_near) -{ - Perf::run_performance_tests("rocm-near"); +TEST_F(rocm, unordered_map_performance_near) { + Perf::run_performance_tests("rocm-near"); } -TEST_F( rocm, unordered_map_performance_far) -{ - Perf::run_performance_tests("rocm-far"); +TEST_F(rocm, unordered_map_performance_far) { + Perf::run_performance_tests("rocm-far"); } -} +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTROCM_PREVENT_EMPTY_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */ +#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */ diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp index bd9121bb82..3d4c57f3e2 100644 --- a/lib/kokkos/containers/performance_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -49,67 +50,68 @@ namespace Perf { -template -void test_scatter_view(int m, int n) -{ - Kokkos::View original_view("original_view", n); +template +void test_scatter_view(int m, int n) { + Kokkos::View original_view("original_view", + n); { - auto scatter_view = Kokkos::Experimental::create_scatter_view - < Kokkos::Experimental::ScatterSum - , duplication - , contribution - > (original_view); + auto scatter_view = Kokkos::Experimental::create_scatter_view< + Kokkos::Experimental::ScatterSum, duplication, contribution>( + original_view); Kokkos::Experimental::UniqueToken< - ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> - unique_token{ExecSpace()}; - //auto internal_view = scatter_view.internal_view; + ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> + unique_token{ExecSpace()}; + // auto internal_view = scatter_view.internal_view; auto policy = Kokkos::RangePolicy(0, n); for (int foo = 0; foo < 5; ++foo) { - { - auto num_threads = unique_token.size(); - std::cout << "num_threads " << num_threads << '\n'; - Kokkos::View hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n); - auto f2 = KOKKOS_LAMBDA(int i) { - auto thread_id = unique_token.acquire(); - for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; - hand_coded_duplicate_view(thread_id, k, 0) += 4.2; - hand_coded_duplicate_view(thread_id, k, 1) += 2.0; - hand_coded_duplicate_view(thread_id, k, 2) += 1.0; + { + auto num_threads = unique_token.size(); + std::cout << "num_threads " << num_threads << '\n'; + Kokkos::View + hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n); + auto f2 = KOKKOS_LAMBDA(int i) { + auto thread_id = unique_token.acquire(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + hand_coded_duplicate_view(thread_id, k, 0) += 4.2; + hand_coded_duplicate_view(thread_id, k, 1) += 2.0; + hand_coded_duplicate_view(thread_id, k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f2, + "hand_coded_duplicate_scatter_view_test"); } - }; - Kokkos::Timer timer; - timer.reset(); - for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test"); + Kokkos::fence(); + auto t = timer.seconds(); + std::cout << "hand-coded test took " << t << " seconds\n"; } - Kokkos::fence(); - auto t = timer.seconds(); - std::cout << "hand-coded test took " << t << " seconds\n"; - } - { - auto f = KOKKOS_LAMBDA(int i) { - auto scatter_access = scatter_view.access(); - for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; - scatter_access(k, 0) += 4.2; - scatter_access(k, 1) += 2.0; - scatter_access(k, 2) += 1.0; + { + auto f = KOKKOS_LAMBDA(int i) { + auto scatter_access = scatter_view.access(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + scatter_access(k, 0) += 4.2; + scatter_access(k, 1) += 2.0; + scatter_access(k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f, "scatter_view_test"); } - }; - Kokkos::Timer timer; - timer.reset(); - for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f, "scatter_view_test"); + Kokkos::fence(); + auto t = timer.seconds(); + std::cout << "test took " << t << " seconds\n"; } - Kokkos::fence(); - auto t = timer.seconds(); - std::cout << "test took " << t << " seconds\n"; } } - } } -} +} // namespace Perf #endif diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp index 6a02e67b25..2f37404539 100644 --- a/lib/kokkos/containers/performance_tests/TestThreads.cpp +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -42,7 +43,7 @@ */ #include -#if defined( KOKKOS_ENABLE_THREADS ) +#if defined(KOKKOS_ENABLE_THREADS) #include @@ -65,9 +66,8 @@ namespace Performance { class threads : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; unsigned num_threads = 4; @@ -76,66 +76,57 @@ protected: num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::cout << "Threads: " << num_threads << std::endl; - Kokkos::initialize( Kokkos::InitArguments(num_threads) ); + Kokkos::initialize(Kokkos::InitArguments(num_threads)); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( threads, dynrankview_perf ) -{ +TEST_F(threads, dynrankview_perf) { std::cout << "Threads" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( threads, global_2_local) -{ +TEST_F(threads, global_2_local) { std::cout << "Threads" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( threads, unordered_map_performance_near) -{ +TEST_F(threads, unordered_map_performance_near) { unsigned num_threads = 4; if (Kokkos::hwloc::available()) { num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::ostringstream base_file_name; base_file_name << "threads-" << num_threads << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( threads, unordered_map_performance_far) -{ +TEST_F(threads, unordered_map_performance_far) { unsigned num_threads = 4; if (Kokkos::hwloc::available()) { num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::ostringstream base_file_name; base_file_name << "threads-" << num_threads << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -} // namespace Performance +} // namespace Performance #else -void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() {} +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() { +} #endif - diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index 8d09281ed3..9057842340 100644 --- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -1,10 +1,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -22,10 +23,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -50,12 +51,10 @@ #include #include - namespace Perf { template -struct UnorderedMapTest -{ +struct UnorderedMapTest { typedef Device execution_space; typedef Kokkos::UnorderedMap map_type; typedef typename map_type::histogram_type histogram_type; @@ -68,22 +67,22 @@ struct UnorderedMapTest uint32_t capacity; uint32_t inserts; uint32_t collisions; - double seconds; + double seconds; map_type map; histogram_type histogram; - UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions) - : capacity(arg_capacity) - , inserts(arg_inserts) - , collisions(arg_collisions) - , seconds(0) - , map(capacity) - , histogram(map.get_histogram()) - { - Kokkos::Timer wall_clock ; + UnorderedMapTest(uint32_t arg_capacity, uint32_t arg_inserts, + uint32_t arg_collisions) + : capacity(arg_capacity), + inserts(arg_inserts), + collisions(arg_collisions), + seconds(0), + map(capacity), + histogram(map.get_histogram()) { + Kokkos::Timer wall_clock; wall_clock.reset(); - value_type v = {}; + value_type v = {}; int loop_count = 0; do { ++loop_count; @@ -92,81 +91,79 @@ struct UnorderedMapTest Kokkos::parallel_reduce(inserts, *this, v); if (v.failed_count > 0u) { - const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ; - map.rehash( new_capacity ); + const uint32_t new_capacity = map.capacity() + + ((map.capacity() * 3ull) / 20u) + + v.failed_count / collisions; + map.rehash(new_capacity); } } while (v.failed_count > 0u); seconds = wall_clock.seconds(); - switch (loop_count) - { - case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; - case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; - default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; + switch (loop_count) { + case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; + case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; + default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; } - std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; + std::cout << std::setprecision(2) << std::fixed << std::setw(5) + << (1e9 * (seconds / (inserts))) << "; " << std::flush; histogram.calculate(); Device().fence(); } - void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) - { + void print(std::ostream& metrics_out, std::ostream& length_out, + std::ostream& distance_out, std::ostream& block_distance_out) { metrics_out << map.capacity() << " , "; - metrics_out << inserts/collisions << " , "; - metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , "; + metrics_out << inserts / collisions << " , "; + metrics_out << (100.0 * inserts / collisions) / map.capacity() << " , "; metrics_out << inserts << " , "; metrics_out << (map.failed_insert() ? "true" : "false") << " , "; metrics_out << collisions << " , "; - metrics_out << 1e9*(seconds/inserts) << " , "; + metrics_out << 1e9 * (seconds / inserts) << " , "; metrics_out << seconds << std::endl; length_out << map.capacity() << " , "; - length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + length_out << ((100.0 * inserts / collisions) / map.capacity()) << " , "; length_out << collisions << " , "; histogram.print_length(length_out); distance_out << map.capacity() << " , "; - distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + distance_out << ((100.0 * inserts / collisions) / map.capacity()) << " , "; distance_out << collisions << " , "; histogram.print_distance(distance_out); block_distance_out << map.capacity() << " , "; - block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + block_distance_out << ((100.0 * inserts / collisions) / map.capacity()) + << " , "; block_distance_out << collisions << " , "; histogram.print_block_distance(block_distance_out); } - KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { + void init(value_type& v) const { v.failed_count = 0; - v.max_list = 0; + v.max_list = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, const volatile value_type & src ) const - { + void join(volatile value_type& dst, const volatile value_type& src) const { dst.failed_count += src.failed_count; dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; } KOKKOS_INLINE_FUNCTION - void operator()(uint32_t i, value_type & v) const - { - const uint32_t key = Near ? i/collisions : i%(inserts/collisions); - typename map_type::insert_result result = map.insert(key,i); + void operator()(uint32_t i, value_type& v) const { + const uint32_t key = Near ? i / collisions : i % (inserts / collisions); + typename map_type::insert_result result = map.insert(key, i); v.failed_count += !result.failed() ? 0 : 1; - v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position(); + v.max_list = result.list_position() < v.max_list ? v.max_list + : result.list_position(); } - }; template -void run_performance_tests(std::string const & base_file_name) -{ +void run_performance_tests(std::string const& base_file_name) { #if 0 std::string metrics_file_name = base_file_name + std::string("-metrics.csv"); std::string length_file_name = base_file_name + std::string("-length.csv"); @@ -254,7 +251,6 @@ void run_performance_tests(std::string const & base_file_name) #endif } +} // namespace Perf -} // namespace Perf - -#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP +#endif // KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt index e68fcad5e9..0c9d24d641 100644 --- a/lib/kokkos/containers/src/CMakeLists.txt +++ b/lib/kokkos/containers/src/CMakeLists.txt @@ -1,47 +1,34 @@ - -TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) - -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) - -#----------------------------------------------------------------------------- - -SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) - -if(KOKKOS_LEGACY_TRIBITS) - - SET(HEADERS "") - SET(SOURCES "") - - SET(HEADERS_IMPL "") - - FILE(GLOB HEADERS *.hpp) - FILE(GLOB HEADERS_IMPL impl/*.hpp) - FILE(GLOB SOURCES impl/*.cpp) - - INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) - - TRIBITS_ADD_LIBRARY( - kokkoscontainers - HEADERS ${HEADERS} - NOINSTALLHEADERS ${HEADERS_IMPL} - SOURCES ${SOURCES} - DEPLIBS - ) - -else() - - INSTALL ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" - DESTINATION ${TRILINOS_INCDIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - TRIBITS_ADD_LIBRARY( - kokkoscontainers - SOURCES ${KOKKOS_CONTAINERS_SRCS} - DEPLIBS - ) - -endif() -#----------------------------------------------------------------------------- + +KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +#need these here for now +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +SET(KOKKOS_CONTAINERS_SRCS) +APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp) + +INSTALL ( + DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) + +KOKKOS_ADD_LIBRARY( + kokkoscontainers + SOURCES ${KOKKOS_CONTAINERS_SRCS} +) + +SET_TARGET_PROPERTIES(kokkoscontainers PROPERTIES VERSION ${Kokkos_VERSION}) + +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscontainers + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) +KOKKOS_LINK_INTERNAL_LIBRARY(kokkoscontainers kokkoscore) + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index 4d78430fc6..3596c7653a 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -53,27 +54,25 @@ namespace Kokkos { -template +template class Bitset; -template +template class ConstBitset; template -void deep_copy( Bitset & dst, Bitset const& src); +void deep_copy(Bitset& dst, Bitset const& src); template -void deep_copy( Bitset & dst, ConstBitset const& src); +void deep_copy(Bitset& dst, ConstBitset const& src); template -void deep_copy( ConstBitset & dst, ConstBitset const& src); - +void deep_copy(ConstBitset& dst, ConstBitset const& src); /// A thread safe view to a bitset template -class Bitset -{ -public: +class Bitset { + public: typedef Device execution_space; typedef unsigned size_type; @@ -81,98 +80,88 @@ public: enum { MOVE_HINT_BACKWARD = 2u }; enum { - BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u - , BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE - , BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD - , BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD + BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u, + BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE, + BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD, + BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD }; -private: - enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; - enum { block_mask = block_size-1u }; + private: + enum { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; + enum { block_mask = block_size - 1u }; enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; -public: - - + public: /// constructor /// arg_size := number of bit in set Bitset(unsigned arg_size = 0u) - : m_size(arg_size) - , m_last_block_mask(0u) - , m_blocks("Bitset", ((m_size + block_mask) >> block_shift) ) - { - for (int i=0, end = static_cast(m_size & block_mask); i < end; ++i) { + : m_size(arg_size), + m_last_block_mask(0u), + m_blocks("Bitset", ((m_size + block_mask) >> block_shift)) { + for (int i = 0, end = static_cast(m_size & block_mask); i < end; ++i) { m_last_block_mask |= 1u << i; } } KOKKOS_INLINE_FUNCTION - Bitset (const Bitset&) = default; + Bitset(const Bitset&) = default; KOKKOS_INLINE_FUNCTION - Bitset& operator= (const Bitset&) = default; + Bitset& operator=(const Bitset&) = default; KOKKOS_INLINE_FUNCTION - Bitset (Bitset&&) = default; + Bitset(Bitset&&) = default; KOKKOS_INLINE_FUNCTION - Bitset& operator= (Bitset&&) = default; - + Bitset& operator=(Bitset&&) = default; + KOKKOS_INLINE_FUNCTION - ~Bitset () = default; + ~Bitset() = default; /// number of bits in the set /// can be call from the host or the device KOKKOS_FORCEINLINE_FUNCTION - unsigned size() const - { return m_size; } + unsigned size() const { return m_size; } /// number of bits which are set to 1 /// can only be called from the host - unsigned count() const - { - Impl::BitsetCount< Bitset > f(*this); + unsigned count() const { + Impl::BitsetCount > f(*this); return f.apply(); } /// set all bits to 1 /// can only be called from the host - void set() - { - Kokkos::deep_copy(m_blocks, ~0u ); + void set() { + Kokkos::deep_copy(m_blocks, ~0u); if (m_last_block_mask) { - //clear the unused bits in the last block - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; - raw_deep_copy( m_blocks.data() + (m_blocks.extent(0) -1u), &m_last_block_mask, sizeof(unsigned)); + // clear the unused bits in the last block + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(m_blocks.data() + (m_blocks.extent(0) - 1u), + &m_last_block_mask, sizeof(unsigned)); } } /// set all bits to 0 /// can only be called from the host - void reset() - { - Kokkos::deep_copy(m_blocks, 0u ); - } + void reset() { Kokkos::deep_copy(m_blocks, 0u); } /// set all bits to 0 /// can only be called from the host - void clear() - { - Kokkos::deep_copy(m_blocks, 0u ); - } + void clear() { Kokkos::deep_copy(m_blocks, 0u); } /// set i'th bit to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned i ) const - { - if ( i < m_size ) { - unsigned * block_ptr = &m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool set(unsigned i) const { + if (i < m_size) { + unsigned* block_ptr = &m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); - return !( atomic_fetch_or( block_ptr, mask ) & mask ); + return !(atomic_fetch_or(block_ptr, mask) & mask); } return false; } @@ -180,13 +169,12 @@ public: /// set i'th bit to 0 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool reset( unsigned i ) const - { - if ( i < m_size ) { - unsigned * block_ptr = &m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool reset(unsigned i) const { + if (i < m_size) { + unsigned* block_ptr = &m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); - return atomic_fetch_and( block_ptr, ~mask ) & mask; + return atomic_fetch_and(block_ptr, ~mask) & mask; } return false; } @@ -194,11 +182,10 @@ public: /// return true if the i'th bit set to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool test( unsigned i ) const - { - if ( i < m_size ) { - const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]); - const unsigned mask = 1u << static_cast( i & block_mask ); + bool test(unsigned i) const { + if (i < m_size) { + const unsigned block = volatile_load(&m_blocks[i >> block_shift]); + const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; @@ -208,90 +195,93 @@ public: /// returns the max number of times those functions should be call /// when searching for an available bit KOKKOS_FORCEINLINE_FUNCTION - unsigned max_hint() const - { - return m_blocks.extent(0); - } + unsigned max_hint() const { return m_blocks.extent(0); } /// find a bit set to 1 near the hint - /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found - /// and if result.first is false the result.second is a new hint + /// returns a pair< bool, unsigned> where if result.first is true then + /// result.second is the bit found and if result.first is false the + /// result.second is a new hint KOKKOS_INLINE_FUNCTION - Kokkos::pair find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const - { - const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; + Kokkos::pair find_any_set_near( + unsigned hint, + unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { + const unsigned block_idx = + (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; const unsigned offset = hint & block_mask; - unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1)) ? block : block & m_last_block_mask ; + unsigned block = volatile_load(&m_blocks[block_idx]); + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) + ? block + : block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } /// find a bit set to 0 near the hint - /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found - /// and if result.first is false the result.second is a new hint + /// returns a pair< bool, unsigned> where if result.first is true then + /// result.second is the bit found and if result.first is false the + /// result.second is a new hint KOKKOS_INLINE_FUNCTION - Kokkos::pair find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const - { + Kokkos::pair find_any_unset_near( + unsigned hint, + unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { const unsigned block_idx = hint >> block_shift; - const unsigned offset = hint & block_mask; - unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1) ) ? ~block : ~block & m_last_block_mask ; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[block_idx]); + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) + ? ~block + : ~block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } -private: - + private: KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const - { - Kokkos::pair result( block > 0u, 0); + Kokkos::pair find_any_helper(unsigned block_idx, + unsigned offset, unsigned block, + unsigned scan_direction) const { + Kokkos::pair result(block > 0u, 0); if (!result.first) { - result.second = update_hint( block_idx, offset, scan_direction ); - } - else { - result.second = scan_block( (block_idx << block_shift) - , offset - , block - , scan_direction - ); + result.second = update_hint(block_idx, offset, scan_direction); + } else { + result.second = + scan_block((block_idx << block_shift), offset, block, scan_direction); } return result; } - KOKKOS_FORCEINLINE_FUNCTION - unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const - { - offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; + unsigned scan_block(unsigned block_start, int offset, unsigned block, + unsigned scan_direction) const { + offset = !(scan_direction & BIT_SCAN_REVERSE) + ? offset + : (offset + block_mask) & block_mask; block = Impl::rotate_right(block, offset); - return ((( !(scan_direction & BIT_SCAN_REVERSE) ? - Impl::bit_scan_forward(block) : - ::Kokkos::log2(block) - ) + offset - ) & block_mask - ) + block_start; + return (((!(scan_direction & BIT_SCAN_REVERSE) + ? Impl::bit_scan_forward(block) + : ::Kokkos::log2(block)) + + offset) & + block_mask) + + block_start; } KOKKOS_FORCEINLINE_FUNCTION - unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const - { + unsigned update_hint(long long block_idx, unsigned offset, + unsigned scan_direction) const { block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1; - block_idx = block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; + block_idx = + block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; - return static_cast(block_idx)*block_size + offset; + return static_cast(block_idx) * block_size + offset; } -private: - + private: unsigned m_size; unsigned m_last_block_mask; - View< unsigned *, execution_space, MemoryTraits > m_blocks; + View > m_blocks; -private: + private: template friend class Bitset; @@ -302,87 +292,72 @@ private: friend struct Impl::BitsetCount; template - friend void deep_copy( Bitset & dst, Bitset const& src); + friend void deep_copy(Bitset& dst, Bitset const& src); template - friend void deep_copy( Bitset & dst, ConstBitset const& src); + friend void deep_copy(Bitset& dst, + ConstBitset const& src); }; /// a thread-safe view to a const bitset /// i.e. can only test bits template -class ConstBitset -{ -public: +class ConstBitset { + public: typedef Device execution_space; typedef unsigned size_type; -private: - enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; - enum { block_mask = block_size -1u }; + private: + enum { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; + enum { block_mask = block_size - 1u }; enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; -public: - ConstBitset() - : m_size (0) - {} + public: + ConstBitset() : m_size(0) {} ConstBitset(Bitset const& rhs) - : m_size(rhs.m_size) - , m_blocks(rhs.m_blocks) - {} + : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} ConstBitset(ConstBitset const& rhs) - : m_size( rhs.m_size ) - , m_blocks( rhs.m_blocks ) - {} + : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} - ConstBitset & operator = (Bitset const & rhs) - { - this->m_size = rhs.m_size; + ConstBitset& operator=(Bitset const& rhs) { + this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } - ConstBitset & operator = (ConstBitset const & rhs) - { - this->m_size = rhs.m_size; + ConstBitset& operator=(ConstBitset const& rhs) { + this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } - KOKKOS_FORCEINLINE_FUNCTION - unsigned size() const - { - return m_size; - } + unsigned size() const { return m_size; } - unsigned count() const - { - Impl::BitsetCount< ConstBitset > f(*this); + unsigned count() const { + Impl::BitsetCount > f(*this); return f.apply(); } KOKKOS_FORCEINLINE_FUNCTION - bool test( unsigned i ) const - { - if ( i < m_size ) { - const unsigned block = m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool test(unsigned i) const { + if (i < m_size) { + const unsigned block = m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; } -private: - + private: unsigned m_size; - View< const unsigned *, execution_space, MemoryTraits > m_blocks; + View > m_blocks; -private: + private: template friend class ConstBitset; @@ -390,47 +365,56 @@ private: friend struct Impl::BitsetCount; template - friend void deep_copy( Bitset & dst, ConstBitset const& src); + friend void deep_copy(Bitset& dst, + ConstBitset const& src); template - friend void deep_copy( ConstBitset & dst, ConstBitset const& src); + friend void deep_copy(ConstBitset& dst, + ConstBitset const& src); }; - template -void deep_copy( Bitset & dst, Bitset const& src) -{ +void deep_copy(Bitset& dst, Bitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } template -void deep_copy( Bitset & dst, ConstBitset const& src) -{ +void deep_copy(Bitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } template -void deep_copy( ConstBitset & dst, ConstBitset const& src) -{ +void deep_copy(ConstBitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } -} // namespace Kokkos - -#endif //KOKKOS_BITSET_HPP +} // namespace Kokkos +#endif // KOKKOS_BITSET_HPP diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index d9b14d67a2..d8a3ebc1ae 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -90,47 +91,41 @@ namespace Kokkos { * behavior. Please see the documentation of Kokkos::View for * examples. The default suffices for most users. */ -template< class DataType , - class Arg1Type = void , - class Arg2Type = void , +template -class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > -{ -template< class , class , class , class > friend class DualView ; -public: +class DualView : public ViewTraits { + template + friend class DualView; + + public: //! \name Typedefs for device types and various Kokkos::View specializations. //@{ - typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + typedef ViewTraits traits; //! The Kokkos Host Device type; - typedef typename traits::host_mirror_space host_mirror_space ; + typedef typename traits::host_mirror_space host_mirror_space; //! The type of a Kokkos::View on the device. - typedef View< typename traits::data_type , - Arg1Type , - Arg2Type , - Arg3Type > t_dev ; + typedef View t_dev; /// \typedef t_host /// \brief The type of a Kokkos::View host mirror of \c t_dev. - typedef typename t_dev::HostMirror t_host ; + typedef typename t_dev::HostMirror t_host; //! The type of a const View on the device. //! The type of a Kokkos::View on the device. - typedef View< typename traits::const_data_type , - Arg1Type , - Arg2Type , - Arg3Type > t_dev_const ; + typedef View + t_dev_const; /// \typedef t_host_const /// \brief The type of a const View host mirror of \c t_dev_const. typedef typename t_dev_const::HostMirror t_host_const; //! The type of a const, random-access View on the device. - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - Kokkos::MemoryTraits > t_dev_const_randomread ; + typedef View > + t_dev_const_randomread; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of @@ -138,39 +133,36 @@ public: typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; //! The type of an unmanaged View on the device. - typedef View< typename traits::data_type , - typename traits::array_layout , - typename traits::device_type , - MemoryUnmanaged> t_dev_um; + typedef View + t_dev_um; //! The type of an unmanaged View host mirror of \c t_dev_um. - typedef View< typename t_host::data_type , - typename t_host::array_layout , - typename t_host::device_type , - MemoryUnmanaged> t_host_um; + typedef View + t_host_um; //! The type of a const unmanaged View on the device. - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - MemoryUnmanaged> t_dev_const_um; + typedef View + t_dev_const_um; //! The type of a const unmanaged View host mirror of \c t_dev_const_um. - typedef View t_host_const_um; + typedef View + t_host_const_um; //! The type of a const, random-access View on the device. - typedef View< typename t_host::const_data_type , - typename t_host::array_layout , - typename t_host::device_type , - Kokkos::MemoryTraits > t_dev_const_randomread_um ; + typedef View > + t_dev_const_randomread_um; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of /// \c t_dev_const_randomread. - typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; + typedef + typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; //@} //! \name The two View instances. @@ -184,18 +176,20 @@ public: //@{ #ifndef KOKKOS_ENABLE_DEPRECATED_CODE -protected: + protected: // modified_flags[0] -> host // modified_flags[1] -> device - typedef View t_modified_flags; + typedef View t_modified_flags; t_modified_flags modified_flags; -public: + public: #else - typedef View t_modified_flags; - typedef View t_modified_flag; + typedef View + t_modified_flags; + typedef View + t_modified_flag; t_modified_flags modified_flags; - t_modified_flag modified_host,modified_device; + t_modified_flag modified_host, modified_device; #endif //@} @@ -208,11 +202,11 @@ public: /// default constructors. The "modified" flags are both initialized /// to "unmodified." #ifndef KOKKOS_ENABLE_DEPRECATED_CODE - DualView () = default; + DualView() = default; #else - DualView ():modified_flags (t_modified_flags("DualView::modified_flags")) { - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + DualView() : modified_flags(t_modified_flags("DualView::modified_flags")) { + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); } #endif @@ -225,52 +219,52 @@ public: /// View objects. For example, if the View has three dimensions, /// the first three integer arguments will be nonzero, and you may /// omit the integer arguments that follow. - DualView (const std::string& label, - const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) - : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) - , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors - , modified_flags (t_modified_flags("DualView::modified_flags")) - { + DualView(const std::string& label, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) + : d_view(label, n0, n1, n2, n3, n4, n5, n6, n7), + h_view(create_mirror_view(d_view)) // without UVM, host View mirrors + , + modified_flags(t_modified_flags("DualView::modified_flags")) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); #endif } //! Copy constructor (shallow copy) - template - DualView (const DualView& src) : - d_view (src.d_view), - h_view (src.h_view), - modified_flags (src.modified_flags) + template + DualView(const DualView& src) + : d_view(src.d_view), + h_view(src.h_view), + modified_flags(src.modified_flags) #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - , modified_host(src.modified_host) - , modified_device(src.modified_device) + , + modified_host(src.modified_host), + modified_device(src.modified_device) #endif - {} + { + } //! Subview constructor - template< class SD, class S1 , class S2 , class S3 - , class Arg0 , class ... Args > - DualView( const DualView & src - , const Arg0 & arg0 - , Args ... args - ) - : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) - , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) - , modified_flags (src.modified_flags) + template + DualView(const DualView& src, const Arg0& arg0, Args... args) + : d_view(Kokkos::subview(src.d_view, arg0, args...)), + h_view(Kokkos::subview(src.h_view, arg0, args...)), + modified_flags(src.modified_flags) #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - , modified_host(src.modified_host) - , modified_device(src.modified_device) + , + modified_host(src.modified_host), + modified_device(src.modified_device) #endif - {} + { + } /// \brief Create DualView from existing device and host View objects. /// @@ -282,34 +276,34 @@ public: /// /// \param d_view_ Device View /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror) - DualView (const t_dev& d_view_, const t_host& h_view_) : - d_view (d_view_), - h_view (h_view_), - modified_flags (t_modified_flags("DualView::modified_flags")) - { - if ( int(d_view.rank) != int(h_view.rank) || - d_view.extent(0) != h_view.extent(0) || - d_view.extent(1) != h_view.extent(1) || - d_view.extent(2) != h_view.extent(2) || - d_view.extent(3) != h_view.extent(3) || - d_view.extent(4) != h_view.extent(4) || - d_view.extent(5) != h_view.extent(5) || - d_view.extent(6) != h_view.extent(6) || - d_view.extent(7) != h_view.extent(7) || - d_view.stride_0() != h_view.stride_0() || - d_view.stride_1() != h_view.stride_1() || - d_view.stride_2() != h_view.stride_2() || - d_view.stride_3() != h_view.stride_3() || - d_view.stride_4() != h_view.stride_4() || - d_view.stride_5() != h_view.stride_5() || - d_view.stride_6() != h_view.stride_6() || - d_view.stride_7() != h_view.stride_7() || - d_view.span() != h_view.span() ) { - Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); + DualView(const t_dev& d_view_, const t_host& h_view_) + : d_view(d_view_), + h_view(h_view_), + modified_flags(t_modified_flags("DualView::modified_flags")) { + if (int(d_view.rank) != int(h_view.rank) || + d_view.extent(0) != h_view.extent(0) || + d_view.extent(1) != h_view.extent(1) || + d_view.extent(2) != h_view.extent(2) || + d_view.extent(3) != h_view.extent(3) || + d_view.extent(4) != h_view.extent(4) || + d_view.extent(5) != h_view.extent(5) || + d_view.extent(6) != h_view.extent(6) || + d_view.extent(7) != h_view.extent(7) || + d_view.stride_0() != h_view.stride_0() || + d_view.stride_1() != h_view.stride_1() || + d_view.stride_2() != h_view.stride_2() || + d_view.stride_3() != h_view.stride_3() || + d_view.stride_4() != h_view.stride_4() || + d_view.stride_5() != h_view.stride_5() || + d_view.stride_6() != h_view.stride_6() || + d_view.stride_7() != h_view.stride_7() || + d_view.span() != h_view.span()) { + Kokkos::Impl::throw_runtime_exception( + "DualView constructed with incompatible views"); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); #endif } @@ -326,119 +320,133 @@ public: /// /// For example, suppose you create a DualView on Cuda, like this: /// \code - /// typedef Kokkos::DualView dual_view_type; - /// dual_view_type DV ("my dual view", 100); - /// \endcode - /// If you want to get the CUDA device View, do this: - /// \code - /// typename dual_view_type::t_dev cudaView = DV.view (); - /// \endcode - /// and if you want to get the host mirror of that View, do this: - /// \code - /// typedef typename Kokkos::HostSpace::execution_space host_device_type; - /// typename dual_view_type::t_host hostView = DV.view (); - /// \endcode - template< class Device > - KOKKOS_INLINE_FUNCTION - const typename Impl::if_c< - std::is_same::value, - t_dev, - t_host>::type& view () const - { - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE - constexpr bool device_is_memspace = std::is_same::value; - constexpr bool device_is_execspace = std::is_same::value; - constexpr bool device_exec_is_t_dev_exec = std::is_same::value; - constexpr bool device_mem_is_t_dev_mem = std::is_same::value; - constexpr bool device_exec_is_t_host_exec = std::is_same::value; - constexpr bool device_mem_is_t_host_mem = std::is_same::value; - constexpr bool device_is_t_host_device = std::is_same::value; - constexpr bool device_is_t_dev_device = std::is_same::value; + /// typedef Kokkos::DualView + /// dual_view_type; dual_view_type DV ("my dual view", 100); \endcode If you + /// want to get the CUDA device View, do this: \code typename + /// dual_view_type::t_dev cudaView = DV.view (); \endcode and if + /// you want to get the host mirror of that View, do this: \code typedef + /// typename Kokkos::HostSpace::execution_space host_device_type; typename + /// dual_view_type::t_host hostView = DV.view (); \endcode + template + KOKKOS_INLINE_FUNCTION const typename Impl::if_c< + std::is_same::value, + t_dev, t_host>::type& + view() const { +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE + constexpr bool device_is_memspace = + std::is_same::value; + constexpr bool device_is_execspace = + std::is_same::value; + constexpr bool device_exec_is_t_dev_exec = + std::is_same::value; + constexpr bool device_mem_is_t_dev_mem = + std::is_same::value; + constexpr bool device_exec_is_t_host_exec = + std::is_same::value; + constexpr bool device_mem_is_t_host_mem = + std::is_same::value; + constexpr bool device_is_t_host_device = + std::is_same::value; + constexpr bool device_is_t_dev_device = + std::is_same::value; static_assert( device_is_t_dev_device || device_is_t_host_device || - (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || - (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || - ( - (!device_is_execspace && !device_is_memspace) && ( - (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || - (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) - ) - ) - , - "Template parameter to .view() must exactly match one of the DualView's device types or one of the execution or memory spaces"); - #endif - - return Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value, - t_dev, - t_host >::select (d_view , h_view); + (device_is_memspace && + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem)) || + (device_is_execspace && + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)) || + ((!device_is_execspace && !device_is_memspace) && + ((device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec))), + "Template parameter to .view() must exactly match one of the " + "DualView's device types or one of the execution or memory spaces"); +#endif + + return Impl::if_c::value, + t_dev, t_host>::select(d_view, h_view); } KOKKOS_INLINE_FUNCTION - t_host view_host() const { - return h_view; - } + t_host view_host() const { return h_view; } KOKKOS_INLINE_FUNCTION - t_dev view_device() const { - return d_view; - } + t_dev view_device() const { return d_view; } - template + template static int get_device_side() { - constexpr bool device_is_memspace = std::is_same::value; - constexpr bool device_is_execspace = std::is_same::value; - constexpr bool device_exec_is_t_dev_exec = std::is_same::value; - constexpr bool device_mem_is_t_dev_mem = std::is_same::value; - constexpr bool device_exec_is_t_host_exec = std::is_same::value; - constexpr bool device_mem_is_t_host_mem = std::is_same::value; - constexpr bool device_is_t_host_device = std::is_same::value; - constexpr bool device_is_t_dev_device = std::is_same::value; - - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE + constexpr bool device_is_memspace = + std::is_same::value; + constexpr bool device_is_execspace = + std::is_same::value; + constexpr bool device_exec_is_t_dev_exec = + std::is_same::value; + constexpr bool device_mem_is_t_dev_mem = + std::is_same::value; + constexpr bool device_exec_is_t_host_exec = + std::is_same::value; + constexpr bool device_mem_is_t_host_mem = + std::is_same::value; + constexpr bool device_is_t_host_device = + std::is_same::value; + constexpr bool device_is_t_dev_device = + std::is_same::value; + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE static_assert( device_is_t_dev_device || device_is_t_host_device || - (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || - (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || - ( - (!device_is_execspace && !device_is_memspace) && ( - (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || - (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) - ) - ) - , - "Template parameter to .sync() must exactly match one of the DualView's device types or one of the execution or memory spaces"); - #endif + (device_is_memspace && + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem)) || + (device_is_execspace && + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)) || + ((!device_is_execspace && !device_is_memspace) && + ((device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec))), + "Template parameter to .sync() must exactly match one of the " + "DualView's device types or one of the execution or memory spaces"); +#endif - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE int dev = -1; - #else +#else int dev = 0; - #endif - if(device_is_t_dev_device) dev = 1; - else if(device_is_t_host_device) dev = 0; +#endif + if (device_is_t_dev_device) + dev = 1; + else if (device_is_t_host_device) + dev = 0; else { - if(device_is_memspace) { - if(device_mem_is_t_dev_mem) dev = 1; - if(device_mem_is_t_host_mem) dev = 0; - if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + if (device_is_memspace) { + if (device_mem_is_t_dev_mem) dev = 1; + if (device_mem_is_t_host_mem) dev = 0; + if (device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; } - if(device_is_execspace) { - if(device_exec_is_t_dev_exec) dev = 1; - if(device_exec_is_t_host_exec) dev = 0; - if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + if (device_is_execspace) { + if (device_exec_is_t_dev_exec) dev = 1; + if (device_exec_is_t_host_exec) dev = 0; + if (device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; } - if(!device_is_execspace && !device_is_memspace) { - if(device_mem_is_t_dev_mem) dev = 1; - if(device_mem_is_t_host_mem) dev = 0; - if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; - if(device_exec_is_t_dev_exec) dev = 1; - if(device_exec_is_t_host_exec) dev = 0; - if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + if (!device_is_execspace && !device_is_memspace) { + if (device_mem_is_t_dev_mem) dev = 1; + if (device_mem_is_t_host_mem) dev = 0; + if (device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + if (device_exec_is_t_dev_exec) dev = 1; + if (device_exec_is_t_host_exec) dev = 0; + if (device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; } } return dev; @@ -461,88 +469,94 @@ public: /// the data in either View. You must manually mark modified data /// as modified, by calling the modify() method with the /// appropriate template parameter. - template - void sync( const typename Impl::enable_if< - ( std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || - ( std::is_same< Device , int>::value) - , int >::type& = 0) - { - if(modified_flags.data()==NULL) return; + template + void sync(const typename Impl::enable_if< + (std::is_same::value) || + (std::is_same::value), + int>::type& = 0) { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { - deep_copy (d_view, h_view); + deep_copy(d_view, h_view); modified_flags(0) = modified_flags(1) = 0; } } - if (dev == 0) { // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { - deep_copy (h_view, d_view); + deep_copy(h_view, d_view); modified_flags(0) = modified_flags(1) = 0; } } - if(std::is_same::value) { + if (std::is_same::value) { typename t_dev::execution_space().fence(); typename t_host::execution_space().fence(); } } - template - void sync ( const typename Impl::enable_if< - ( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || - ( std::is_same< Device , int>::value) - , int >::type& = 0 ) - { - if(modified_flags.data()==NULL) return; + template + void sync(const typename Impl::enable_if< + (!std::is_same::value) || + (std::is_same::value), + int>::type& = 0) { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { - Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + Impl::throw_runtime_exception( + "Calling sync on a DualView with a const datatype."); } } - if (dev == 0){ // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { - Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + Impl::throw_runtime_exception( + "Calling sync on a DualView with a const datatype."); } } } void sync_host() { - if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) - Impl::throw_runtime_exception("Calling sync_host on a DualView with a const datatype."); - if(modified_flags.data()==NULL) return; - if(modified_flags(1) > modified_flags(0)) { - deep_copy (h_view, d_view); + if (!std::is_same::value) + Impl::throw_runtime_exception( + "Calling sync_host on a DualView with a const datatype."); + if (modified_flags.data() == NULL) return; + if (modified_flags(1) > modified_flags(0)) { + deep_copy(h_view, d_view); modified_flags(1) = modified_flags(0) = 0; } } void sync_device() { - if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) - Impl::throw_runtime_exception("Calling sync_device on a DualView with a const datatype."); - if(modified_flags.data()==NULL) return; - if(modified_flags(0) > modified_flags(1)) { - deep_copy (d_view, h_view); + if (!std::is_same::value) + Impl::throw_runtime_exception( + "Calling sync_device on a DualView with a const datatype."); + if (modified_flags.data() == NULL) return; + if (modified_flags(0) > modified_flags(1)) { + deep_copy(d_view, h_view); modified_flags(1) = modified_flags(0) = 0; } } - template - bool need_sync() const - { - if(modified_flags.data()==NULL) return false; + template + bool need_sync() const { + if (modified_flags.data() == NULL) return false; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { return true; } } - if (dev == 0){ // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { return true; } @@ -551,13 +565,13 @@ public: } inline bool need_sync_host() const { - if(modified_flags.data()==NULL) return false; - return modified_flags(0) - void modify () { - if(modified_flags.data()==NULL) return; + template + void modify() { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type // Increment the device's modified count. - modified_flags(1) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; + modified_flags(1) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; } - if (dev == 0) { // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type // Increment the host's modified count. - modified_flags(0) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; + modified_flags(0) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; } #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK @@ -594,10 +612,12 @@ public: } inline void modify_host() { - if(modified_flags.data()!=NULL) { - modified_flags(0) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; - #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags.data() != NULL) { + modified_flags(0) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK if (modified_flags(0) && modified_flags(1)) { std::string msg = "Kokkos::DualView::modify_host ERROR: "; msg += "Concurrent modification of host and device views "; @@ -606,15 +626,17 @@ public: msg += "\"\n"; Kokkos::abort(msg.c_str()); } - #endif +#endif } } inline void modify_device() { - if(modified_flags.data()!=NULL) { - modified_flags(1) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; - #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags.data() != NULL) { + modified_flags(1) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK if (modified_flags(0) && modified_flags(1)) { std::string msg = "Kokkos::DualView::modify_device ERROR: "; msg += "Concurrent modification of host and device views "; @@ -623,12 +645,12 @@ public: msg += "\"\n"; Kokkos::abort(msg.c_str()); } - #endif +#endif } } inline void clear_sync_state() { - if(modified_flags.data()!=NULL) + if (modified_flags.data() != NULL) modified_flags(1) = modified_flags(0) = 0; } @@ -641,75 +663,72 @@ public: /// This discards any existing contents of the objects, and resets /// their modified flags. It does not copy the old contents /// of either View into the new View objects. - void realloc( const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) { - ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Reset dirty flags */ - if(modified_flags.data()==NULL) { - modified_flags = t_modified_flags("DualView::modified_flags"); - } else - modified_flags(1) = modified_flags(0) = 0; + void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + h_view = create_mirror_view(d_view); + + /* Reset dirty flags */ + if (modified_flags.data() == NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } else + modified_flags(1) = modified_flags(0) = 0; } /// \brief Resize both views, copying old contents into new if necessary. /// /// This method only copies the old contents into the new View /// objects for the device which was last marked as modified. - void resize( const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) { - if(modified_flags.data()==NULL) { - modified_flags = t_modified_flags("DualView::modified_flags"); - } - if(modified_flags(1) >= modified_flags(0)) { - /* Resize on Device */ - ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Mark Device copy as modified */ - modified_flags(1) = modified_flags(1)+1; - - } else { - /* Realloc on Device */ - - ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - - const bool sizeMismatch = ( h_view.extent(0) != n0 ) || - ( h_view.extent(1) != n1 ) || - ( h_view.extent(2) != n2 ) || - ( h_view.extent(3) != n3 ) || - ( h_view.extent(4) != n4 ) || - ( h_view.extent(5) != n5 ) || - ( h_view.extent(6) != n6 ) || - ( h_view.extent(7) != n7 ); - if ( sizeMismatch ) - ::Kokkos::resize(h_view,n0,n1,n2,n3,n4,n5,n6,n7); - - t_host temp_view = create_mirror_view( d_view ); - - /* Remap on Host */ - Kokkos::deep_copy( temp_view , h_view ); - - h_view = temp_view; - - d_view = create_mirror_view( typename t_dev::execution_space(), h_view ); - - /* Mark Host copy as modified */ - modified_flags(0) = modified_flags(0)+1; - } + void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + if (modified_flags.data() == NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } + if (modified_flags(1) >= modified_flags(0)) { + /* Resize on Device */ + ::Kokkos::resize(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + h_view = create_mirror_view(d_view); + + /* Mark Device copy as modified */ + modified_flags(1) = modified_flags(1) + 1; + + } else { + /* Realloc on Device */ + + ::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + + const bool sizeMismatch = + (h_view.extent(0) != n0) || (h_view.extent(1) != n1) || + (h_view.extent(2) != n2) || (h_view.extent(3) != n3) || + (h_view.extent(4) != n4) || (h_view.extent(5) != n5) || + (h_view.extent(6) != n6) || (h_view.extent(7) != n7); + if (sizeMismatch) + ::Kokkos::resize(h_view, n0, n1, n2, n3, n4, n5, n6, n7); + + t_host temp_view = create_mirror_view(d_view); + + /* Remap on Host */ + Kokkos::deep_copy(temp_view, h_view); + + h_view = temp_view; + + d_view = create_mirror_view(typename t_dev::execution_space(), h_view); + + /* Mark Host copy as modified */ + modified_flags(0) = modified_flags(0) + 1; + } } //@} @@ -718,37 +737,35 @@ public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE //! The allocation size (same as Kokkos::View::capacity). - size_t capacity() const { - return d_view.span(); - } + size_t capacity() const { return d_view.span(); } #endif //! The allocation size (same as Kokkos::View::span). - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { - return d_view.span(); - } + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return d_view.span(); } - KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { - return d_view.span_is_contiguous(); + KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { + return d_view.span_is_contiguous(); } //! Get stride(s) for each dimension. - template< typename iType> + template void stride(iType* stride_) const { d_view.stride(stride_); } - template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - extent( const iType & r ) const - { return d_view.extent(r); } + template + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if::value, size_t>::type + extent(const iType& r) const { + return d_view.extent(r); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , int >::type - extent_int( const iType & r ) const - { return static_cast(d_view.extent(r)); } + template + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if::value, int>::type + extent_int(const iType& r) const { + return static_cast(d_view.extent(r)); + } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE /* Deprecate all 'dimension' functions in favor of @@ -756,27 +773,27 @@ public: */ /* \brief return size of dimension 0 */ - size_t dimension_0() const {return d_view.extent(0);} + size_t dimension_0() const { return d_view.extent(0); } /* \brief return size of dimension 1 */ - size_t dimension_1() const {return d_view.extent(1);} + size_t dimension_1() const { return d_view.extent(1); } /* \brief return size of dimension 2 */ - size_t dimension_2() const {return d_view.extent(2);} + size_t dimension_2() const { return d_view.extent(2); } /* \brief return size of dimension 3 */ - size_t dimension_3() const {return d_view.extent(3);} + size_t dimension_3() const { return d_view.extent(3); } /* \brief return size of dimension 4 */ - size_t dimension_4() const {return d_view.extent(4);} + size_t dimension_4() const { return d_view.extent(4); } /* \brief return size of dimension 5 */ - size_t dimension_5() const {return d_view.extent(5);} + size_t dimension_5() const { return d_view.extent(5); } /* \brief return size of dimension 6 */ - size_t dimension_6() const {return d_view.extent(6);} + size_t dimension_6() const { return d_view.extent(6); } /* \brief return size of dimension 7 */ - size_t dimension_7() const {return d_view.extent(7);} + size_t dimension_7() const { return d_view.extent(7); } #endif //@} }; -} // namespace Kokkos +} // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -787,32 +804,24 @@ public: namespace Kokkos { namespace Impl { -template< class D, class A1, class A2, class A3, class ... Args > +template struct DualViewSubview { + typedef typename Kokkos::Impl::ViewMapping< + void, Kokkos::ViewTraits, Args...>::traits_type dst_traits; - typedef typename Kokkos::Impl::ViewMapping - < void - , Kokkos::ViewTraits< D, A1, A2, A3 > - , Args ... - >::traits_type dst_traits ; - - typedef Kokkos::DualView - < typename dst_traits::data_type - , typename dst_traits::array_layout - , typename dst_traits::device_type - , typename dst_traits::memory_traits - > type ; + typedef Kokkos::DualView< + typename dst_traits::data_type, typename dst_traits::array_layout, + typename dst_traits::device_type, typename dst_traits::memory_traits> + type; }; } /* namespace Impl */ - -template< class D , class A1 , class A2 , class A3 , class ... Args > -typename Impl::DualViewSubview::type -subview( const DualView & src , Args ... args ) -{ - return typename - Impl::DualViewSubview::type( src , args ... ); +template +typename Impl::DualViewSubview::type subview( + const DualView& src, Args... args) { + return typename Impl::DualViewSubview::type(src, + args...); } } /* namespace Kokkos */ @@ -826,40 +835,35 @@ namespace Kokkos { // Partial specialization of Kokkos::deep_copy() for DualView objects. // -template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > -void -deep_copy (DualView dst, // trust me, this must not be a reference - const DualView& src ) -{ - if ( src.need_sync_device() ) { - deep_copy (dst.h_view, src.h_view); +template +void deep_copy( + DualView dst, // trust me, this must not be a reference + const DualView& src) { + if (src.need_sync_device()) { + deep_copy(dst.h_view, src.h_view); dst.modify_host(); - } - else { - deep_copy (dst.d_view, src.d_view); + } else { + deep_copy(dst.d_view, src.d_view); dst.modify_device(); - } + } } -template< class ExecutionSpace , - class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > -void -deep_copy (const ExecutionSpace& exec , - DualView dst, // trust me, this must not be a reference - const DualView& src ) -{ - if ( src.need_sync_device() ) { - deep_copy (exec, dst.h_view, src.h_view); +template +void deep_copy( + const ExecutionSpace& exec, + DualView dst, // trust me, this must not be a reference + const DualView& src) { + if (src.need_sync_device()) { + deep_copy(exec, dst.h_view, src.h_view); dst.modify_host(); } else { - deep_copy (exec, dst.d_view, src.d_view); + deep_copy(exec, dst.d_view, src.d_view); dst.modify_device(); } } -} // namespace Kokkos +} // namespace Kokkos #endif - diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index d1e6704a57..0ceb9d5d39 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -2,10 +2,11 @@ //@HEADER // ************************************************************************ // -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). // -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without @@ -23,10 +24,10 @@ // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -56,255 +57,240 @@ namespace Kokkos { -template< typename DataType , class ... Properties > -class DynRankView; //forward declare +template +class DynRankView; // forward declare namespace Impl { template struct DynRankDimTraits { - - enum : size_t{unspecified = KOKKOS_INVALID_INDEX}; + enum : size_t { unspecified = KOKKOS_INVALID_INDEX }; // Compute the rank of the view from the nonzero dimension arguments. KOKKOS_INLINE_FUNCTION - static size_t computeRank( const size_t N0 - , const size_t N1 - , const size_t N2 - , const size_t N3 - , const size_t N4 - , const size_t N5 - , const size_t N6 - , const size_t /* N7 */) - { - return - ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified) ? 1 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified) ? 2 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified) ? 3 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified) ? 4 - : ( (N6 == unspecified && N5 == unspecified) ? 5 - : ( (N6 == unspecified) ? 6 - : 7 ) ) ) ) ) ) ); + static size_t computeRank(const size_t N0, const size_t N1, const size_t N2, + const size_t N3, const size_t N4, const size_t N5, + const size_t N6, const size_t /* N7 */) { + return ( + (N6 == unspecified && N5 == unspecified && N4 == unspecified && + N3 == unspecified && N2 == unspecified && N1 == unspecified && + N0 == unspecified) + ? 0 + : ((N6 == unspecified && N5 == unspecified && N4 == unspecified && + N3 == unspecified && N2 == unspecified && N1 == unspecified) + ? 1 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified && N3 == unspecified && + N2 == unspecified) + ? 2 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified && N3 == unspecified) + ? 3 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified) + ? 4 + : ((N6 == unspecified && + N5 == unspecified) + ? 5 + : ((N6 == unspecified) + ? 6 + : 7))))))); } // Compute the rank of the view from the nonzero layout arguments. template - KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Layout& layout ) - { - return computeRank( layout.dimension[0] - , layout.dimension[1] - , layout.dimension[2] - , layout.dimension[3] - , layout.dimension[4] - , layout.dimension[5] - , layout.dimension[6] - , layout.dimension[7] ); + KOKKOS_INLINE_FUNCTION static size_t computeRank(const Layout& layout) { + return computeRank(layout.dimension[0], layout.dimension[1], + layout.dimension[2], layout.dimension[3], + layout.dimension[4], layout.dimension[5], + layout.dimension[6], layout.dimension[7]); } // Extra overload to match that for specialize types v2 - template - KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Kokkos::Impl::ViewCtorProp& /* prop */, const Layout& layout ) - { + template + KOKKOS_INLINE_FUNCTION static size_t computeRank( + const Kokkos::Impl::ViewCtorProp& /* prop */, + const Layout& layout) { return computeRank(layout); } // Create the layout for the rank-7 view. // Non-strided Layout template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value) , Layout >::type createLayout( const Layout& layout ) - { - return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - ); + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value || + std::is_same::value), + Layout>::type + createLayout(const Layout& layout) { + return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, + layout.dimension[1] != unspecified ? layout.dimension[1] : 1, + layout.dimension[2] != unspecified ? layout.dimension[2] : 1, + layout.dimension[3] != unspecified ? layout.dimension[3] : 1, + layout.dimension[4] != unspecified ? layout.dimension[4] : 1, + layout.dimension[5] != unspecified ? layout.dimension[5] : 1, + layout.dimension[6] != unspecified ? layout.dimension[6] : 1, + layout.dimension[7] != unspecified ? layout.dimension[7] : 1); } // LayoutStride template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) - { - return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.stride[0] - , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.stride[1] - , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.stride[2] - , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.stride[3] - , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.stride[4] - , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.stride[5] - , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.stride[6] - , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - , layout.stride[7] - ); + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value), Layout>::type + createLayout(const Layout& layout) { + return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, + layout.stride[0], + layout.dimension[1] != unspecified ? layout.dimension[1] : 1, + layout.stride[1], + layout.dimension[2] != unspecified ? layout.dimension[2] : 1, + layout.stride[2], + layout.dimension[3] != unspecified ? layout.dimension[3] : 1, + layout.stride[3], + layout.dimension[4] != unspecified ? layout.dimension[4] : 1, + layout.stride[4], + layout.dimension[5] != unspecified ? layout.dimension[5] : 1, + layout.stride[5], + layout.dimension[6] != unspecified ? layout.dimension[6] : 1, + layout.stride[6], + layout.dimension[7] != unspecified ? layout.dimension[7] : 1, + layout.stride[7]); } // Extra overload to match that for specialize types - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& /* prop */, const typename Traits::array_layout& layout ) - { - return createLayout( layout ); + template + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value || + std::is_same::value || + std::is_same::value), + typename Traits::array_layout>::type + createLayout(const Kokkos::Impl::ViewCtorProp& /* prop */, + const typename Traits::array_layout& layout) { + return createLayout(layout); } // Create a view from the given dimension arguments. // This is only necessary because the shmem constructor doesn't take a layout. - // NDE shmem View's are not compatible with the added view_alloc value_type / fad_dim deduction functionality + // NDE shmem View's are not compatible with the added view_alloc value_type + // / fad_dim deduction functionality template - static ViewType createView( const ViewArg& arg - , const size_t N0 - , const size_t N1 - , const size_t N2 - , const size_t N3 - , const size_t N4 - , const size_t N5 - , const size_t N6 - , const size_t N7 ) - { - return ViewType( arg - , N0 != unspecified ? N0 : 1 - , N1 != unspecified ? N1 : 1 - , N2 != unspecified ? N2 : 1 - , N3 != unspecified ? N3 : 1 - , N4 != unspecified ? N4 : 1 - , N5 != unspecified ? N5 : 1 - , N6 != unspecified ? N6 : 1 - , N7 != unspecified ? N7 : 1 ); + static ViewType createView(const ViewArg& arg, const size_t N0, + const size_t N1, const size_t N2, const size_t N3, + const size_t N4, const size_t N5, const size_t N6, + const size_t N7) { + return ViewType(arg, N0 != unspecified ? N0 : 1, N1 != unspecified ? N1 : 1, + N2 != unspecified ? N2 : 1, N3 != unspecified ? N3 : 1, + N4 != unspecified ? N4 : 1, N5 != unspecified ? N5 : 1, + N6 != unspecified ? N6 : 1, N7 != unspecified ? N7 : 1); } }; - // Non-strided Layout - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type - reconstructLayout( const Layout& layout , iType dynrank ) - { - return Layout( dynrank > 0 ? layout.dimension[0] :KOKKOS_INVALID_INDEX - , dynrank > 1 ? layout.dimension[1] :KOKKOS_INVALID_INDEX - , dynrank > 2 ? layout.dimension[2] :KOKKOS_INVALID_INDEX - , dynrank > 3 ? layout.dimension[3] :KOKKOS_INVALID_INDEX - , dynrank > 4 ? layout.dimension[4] :KOKKOS_INVALID_INDEX - , dynrank > 5 ? layout.dimension[5] :KOKKOS_INVALID_INDEX - , dynrank > 6 ? layout.dimension[6] :KOKKOS_INVALID_INDEX - , dynrank > 7 ? layout.dimension[7] :KOKKOS_INVALID_INDEX - ); - } - - // LayoutStride - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type - reconstructLayout( const Layout& layout , iType dynrank ) - { - return Layout( dynrank > 0 ? layout.dimension[0] :KOKKOS_INVALID_INDEX - , dynrank > 0 ? layout.stride[0] : (0) - , dynrank > 1 ? layout.dimension[1] :KOKKOS_INVALID_INDEX - , dynrank > 1 ? layout.stride[1] : (0) - , dynrank > 2 ? layout.dimension[2] :KOKKOS_INVALID_INDEX - , dynrank > 2 ? layout.stride[2] : (0) - , dynrank > 3 ? layout.dimension[3] :KOKKOS_INVALID_INDEX - , dynrank > 3 ? layout.stride[3] : (0) - , dynrank > 4 ? layout.dimension[4] :KOKKOS_INVALID_INDEX - , dynrank > 4 ? layout.stride[4] : (0) - , dynrank > 5 ? layout.dimension[5] :KOKKOS_INVALID_INDEX - , dynrank > 5 ? layout.stride[5] : (0) - , dynrank > 6 ? layout.dimension[6] :KOKKOS_INVALID_INDEX - , dynrank > 6 ? layout.stride[6] : (0) - , dynrank > 7 ? layout.dimension[7] :KOKKOS_INVALID_INDEX - , dynrank > 7 ? layout.stride[7] : (0) - ); - } +// Non-strided Layout +template +KOKKOS_INLINE_FUNCTION static + typename std::enable_if<(std::is_same::value || + std::is_same::value) && + std::is_integral::value, + Layout>::type + reconstructLayout(const Layout& layout, iType dynrank) { + return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, + dynrank > 1 ? layout.dimension[1] : KOKKOS_INVALID_INDEX, + dynrank > 2 ? layout.dimension[2] : KOKKOS_INVALID_INDEX, + dynrank > 3 ? layout.dimension[3] : KOKKOS_INVALID_INDEX, + dynrank > 4 ? layout.dimension[4] : KOKKOS_INVALID_INDEX, + dynrank > 5 ? layout.dimension[5] : KOKKOS_INVALID_INDEX, + dynrank > 6 ? layout.dimension[6] : KOKKOS_INVALID_INDEX, + dynrank > 7 ? layout.dimension[7] : KOKKOS_INVALID_INDEX); +} +// LayoutStride +template +KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value) && + std::is_integral::value, + Layout>::type +reconstructLayout(const Layout& layout, iType dynrank) { + return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, + dynrank > 0 ? layout.stride[0] : (0), + dynrank > 1 ? layout.dimension[1] : KOKKOS_INVALID_INDEX, + dynrank > 1 ? layout.stride[1] : (0), + dynrank > 2 ? layout.dimension[2] : KOKKOS_INVALID_INDEX, + dynrank > 2 ? layout.stride[2] : (0), + dynrank > 3 ? layout.dimension[3] : KOKKOS_INVALID_INDEX, + dynrank > 3 ? layout.stride[3] : (0), + dynrank > 4 ? layout.dimension[4] : KOKKOS_INVALID_INDEX, + dynrank > 4 ? layout.stride[4] : (0), + dynrank > 5 ? layout.dimension[5] : KOKKOS_INVALID_INDEX, + dynrank > 5 ? layout.stride[5] : (0), + dynrank > 6 ? layout.dimension[6] : KOKKOS_INVALID_INDEX, + dynrank > 6 ? layout.stride[6] : (0), + dynrank > 7 ? layout.dimension[7] : KOKKOS_INVALID_INDEX, + dynrank > 7 ? layout.stride[7] : (0)); +} /** \brief Debug bounds-checking routines */ // Enhanced debug checking - most infrastructure matches that of functions in // Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0 -template< unsigned , typename iType0 , class MapType > -KOKKOS_INLINE_FUNCTION -bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & ) -{ return true ; } - -template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args > -KOKKOS_INLINE_FUNCTION -bool dyn_rank_view_verify_operator_bounds - ( const iType0 & rank - , const MapType & map - , const iType1 & i - , Args ... args - ) -{ - if ( static_cast(R) < rank ) { - return ( size_t(i) < map.extent(R) ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); - } - else if ( i != 0 ) { - printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R); - return ( false ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); - } - else { - return ( true ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); +template +KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds( + const iType0&, const MapType&) { + return true; +} + +template +KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds( + const iType0& rank, const MapType& map, const iType1& i, Args... args) { + if (static_cast(R) < rank) { + return (size_t(i) < map.extent(R)) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); + } else if (i != 0) { + printf( + "DynRankView Debug Bounds Checking Error: at rank %u\n Extra " + "arguments beyond the rank must be zero \n", + R); + return (false) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); + } else { + return (true) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); } } -template< unsigned , class MapType > -inline -void dyn_rank_view_error_operator_bounds( char * , int , const MapType & ) -{} - -template< unsigned R , class MapType , class iType , class ... Args > -inline -void dyn_rank_view_error_operator_bounds - ( char * buf - , int len - , const MapType & map - , const iType & i - , Args ... args - ) -{ - const int n = - snprintf(buf,len," %ld < %ld %c" - , static_cast(i) - , static_cast( map.extent(R) ) - , ( sizeof...(Args) ? ',' : ')' ) - ); - dyn_rank_view_error_operator_bounds(buf+n,len-n,map,args...); +template +inline void dyn_rank_view_error_operator_bounds(char*, int, const MapType&) {} + +template +inline void dyn_rank_view_error_operator_bounds(char* buf, int len, + const MapType& map, + const iType& i, Args... args) { + const int n = snprintf( + buf, len, " %ld < %ld %c", static_cast(i), + static_cast(map.extent(R)), (sizeof...(Args) ? ',' : ')')); + dyn_rank_view_error_operator_bounds(buf + n, len - n, map, args...); } // op_rank = rank of the operator version that was called -template< typename MemorySpace - , typename iType0 , typename iType1 , class MapType , class ... Args > -KOKKOS_INLINE_FUNCTION -void dyn_rank_view_verify_operator_bounds - ( const iType0 & op_rank , const iType1 & rank - , const Kokkos::Impl::SharedAllocationTracker & tracker - , const MapType & map , Args ... args ) -{ - if ( static_cast(rank) > op_rank ) { - Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); - } - - if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +template +KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds( + const iType0& op_rank, const iType1& rank, + const Kokkos::Impl::SharedAllocationTracker& tracker, const MapType& map, + Args... args) { + if (static_cast(rank) > op_rank) { + Kokkos::abort( + "DynRankView Bounds Checking Error: Need at least rank arguments to " + "the operator()"); + } + + if (!dyn_rank_view_verify_operator_bounds<0>(rank, map, args...)) { +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) enum { LEN = 1024 }; - char buffer[ LEN ]; + char buffer[LEN]; const std::string label = tracker.template get_label(); - int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label.c_str()); - dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); + int n = snprintf(buffer, LEN, "DynRankView bounds error of view %s (", + label.c_str()); + dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else Kokkos::abort("DynRankView bounds error"); @@ -312,86 +298,84 @@ void dyn_rank_view_verify_operator_bounds } } - /** \brief Assign compatible default mappings */ struct ViewToDynRankViewTag {}; -} // namespace Impl +} // namespace Impl namespace Impl { -template< class DstTraits , class SrcTraits > -class ViewMapping< DstTraits , SrcTraits , - typename std::enable_if<( - std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value - && - std::is_same< typename DstTraits::specialize , void >::value - && - std::is_same< typename SrcTraits::specialize , void >::value - && - ( - std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value - || - ( - ( - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value - ) - && - ( - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value - ) - ) - ) - ) , Kokkos::Impl::ViewToDynRankViewTag >::type > -{ -private: - - enum { is_assignable_value_type = - std::is_same< typename DstTraits::value_type - , typename SrcTraits::value_type >::value || - std::is_same< typename DstTraits::value_type - , typename SrcTraits::const_value_type >::value }; - - enum { is_assignable_layout = - std::is_same< typename DstTraits::array_layout - , typename SrcTraits::array_layout >::value || - std::is_same< typename DstTraits::array_layout - , Kokkos::LayoutStride >::value - }; +template +class ViewMapping< + DstTraits, SrcTraits, + typename std::enable_if< + (std::is_same::value && + std::is_same::value && + std::is_same::value && + (std::is_same::value || + ((std::is_same::value || + std::is_same::value || + std::is_same::value) && + (std::is_same::value || + std::is_same::value || + std::is_same::value)))), + Kokkos::Impl::ViewToDynRankViewTag>::type> { + private: + enum { + is_assignable_value_type = + std::is_same::value || + std::is_same::value + }; -public: + enum { + is_assignable_layout = + std::is_same::value || + std::is_same::value + }; - enum { is_assignable = is_assignable_value_type && - is_assignable_layout }; + public: + enum { is_assignable = is_assignable_value_type && is_assignable_layout }; - typedef ViewMapping< DstTraits , typename DstTraits::specialize > DstType ; - typedef ViewMapping< SrcTraits , typename SrcTraits::specialize > SrcType ; + typedef ViewMapping DstType; + typedef ViewMapping SrcType; - template < typename DT , typename ... DP , typename ST , typename ... SP > - KOKKOS_INLINE_FUNCTION - static void assign( Kokkos::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) - { - static_assert( is_assignable_value_type - , "View assignment must have same value type or const = non-const" ); + template + KOKKOS_INLINE_FUNCTION static void assign( + Kokkos::DynRankView& dst, const Kokkos::View& src) { + static_assert( + is_assignable_value_type, + "View assignment must have same value type or const = non-const"); - static_assert( is_assignable_layout - , "View assignment must have compatible layout or have rank <= 1" ); + static_assert( + is_assignable_layout, + "View assignment must have compatible layout or have rank <= 1"); // Removed dimension checks... - typedef typename DstType::offset_type dst_offset_type ; - dst.m_map.m_impl_offset = dst_offset_type(std::integral_constant() , src.layout() ); //Check this for integer input1 for padding, etc - dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_impl_handle , src.m_track ); - dst.m_track.assign( src.m_track , DstTraits::is_managed ); - dst.m_rank = src.Rank ; - } + typedef typename DstType::offset_type dst_offset_type; + dst.m_map.m_impl_offset = dst_offset_type( + std::integral_constant(), + src.layout()); // Check this for integer input1 for padding, etc + dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle::assign( + src.m_map.m_impl_handle, src.m_track); + dst.m_track.assign(src.m_track, DstTraits::is_managed); + dst.m_rank = src.Rank; + } }; -} //end Impl +} // namespace Impl /* \class DynRankView * \brief Container that creates a Kokkos view with rank determined at runtime. @@ -400,7 +384,8 @@ public: * Changes from View * 1. The rank of the DynRankView is returned by the method rank() * 2. Max rank of a DynRankView is 7 - * 3. subview called with 'subview(...)' or 'subdynrankview(...)' (backward compatibility) + * 3. subview called with 'subview(...)' or 'subdynrankview(...)' (backward + * compatibility) * 4. Every subview is returned with LayoutStride * 5. Copy and Copy-Assign View to DynRankView * 6. deep_copy between Views and DynRankViews @@ -408,93 +393,99 @@ public: * */ -template< class > struct is_dyn_rank_view : public std::false_type {}; - -template< class D, class ... P > -struct is_dyn_rank_view< Kokkos::DynRankView > : public std::true_type {}; +template +struct is_dyn_rank_view : public std::false_type {}; +template +struct is_dyn_rank_view > : public std::true_type { +}; -template< typename DataType , class ... Properties > -class DynRankView : public ViewTraits< DataType , Properties ... > -{ - static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); - -private: - template < class , class ... > friend class DynRankView ; - template < class , class ... > friend class Kokkos::Impl::ViewMapping ; +template +class DynRankView : public ViewTraits { + static_assert(!std::is_array::value && + !std::is_pointer::value, + "Cannot template DynRankView with array or pointer datatype - " + "must be pod"); -public: - typedef ViewTraits< DataType , Properties ... > drvtraits ; + private: + template + friend class DynRankView; + template + friend class Kokkos::Impl::ViewMapping; - typedef View< DataType******* , Properties...> view_type ; + public: + typedef ViewTraits drvtraits; - typedef ViewTraits< DataType******* , Properties ... > traits ; + typedef View view_type; + typedef ViewTraits traits; -private: - typedef Kokkos::Impl::ViewMapping< traits , typename traits::specialize > map_type ; - typedef Kokkos::Impl::SharedAllocationTracker track_type ; + private: + typedef Kokkos::Impl::ViewMapping + map_type; + typedef Kokkos::Impl::SharedAllocationTracker track_type; - track_type m_track ; - map_type m_map ; + track_type m_track; + map_type m_map; unsigned m_rank; -public: + public: KOKKOS_INLINE_FUNCTION - view_type & DownCast() const { return ( view_type & ) (*this); } + view_type& DownCast() const { return (view_type&)(*this); } KOKKOS_INLINE_FUNCTION - const view_type & ConstDownCast() const { return (const view_type & ) (*this); } + const view_type& ConstDownCast() const { return (const view_type&)(*this); } - //Types below - at least the HostMirror requires the value_type, NOT the rank 7 data_type of the traits + // Types below - at least the HostMirror requires the value_type, NOT the rank + // 7 data_type of the traits /** \brief Compatible view of array of scalar types */ - typedef DynRankView< typename drvtraits::scalar_array_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - array_type ; + typedef DynRankView< + typename drvtraits::scalar_array_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + array_type; /** \brief Compatible view of const data type */ - typedef DynRankView< typename drvtraits::const_data_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - const_type ; + typedef DynRankView< + typename drvtraits::const_data_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + const_type; /** \brief Compatible view of non-const data type */ - typedef DynRankView< typename drvtraits::non_const_data_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - non_const_type ; + typedef DynRankView< + typename drvtraits::non_const_data_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + non_const_type; /** \brief Compatible HostMirror view */ - typedef DynRankView< typename drvtraits::non_const_data_type , - typename drvtraits::array_layout , - typename drvtraits::host_mirror_space > - HostMirror ; - + typedef DynRankView + HostMirror; //---------------------------------------- // Domain rank and extents -// enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the enum? + // enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the + // enum? - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - extent( const iType & r ) const - { return m_map.extent(r); } + typename std::enable_if::value, size_t>::type + extent(const iType& r) const { + return m_map.extent(r); + } - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , int >::type - extent_int( const iType & r ) const - { return static_cast(m_map.extent(r)); } + typename std::enable_if::value, int>::type + extent_int(const iType& r) const { + return static_cast(m_map.extent(r)); + } - KOKKOS_INLINE_FUNCTION constexpr - typename traits::array_layout layout() const - { return m_map.layout(); } + KOKKOS_INLINE_FUNCTION constexpr typename traits::array_layout layout() + const { + return m_map.layout(); + } //---------------------------------------- /* Deprecate all 'dimension' functions in favor of @@ -502,421 +493,572 @@ public: */ #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - dimension( const iType & r ) const { return extent( r ); } - - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + typename std::enable_if::value, size_t>::type + dimension(const iType& r) const { + return extent(r); + } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { + return m_map.dimension_0(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { + return m_map.dimension_1(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { + return m_map.dimension_2(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { + return m_map.dimension_3(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { + return m_map.dimension_4(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { + return m_map.dimension_5(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { + return m_map.dimension_6(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { + return m_map.dimension_7(); + } #endif //---------------------------------------- - KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.extent(0) * - m_map.extent(1) * - m_map.extent(2) * - m_map.extent(3) * - m_map.extent(4) * - m_map.extent(5) * - m_map.extent(6) * - m_map.extent(7); } - - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } - - template< typename iType > - KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { + return m_map.extent(0) * m_map.extent(1) * m_map.extent(2) * + m_map.extent(3) * m_map.extent(4) * m_map.extent(5) * + m_map.extent(6) * m_map.extent(7); + } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { + return m_map.stride_0(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { + return m_map.stride_1(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { + return m_map.stride_2(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { + return m_map.stride_3(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { + return m_map.stride_4(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { + return m_map.stride_5(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { + return m_map.stride_6(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { + return m_map.stride_7(); + } + + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + m_map.stride(s); + } //---------------------------------------- // Range span is the span which contains all members. - typedef typename map_type::reference_type reference_type ; - typedef typename map_type::pointer_type pointer_type ; + typedef typename map_type::reference_type reference_type; + typedef typename map_type::pointer_type pointer_type; - enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + enum { + reference_type_is_lvalue_reference = + std::is_lvalue_reference::value + }; KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Deprecated, use 'span()' instead - KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { + return m_map.span(); + } #endif - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { + return m_map.span_is_contiguous(); + } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { + return m_map.data(); + } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Deprecated, use 'span_is_contigous()' instead - KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { + return m_map.span_is_contiguous(); + } // Deprecated, use 'data()' instead - KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { + return m_map.data(); + } #endif //---------------------------------------- // Allow specializations to query their specialized map #ifdef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION - const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > & - implementation_map() const { return m_map ; } + const Kokkos::Impl::ViewMapping& + implementation_map() const { + return m_map; + } #endif KOKKOS_INLINE_FUNCTION - const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > & - impl_map() const { return m_map ; } + const Kokkos::Impl::ViewMapping& + impl_map() const { + return m_map; + } //---------------------------------------- -private: - + private: enum { - is_layout_left = std::is_same< typename traits::array_layout - , Kokkos::LayoutLeft >::value , + is_layout_left = + std::is_same::value, - is_layout_right = std::is_same< typename traits::array_layout - , Kokkos::LayoutRight >::value , + is_layout_right = + std::is_same::value, - is_layout_stride = std::is_same< typename traits::array_layout - , Kokkos::LayoutStride >::value , + is_layout_stride = std::is_same::value, - is_default_map = - std::is_same< typename traits::specialize , void >::value && - ( is_layout_left || is_layout_right || is_layout_stride ) + is_default_map = std::is_same::value && + (is_layout_left || is_layout_right || is_layout_stride) }; - template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space - { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + template ::accessible> + struct verify_space { + KOKKOS_FORCEINLINE_FUNCTION static void check() {} + }; - template< class Space > struct verify_space - { KOKKOS_FORCEINLINE_FUNCTION static void check() - { Kokkos::abort("Kokkos::DynRankView ERROR: attempt to access inaccessible memory space"); }; + template + struct verify_space { + KOKKOS_FORCEINLINE_FUNCTION static void check() { + Kokkos::abort( + "Kokkos::DynRankView ERROR: attempt to access inaccessible memory " + "space"); }; + }; // Bounds checking macros -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) +#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) // rank of the calling operator - included as first argument in ARG -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ - DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + DynRankView::template verify_space< \ + Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ + Kokkos::Impl::dyn_rank_view_verify_operator_bounds< \ + typename traits::memory_space> \ + ARG; #else -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ - DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + DynRankView::template verify_space< \ + Kokkos::Impl::ActiveExecutionMemorySpace>::check(); #endif -public: - + public: KOKKOS_INLINE_FUNCTION constexpr unsigned rank() const { return m_rank; } - - //operators () + // operators () // Rank 0 KOKKOS_INLINE_FUNCTION - reference_type operator()() const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return impl_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); - } + reference_type operator()() const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((0, this->rank(), m_track, m_map)) + return impl_map().reference(); + // return m_map.reference(0,0,0,0,0,0,0); + } // Rank 1 - // This assumes a contiguous underlying memory (i.e. no padding, no striding...) - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type - operator[](const iType & i0) const - { - //Phalanx is violating this, since they use the operator to access ALL elements in the allocation - //KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map) ) - return data()[i0]; - } + // This assumes a contiguous underlying memory (i.e. no padding, no + // striding...) + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + std::is_same::value && + std::is_integral::value, + reference_type>::type + operator[](const iType& i0) const { + // Phalanx is violating this, since they use the operator to access ALL + // elements in the allocation KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , + // this->rank(), m_track, m_map) ) + return data()[i0]; + } - // This assumes a contiguous underlying memory (i.e. no padding, no striding... - // AND a Trilinos/Sacado scalar type ) - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type - operator[](const iType & i0) const - { -// auto map = impl_map(); - const size_t dim_scalar = m_map.dimension_scalar(); - const size_t bytes = this->span() / dim_scalar; - - typedef Kokkos::View > tmp_view_type; - tmp_view_type rankone_view(this->data(), bytes, dim_scalar); - return rankone_view(i0); - } + // This assumes a contiguous underlying memory (i.e. no padding, no + // striding... AND a Trilinos/Sacado scalar type ) + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !std::is_same::value && + std::is_integral::value, + reference_type>::type + operator[](const iType& i0) const { + // auto map = impl_map(); + const size_t dim_scalar = m_map.dimension_scalar(); + const size_t bytes = this->span() / dim_scalar; + + typedef Kokkos::View< + DataType*, typename traits::array_layout, typename traits::device_type, + Kokkos::MemoryTraits > + tmp_view_type; + tmp_view_type rankone_view(this->data(), bytes, dim_scalar); + return rankone_view(i0); + } // Rank 1 parenthesis - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0,0,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0, 0, 0, 0, 0, 0, 0); + } // Rank 2 - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1); + } - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1, 0, 0, 0, 0, 0); + } // Rank 3 - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2); + } - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2, 0, 0, 0, 0); + } // Rank 4 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3, 0, 0, 0); + } // Rank 5 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4, 0, 0); + } // Rank 6 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5, 0); + } // Rank 7 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5, + const iType6& i6) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (7, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) + return m_map.reference(i0, i1, i2, i3, i4, i5, i6); + } // Rank 0 KOKKOS_INLINE_FUNCTION - reference_type access() const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return impl_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); - } + reference_type access() const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((0, this->rank(), m_track, m_map)) + return impl_map().reference(); + // return m_map.reference(0,0,0,0,0,0,0); + } // Rank 1 - // Rank 1 parenthesis - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0); - } + // Rank 1 parenthesis + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0,0,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0, 0, 0, 0, 0, 0, 0); + } // Rank 2 - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1); + } - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1, 0, 0, 0, 0, 0); + } // Rank 3 - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2); + } - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2, 0, 0, 0, 0); + } // Rank 4 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3, 0, 0, 0); + } // Rank 5 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4, 0, 0); + } // Rank 6 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5, 0); + } // Rank 7 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5, const iType6& i6) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (7, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) + return m_map.reference(i0, i1, i2, i3, i4, i5, i6); + } #undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY @@ -927,405 +1069,393 @@ public: ~DynRankView() {} KOKKOS_INLINE_FUNCTION - DynRankView() : m_track(), m_map(), m_rank() {} //Default ctor + DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor KOKKOS_INLINE_FUNCTION - DynRankView( const DynRankView & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + DynRankView(const DynRankView& rhs) + : m_track(rhs.m_track), m_map(rhs.m_map), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView( DynRankView && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + DynRankView(DynRankView&& rhs) + : m_track(rhs.m_track), m_map(rhs.m_map), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView& operator=(const DynRankView& rhs) { + m_track = rhs.m_track; + m_map = rhs.m_map; + m_rank = rhs.m_rank; + return *this; + } KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView& operator=(DynRankView&& rhs) { + m_track = rhs.m_track; + m_map = rhs.m_map; + m_rank = rhs.m_rank; + return *this; + } //---------------------------------------- // Compatible view copy constructor and assignment // may assign unmanaged from managed. - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView( const DynRankView & rhs ) - : m_track( rhs.m_track , traits::is_managed ) - , m_map() - , m_rank(rhs.m_rank) - { - typedef typename DynRankView ::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); - Mapping::assign( m_map , rhs.m_map , rhs.m_track ); - } + template + KOKKOS_INLINE_FUNCTION DynRankView(const DynRankView& rhs) + : m_track(rhs.m_track, traits::is_managed), m_map(), m_rank(rhs.m_rank) { + typedef typename DynRankView::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible DynRankView copy construction"); + Mapping::assign(m_map, rhs.m_map, rhs.m_track); + } - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView & operator = (const DynRankView & rhs ) - { - typedef typename DynRankView ::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); - Mapping::assign( m_map , rhs.m_map , rhs.m_track ); - m_track.assign( rhs.m_track , traits::is_managed ); - m_rank = rhs.rank(); - return *this; - } + template + KOKKOS_INLINE_FUNCTION DynRankView& operator=( + const DynRankView& rhs) { + typedef typename DynRankView::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible DynRankView copy construction"); + Mapping::assign(m_map, rhs.m_map, rhs.m_track); + m_track.assign(rhs.m_track, traits::is_managed); + m_rank = rhs.rank(); + return *this; + } -// Copy/Assign View to DynRankView - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView( const View & rhs ) - : m_track() - , m_map() - , m_rank( rhs.Rank ) - { - typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy construction" ); - Mapping::assign( *this , rhs ); - } + // Copy/Assign View to DynRankView + template + KOKKOS_INLINE_FUNCTION DynRankView(const View& rhs) + : m_track(), m_map(), m_rank(rhs.Rank) { + typedef typename View::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible View to DynRankView copy construction"); + Mapping::assign(*this, rhs); + } - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( const View & rhs ) - { - typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); - Mapping::assign( *this , rhs ); - return *this ; - } + template + KOKKOS_INLINE_FUNCTION DynRankView& operator=(const View& rhs) { + typedef typename View::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible View to DynRankView copy assignment"); + Mapping::assign(*this, rhs); + return *this; + } //---------------------------------------- // Allocation tracking properties KOKKOS_INLINE_FUNCTION - int use_count() const - { return m_track.use_count(); } + int use_count() const { return m_track.use_count(); } - inline - const std::string label() const - { return m_track.template get_label< typename traits::memory_space >(); } + inline const std::string label() const { + return m_track.template get_label(); + } //---------------------------------------- // Allocation according to allocation properties and array layout - // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that rank deduction can properly take place - template< class ... P > - explicit inline - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , typename traits::array_layout - >::type const & arg_layout - ) - : m_track() - , m_map() - , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) ) - { - // Append layout and spaces if not input - typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ; - - // use 'std::integral_constant' for non-types - // to avoid duplicate class error. - typedef Kokkos::Impl::ViewCtorProp - < P ... - , typename std::conditional - < alloc_prop_input::has_label - , std::integral_constant - , typename std::string - >::type - , typename std::conditional - < alloc_prop_input::has_memory_space - , std::integral_constant - , typename traits::device_type::memory_space - >::type - , typename std::conditional - < alloc_prop_input::has_execution_space - , std::integral_constant - , typename traits::device_type::execution_space - >::type - > alloc_prop ; - - static_assert( traits::is_managed - , "View allocation constructor requires managed memory" ); - - if ( alloc_prop::initialize && + // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that + // rank deduction can properly take place + template + explicit inline DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + typename traits::array_layout>::type const& + arg_layout) + : m_track(), + m_map(), + m_rank(Impl::DynRankDimTraits:: + template computeRank( + arg_prop, arg_layout)) { + // Append layout and spaces if not input + typedef Kokkos::Impl::ViewCtorProp alloc_prop_input; + + // use 'std::integral_constant' for non-types + // to avoid duplicate class error. + typedef Kokkos::Impl::ViewCtorProp< + P..., + typename std::conditional, + typename std::string>::type, + typename std::conditional< + alloc_prop_input::has_memory_space, + std::integral_constant, + typename traits::device_type::memory_space>::type, + typename std::conditional< + alloc_prop_input::has_execution_space, + std::integral_constant, + typename traits::device_type::execution_space>::type> + alloc_prop; + + static_assert(traits::is_managed, + "View allocation constructor requires managed memory"); + + if (alloc_prop::initialize && #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - ! alloc_prop::execution_space::is_initialized() + !alloc_prop::execution_space::is_initialized() #else - ! alloc_prop::execution_space::impl_is_initialized() + !alloc_prop::execution_space::impl_is_initialized() #endif - ) { - // If initializing view data then - // the execution space must be initialized. - Kokkos::Impl::throw_runtime_exception("Constructing DynRankView and initializing data with uninitialized execution space"); - } + ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception( + "Constructing DynRankView and initializing data with uninitialized " + "execution space"); + } - // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop_copy( arg_prop ); + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop_copy(arg_prop); //------------------------------------------------------------ -#if defined( KOKKOS_ENABLE_CUDA ) - // If allocating in CudaUVMSpace must fence before and after - // the allocation to protect against possible concurrent access - // on the CPU and the GPU. - // Fence using the trait's executon space (which will be Kokkos::Cuda) - // to avoid incomplete type errors from usng Kokkos::Cuda directly. - if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - typename traits::device_type::memory_space::execution_space().fence(); - } +#if defined(KOKKOS_ENABLE_CUDA) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if (std::is_same::value) { + typename traits::device_type::memory_space::execution_space().fence(); + } #endif -//------------------------------------------------------------ + //------------------------------------------------------------ - Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop_copy, Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); + Kokkos::Impl::SharedAllocationRecord<>* record = m_map.allocate_shared( + prop_copy, + Impl::DynRankDimTraits:: + template createLayout(arg_prop, arg_layout)); //------------------------------------------------------------ -#if defined( KOKKOS_ENABLE_CUDA ) - if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - typename traits::device_type::memory_space::execution_space().fence(); - } -#endif -//------------------------------------------------------------ - - // Setup and initialization complete, start tracking - m_track.assign_allocated_record_to_uninitialized( record ); +#if defined(KOKKOS_ENABLE_CUDA) + if (std::is_same::value) { + typename traits::device_type::memory_space::execution_space().fence(); } +#endif + //------------------------------------------------------------ + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized(record); + } // Wrappers - template< class ... P > - explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , typename traits::array_layout - >::type const & arg_layout - ) - : m_track() // No memory tracking - , m_map( arg_prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ) - , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) ) - { - static_assert( - std::is_same< pointer_type - , typename Impl::ViewCtorProp< P... >::pointer_type - >::value , - "Constructing DynRankView to wrap user memory must supply matching pointer type" ); - } + template + explicit KOKKOS_INLINE_FUNCTION DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + typename traits::array_layout>::type const& + arg_layout) + : m_track() // No memory tracking + , + m_map(arg_prop, + Impl::DynRankDimTraits:: + template createLayout(arg_prop, arg_layout)), + m_rank(Impl::DynRankDimTraits:: + template computeRank( + arg_prop, arg_layout)) { + static_assert( + std::is_same::pointer_type>::value, + "Constructing DynRankView to wrap user memory must supply matching " + "pointer type"); + } //---------------------------------------- - //Constructor(s) + // Constructor(s) // Simple dimension-only layout - template< class ... P > - explicit inline - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , size_t - >::type const arg_N0 =KOKKOS_INVALID_INDEX - , const size_t arg_N1 =KOKKOS_INVALID_INDEX - , const size_t arg_N2 =KOKKOS_INVALID_INDEX - , const size_t arg_N3 =KOKKOS_INVALID_INDEX - , const size_t arg_N4 =KOKKOS_INVALID_INDEX - , const size_t arg_N5 =KOKKOS_INVALID_INDEX - , const size_t arg_N6 =KOKKOS_INVALID_INDEX - , const size_t arg_N7 =KOKKOS_INVALID_INDEX - ) - : DynRankView( arg_prop - , typename traits::array_layout - ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) - ) - {} - - template< class ... P > - explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , size_t - >::type const arg_N0 =KOKKOS_INVALID_INDEX - , const size_t arg_N1 =KOKKOS_INVALID_INDEX - , const size_t arg_N2 =KOKKOS_INVALID_INDEX - , const size_t arg_N3 =KOKKOS_INVALID_INDEX - , const size_t arg_N4 =KOKKOS_INVALID_INDEX - , const size_t arg_N5 =KOKKOS_INVALID_INDEX - , const size_t arg_N6 =KOKKOS_INVALID_INDEX - , const size_t arg_N7 =KOKKOS_INVALID_INDEX - ) - : DynRankView( arg_prop - , typename traits::array_layout - ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) - ) - {} + template + explicit inline DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) + : DynRankView(arg_prop, typename traits::array_layout( + arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, + arg_N5, arg_N6, arg_N7)) {} + + template + explicit KOKKOS_INLINE_FUNCTION DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) + : DynRankView(arg_prop, typename traits::array_layout( + arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, + arg_N5, arg_N6, arg_N7)) {} // Allocate with label and layout - template< typename Label > - explicit inline - DynRankView( const Label & arg_label - , typename std::enable_if< - Kokkos::Impl::is_view_label