memory allocation bugfix for USER-INTEL pppm from M Brown (e634c5a2) · Commits · 郑智淋 / lammps

doc/src/accelerate_intel.txt

+6 −0

Original line number	Diff line number	Diff line
		@@ -106,6 +106,8 @@ $t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
		For some of the simple 2-body potentials without long-range
		electrostatics, performance and scalability can be better with
		the "newton off" setting added to the input script :l
		For simulations on higher node counts, add "processors * * * grid
		numa" to the beginning of the input script for better scalability :l
		If using {kspace_style pppm} in the input script, add
		"kspace_modify diff ad" for better performance :l
		:ule
		@@ -392,6 +394,10 @@ hybrid intel omp"_suffix.html command can also be used within the
		input script to automatically append the "omp" suffix to styles when
		USER-INTEL styles are not available.

		NOTE: For simulations on higher node counts, add "processors * * *
		grid numa"_processors.html" to the beginning of the input script for
		better scalability.

		When running on many nodes, performance might be better when using
		fewer OpenMP threads and more MPI tasks. This will depend on the
		simulation and the machine. Using the "verlet/split"_run_style.html

doc/src/read_data.txt

+1 −1

Original line number	Diff line number	Diff line
		@@ -14,7 +14,7 @@ read_data file keyword args ... :pre

		file = name of data file to read in :ulb,l
		zero or more keyword/arg pairs may be appended :l
		keyword = {add} or {offset} or {shift} or {extra/atom/types} or {extra/bond/types} or {extra/angle/types} or {extra/dihedral/types} or {extra/improper/types} or {group} or {nocoeff} or {fix} :l
		keyword = {add} or {offset} or {shift} or {extra/atom/types} or {extra/bond/types} or {extra/angle/types} or {extra/dihedral/types} or {extra/improper/types} or {extra/bond/per/atom} or {extra/angle/per/atom} or {extra/dihedral/per/atom} or {extra/improper/per/atom} or {group} or {nocoeff} or {fix} :l
		{add} arg = {append} or {Nstart} or {merge}
		append = add new atoms with IDs appended to current IDs
		Nstart = add new atoms with IDs starting with Nstart

src/USER-INTEL/fix_intel.cpp

+8 −4

Original line number	Diff line number	Diff line
		@@ -748,7 +748,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
		if (eatom) {
		double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
		#if defined(LMP_SIMD_COMPILER)
		#pragma novector
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		f[i].x += f_in[ii].x;
		@@ -762,7 +763,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
		}
		} else {
		#if defined(LMP_SIMD_COMPILER)
		#pragma novector
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		f[i].x += f_in[ii].x;
		@@ -778,7 +780,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
		if (eatom) {
		double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
		#if defined(LMP_SIMD_COMPILER)
		#pragma novector
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		f[i].x += f_in[i].x;
		@@ -788,7 +791,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
		}
		} else {
		#if defined(LMP_SIMD_COMPILER)
		#pragma novector
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		f[i].x += f_in[i].x;

src/USER-INTEL/intel_buffers.h

+24 −2

Original line number	Diff line number	Diff line
		@@ -172,6 +172,10 @@ class IntelBuffers {

		inline void thr_pack(const int ifrom, const int ito, const int ago) {
		if (ago == 0) {
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		_x[i].x = lmp->atom->x[i][0];
		_x[i].y = lmp->atom->x[i][1];
		@@ -179,9 +183,17 @@ class IntelBuffers {
		_x[i].w = lmp->atom->type[i];
		}
		if (lmp->atom->q != NULL)
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++)
		_q[i] = lmp->atom->q[i];
		} else {
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		_x[i].x = lmp->atom->x[i][0];
		_x[i].y = lmp->atom->x[i][1];
		@@ -204,7 +216,10 @@ class IntelBuffers {
		const int offset, const bool dotype = false) {
		double ** x = lmp->atom->x + offset;
		if (dotype == false) {
		#pragma vector nontemporal
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		_x[i].x = x[i][0];
		_x[i].y = x[i][1];
		@@ -212,7 +227,10 @@ class IntelBuffers {
		}
		} else {
		int *type = lmp->atom->type + offset;
		#pragma vector nontemporal
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		_x[i].x = x[i][0];
		_x[i].y = x[i][1];
		@@ -225,6 +243,10 @@ class IntelBuffers {
		inline void thr_pack_host(const int ifrom, const int ito,
		const int offset) {
		double ** x = lmp->atom->x + offset;
		#if defined(LMP_SIMD_COMPILER)
		#pragma vector aligned
		#pragma ivdep
		#endif
		for (int i = ifrom; i < ito; i++) {
		_host_x[i].x = x[i][0];
		_host_x[i].y = x[i][1];

src/USER-INTEL/intel_preprocess.h

+16 −1

Original line number	Diff line number	Diff line
		@@ -68,7 +68,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
		#define INTEL_MAX_STENCIL 256
		// INTEL_MAX_STENCIL * sqrt(INTEL_MAX_STENCIL)
		#define INTEL_MAX_STENCIL_CHECK 4096
		#define INTEL_P3M_MAXORDER 7
		#define INTEL_P3M_MAXORDER 8
		#define INTEL_P3M_ALIGNED_MAXORDER 8
		// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
		#define INTEL_P3M_TABLE 1
		@@ -248,6 +248,12 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,

		#else

		#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \
		{ \
		ifrom = 0; \
		ito = inum; \
		}

		#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \
		{ \
		tid = 0; \
		@@ -293,6 +299,15 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
		ito = inum; \
		}

		#define IP_PRE_omp_range_id_vec(ifrom, ip, ito, tid, inum, \
		nthreads, vecsize) \
		{ \
		tid = 0; \
		ifrom = 0; \
		ito = inum; \
		ip = vecsize; \
		}

		#endif

		#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \

Admin message