Merge pull request #1156 from wmbrownIntel/user-intel-hybrid (382e91cf) · Commits · 郑智淋 / lammps

doc/src/Speed_intel.txt

+1 −1

Original line number	Diff line number	Diff line
		@@ -499,7 +499,7 @@ MPI task.
		When offloading to a coprocessor, "hybrid"_pair_hybrid.html styles
		that require skip lists for neighbor builds cannot be offloaded.
		Using "hybrid/overlay"_pair_hybrid.html is allowed. Only one intel
		accelerated style may be used with hybrid styles.
		accelerated style may be used with hybrid styles when offloading.
		"Special_bonds"_special_bonds.html exclusion lists are not currently
		supported with offload, however, the same effect can often be
		accomplished by setting cutoffs for excluded atom types to 0. None of

src/USER-INTEL/fix_intel.cpp

+85 −22

Original line number	Diff line number	Diff line
		@@ -65,6 +65,7 @@ FixIntel::FixIntel(LAMMPS lmp, int narg, char *arg) : Fix(lmp, narg, arg)

		_nbor_pack_width = 1;
		_three_body_neighbor = 0;
		_hybrid_nonpair = 0;

		_precision_mode = PREC_MODE_MIXED;
		_offload_balance = -1.0;
		@@ -266,8 +267,7 @@ FixIntel::~FixIntel()
		double *time1 = off_watch_pair();
		double *time2 = off_watch_neighbor();
		int *overflow = get_off_overflow_flag();
		if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL &&
		overflow != NULL) {
		if (_offload_balance != 0.0) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
		}
		@@ -314,34 +314,63 @@ void FixIntel::init()

		int nstyles = 0;
		if (force->pair_match("hybrid", 1) != NULL) {
		_pair_hybrid_flag = 1;
		PairHybrid hybrid = (PairHybrid ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		nstyles++;
		if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
		error->all(FLERR,
		"Intel package requires fdotr virial with newton on.");
		} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
		_pair_hybrid_flag = 1;
		PairHybridOverlay hybrid = (PairHybridOverlay ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		nstyles++;
		else
		force->pair->no_virial_fdotr_compute = 1;
		}
		if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
		error->all(FLERR,
		"Intel package requires fdotr virial with newton on.");
		} else
		_pair_hybrid_flag = 0;

		if (nstyles > 1 && _pair_hybrid_flag) _pair_hybrid_flag = 2;
		else if (force->newton_pair == 0) _pair_hybrid_flag = 0;

		_pair_hybrid_zero = 0;
		_zero_master = 0;

		if (_pair_hybrid_flag && _hybrid_nonpair)
		if (_pair_hybrid_flag > 1 \|\| force->newton_pair == 0)
		_pair_hybrid_zero = 1;
		_hybrid_nonpair = 0;

		#ifdef _LMP_INTEL_OFFLOAD
		if (offload_balance() != 0.0) {
		_pair_hybrid_zero = 0;
		if (force->newton_pair == 0) _pair_hybrid_flag = 0;
		if (nstyles > 1)
		error->all(FLERR,
		"Currently, cannot use more than one intel style with hybrid.");
		"Currently, cannot offload more than one intel style with hybrid.");
		}
		#endif

		check_neighbor_intel();

		int off_mode = 0;
		if (_offload_balance != 0.0) off_mode = 1;
		if (_precision_mode == PREC_MODE_SINGLE) {
		_single_buffers->zero_ev();
		_single_buffers->grow_ncache(off_mode,_nthreads);
		_single_buffers->free_list_ptrs();
		} else if (_precision_mode == PREC_MODE_MIXED) {
		_mixed_buffers->zero_ev();
		_mixed_buffers->grow_ncache(off_mode,_nthreads);
		_mixed_buffers->free_list_ptrs();
		} else {
		_double_buffers->zero_ev();
		_double_buffers->grow_ncache(off_mode,_nthreads);
		_double_buffers->free_list_ptrs();
		}

		_need_reduce = 0;
		@@ -349,7 +378,7 @@ void FixIntel::init()

		/* ---------------------------------------------------------------------- */

		void FixIntel::setup(int /vflag/)
		void FixIntel::setup(int vflag)
		{
		if (neighbor->style != Neighbor::BIN)
		error->all(FLERR,
		@@ -395,8 +424,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
		double *time1 = off_watch_pair();
		double *time2 = off_watch_neighbor();
		int *overflow = get_off_overflow_flag();
		if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL &&
		overflow != NULL) {
		if (_offload_balance !=0.0) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
		in(overflow:length(5) alloc_if(1) free_if(0))
		@@ -419,6 +447,21 @@ void FixIntel::pair_init_check(const bool cdmessage)
		#endif
		}

		#ifndef LMP_INTEL_NBOR_COMPAT
		if (force->pair->manybody_flag && atom->molecular) {
		int flag = 0;
		if (atom->nbonds > 0 && force->special_lj[1] == 0.0 &&
		force->special_coul[1] == 0.0) flag = 1;
		if (atom->nangles > 0 && force->special_lj[2] == 0.0 &&
		force->special_coul[2] == 0.0) flag = 1;
		if (atom->ndihedrals > 0 && force->special_lj[3] == 0.0 &&
		force->special_coul[3] == 0.0) flag = 1;
		if (flag)
		error->all(FLERR,"Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond"
		"exclusions with Intel");
		}
		#endif

		int need_tag = 0;
		if (atom->molecular) need_tag = 1;

		@@ -477,11 +520,13 @@ void FixIntel::bond_init_check()
		if (force->pair_match("/intel", 0) != NULL)
		intel_pair = 1;
		else if (force->pair_match("hybrid", 1) != NULL) {
		_hybrid_nonpair = 1;
		PairHybrid hybrid = (PairHybrid ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		intel_pair = 1;
		} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
		_hybrid_nonpair = 1;
		PairHybridOverlay hybrid = (PairHybridOverlay ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		@@ -501,11 +546,13 @@ void FixIntel::kspace_init_check()
		if (force->pair_match("/intel", 0) != NULL)
		intel_pair = 1;
		else if (force->pair_match("hybrid", 1) != NULL) {
		_hybrid_nonpair = 1;
		PairHybrid hybrid = (PairHybrid ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		intel_pair = 1;
		} else if (force->pair_match("hybrid/overlay", 1) != NULL) {
		_hybrid_nonpair = 1;
		PairHybridOverlay hybrid = (PairHybridOverlay ) force->pair;
		for (int i = 0; i < hybrid->nstyles; i++)
		if (strstr(hybrid->keywords[i], "/intel") != NULL)
		@@ -522,51 +569,60 @@ void FixIntel::check_neighbor_intel()
		{
		#ifdef _LMP_INTEL_OFFLOAD
		_full_host_list = 0;
		#endif
		const int nrequest = neighbor->nrequest;

		const int nrequest = neighbor->nrequest;
		for (int i = 0; i < nrequest; ++i) {
		#ifdef _LMP_INTEL_OFFLOAD
		if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) {
		_full_host_list = 1;
		_offload_noghost = 0;
		}
		#endif
		if (neighbor->requests[i]->skip && _offload_balance != 0.0)
		error->all(FLERR, "Cannot yet use hybrid styles with Intel offload.");

		// avoid flagging a neighbor list as both USER-INTEL and USER-OMP
		if (neighbor->requests[i]->intel)
		neighbor->requests[i]->omp = 0;

		if (neighbor->requests[i]->skip)
		error->all(FLERR, "Hybrid styles with Intel package are unsupported.");
		}
		#else
		// avoid flagging a neighbor list as both USER-INTEL and USER-OMP
		const int nrequest = neighbor->nrequest;
		for (int i = 0; i < nrequest; ++i)
		if (neighbor->requests[i]->intel)
		neighbor->requests[i]->omp = 0;
		#endif
		}

		/* ---------------------------------------------------------------------- */

		void FixIntel::pre_reverse(int /eflag/, int /vflag/)
		void FixIntel::_sync_main_arrays(const int prereverse)
		{
		if (!prereverse) _zero_master = 1;
		int done_this_step = prereverse;
		if (_pair_hybrid_zero == 0) done_this_step = 1;
		if (_force_array_m != 0) {
		if (_need_reduce) {
		reduce_results(&_force_array_m[0].x);
		_need_reduce = 0;
		}
		add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom,0);
		_force_array_m = 0;
		if (done_this_step) _force_array_m = 0;
		else _ev_array_d = 0;
		} else if (_force_array_d != 0) {
		if (_need_reduce) {
		reduce_results(&_force_array_d[0].x);
		_need_reduce = 0;
		}
		add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom,0);
		_force_array_d = 0;
		if (done_this_step) _force_array_d = 0;
		else _ev_array_d = 0;
		} else if (_force_array_s != 0) {
		if (_need_reduce) {
		reduce_results(&_force_array_s[0].x);
		_need_reduce = 0;
		}
		add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom,0);
		_force_array_s = 0;
		if (done_this_step) _force_array_s = 0;
		else _ev_array_s = 0;
		}

		#ifdef _LMP_INTEL_OFFLOAD
		@@ -576,6 +632,13 @@ void FixIntel::pre_reverse(int /eflag/, int /vflag/)

		/* ---------------------------------------------------------------------- */

		void FixIntel::pre_reverse(int /eflag/, int /vflag/)
		{
		_sync_main_arrays(1);
		}

		/* ---------------------------------------------------------------------- */

		template <class acc_t>
		void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
		{
		@@ -657,7 +720,7 @@ template <class ft, class acc_t>
		void FixIntel::add_results(const ft * _noalias const f_in,
		const acc_t * _noalias const ev_global,
		const int eatom, const int vatom,
		const int /offload/) {
		const int offload) {
		start_watch(TIME_PACK);
		int f_length;
		#ifdef _LMP_INTEL_OFFLOAD

src/USER-INTEL/fix_intel.h

+49 −14

Original line number	Diff line number	Diff line
		@@ -74,11 +74,12 @@ class FixIntel : public Fix {
		inline int nbor_pack_width() const { return _nbor_pack_width; }
		inline void nbor_pack_width(const int w) { _nbor_pack_width = w; }
		inline int three_body_neighbor() { return _three_body_neighbor; }
		inline void three_body_neighbor(const int /i/) { _three_body_neighbor = 1; }
		inline void three_body_neighbor(const int i) { _three_body_neighbor = i; }

		inline int need_zero(const int tid) {
		if (_need_reduce == 0 && tid > 0) return 1;
		return 0;
		else if (_zero_master && tid == 0) { _zero_master = 0; return 1; }
		else return 0;
		}
		inline void set_reduce_flag() { if (_nthreads > 1) _need_reduce = 1; }
		inline int lrt() {
		@@ -100,6 +101,9 @@ class FixIntel : public Fix {
		IntelBuffers<double,double> *_double_buffers;

		int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor;
		int _pair_hybrid_flag;
		// These should be removed in subsequent update w/ simpler hybrid arch
		int _pair_hybrid_zero, _hybrid_nonpair, _zero_master;

		public:
		inline int* get_overflow_flag() { return _overflow_flag; }
		@@ -210,6 +214,8 @@ class FixIntel : public Fix {
		_alignvar(double _stopwatch_offload_neighbor[1],64);
		_alignvar(double _stopwatch_offload_pair[1],64);

		void _sync_main_arrays(const int prereverse);

		template <class ft>
		void reduce_results(ft * _noalias const f_in);

		@@ -238,7 +244,7 @@ class FixIntel : public Fix {

		/* ---------------------------------------------------------------------- */

		void FixIntel::get_buffern(const int /offload/, int &nlocal, int &nall,
		void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
		int &minlocal) {
		#ifdef _LMP_INTEL_OFFLOAD
		if (_separate_buffers) {
		@@ -273,7 +279,7 @@ void FixIntel::get_buffern(const int /offload/, int &nlocal, int &nall,
		/* ---------------------------------------------------------------------- */

		void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
		double ev_in, const int /offload*/,
		double *ev_in, const int offload,
		const int eatom, const int vatom,
		const int rflag) {
		#ifdef _LMP_INTEL_OFFLOAD
		@@ -282,6 +288,8 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
		_off_results_vatom = vatom;
		_off_force_array_d = f_in;
		_off_ev_array_d = ev_in;
		if (_pair_hybrid_flag && force->pair->fdotr_is_set())
		_sync_main_arrays(1);
		return;
		}
		#endif
		@@ -296,12 +304,15 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,

		if (_overflow_flag[LMP_OVERFLOW])
		error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

		if (_pair_hybrid_flag > 1 \|\|
		(_pair_hybrid_flag && force->pair->fdotr_is_set())) _sync_main_arrays(0);
		}

		/* ---------------------------------------------------------------------- */

		void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
		double ev_in, const int /offload*/,
		double *ev_in, const int offload,
		const int eatom, const int vatom,
		const int rflag) {
		#ifdef _LMP_INTEL_OFFLOAD
		@@ -310,6 +321,8 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
		_off_results_vatom = vatom;
		_off_force_array_m = f_in;
		_off_ev_array_d = ev_in;
		if (_pair_hybrid_flag && force->pair->fdotr_is_set())
		_sync_main_arrays(1);
		return;
		}
		#endif
		@@ -324,12 +337,16 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,

		if (_overflow_flag[LMP_OVERFLOW])
		error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

		if (_pair_hybrid_flag > 1 \|\|
		(_pair_hybrid_flag && force->pair->fdotr_is_set()))
		_sync_main_arrays(0);
		}

		/* ---------------------------------------------------------------------- */

		void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
		float ev_in, const int /offload*/,
		float *ev_in, const int offload,
		const int eatom, const int vatom,
		const int rflag) {
		#ifdef _LMP_INTEL_OFFLOAD
		@@ -338,6 +355,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
		_off_results_vatom = vatom;
		_off_force_array_s = f_in;
		_off_ev_array_s = ev_in;
		if (_pair_hybrid_flag && force->pair->fdotr_is_set())
		_sync_main_arrays(1);
		return;
		}
		#endif
		@@ -352,6 +371,10 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,

		if (_overflow_flag[LMP_OVERFLOW])
		error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

		if (_pair_hybrid_flag > 1 \|\|
		(_pair_hybrid_flag && force->pair->fdotr_is_set()))
		_sync_main_arrays(0);
		}

		/* ---------------------------------------------------------------------- */
		@@ -487,16 +510,16 @@ The compiler version used to build LAMMPS is not supported when using
		offload to a coprocessor. There could be performance or correctness
		issues. Please use 14.0.1.106 or 15.1.133 or later.

		E: Currently, cannot use more than one intel style with hybrid.
		E: Currently, cannot offload more than one intel style with hybrid.

		Currently, hybrid pair styles can only use the intel suffix for one of the
		pair styles.
		Currently, when using offload, hybrid pair styles can only use the intel
		suffix for one of the pair styles.

		E: Cannot yet use hybrid styles with Intel package.
		E: Cannot yet use hybrid styles with Intel offload.

		The hybrid pair style configuration is not yet supported by the Intel
		package. Support is limited to hybrid/overlay or a hybrid style that does
		not require a skip list.
		The hybrid pair style configuration is not yet supported when using offload
		within the Intel package. Support is limited to hybrid/overlay or a hybrid
		style that does not require a skip list.

		W: Leaving a core/node free can improve performance for offload

		@@ -538,4 +561,16 @@ E: Too few atoms for load balancing offload.
		When using offload to a coprocessor, each MPI task must have at least 2
		atoms throughout the simulation.

		E: Intel package requires fdotr virial with newton on.

		This error can occur with a hybrid pair style that mixes styles that are
		incompatible with the newton pair setting turned on. Try turning the
		newton pair setting off.

		E: Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond exclusions with Intel

		When using a manybody pair style, bonds/angles/dihedrals, and special_bond
		exclusions, LAMMPS should be built with the above compile flag for compatible
		results.

		*/

src/USER-INTEL/intel_buffers.cpp

+102 −28

Original line number	Diff line number	Diff line
		@@ -24,7 +24,9 @@ using namespace LAMMPS_NS;
		template <class flt_t, class acc_t>
		IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
		lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
		_buf_size(0), _buf_local_size(0) {
		_buf_size(0), _buf_local_size(0), _n_list_ptrs(1), _max_list_ptrs(4) {
		_neigh_list_ptrs = new IntelNeighListPtrs[_max_list_ptrs];
		_neigh_list_ptrs[0].cnumneigh = 0;
		_list_alloc_atoms = 0;
		_ntypes = 0;
		_off_map_listlocal = 0;
		@@ -55,6 +57,7 @@ IntelBuffers<flt_t, acc_t>::~IntelBuffers()
		free_all_nbor_buffers();
		free_ccache();
		set_ntypes(0);
		delete []_neigh_list_ptrs;
		}

		/* ---------------------------------------------------------------------- */
		@@ -109,7 +112,7 @@ void IntelBuffers<flt_t, acc_t>::free_buffers()
		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
		const int nthreads,
		const int /offload_end/)
		const int offload_end)
		{
		free_buffers();
		_buf_size = static_cast<double>(nall) * 1.1 + 1;
		@@ -186,11 +189,9 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
		const int * tag = _off_map_tag;
		const int * special = _off_map_special;
		const int * nspecial = _off_map_nspecial;
		if (tag != 0 && special != 0 && nspecial !=0) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(tag:alloc_if(0) free_if(1)) \
		nocopy(special,nspecial:alloc_if(0) free_if(1))
		}
		_off_map_nmax = 0;
		_host_nmax = 0;
		}
		@@ -200,7 +201,7 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int /offload_end/)
		void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
		{
		#ifdef _LMP_INTEL_OFFLOAD
		free_nmax();
		@@ -243,46 +244,117 @@ template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::free_list_local()
		{
		if (_off_map_listlocal > 0) {
		int * cnumneigh = _cnumneigh;
		if (_neigh_list_ptrs[0].cnumneigh) {
		int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
		_neigh_list_ptrs[0].cnumneigh = 0;
		#ifdef _LMP_INTEL_OFFLOAD
		if (_off_map_ilist != NULL) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(cnumneigh:alloc_if(0) free_if(1))
		}
		#endif
		lmp->memory->destroy(cnumneigh);
		}

		#ifdef _LMP_INTEL_OFFLOAD
		if (_off_map_ilist != NULL) {
		const int * ilist = _off_map_ilist;
		const int * numneigh = _off_map_numneigh;
		const int firstneigh = (const int )_off_map_firstneigh;
		_off_map_ilist = NULL;
		if (numneigh != 0 && ilist != 0) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
		}
		nocopy(ilist,firstneigh,numneigh:alloc_if(0) free_if(1))
		}
		#endif
		lmp->memory->destroy(cnumneigh);
		_off_map_listlocal = 0;
		}
		}

		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::free_list_ptrs()
		{
		for (int list_num = 1; list_num < _n_list_ptrs; list_num++) {
		if (_neigh_list_ptrs[list_num].size) {
		lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
		lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
		}
		_neigh_list_ptrs[list_num].size = 0;
		_neigh_list_ptrs[list_num].list_ptr = 0;
		}
		_n_list_ptrs = 1;
		}

		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::grow_data3(NeighList *list,
		int *&numneighhalf,
		int *&cnumneigh)
		{
		const int size = list->get_maxlocal();
		int list_num;
		for (list_num = 0; list_num < _n_list_ptrs; list_num++)
		if (_neigh_list_ptrs[list_num].list_ptr == (void*)list) break;
		if (list_num == _n_list_ptrs) {
		if (_n_list_ptrs == _max_list_ptrs) {
		_max_list_ptrs *= 2;
		IntelNeighListPtrs *new_list = new IntelNeighListPtrs[_max_list_ptrs];
		for (int i = 0; i < _n_list_ptrs; i++) new_list[i] = _neigh_list_ptrs[i];
		delete []_neigh_list_ptrs;
		_neigh_list_ptrs = new_list;
		}
		_neigh_list_ptrs[list_num].list_ptr = (void *)list;
		_neigh_list_ptrs[list_num].size = 0;
		_n_list_ptrs++;
		}
		if (size > _neigh_list_ptrs[list_num].size) {
		if (_neigh_list_ptrs[list_num].size) {
		lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
		lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
		}
		lmp->memory->create(_neigh_list_ptrs[list_num].cnumneigh, size,
		"_cnumneigh");
		lmp->memory->create(_neigh_list_ptrs[list_num].numneighhalf, size,
		"_cnumneigh");
		_neigh_list_ptrs[list_num].size = size;
		}
		numneighhalf = _neigh_list_ptrs[list_num].numneighhalf;
		cnumneigh = _neigh_list_ptrs[list_num].cnumneigh;
		}

		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list,
		const int /offload_end/)
		const int three_body,
		const int offload_end)
		{
		free_list_local();
		int size = list->get_maxlocal();
		lmp->memory->create(_cnumneigh, size, "_cnumneigh");
		_off_map_listlocal = size;
		if (three_body)
		lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, size, "_cnumneigh");

		#ifdef _LMP_INTEL_OFFLOAD
		if (offload_end > 0) {
		int tb_size = size;
		if (three_body == 0) {
		lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, 16, "_cnumneigh");
		tb_size = 16;
		}
		int ** firstneigh = list->firstneigh;
		int * numneigh = list->numneigh;
		int * ilist = list->ilist;
		int * cnumneigh = _cnumneigh;
		if (cnumneigh != 0) {
		int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
		#pragma offload_transfer target(mic:_cop) \
		nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
		nocopy(firstneigh:length(size) alloc_if(1) free_if(0)) \
		nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
		nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
		}
		nocopy(cnumneigh:length(tb_size) alloc_if(1) free_if(0))
		_off_map_ilist = ilist;
		_off_map_firstneigh = firstneigh;
		_off_map_numneigh = numneigh;
		}
		#endif
		@@ -313,7 +385,7 @@ template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList * /list/,
		const int nlocal,
		const int nthreads,
		const int /offload_end/,
		const int offload_end,
		const int pack_width)
		{
		free_nbor_list();
		@@ -382,7 +454,7 @@ void IntelBuffers<flt_t, acc_t>::free_ccache()
		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::grow_ccache(const int /off_flag/,
		void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
		const int nthreads,
		const int width)
		{
		@@ -481,7 +553,7 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
		/* ---------------------------------------------------------------------- */

		template <class flt_t, class acc_t>
		void IntelBuffers<flt_t, acc_t>::grow_ncache(const int /off_flag/,
		void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
		const int nthreads)
		{
		const int nsize = get_max_nbors() * 3;
		@@ -576,12 +648,12 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
		if (_ntypes > 0) {
		#ifdef _LMP_INTEL_OFFLOAD
		flt_t * cutneighsqo = _cutneighsq[0];
		if (_off_threads > 0 && cutneighsqo != 0) {
		if (_off_threads > 0) {
		#pragma offload_transfer target(mic:_cop) \
		nocopy(cutneighsqo:alloc_if(0) free_if(1))
		}
		flt_t * cutneighghostsqo;
		if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
		if (_cutneighghostsq && _off_threads > 0) {
		cutneighghostsqo = _cutneighghostsq[0];
		#pragma offload_transfer target(mic:_cop) \
		nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
		@@ -637,6 +709,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
		tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
		tmem += _ntypes * _ntypes * sizeof(int);

		tmem += _buf_local_size + (_n_list_ptrs - 1) * _buf_local_size * 2;

		return tmem;
		}

src/USER-INTEL/intel_buffers.h

+44 −14

File changed.

Preview size limit exceeded, changes collapsed.

Admin message