Unverified Commit 382e91cf authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1156 from wmbrownIntel/user-intel-hybrid

Adding hybrid support to USER-INTEL package + EAM/intel bug fix
parents ce63a227 c37deebf
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -499,7 +499,7 @@ MPI task.
When offloading to a coprocessor, "hybrid"_pair_hybrid.html styles
that require skip lists for neighbor builds cannot be offloaded.
Using "hybrid/overlay"_pair_hybrid.html is allowed.  Only one intel
accelerated style may be used with hybrid styles.
accelerated style may be used with hybrid styles when offloading.
"Special_bonds"_special_bonds.html exclusion lists are not currently
supported with offload, however, the same effect can often be
accomplished by setting cutoffs for excluded atom types to 0.  None of
+85 −22
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)

  _nbor_pack_width = 1;
  _three_body_neighbor = 0;
  _hybrid_nonpair = 0;

  _precision_mode = PREC_MODE_MIXED;
  _offload_balance = -1.0;
@@ -266,8 +267,7 @@ FixIntel::~FixIntel()
    double *time1 = off_watch_pair();
    double *time2 = off_watch_neighbor();
    int *overflow = get_off_overflow_flag();
    if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL &&
        overflow != NULL) {
    if (_offload_balance != 0.0) {
      #pragma offload_transfer target(mic:_cop) \
        nocopy(time1,time2,overflow:alloc_if(0) free_if(1))
    }
@@ -314,34 +314,63 @@ void FixIntel::init()

  int nstyles = 0;
  if (force->pair_match("hybrid", 1) != NULL) {
    _pair_hybrid_flag = 1;
    PairHybrid *hybrid = (PairHybrid *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
        nstyles++;
    if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
      error->all(FLERR,
                 "Intel package requires fdotr virial with newton on.");
  } else if (force->pair_match("hybrid/overlay", 1) != NULL) {
    _pair_hybrid_flag = 1;
    PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
        nstyles++;
      else
        force->pair->no_virial_fdotr_compute = 1;
  }
    if (force->newton_pair != 0 && force->pair->no_virial_fdotr_compute)
      error->all(FLERR,
                 "Intel package requires fdotr virial with newton on.");
  } else
    _pair_hybrid_flag = 0;

  if (nstyles > 1 && _pair_hybrid_flag) _pair_hybrid_flag = 2;
  else if (force->newton_pair == 0) _pair_hybrid_flag = 0;

  _pair_hybrid_zero = 0;
  _zero_master = 0;

  if (_pair_hybrid_flag && _hybrid_nonpair)
    if (_pair_hybrid_flag > 1 || force->newton_pair == 0)
      _pair_hybrid_zero = 1;
  _hybrid_nonpair = 0;

  #ifdef _LMP_INTEL_OFFLOAD
  if (offload_balance() != 0.0) {
    _pair_hybrid_zero = 0;
    if (force->newton_pair == 0) _pair_hybrid_flag = 0;
    if (nstyles > 1)
      error->all(FLERR,
               "Currently, cannot use more than one intel style with hybrid.");
        "Currently, cannot offload more than one intel style with hybrid.");
  }
  #endif

  check_neighbor_intel();

  int off_mode = 0;
  if (_offload_balance != 0.0) off_mode = 1;
  if (_precision_mode == PREC_MODE_SINGLE) {
    _single_buffers->zero_ev();
    _single_buffers->grow_ncache(off_mode,_nthreads);
    _single_buffers->free_list_ptrs();
  } else if (_precision_mode == PREC_MODE_MIXED) {
    _mixed_buffers->zero_ev();
    _mixed_buffers->grow_ncache(off_mode,_nthreads);
    _mixed_buffers->free_list_ptrs();
  } else {
    _double_buffers->zero_ev();
    _double_buffers->grow_ncache(off_mode,_nthreads);
    _double_buffers->free_list_ptrs();
  }

  _need_reduce = 0;
@@ -349,7 +378,7 @@ void FixIntel::init()

/* ---------------------------------------------------------------------- */

void FixIntel::setup(int /*vflag*/)
void FixIntel::setup(int vflag)
{
  if (neighbor->style != Neighbor::BIN)
    error->all(FLERR,
@@ -395,8 +424,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
    double *time1 = off_watch_pair();
    double *time2 = off_watch_neighbor();
    int *overflow = get_off_overflow_flag();
    if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL &&
        overflow != NULL) {
    if (_offload_balance !=0.0) {
      #pragma offload_transfer target(mic:_cop)  \
        nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \
        in(overflow:length(5) alloc_if(1) free_if(0))
@@ -419,6 +447,21 @@ void FixIntel::pair_init_check(const bool cdmessage)
    #endif
  }

  #ifndef LMP_INTEL_NBOR_COMPAT
  if (force->pair->manybody_flag && atom->molecular) {
    int flag = 0;
    if (atom->nbonds > 0 && force->special_lj[1] == 0.0 &&
        force->special_coul[1] == 0.0) flag = 1;
    if (atom->nangles > 0 && force->special_lj[2] == 0.0 &&
        force->special_coul[2] == 0.0) flag = 1;
    if (atom->ndihedrals > 0 && force->special_lj[3] == 0.0 &&
        force->special_coul[3] == 0.0) flag = 1;
    if (flag)
      error->all(FLERR,"Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond"
                 "exclusions with Intel");
  }
  #endif
  
  int need_tag = 0;
  if (atom->molecular) need_tag = 1;

@@ -477,11 +520,13 @@ void FixIntel::bond_init_check()
  if (force->pair_match("/intel", 0) != NULL)
    intel_pair = 1;
  else if (force->pair_match("hybrid", 1) != NULL) {
    _hybrid_nonpair = 1;
    PairHybrid *hybrid = (PairHybrid *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
        intel_pair = 1;
  } else if (force->pair_match("hybrid/overlay", 1) != NULL) {
    _hybrid_nonpair = 1;
    PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
@@ -501,11 +546,13 @@ void FixIntel::kspace_init_check()
  if (force->pair_match("/intel", 0) != NULL)
    intel_pair = 1;
  else if (force->pair_match("hybrid", 1) != NULL) {
    _hybrid_nonpair = 1;
    PairHybrid *hybrid = (PairHybrid *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
        intel_pair = 1;
  } else if (force->pair_match("hybrid/overlay", 1) != NULL) {
    _hybrid_nonpair = 1;
    PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i], "/intel") != NULL)
@@ -522,51 +569,60 @@ void FixIntel::check_neighbor_intel()
{
  #ifdef _LMP_INTEL_OFFLOAD
  _full_host_list = 0;
  #endif
  const int nrequest = neighbor->nrequest;

  const int nrequest = neighbor->nrequest;
  for (int i = 0; i < nrequest; ++i) {
    #ifdef _LMP_INTEL_OFFLOAD
    if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) {
      _full_host_list = 1;
      _offload_noghost = 0;
    }
    #endif
    if (neighbor->requests[i]->skip && _offload_balance != 0.0)
      error->all(FLERR, "Cannot yet use hybrid styles with Intel offload.");

    // avoid flagging a neighbor list as both USER-INTEL and USER-OMP
    if (neighbor->requests[i]->intel)
      neighbor->requests[i]->omp = 0;

    if (neighbor->requests[i]->skip)
      error->all(FLERR, "Hybrid styles with Intel package are unsupported.");
  }
  #else
  // avoid flagging a neighbor list as both USER-INTEL and USER-OMP
  const int nrequest = neighbor->nrequest;
  for (int i = 0; i < nrequest; ++i)
    if (neighbor->requests[i]->intel)
      neighbor->requests[i]->omp = 0;
  #endif
}

/* ---------------------------------------------------------------------- */

void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)
void FixIntel::_sync_main_arrays(const int prereverse)
{
  if (!prereverse) _zero_master = 1;
  int done_this_step = prereverse;
  if (_pair_hybrid_zero == 0) done_this_step = 1;
  if (_force_array_m != 0) {
    if (_need_reduce) {
      reduce_results(&_force_array_m[0].x);
      _need_reduce = 0;
    }
    add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom,0);
    _force_array_m = 0;
    if (done_this_step) _force_array_m = 0;
    else _ev_array_d = 0;
  } else if (_force_array_d != 0) {
    if (_need_reduce) {
      reduce_results(&_force_array_d[0].x);
      _need_reduce = 0;
    }
    add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom,0);
    _force_array_d = 0;
    if (done_this_step) _force_array_d = 0;
    else _ev_array_d = 0;
  } else if (_force_array_s != 0) {
    if (_need_reduce) {
      reduce_results(&_force_array_s[0].x);
      _need_reduce = 0;
    }
    add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom,0);
    _force_array_s = 0;
    if (done_this_step) _force_array_s = 0;
    else _ev_array_s = 0;
  }

  #ifdef _LMP_INTEL_OFFLOAD
@@ -576,6 +632,13 @@ void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)

/* ---------------------------------------------------------------------- */

void FixIntel::pre_reverse(int /*eflag*/, int /*vflag*/)
{
  _sync_main_arrays(1);
}

/* ---------------------------------------------------------------------- */

template <class acc_t>
void FixIntel::reduce_results(acc_t * _noalias const f_scalar)
{
@@ -657,7 +720,7 @@ template <class ft, class acc_t>
void FixIntel::add_results(const ft * _noalias const f_in,
                           const acc_t * _noalias const ev_global,
                           const int eatom, const int vatom,
                           const int /*offload*/) {
                           const int offload) {
  start_watch(TIME_PACK);
  int f_length;
  #ifdef _LMP_INTEL_OFFLOAD
+49 −14
Original line number Diff line number Diff line
@@ -74,11 +74,12 @@ class FixIntel : public Fix {
  inline int nbor_pack_width() const { return _nbor_pack_width; }
  inline void nbor_pack_width(const int w) { _nbor_pack_width = w; }
  inline int three_body_neighbor() { return _three_body_neighbor; }
  inline void three_body_neighbor(const int /*i*/) { _three_body_neighbor = 1; }
  inline void three_body_neighbor(const int i) { _three_body_neighbor = i; }

  inline int need_zero(const int tid) {
    if (_need_reduce == 0 && tid > 0) return 1;
    return 0;
    else if (_zero_master && tid == 0) { _zero_master = 0; return 1; }
    else return 0;
  }
  inline void set_reduce_flag() { if (_nthreads > 1) _need_reduce = 1; }
  inline int lrt() {
@@ -100,6 +101,9 @@ class FixIntel : public Fix {
  IntelBuffers<double,double> *_double_buffers;

  int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor;
  int _pair_hybrid_flag;
  // These should be removed in subsequent update w/ simpler hybrid arch
  int _pair_hybrid_zero, _hybrid_nonpair, _zero_master;
  
 public:
  inline int* get_overflow_flag() { return _overflow_flag; }
@@ -210,6 +214,8 @@ class FixIntel : public Fix {
  _alignvar(double _stopwatch_offload_neighbor[1],64);
  _alignvar(double _stopwatch_offload_pair[1],64);

  void _sync_main_arrays(const int prereverse);
  
  template <class ft>
  void reduce_results(ft * _noalias const f_in);

@@ -238,7 +244,7 @@ class FixIntel : public Fix {

/* ---------------------------------------------------------------------- */

void FixIntel::get_buffern(const int /*offload*/, int &nlocal, int &nall,
void FixIntel::get_buffern(const int offload, int &nlocal, int &nall,
                           int &minlocal) {
  #ifdef _LMP_INTEL_OFFLOAD
  if (_separate_buffers) {
@@ -273,7 +279,7 @@ void FixIntel::get_buffern(const int /*offload*/, int &nlocal, int &nall,
/* ---------------------------------------------------------------------- */

void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
                                double *ev_in, const int /*offload*/,
                                double *ev_in, const int offload,
                                const int eatom, const int vatom,
                                const int rflag) {
  #ifdef _LMP_INTEL_OFFLOAD
@@ -282,6 +288,8 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,
    _off_results_vatom = vatom;
    _off_force_array_d = f_in;
    _off_ev_array_d = ev_in;
    if (_pair_hybrid_flag && force->pair->fdotr_is_set())
       _sync_main_arrays(1);
    return;
  }
  #endif
@@ -296,12 +304,15 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in,

  if (_overflow_flag[LMP_OVERFLOW])
    error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

  if (_pair_hybrid_flag > 1 ||
      (_pair_hybrid_flag && force->pair->fdotr_is_set())) _sync_main_arrays(0);
}

/* ---------------------------------------------------------------------- */

void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
                                double *ev_in, const int /*offload*/,
                                double *ev_in, const int offload,
                                const int eatom, const int vatom,
                                const int rflag) {
  #ifdef _LMP_INTEL_OFFLOAD
@@ -310,6 +321,8 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,
    _off_results_vatom = vatom;
    _off_force_array_m = f_in;
    _off_ev_array_d = ev_in;
    if (_pair_hybrid_flag && force->pair->fdotr_is_set())
       _sync_main_arrays(1);
    return;
  }
  #endif
@@ -324,12 +337,16 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in,

  if (_overflow_flag[LMP_OVERFLOW])
    error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

  if (_pair_hybrid_flag > 1 ||
      (_pair_hybrid_flag && force->pair->fdotr_is_set()))
    _sync_main_arrays(0);
}

/* ---------------------------------------------------------------------- */

void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
                                float *ev_in, const int /*offload*/,
                                float *ev_in, const int offload,
                                const int eatom, const int vatom,
                                const int rflag) {
  #ifdef _LMP_INTEL_OFFLOAD
@@ -338,6 +355,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
    _off_results_vatom = vatom;
    _off_force_array_s = f_in;
    _off_ev_array_s = ev_in;
    if (_pair_hybrid_flag && force->pair->fdotr_is_set())
       _sync_main_arrays(1);
    return;
  }
  #endif
@@ -352,6 +371,10 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,

  if (_overflow_flag[LMP_OVERFLOW])
    error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");

  if (_pair_hybrid_flag > 1 ||
      (_pair_hybrid_flag && force->pair->fdotr_is_set()))
    _sync_main_arrays(0);
}

/* ---------------------------------------------------------------------- */
@@ -487,16 +510,16 @@ The compiler version used to build LAMMPS is not supported when using
offload to a coprocessor. There could be performance or correctness
issues. Please use 14.0.1.106 or 15.1.133 or later.

E: Currently, cannot use more than one intel style with hybrid.
E: Currently, cannot offload more than one intel style with hybrid.

Currently, hybrid pair styles can only use the intel suffix for one of the
pair styles.
Currently, when using offload, hybrid pair styles can only use the intel 
suffix for one of the pair styles.

E: Cannot yet use hybrid styles with Intel package.
E: Cannot yet use hybrid styles with Intel offload.

The hybrid pair style configuration is not yet supported by the Intel
package. Support is limited to hybrid/overlay or a hybrid style that does
not require a skip list.
The hybrid pair style configuration is not yet supported when using offload
within the Intel package. Support is limited to hybrid/overlay or a hybrid 
style that does not require a skip list.

W: Leaving a core/node free can improve performance for offload

@@ -538,4 +561,16 @@ E: Too few atoms for load balancing offload.
When using offload to a coprocessor, each MPI task must have at least 2
atoms throughout the simulation.

E: Intel package requires fdotr virial with newton on.

This error can occur with a hybrid pair style that mixes styles that are
incompatible with the newton pair setting turned on. Try turning the 
newton pair setting off.

E: Add -DLMP_INTEL_NBOR_COMPAT to build for special_bond exclusions with Intel

When using a manybody pair style, bonds/angles/dihedrals, and special_bond
exclusions, LAMMPS should be built with the above compile flag for compatible
results.

*/
+102 −28
Original line number Diff line number Diff line
@@ -24,7 +24,9 @@ using namespace LAMMPS_NS;
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
    lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
    _buf_size(0), _buf_local_size(0) {
    _buf_size(0), _buf_local_size(0), _n_list_ptrs(1), _max_list_ptrs(4) {
  _neigh_list_ptrs = new IntelNeighListPtrs[_max_list_ptrs];
  _neigh_list_ptrs[0].cnumneigh = 0;
  _list_alloc_atoms = 0;
  _ntypes = 0;
  _off_map_listlocal = 0;
@@ -55,6 +57,7 @@ IntelBuffers<flt_t, acc_t>::~IntelBuffers()
  free_all_nbor_buffers();
  free_ccache();
  set_ntypes(0);
  delete []_neigh_list_ptrs;
}

/* ---------------------------------------------------------------------- */
@@ -109,7 +112,7 @@ void IntelBuffers<flt_t, acc_t>::free_buffers()
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
                                       const int nthreads,
                                       const int /*offload_end*/)
                                       const int offload_end)
{
  free_buffers();
  _buf_size = static_cast<double>(nall) * 1.1 + 1;
@@ -186,11 +189,9 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
    const int * tag = _off_map_tag;
    const int * special = _off_map_special;
    const int * nspecial = _off_map_nspecial;
    if (tag != 0 && special != 0 && nspecial !=0) {
    #pragma offload_transfer target(mic:_cop) \
      nocopy(tag:alloc_if(0) free_if(1)) \
      nocopy(special,nspecial:alloc_if(0) free_if(1))
    }
    _off_map_nmax = 0;
    _host_nmax = 0;
  }
@@ -200,7 +201,7 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int /*offload_end*/)
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
{
  #ifdef _LMP_INTEL_OFFLOAD
  free_nmax();
@@ -243,46 +244,117 @@ template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_list_local()
{
  if (_off_map_listlocal > 0) {
    int * cnumneigh = _cnumneigh;
    if (_neigh_list_ptrs[0].cnumneigh) {
      int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
      _neigh_list_ptrs[0].cnumneigh = 0;
      #ifdef _LMP_INTEL_OFFLOAD
      if (_off_map_ilist != NULL) {
        #pragma offload_transfer target(mic:_cop) \
          nocopy(cnumneigh:alloc_if(0) free_if(1))
      }
      #endif
      lmp->memory->destroy(cnumneigh);
    }
      
    #ifdef _LMP_INTEL_OFFLOAD
    if (_off_map_ilist != NULL) {
      const int * ilist = _off_map_ilist;
      const int * numneigh = _off_map_numneigh;
      const int ** firstneigh = (const int **)_off_map_firstneigh;
      _off_map_ilist = NULL;
      if (numneigh != 0 && ilist != 0) {
      #pragma offload_transfer target(mic:_cop) \
          nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
      }
        nocopy(ilist,firstneigh,numneigh:alloc_if(0) free_if(1))
    }
    #endif
    lmp->memory->destroy(cnumneigh);
    _off_map_listlocal = 0;
  }
}

/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_list_ptrs()
{
  for (int list_num = 1; list_num < _n_list_ptrs; list_num++) {
    if (_neigh_list_ptrs[list_num].size) {
      lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
      lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
    }
    _neigh_list_ptrs[list_num].size = 0;
    _neigh_list_ptrs[list_num].list_ptr = 0;
  }
  _n_list_ptrs = 1;
}

/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::grow_data3(NeighList *list,
                                            int *&numneighhalf,
                                            int *&cnumneigh)
{
  const int size = list->get_maxlocal();
  int list_num;
  for (list_num = 0; list_num < _n_list_ptrs; list_num++) 
    if (_neigh_list_ptrs[list_num].list_ptr == (void*)list) break;
  if (list_num == _n_list_ptrs) {
    if (_n_list_ptrs == _max_list_ptrs) {
      _max_list_ptrs *= 2;
      IntelNeighListPtrs *new_list = new IntelNeighListPtrs[_max_list_ptrs];
      for (int i = 0; i < _n_list_ptrs; i++) new_list[i] = _neigh_list_ptrs[i];
      delete []_neigh_list_ptrs;
      _neigh_list_ptrs = new_list;
    }
    _neigh_list_ptrs[list_num].list_ptr = (void *)list;
    _neigh_list_ptrs[list_num].size = 0;
    _n_list_ptrs++;
  }
  if (size > _neigh_list_ptrs[list_num].size) {
    if (_neigh_list_ptrs[list_num].size) {
      lmp->memory->destroy(_neigh_list_ptrs[list_num].cnumneigh);
      lmp->memory->destroy(_neigh_list_ptrs[list_num].numneighhalf);
    }
    lmp->memory->create(_neigh_list_ptrs[list_num].cnumneigh, size,
                        "_cnumneigh");
    lmp->memory->create(_neigh_list_ptrs[list_num].numneighhalf, size,
                        "_cnumneigh");
    _neigh_list_ptrs[list_num].size = size;
  }
  numneighhalf = _neigh_list_ptrs[list_num].numneighhalf;
  cnumneigh = _neigh_list_ptrs[list_num].cnumneigh;
}

/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list,
                                                  const int /*offload_end*/)
                                                  const int three_body,
                                                  const int offload_end)
{
  free_list_local();
  int size = list->get_maxlocal();
  lmp->memory->create(_cnumneigh, size, "_cnumneigh");
  _off_map_listlocal = size;
  if (three_body)
    lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, size, "_cnumneigh");

  #ifdef _LMP_INTEL_OFFLOAD
  if (offload_end > 0) {
    int tb_size = size;
    if (three_body == 0) {
      lmp->memory->create(_neigh_list_ptrs[0].cnumneigh, 16, "_cnumneigh");
      tb_size = 16;
    }
    int ** firstneigh = list->firstneigh;
    int * numneigh = list->numneigh;
    int * ilist = list->ilist;
    int * cnumneigh = _cnumneigh;
    if (cnumneigh != 0) {
    int * cnumneigh = _neigh_list_ptrs[0].cnumneigh;
    #pragma offload_transfer target(mic:_cop) \
      nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
      nocopy(firstneigh:length(size) alloc_if(1) free_if(0)) \
      nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
        nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
    }
      nocopy(cnumneigh:length(tb_size) alloc_if(1) free_if(0))
    _off_map_ilist = ilist;
    _off_map_firstneigh = firstneigh;
    _off_map_numneigh = numneigh;
  }
  #endif
@@ -313,7 +385,7 @@ template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList * /*list*/,
                                                 const int nlocal,
                                                 const int nthreads,
                                                 const int /*offload_end*/,
                                                 const int offload_end,
                                                 const int pack_width)
{
  free_nbor_list();
@@ -382,7 +454,7 @@ void IntelBuffers<flt_t, acc_t>::free_ccache()
/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::grow_ccache(const int /*off_flag*/,
void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
        const int nthreads,
        const int width)
{
@@ -481,7 +553,7 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::grow_ncache(const int /*off_flag*/,
void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
                                             const int nthreads)
{
  const int nsize = get_max_nbors() * 3;
@@ -576,12 +648,12 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
    if (_ntypes > 0) {
      #ifdef _LMP_INTEL_OFFLOAD
      flt_t * cutneighsqo = _cutneighsq[0];
      if (_off_threads > 0 && cutneighsqo != 0) {
      if (_off_threads > 0) {
        #pragma offload_transfer target(mic:_cop) \
          nocopy(cutneighsqo:alloc_if(0) free_if(1))
      }
      flt_t * cutneighghostsqo;
      if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
      if (_cutneighghostsq && _off_threads > 0) {
        cutneighghostsqo = _cutneighghostsq[0];
        #pragma offload_transfer target(mic:_cop) \
          nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
@@ -637,6 +709,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
  tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
  tmem += _ntypes * _ntypes * sizeof(int);

  tmem += _buf_local_size + (_n_list_ptrs - 1) * _buf_local_size * 2;

  return tmem;
}

+44 −14

File changed.

Preview size limit exceeded, changes collapsed.

Loading