Commit 591e7824 authored by Denis Taniguchi's avatar Denis Taniguchi
Browse files

Optimizing PairGranHookeHistoryKokkos to be less divergent.

parent 406aaf01
Loading
Loading
Loading
Loading
+158 −122
Original line number Diff line number Diff line
@@ -159,9 +159,17 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  d_neighbors = k_list->d_neighbors;
  d_ilist = k_list->d_ilist;

  if (d_numneigh.extent(0) != d_numneigh_touch.extent(0))
    d_numneigh_touch = typename AT::t_int_1d("pair:numneigh_touch",d_numneigh.extent(0));
  if (d_neighbors.extent(0) != d_neighbors_touch.extent(0) ||
      d_neighbors.extent(1) != d_neighbors_touch.extent(1))
    d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));

  d_firsttouch = fix_historyKK->d_firstflag;
  d_firstshear = fix_historyKK->d_firstvalue;

  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);

  EV_FLOAT ev;

  if (lmp->kokkos->neighflag == HALF) {
@@ -269,6 +277,44 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  copymode = 0;
}

template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const {
  const int i = d_ilist[ii];
  const X_FLOAT xtmp = x(i,0);
  const X_FLOAT ytmp = x(i,1);
  const X_FLOAT ztmp = x(i,2);
  const LMP_FLOAT imass = rmass[i];
  const LMP_FLOAT irad = radius[i];
  const int jnum = d_numneigh[i];
  int count = 0;

  for (int jj = 0; jj < jnum; jj++) {
    const int j = d_neighbors(i,jj) & NEIGHMASK;

    const X_FLOAT delx = xtmp - x(j,0);
    const X_FLOAT dely = ytmp - x(j,1);
    const X_FLOAT delz = ztmp - x(j,2);
    const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
    const LMP_FLOAT jmass = rmass[j];
    const LMP_FLOAT jrad = radius[j];
    const LMP_FLOAT radsum = irad + jrad;

    // check for touching neighbors

    if (rsq >= radsum * radsum) {
      d_firsttouch(i,jj) = 0;
      d_firstshear(i,3*jj) = 0;
      d_firstshear(i,3*jj+1) = 0;
      d_firstshear(i,3*jj+2) = 0;
    } else {
      d_firsttouch(i,jj) = 1;
      d_neighbors_touch(i,count++) = jj;
    }
  }
  d_numneigh_touch[i] = count;
}

template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION
@@ -282,10 +328,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
  const X_FLOAT xtmp = x(i,0);
  const X_FLOAT ytmp = x(i,1);
  const X_FLOAT ztmp = x(i,2);
  const int itype = type[i];
  const LMP_FLOAT imass = rmass[i];
  const LMP_FLOAT irad = radius[i];
  const int jnum = d_numneigh[i];
  const int jnum = d_numneigh_touch[i];
  
  F_FLOAT fx_i = 0.0;
  F_FLOAT fy_i = 0.0;
@@ -296,26 +341,19 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
  F_FLOAT torquez_i = 0.0;

  for (int jj = 0; jj < jnum; jj++) {
    int j = d_neighbors(i,jj);
    j &= NEIGHMASK;
    const int m = d_neighbors_touch(i, jj);
    const int j = d_neighbors(i, m) & NEIGHMASK;
    
    const X_FLOAT delx = xtmp - x(j,0);
    const X_FLOAT dely = ytmp - x(j,1);
    const X_FLOAT delz = ztmp - x(j,2);
    const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
    const int jtype = type[j];
    const LMP_FLOAT jmass = rmass[j];
    const LMP_FLOAT jrad = radius[j];
    const LMP_FLOAT radsum = irad + jrad;

    // check for touching neighbors

    if (rsq >= radsum * radsum) {
      d_firsttouch(i,jj) = 0;
      d_firstshear(i,3*jj) = 0;
      d_firstshear(i,3*jj+1) = 0;
      d_firstshear(i,3*jj+2) = 0;
    } else {
    const LMP_FLOAT r = sqrt(rsq);
    const LMP_FLOAT rinv = 1.0/r;
    const LMP_FLOAT rsqinv = 1/rsq;
@@ -362,10 +400,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC

    // shear history effects

      d_firsttouch(i,jj) = 1;
      X_FLOAT shear1 = d_firstshear(i,3*jj);
      X_FLOAT shear2 = d_firstshear(i,3*jj+1);
      X_FLOAT shear3 = d_firstshear(i,3*jj+2);
    X_FLOAT shear1 = d_firstshear(i,3*m);
    X_FLOAT shear2 = d_firstshear(i,3*m+1);
    X_FLOAT shear3 = d_firstshear(i,3*m+2);
    if (SHEARUPDATE) {
      shear1 += vtr1*dt;
      shear2 += vtr2*dt;
@@ -410,9 +447,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
    }

    if (SHEARUPDATE) {
  	d_firstshear(i,3*jj) = shear1;
  	d_firstshear(i,3*jj+1) = shear2;
  	d_firstshear(i,3*jj+2) = shear3;
      d_firstshear(i,3*m) = shear1;
      d_firstshear(i,3*m+1) = shear2;
      d_firstshear(i,3*m+2) = shear3;
    }

    // forces & torques
@@ -445,7 +482,6 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC
    if (EVFLAG == 1)
      ev_tally_xyz<NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz);
  }
  }

  a_f(i,0) += fx_i;
  a_f(i,1) += fy_i;
+9 −1
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@ class FixNeighHistoryKokkos;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
struct TagPairGranHookeHistoryCompute {};

struct TagPairGranHookeHistoryReduce {};

template <class DeviceType>
class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
 public:
@@ -46,6 +48,9 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
  virtual void compute(int, int);
  void init_style();

  KOKKOS_INLINE_FUNCTION
  void operator()(TagPairGranHookeHistoryReduce, const int ii) const;

  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
  KOKKOS_INLINE_FUNCTION
  void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const;
@@ -89,6 +94,9 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
  typename Kokkos::View<int**> d_firsttouch;
  typename Kokkos::View<LMP_FLOAT**> d_firstshear;

  typename AT::t_neighbors_2d d_neighbors_touch;
  typename AT::t_int_1d d_numneigh_touch;
  
  int newton_pair;
  double special_lj[4];