Commit ee83b755 authored by Tim Mattox's avatar Tim Mattox
Browse files

USER-DPD: Split the SSA stencil and neighbor list into subphases.

NOTE: pair evaluation order changes, causing numerical differences!
This enables processing neighbors in subphase groups that enforce
a geometrical seperation of pairs, allowing greater parallelism
once fix_shardlow (SSA) is converted to Kokkos.
parent ce2da506
Loading
Loading
Loading
Loading
+31 −27
Original line number Diff line number Diff line
@@ -74,7 +74,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)

  NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
  if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
  int nstencil_half = ns_ssa->nstencil_half;
  int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]);
  int nstencil_full = ns_ssa->nstencil;

  NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
@@ -150,7 +150,9 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)

    // loop over all local atoms in other bins in "half" stencil

    for (k = 0; k < nstencil_half; k++) {
    k = 0;
    for (int subphase = 0; subphase < 4; subphase++) {
      for (; k < nstencil_ssa[subphase]; k++) {
        for (j = binhead[ibin+stencil[k]]; j >= 0;
             j = bins[j]) {

@@ -179,6 +181,8 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
          }
        }
      }
      list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase
    }

    if (n > 0) {
      ilist[inum++] = i;
+46 −8
Original line number Diff line number Diff line
@@ -42,31 +42,69 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin2dNewtonSSA::create()
{
  int i,j,pos = 0;

  // Subphase 0: upper right front bins (red)
  for (j = 0; j <= sy; j++)
    for (i = -sx; i <= sx; i++)
      if (j > 0 || (j == 0 && i > 0))
    for (i = 0; i <= sx; i++)
      if (j > 0 || i > 0) // skip the centroid
        if (bin_distance(i,j,0) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = 0;
          stencil[pos++] = j*mbinx + i;
        }
  nstencil_ssa[0] = pos;

  // Subphase 1: upper left front bins (light blue)
  for (j = 1; j <= sy; j++)
    for (i = -sx; i < 0; i++)
      if (bin_distance(i,j,0) < cutneighmaxsq) {
        stencilxyz[pos][0] = i;
        stencilxyz[pos][1] = j;
        stencilxyz[pos][2] = 0;
        stencil[pos++] = j*mbinx + i;
      }
  nstencil_ssa[1] = pos;

  // Subphase 2: lower left front bins (blue)
  nstencil_ssa[2] = pos;

  nstencil_half = pos; // record where normal half stencil ends
  // Subphase 3: lower right front bins (yellow)
  nstencil_ssa[3] = pos;

  // include additional bins for AIR ghosts only
  // Now include additional bins for AIR ghosts, and impure-to-pure locals
  // Subphase 4: upper right back bins (pink)
  nstencil_ssa[4] = pos;

  // Subphase 5: upper left back bins (light green)
  nstencil_ssa[5] = pos;

  // Subphase 6: lower left back bins (purple)
  for (j = -sy; j <= 0; j++)
    for (i = -sx; i <= sx; i++) {
      if (j == 0 && i > 0) continue;
    for (i = -sx; i < 0; i++)
      if (bin_distance(i,j,0) < cutneighmaxsq) {
        stencilxyz[pos][0] = i;
        stencilxyz[pos][1] = j;
        stencilxyz[pos][2] = 0;
        stencil[pos++] = j*mbinx + i;
      }
  nstencil_ssa[6] = pos;

  // Subphase 7: lower right back bins (white)
  for (j = -sy; j < 0; j++)
    for (i = 0; i <= sx; i++)
      if (bin_distance(i,j,0) < cutneighmaxsq) {
        stencilxyz[pos][0] = i;
        stencilxyz[pos][1] = j;
        stencilxyz[pos][2] = 0;
        stencil[pos++] = j*mbinx + i;
      }
  nstencil_ssa[7] = pos;

  // Also, include the centroid for the AIR ghosts.
  stencilxyz[pos][0] = 0;
  stencilxyz[pos][1] = 0;
  stencilxyz[pos][2] = 0;
  stencil[pos++] = 0;

  nstencil = pos; // record where full stencil ends
}
+85 −21
Original line number Diff line number Diff line
@@ -42,45 +42,109 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin3dNewtonSSA::create()
{
  int i,j,k,pos = 0;
  // Subphase 0: upper right front bins (red)
  for (k = 0; k <= sz; k++)
    for (j = 0; j <= sy; j++)
      for (i = 0; i <= sx; i++)
        if (k > 0 || j > 0 || i > 0) // skip the centroid
          if (bin_distance(i,j,k) < cutneighmaxsq) {
            stencilxyz[pos][0] = i;
            stencilxyz[pos][1] = j;
            stencilxyz[pos][2] = k;
            stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
          }
  nstencil_ssa[0] = pos;

  // Subphase 1: upper left front bins (light blue)
  for (k = 0; k <= sz; k++)
    for (j = -sy; j <= sy; j++)
      for (i = -sx; i <= sx; i++)
        if (k > 0 || j > 0 || (j == 0 && i > 0))
    for (j = 1; j <= sy; j++)
      for (i = -sx; i < 0; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[1] = pos;

  nstencil_half = pos; // record where normal half stencil ends
  // Subphase 2: lower left front bins (blue)
  for (k = 1; k <= sz; k++)
    for (j = -sy; j <= 0; j++)
      for (i = -sx; i < 0; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[2] = pos;

  // include additional bins for AIR ghosts only
  // Subphase 3: lower right front bins (yellow)
  for (k = 1; k <= sz; k++)
    for (j = -sy; j < 0; j++)
      for (i = 0; i <= sx; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[3] = pos;

  // Now include additional bins for AIR ghosts, and impure-to-pure locals
  // Subphase 4: upper right back bins (pink)
  for (k = -sz; k < 0; k++)
    for (j = -sy; j <= sy; j++)
      for (i = -sx; i <= sx; i++)
    for (j = 0; j <= sy; j++)
      for (i = 0; i <= sx; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[4] = pos;

  // For k==0, make sure to skip already included bins
  // Subphase 5: upper left back bins (light green)
  for (k = -sz; k < 0; k++)
    for (j = 1; j <= sy; j++)
      for (i = -sx; i < 0; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[5] = pos;

  k = 0;
  // Subphase 6: lower left back bins (purple)
  for (k = -sz; k <= 0; k++)
    for (j = -sy; j <= 0; j++)
    for (i = -sx; i <= sx; i++) {
      if (j == 0 && i > 0) continue;
      for (i = -sx; i < 0; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[6] = pos;

  // Subphase 7: lower right back bins (white)
  for (k = -sz; k <= 0; k++)
    for (j = -sy; j < 0; j++)
      for (i = 0; i <= sx; i++)
        if (bin_distance(i,j,k) < cutneighmaxsq) {
          stencilxyz[pos][0] = i;
          stencilxyz[pos][1] = j;
          stencilxyz[pos][2] = k;
          stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
        }
  nstencil_ssa[7] = pos;

  // Also, include the centroid for the AIR ghosts.
  stencilxyz[pos][0] = 0;
  stencilxyz[pos][1] = 0;
  stencilxyz[pos][2] = 0;
  stencil[pos++] = 0;

  nstencil = pos; // record where full stencil ends
}
+1 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ class NStencilSSA : public NStencil {
  ~NStencilSSA() {}
  virtual void create() = 0;

  int nstencil_half;   // where the half stencil ends
  int nstencil_ssa[8];  // last stencil index for each subphase
};

}