Commit b0114310 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

refactoring of USER-TALLY computes to handle sparse and hybrid systems

with sparse and hybrid systems, Pair::ev_tally() may not be called on
every processor and thus the computes in USER-TALLY may hang during
reverse communication because of the error->all() call after checking
whether callback from Pair::ev_tally() has been called at least once.
To address this cleanly, a second callback function needs to be added,
which is run during Pair::ev_setup() and will now handle all memory
re-allocation and clearing of accumulators, just like it is done for
regular tallied data.
parent e530ba46
Loading
Loading
Loading
Loading
+13 −39
Original line number Diff line number Diff line
@@ -44,8 +44,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
  extscalar = 1;
  peflag = 1;                   // we need Pair::ev_tally() to be run

  did_compute = 0;
  invoked_peratom = invoked_scalar = -1;
  did_setup = invoked_peratom = invoked_scalar = -1;
  nmax = -1;
  fatom = NULL;
  vector = new double[size_peratom_cols];
@@ -77,15 +76,17 @@ void ComputeForceTally::init()
                    || force->improper || force->kspace)
      error->warning(FLERR,"Compute force/tally only called from pair style");
  }
  did_compute = -1;
  did_setup = -1;
}

/* ---------------------------------------------------------------------- */

void ComputeForceTally::setup()
void ComputeForceTally::pair_setup_callback(int, int)
{
  const int ntotal = atom->nlocal + atom->nghost;

  // grow per-atom storage, if needed

  if (atom->nmax > nmax) {
    memory->destroy(fatom);
    nmax = atom->nmax;
@@ -93,7 +94,7 @@ void ComputeForceTally::setup()
    array_atom = fatom;
  }

  // clear storage as needed
  // clear storage

  for (int i=0; i < ntotal; ++i)
    for (int j=0; j < size_peratom_cols; ++j)
@@ -101,10 +102,11 @@ void ComputeForceTally::setup()

  for (int i=0; i < size_peratom_cols; ++i)
    vector[i] = ftotal[i] = 0.0;

  did_setup = update->ntimestep;
}

/* ---------------------------------------------------------------------- */

void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton,
                                            double, double, double fpair,
                                            double dx, double dy, double dz)
@@ -112,37 +114,6 @@ void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton
  const int ntotal = atom->nlocal + atom->nghost;
  const int * const mask = atom->mask;

  // do setup work that needs to be done only once per timestep

  if (did_compute != update->ntimestep) {
    did_compute = update->ntimestep;

    // grow local force array if necessary
    // needs to be atom->nmax in length

    if (atom->nmax > nmax) {
      memory->destroy(fatom);
      nmax = atom->nmax;
      memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom");
      array_atom = fatom;
    }

    // clear storage as needed

    if (newton) {
      for (int i=0; i < ntotal; ++i)
        for (int j=0; j < size_peratom_cols; ++j)
          fatom[i][j] = 0.0;
    } else {
      for (int i=0; i < atom->nlocal; ++i)
        for (int j=0; j < size_peratom_cols; ++j)
          fatom[i][j] = 0.0;
    }

    for (int i=0; i < size_peratom_cols; ++i)
      vector[i] = ftotal[i] = 0.0;
  }

  if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
       || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {

@@ -205,7 +176,8 @@ void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputeForceTally::compute_scalar()
{
  invoked_scalar = update->ntimestep;
  if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar))
  if ((did_setup != invoked_scalar)
      || (update->eflag_global != invoked_scalar))
    error->all(FLERR,"Energy was not tallied on needed timestep");

  // sum accumulated forces across procs
@@ -221,7 +193,8 @@ double ComputeForceTally::compute_scalar()
void ComputeForceTally::compute_peratom()
{
  invoked_peratom = update->ntimestep;
  if (((atom->nlocal > 0) && (did_compute != invoked_peratom)) || (update->eflag_global != invoked_peratom))
  if ((did_setup != invoked_peratom)
      || (update->eflag_global != invoked_peratom))
    error->all(FLERR,"Energy was not tallied on needed timestep");

  // collect contributions from ghost atoms
@@ -229,6 +202,7 @@ void ComputeForceTally::compute_peratom()
  if (force->newton_pair) {
    comm->reverse_comm_compute(this);

    // clear out ghost atom data after it has been collected to local atoms
    const int nall = atom->nlocal + atom->nghost;
    for (int i = atom->nlocal; i < nall; ++i)
      for (int j = 0; j < size_peratom_cols; ++j)
+2 −3
Original line number Diff line number Diff line
@@ -31,7 +31,6 @@ class ComputeForceTally : public Compute {
  virtual ~ComputeForceTally();

  void init();
  void setup();

  double compute_scalar();
  void compute_peratom();
@@ -40,12 +39,12 @@ class ComputeForceTally : public Compute {
  void unpack_reverse_comm(int, int *, double *);
  double memory_usage();

  void pair_setup_callback(int, int);
  void pair_tally_callback(int, int, int, int,
                           double, double, double,
                           double, double, double);

 private:
  bigint did_compute;
  bigint did_setup;
  int nmax,igroup2,groupbit2;
  double **fatom;
  double ftotal[3];
+33 −47
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
  size_vector = 6;
  peflag = 1;                   // we need Pair::ev_tally() to be run

  did_compute = 0;
  did_setup = 0;
  invoked_peratom = invoked_scalar = -1;
  nmax = -1;
  stress = NULL;
@@ -80,39 +80,26 @@ void ComputeHeatFluxTally::init()
                    || force->improper || force->kspace)
      error->warning(FLERR,"Compute heat/flux/tally only called from pair style");
  }
  did_compute = -1;
  did_setup = -1;
}


/* ---------------------------------------------------------------------- */
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton,
                                             double evdwl, double ecoul, double fpair,
                                             double dx, double dy, double dz)
void ComputeHeatFluxTally::pair_setup_callback(int, int)
{
  const int ntotal = atom->nlocal + atom->nghost;
  const int * const mask = atom->mask;

  // do setup work that needs to be done only once per timestep

  if (did_compute != update->ntimestep) {
    did_compute = update->ntimestep;

    // grow local stress and eatom arrays if necessary
    // needs to be atom->nmax in length
  // grow per-atom storage, if needed

  if (atom->nmax > nmax) {
    memory->destroy(stress);
      nmax = atom->nmax;
      memory->create(stress,nmax,6,"heat/flux/tally:stress");

    memory->destroy(eatom);
    nmax = atom->nmax;
    memory->create(stress,nmax,6,"heat/flux/tally:stress");
    memory->create(eatom,nmax,"heat/flux/tally:eatom");
  }

    // clear storage as needed
  // clear storage

    if (newton) {
  for (int i=0; i < ntotal; ++i) {
    eatom[i] = 0.0;
    stress[i][0] = 0.0;
@@ -122,22 +109,21 @@ void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int new
    stress[i][4] = 0.0;
    stress[i][5] = 0.0;
  }
    } else {
      for (int i=0; i < atom->nlocal; ++i) {
        eatom[i] = 0.0;
        stress[i][0] = 0.0;
        stress[i][1] = 0.0;
        stress[i][2] = 0.0;
        stress[i][3] = 0.0;
        stress[i][4] = 0.0;
        stress[i][5] = 0.0;
      }
    }

  for (int i=0; i < size_vector; ++i)
    vector[i] = heatj[i] = 0.0;

  did_setup = update->ntimestep;
}

/* ---------------------------------------------------------------------- */
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton,
                                             double evdwl, double ecoul, double fpair,
                                             double dx, double dy, double dz)
{
  const int ntotal = atom->nlocal + atom->nghost;
  const int * const mask = atom->mask;

  if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
       || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {

@@ -215,7 +201,7 @@ void ComputeHeatFluxTally::unpack_reverse_comm(int n, int *list, double *buf)
void ComputeHeatFluxTally::compute_vector()
{
  invoked_vector = update->ntimestep;
  if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector))
  if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
    error->all(FLERR,"Energy was not tallied on needed timestep");

  // collect contributions from ghost atoms
+2 −1
Original line number Diff line number Diff line
@@ -38,12 +38,13 @@ class ComputeHeatFluxTally : public Compute {
  void unpack_reverse_comm(int, int *, double *);
  double memory_usage();

  void pair_setup_callback(int, int);
  void pair_tally_callback(int, int, int, int,
                           double, double, double,
                           double, double, double);

 private:
  bigint did_compute;
  bigint did_setup;
  int nmax,igroup2,groupbit2;
  double **stress,*eatom;
  double *heatj;
+10 −11
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
  extvector = 1;
  peflag = 1;                   // we need Pair::ev_tally() to be run

  did_compute = invoked_vector = -1;
  did_setup = invoked_vector = -1;
  vector = new double[size_vector];
}

@@ -74,9 +74,16 @@ void ComputePEMolTally::init()
                    || force->improper || force->kspace)
      error->warning(FLERR,"Compute pe/mol/tally only called from pair style");
  }
  did_compute = -1;
  did_setup = -1;
}

/* ---------------------------------------------------------------------- */

void ComputePEMolTally::pair_setup_callback(int, int)
{
  etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0;
  did_setup = update->ntimestep;
}

/* ---------------------------------------------------------------------- */
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton,
@@ -86,14 +93,6 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton
  const int * const mask = atom->mask;
  const tagint * const molid = atom->molecule;

  // do setup work that needs to be done only once per timestep

  if (did_compute != update->ntimestep) {
    did_compute = update->ntimestep;

    etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0;
  }

  if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
     || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ){

@@ -120,7 +119,7 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton
void ComputePEMolTally::compute_vector()
{
  invoked_vector = update->ntimestep;
  if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector))
  if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
    error->all(FLERR,"Energy was not tallied on needed timestep");

  // sum accumulated energies across procs
Loading