Commit cc776798 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

implement wall clock based load balancing cost function support

(cherry picked from commit 2a57dc6db46079836cc5aecbe1a938d27fb9f989)
parent b8ae885d
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -33,9 +33,11 @@ style = {x} or {y} or {z} or {shift} or {rcb} :l
    stopthresh = stop balancing when this imbalance threshhold is reached
  {rcb} args = none :pre
zero or more keyword/value pairs may be appended :l
keyword = {out} or {group} :l
keyword = {out} or {clock} or {group} :l
  {out} value = filename
    filename = write each processor's sub-domain to a file
  {clock} value = weight
    weight = weight factor between 0 and 1 for including wall clock data
  {group} args = Ngroup groupID-1 weight-1 groupID-2 weight-2...
    Ngroup = number of groups with assigned weights
    groupID-1, groupID-2, ... = group names
@@ -49,6 +51,7 @@ balance 1.2 shift xz 5 1.1
balance 1.0 shift xz 5 1.1
balance 1.1 rcb
balance 1.0 shift x 10 1.1 group 2 fast 0.5 slow 2.0
balance 1.0 shift x 10 1.1 clock 0.8
balance 1.0 shift x 20 1.0 out tmp.balance :pre

[Description:]
@@ -74,6 +77,15 @@ An atom with a total weight of 5 then be will be considered to
have 5x the computational cost than an atom with the default weight
of 1.0.

With the optional {clock} keyword, "timer data"_timer.html is
incorporated into the load balancing cost function. The required
weight factor argument (a number between 0 and 1) determines to
which degree timing information is included. The timer information
is taken from the preceding run. If no such information is
available, e.g. at the beginning of an input, of when the 
"timer"_timer.html level is set to either {loop} or {off},
the clock keyword has no effect.

Load-balancing is typically most useful if the particles in the
simulation box have a spatially-varying density distribution or
where the computational cost varies signficantly between different
+13 −0
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ zero or more keyword/value pairs may be appended :l
keyword = {out} or {group} :l
  {out} value = filename
    filename = write each processor's sub-domain to a file, at each re-balancing
  {clock} value = weight
    weight = weight factor between 0 and 1 for including wall clock data
  {group} args = Ngroup groupID-1 weight-1 groupID-2 weight-2...
    Ngroup = number of groups with assigned weights
    groupID-1, groupID-2, ... = group names
@@ -37,6 +39,8 @@ keyword = {out} or {group} :l
fix 2 all balance 1000 1.05 shift x 10 1.05
fix 2 all balance 100 0.9 shift xy 20 1.1 out tmp.balance
fix 2 all balance 100 0.9 shift xy 20 1.1 group 3 substrate 3.0 solvent 1.0 solute 0.8 out tmp.balance
fix 2 all balance 100 1.0 shift x 10 1.1 clock 0.8
fix 2 all balance 100 1.0 shift xy 5 1.1 group 2 fast 0.8 slow 2.0 clock 0.8
fix 2 all balance 1000 1.1 rcb :pre

[Description:]
@@ -59,6 +63,15 @@ An atom with a total weight of 5 then be will be considered to
have 5x the computational cost than an atom with the default weight
of 1.0.

With the optional {clock} keyword, "timer data"_timer.html is
incorporated into the load balancing cost function. The required
weight factor argument (a number between 0 and 1) determines to
which degree timing information is included. The timer information
is taken from the preceding run. If no such information is
available, e.g. at the beginning of an input, of when the 
"timer"_timer.html level is set to either {loop} or {off},
the clock keyword has no effect.

Load-balancing is typically most useful if the particles in the
simulation box have a spatially-varying density distribution or
where the computational cost varies signficantly between different
+72 −8
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include "memory.h"
#include "error.h"
#include "group.h"
#include "timer.h"

using namespace LAMMPS_NS;

@@ -57,6 +58,9 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
  ngroup = 0;
  group_id = NULL;
  group_weight = NULL;

  last_clock = 0.0;
  clock_imbalance = NULL;
}

/* ---------------------------------------------------------------------- */
@@ -86,6 +90,7 @@ Balance::~Balance()

  delete [] group_id;
  delete [] group_weight;
  delete [] clock_imbalance;

  if (fp) fclose(fp);
}
@@ -209,6 +214,13 @@ void Balance::command(int narg, char **arg)
        if (fp == NULL) error->one(FLERR,"Cannot open balance output file");
      }
      iarg += 2;
    } else if (strcmp(arg[iarg],"clock") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
      double factor = force->numeric(FLERR,arg[iarg+1]);
      if (factor < 0.0 || factor > 1.0)
        error->all(FLERR,"Illegal balance command");
      imbalance_clock(factor);
      iarg += 2;
    } else if (strcmp(arg[iarg],"group") == 0) {
      group_setup(narg-iarg-1,arg+iarg+1);
      iarg += 2*ngroup + 2;
@@ -451,6 +463,49 @@ double Balance::getcost(int i)
   return cost;
}

/* ----------------------------------------------------------------------
   calculate imbalance based on timer for Pair+Neighbor
------------------------------------------------------------------------- */

void Balance::imbalance_clock(double factor)
{

  // Compute the cost function of based on relevant timers
  if (timer->has_normal()) {
    double cost = -last_clock;
    if (!clock_imbalance) clock_imbalance = new double[nprocs+1];
    double *clock_cost = new double[nprocs+1];
    for (int i = 0; i <= nprocs; ++i) clock_cost[i] = 0.0;
    cost += timer->get_wall(Timer::PAIR);
    cost += timer->get_wall(Timer::NEIGH);
    if (force->kspace) cost += timer->get_wall(Timer::KSPACE);

    clock_cost[me] = cost;
    clock_cost[nprocs] = cost;
    MPI_Allreduce(clock_cost,clock_imbalance,nprocs+1,MPI_DOUBLE,MPI_SUM,world);
    
    const double avg_cost = clock_imbalance[nprocs]/nprocs;
    if (cost > 0.0) {
      for (int i = 0; i < nprocs; ++i)
        clock_imbalance[i] = (1.0-factor) + factor*clock_imbalance[i]/avg_cost;
    } else {
      for (int i = 0; i < nprocs; ++i)
        clock_imbalance[i] = 1.0;
    }

#if 1 // BALANCE_DEBUG    
    if (me == 0) {
      printf("clock imbalance scaled using factor %g\n",factor);
      for (int i = 0; i < nprocs; ++i)
        printf(" % 2d: %4.2f",i,clock_imbalance[i]);
      puts("");
    }
#endif

    delete [] clock_cost;
  }
}

/* ----------------------------------------------------------------------
   calculate imbalance based on (weighted) nlocal
   return max = max atom per proc
@@ -465,6 +520,7 @@ double Balance::imbalance_nlocal(int &maxcost)
  for (int i=0; i < atom->nlocal; ++i) {
    cost += getcost(i);
  }
  if (clock_imbalance) cost *= clock_imbalance[me];

  double imbalance = 1.0;
  int intcost = (int)cost;
@@ -511,8 +567,12 @@ double Balance::imbalance_splits(int &max)
    proccost[iz*nx*ny + iy*nx + ix] += getcost(i);
  }

  for (int i = 0; i < nprocs; i++)
  for (int i = 0; i < nprocs; i++) {
    if (clock_imbalance)
      proccount[i] = static_cast<int>(proccost[i]*clock_imbalance[i]);
    else
      proccount[i] = static_cast<int>(proccost[i]);
  }

  MPI_Allreduce(proccount,allproccount,nprocs,MPI_INT,MPI_SUM,world);
  bigint sum = 0;
@@ -577,11 +637,12 @@ int *Balance::bisection(int sortflag)
  // then invert() to create list of proc assignements for my atoms
  // Use specified weightings for each atom rather than atom count

  double weights[nlocal];
  double factor = 1.0;
  if (clock_imbalance) factor = clock_imbalance[me];

  for (int i = 0; i < nlocal; i++) {
    weights[i] = getcost(i);
  }
  double weights[nlocal];
  for (int i = 0; i < nlocal; i++)
    weights[i] = getcost(i)*factor;

  //rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi);
  rcb->compute(dim,atom->nlocal,atom->x,weights,shrinklo,shrinkhi);
@@ -763,6 +824,7 @@ int Balance::shift()
    for (i=0; i < atom->nlocal; i++)
      cost += getcost(i);

    if (clock_imbalance) cost *= clock_imbalance[me];
    int intcost = (int)cost;
    int totalcost;
    MPI_Allreduce(&intcost,&totalcost,1,MPI_INT,MPI_SUM,world);
@@ -906,10 +968,12 @@ void Balance::tally(int dim, int n, double *split)
  int nlocal = atom->nlocal;
  int index;

  double factor = 1.0;
  if (clock_imbalance) factor = clock_imbalance[me];

  for (int i = 0; i < nlocal; i++) {
    index = binary(x[i][dim],n,split);
    onecost[index] += getcost(i);
    onecost[index] += getcost(i)*factor;
  }

  for (int i = 0; i < n; i++) onecount[i] = static_cast<bigint>(onecost[i]);
+4 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@ class Balance : protected Pointers {
  int shift();
  int *bisection(int sortflag = 0);
  double imbalance_nlocal(int &);
  void imbalance_clock(double);
  void dumpout(bigint, FILE *);

 private:
@@ -70,6 +71,9 @@ class Balance : protected Pointers {
  int    *group_id;          // group ids for weights
  double *group_weight;      // weights of groups

  double *clock_imbalance;   // computed wall clock imbalance
  double last_clock;         // accumulated clock at previous balancing step

  int outflag;               // for output of balance results to file
  FILE *fp;
  int firststep;
+7 −0
Original line number Diff line number Diff line
@@ -89,6 +89,12 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
      outflag = 1;
      outarg = iarg+1;
      iarg += 2;
    } else if (strcmp(arg[iarg],"clock") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix balance command");
      clock_factor = force->numeric(FLERR,arg[iarg+1]);
      if (clock_factor < 0.0 || clock_factor > 1.0)
        error->all(FLERR,"Illegal fix balance command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"group") == 0) {
      int ngroup = balance->group_setup(narg-iarg-1,arg+iarg+1);
      iarg += 2 + 2*ngroup;
@@ -221,6 +227,7 @@ void FixBalance::pre_exchange()

  // return if imbalance < threshhold

  balance->imbalance_clock(clock_factor);
  imbnow = balance->imbalance_nlocal(maxperproc);
  if (imbnow <= thresh) {
    if (nevery) next_reneighbor = (update->ntimestep/nevery)*nevery + nevery;
Loading