Commit 4161202e authored by knight's avatar knight
Browse files

Merge branch 'comm-nprocs-opt' of https://github.com/lammps/lammps into comm-nprocs-opt

parents 347819a4 23f6f5ea
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@ comm_modify command :h3
comm_modify keyword value ... :pre

zero or more keyword/value pairs may be appended :ulb,l
keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} :l
keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} or
{ring_neighbors} :l
  {mode} value = {single} or {multi} = communicate atoms within a single or multiple distances
  {cutoff} value = Rcut (distance units) = communicate atoms from this far away
  {cutoff/multi} type value
@@ -21,6 +22,8 @@ keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} :l
     value = Rcut (distance units) = communicate atoms for selected types from this far away
  {group} value = group-ID = only communicate atoms in the group
  {vel} value = {yes} or {no} = do or do not communicate velocity info with ghost atoms :pre
  {ring_neighbors} value = {yes} or {no} = do or do not use optimize ring
communication to only nearest neighbors :pre
:ule

[Examples:]
@@ -143,6 +146,11 @@ with its "remap v" option enabled, then the velocities for ghost atoms
also include components due to any velocity shift that occurs across
that boundary (e.g. due to dilation or shear).

The {ring_neighbors} keyword enables an optimization to perform ring
communications only to nearest neighbor processors for supported
communication operations. This can result in substabtial speedups for
calculations using a very large number of processors.

[Restrictions:]

Communication mode {multi} is currently only available for
@@ -155,5 +163,5 @@ Communication mode {multi} is currently only available for
[Default:]

The option defauls are mode = single, group = all, cutoff = 0.0, vel =
no.  The cutoff default of 0.0 means that ghost cutoff = neighbor
cutoff = pairwise force cutoff + neighbor skin.
no, ring_neighbors = no.  The cutoff default of 0.0 means that ghost
cutoff = neighbor cutoff = pairwise force cutoff + neighbor skin.
+10 −2
Original line number Diff line number Diff line
@@ -10,9 +10,11 @@ replicate command :h3

[Syntax:]

replicate nx ny nz :pre
replicate nx ny nz {keyword} :pre

nx,ny,nz = replication factors in each dimension :ul
nx,ny,nz = replication factors in each dimension :ulb
optional {keyword} = {bbox} :l
  {bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule

[Examples:]

@@ -43,6 +45,12 @@ file that crosses a periodic boundary should be between two atoms with
image flags that differ by 1.  This will allow the bond to be
unwrapped appropriately.

The optional keyword {bbox} uses a bounding box to only check atoms
in replicas that overlap with a processor's subdomain when assigning
atoms to processors, and thus can result in substantial speedups for
calculations using a large number of processors. It does require
temporarily using more memory.

[Restrictions:]

A 2d simulation cannot be replicated in the z dimension.
+6 −3
Original line number Diff line number Diff line
@@ -309,9 +309,12 @@ void Comm::modify_params(int narg, char **arg)
      else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
      else error->all(FLERR,"Illegal comm_modify command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"ring_neighbor") == 0) {
      neighborflag = 1;
      iarg++;
    } else if (strcmp(arg[iarg],"ring_neighbors") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
      if (strcmp(arg[iarg+1],"yes") == 0) neighborflag = 1;
      else if (strcmp(arg[iarg+1],"no") == 0) neighborflag = 0;
      else error->all(FLERR,"Illegal comm_modify command");
      iarg += 2;
    } else error->all(FLERR,"Illegal comm_modify command");
  }
}
+90 −85
Original line number Diff line number Diff line
@@ -58,9 +58,9 @@ void Replicate::command(int narg, char **arg)
  int nz = force->inumeric(FLERR,arg[2]);
  int nrep = nx*ny*nz;

  int use_more_memory = 0;
  int bbox_flag = 0;
  if (narg == 4)
    if(strcmp(arg[3],"memory") == 0) use_more_memory = 1;
    if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1;

  // error and warning checks

@@ -113,7 +113,7 @@ void Replicate::command(int narg, char **arg)
  _imagehi[1] = 0;
  _imagehi[2] = 0;

  if(use_more_memory) {
  if (bbox_flag) {

    for (i=0; i<atom->nlocal; ++i) {
      imageint image = atom->image[i];
@@ -315,7 +315,7 @@ void Replicate::command(int narg, char **arg)
  double *coord;
  int tag_enable = atom->tag_enable;

  if(use_more_memory) {
  if (bbox_flag) {

    // allgather size of buf on each proc

@@ -333,9 +333,9 @@ void Replicate::command(int narg, char **arg)
    MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world);

    if (me == 0 && screen) {
      fprintf(screen,"Replicate::bounding box image: lo= %i %i %i  hi= %i %i %i\n",
      fprintf(screen,"  bounding box image = (%i %i %i) to (%i %i %i)\n",
              _imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]);
      fprintf(screen,"Replicate:: buf_all memory allocating %10.2f MB\n",
      fprintf(screen,"  bounding box extra memory = %.2f MB\n",
              (double)size_buf_all*sizeof(double)/1024/1024);
    }

@@ -344,7 +344,7 @@ void Replicate::command(int narg, char **arg)
    int * disp_buf_rnk;
    memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk");
    disp_buf_rnk[0] = 0;
    for (int i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];
    for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];

    // allgather buf_all

@@ -416,13 +416,19 @@ void Replicate::command(int narg, char **arg)
            double _lhi[3];
            domain->x2lamda(_hi,_lhi);

	    if( _llo[0] > (subhi[0] - EPSILON) || _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
	    if( _llo[1] > (subhi[1] - EPSILON) || _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
	    if( _llo[2] > (subhi[2] - EPSILON) || _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
            if (_llo[0] > (subhi[0] - EPSILON)
                || _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
            if (_llo[1] > (subhi[1] - EPSILON)
                || _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
            if (_llo[2] > (subhi[2] - EPSILON)
                || _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
          } else {
	    if( _lo[0] > (subhi[0] - EPSILON) || _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
	    if( _lo[1] > (subhi[1] - EPSILON) || _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
	    if( _lo[2] > (subhi[2] - EPSILON) || _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
            if (_lo[0] > (subhi[0] - EPSILON)
                || _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
            if (_lo[1] > (subhi[1] - EPSILON)
                || _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
            if (_lo[2] > (subhi[2] - EPSILON)
                || _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
          }

          int overlap = 0;
@@ -602,7 +608,7 @@ void Replicate::command(int narg, char **arg)
    MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world);
    double avg = (double) sum / nprocs;
    if (me == 0 && screen)
      fprintf(screen,"Replicate: average # of replicas added to proc= %f out of %i (%f %%)\n",
      fprintf(screen,"  average # of replicas added to proc = %.2f out of %i (%.2f %%)\n",
              avg,nx*ny*nz,avg/(nx*ny*nz)*100.0);

  } else {
@@ -694,8 +700,7 @@ void Replicate::command(int narg, char **arg)
        }
      }
    }

  } // if(use_more_memory)
  } // if (bbox_flag)

  // free communication buffer and old atom class