Merge branch 'comm-nprocs-opt' of https://github.com/lammps/lammps into comm-nprocs-opt (4161202e) · Commits · 郑智淋 / lammps

doc/src/comm_modify.txt

+11 −3

Original line number	Diff line number	Diff line
		@@ -13,7 +13,8 @@ comm_modify command :h3
		comm_modify keyword value ... :pre

		zero or more keyword/value pairs may be appended :ulb,l
		keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} :l
		keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} or
		{ring_neighbors} :l
		{mode} value = {single} or {multi} = communicate atoms within a single or multiple distances
		{cutoff} value = Rcut (distance units) = communicate atoms from this far away
		{cutoff/multi} type value
		@@ -21,6 +22,8 @@ keyword = {mode} or {cutoff} or {cutoff/multi} or {group} or {vel} :l
		value = Rcut (distance units) = communicate atoms for selected types from this far away
		{group} value = group-ID = only communicate atoms in the group
		{vel} value = {yes} or {no} = do or do not communicate velocity info with ghost atoms :pre
		{ring_neighbors} value = {yes} or {no} = do or do not use optimize ring
		communication to only nearest neighbors :pre
		:ule

		[Examples:]
		@@ -143,6 +146,11 @@ with its "remap v" option enabled, then the velocities for ghost atoms
		also include components due to any velocity shift that occurs across
		that boundary (e.g. due to dilation or shear).

		The {ring_neighbors} keyword enables an optimization to perform ring
		communications only to nearest neighbor processors for supported
		communication operations. This can result in substabtial speedups for
		calculations using a very large number of processors.

		[Restrictions:]

		Communication mode {multi} is currently only available for
		@@ -155,5 +163,5 @@ Communication mode {multi} is currently only available for
		[Default:]

		The option defauls are mode = single, group = all, cutoff = 0.0, vel =
		no. The cutoff default of 0.0 means that ghost cutoff = neighbor
		cutoff = pairwise force cutoff + neighbor skin.
		no, ring_neighbors = no. The cutoff default of 0.0 means that ghost
		cutoff = neighbor cutoff = pairwise force cutoff + neighbor skin.

doc/src/replicate.txt

+10 −2

Original line number	Diff line number	Diff line
		@@ -10,9 +10,11 @@ replicate command :h3

		[Syntax:]

		replicate nx ny nz :pre
		replicate nx ny nz {keyword} :pre

		nx,ny,nz = replication factors in each dimension :ul
		nx,ny,nz = replication factors in each dimension :ulb
		optional {keyword} = {bbox} :l
		{bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule

		[Examples:]

		@@ -43,6 +45,12 @@ file that crosses a periodic boundary should be between two atoms with
		image flags that differ by 1. This will allow the bond to be
		unwrapped appropriately.

		The optional keyword {bbox} uses a bounding box to only check atoms
		in replicas that overlap with a processor's subdomain when assigning
		atoms to processors, and thus can result in substantial speedups for
		calculations using a large number of processors. It does require
		temporarily using more memory.

		[Restrictions:]

		A 2d simulation cannot be replicated in the z dimension.

src/comm.cpp

+6 −3

Original line number	Diff line number	Diff line
		@@ -309,9 +309,12 @@ void Comm::modify_params(int narg, char **arg)
		else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
		else error->all(FLERR,"Illegal comm_modify command");
		iarg += 2;
		} else if (strcmp(arg[iarg],"ring_neighbor") == 0) {
		neighborflag = 1;
		iarg++;
		} else if (strcmp(arg[iarg],"ring_neighbors") == 0) {
		if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
		if (strcmp(arg[iarg+1],"yes") == 0) neighborflag = 1;
		else if (strcmp(arg[iarg+1],"no") == 0) neighborflag = 0;
		else error->all(FLERR,"Illegal comm_modify command");
		iarg += 2;
		} else error->all(FLERR,"Illegal comm_modify command");
		}
		}

src/replicate.cpp

+90 −85

Original line number	Diff line number	Diff line
		@@ -58,9 +58,9 @@ void Replicate::command(int narg, char **arg)
		int nz = force->inumeric(FLERR,arg[2]);
		int nrep = nxnynz;

		int use_more_memory = 0;
		int bbox_flag = 0;
		if (narg == 4)
		if(strcmp(arg[3],"memory") == 0) use_more_memory = 1;
		if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1;

		// error and warning checks

		@@ -113,7 +113,7 @@ void Replicate::command(int narg, char **arg)
		_imagehi[1] = 0;
		_imagehi[2] = 0;

		if(use_more_memory) {
		if (bbox_flag) {

		for (i=0; i<atom->nlocal; ++i) {
		imageint image = atom->image[i];
		@@ -315,7 +315,7 @@ void Replicate::command(int narg, char **arg)
		double *coord;
		int tag_enable = atom->tag_enable;

		if(use_more_memory) {
		if (bbox_flag) {

		// allgather size of buf on each proc

		@@ -333,9 +333,9 @@ void Replicate::command(int narg, char **arg)
		MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world);

		if (me == 0 && screen) {
		fprintf(screen,"Replicate::bounding box image: lo= %i %i %i hi= %i %i %i\n",
		fprintf(screen," bounding box image = (%i %i %i) to (%i %i %i)\n",
		_imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]);
		fprintf(screen,"Replicate:: buf_all memory allocating %10.2f MB\n",
		fprintf(screen," bounding box extra memory = %.2f MB\n",
		(double)size_buf_all*sizeof(double)/1024/1024);
		}

		@@ -344,7 +344,7 @@ void Replicate::command(int narg, char **arg)
		int * disp_buf_rnk;
		memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk");
		disp_buf_rnk[0] = 0;
		for (int i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];
		for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];

		// allgather buf_all

		@@ -416,13 +416,19 @@ void Replicate::command(int narg, char **arg)
		double _lhi[3];
		domain->x2lamda(_hi,_lhi);

		if( _llo[0] > (subhi[0] - EPSILON) \|\| _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
		if( _llo[1] > (subhi[1] - EPSILON) \|\| _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
		if( _llo[2] > (subhi[2] - EPSILON) \|\| _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
		if (_llo[0] > (subhi[0] - EPSILON)
		\|\| _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
		if (_llo[1] > (subhi[1] - EPSILON)
		\|\| _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
		if (_llo[2] > (subhi[2] - EPSILON)
		\|\| _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
		} else {
		if( _lo[0] > (subhi[0] - EPSILON) \|\| _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
		if( _lo[1] > (subhi[1] - EPSILON) \|\| _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
		if( _lo[2] > (subhi[2] - EPSILON) \|\| _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
		if (_lo[0] > (subhi[0] - EPSILON)
		\|\| _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
		if (_lo[1] > (subhi[1] - EPSILON)
		\|\| _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
		if (_lo[2] > (subhi[2] - EPSILON)
		\|\| _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
		}

		int overlap = 0;
		@@ -602,7 +608,7 @@ void Replicate::command(int narg, char **arg)
		MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world);
		double avg = (double) sum / nprocs;
		if (me == 0 && screen)
		fprintf(screen,"Replicate: average # of replicas added to proc= %f out of %i (%f %%)\n",
		fprintf(screen," average # of replicas added to proc = %.2f out of %i (%.2f %%)\n",
		avg,nxnynz,avg/(nxnynz)*100.0);

		} else {
		@@ -694,8 +700,7 @@ void Replicate::command(int narg, char **arg)
		}
		}
		}

		} // if(use_more_memory)
		} // if (bbox_flag)

		// free communication buffer and old atom class

Admin message