Unverified Commit 040cbe37 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #2080 from akohlmey/collected-small-changes

Collected small changes for the next patch release
parents af1aa034 3d018698
Loading
Loading
Loading
Loading
+15 −9
Original line number Diff line number Diff line
@@ -320,12 +320,13 @@ to have an executable that will run on this and newer architectures.

.. note::

   If you run Kokkos on a newer GPU architecture than what LAMMPS was
   compiled with, there will be a delay during device initialization
   since the just-in-time compiler has to recompile all GPU kernels
   for the new hardware.  This is, however, not possible when compiled
   for NVIDIA GPUs with CC 3.x (Kepler) for GPUs with CC 5.0 (Maxwell)
   and newer as they are not compatible.
   If you run Kokkos on a different GPU architecture than what LAMMPS
   was compiled with, there will be a delay during device initialization
   while the just-in-time compiler is recompiling all GPU kernels for
   the new hardware.  This is, however, only supported for GPUs of the
   **same** major hardware version and different minor hardware versions,
   e.g. 5.0 and 5.2 but not 5.2 and 6.0.  LAMMPS will abort with an
   error message indicating a mismatch, if that happens.

The settings discussed below have been tested with LAMMPS and are
confirmed to work.  Kokkos is an active project with ongoing improvements
@@ -580,9 +581,14 @@ recommended when developing a Kokkos-enabled style in LAMMPS.

The CMake option ``-DKokkos_ENABLE_CUDA_UVM=on`` or the makefile
setting ``KOKKOS_CUDA_OPTIONS=enable_lambda,force_uvm`` enables the
use of CUDA "Unified Virtual Memory" in Kokkos.  Please note, that
the LAMMPS KOKKOS package must **always** be compiled with the
*enable_lambda* option when using GPUs.
use of CUDA "Unified Virtual Memory" (UVM) in Kokkos.  UVM allows to
transparently use RAM on the host to supplement the memory used on the
GPU (with some performance penalty) and thus enables running larger
problems that would otherwise not fit into the RAM on the GPU.

Please note, that the LAMMPS KOKKOS package must **always** be compiled
with the *enable_lambda* option when using GPUs.  The CMake configuration
will thus always enable it.

----------

+3 −3
Original line number Diff line number Diff line
@@ -353,7 +353,7 @@ int MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count)

int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *comm_out)
{
  *comm_out = comm;
  *comm_out = comm+1;
  return 0;
}

@@ -361,7 +361,7 @@ int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *comm_out)

int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *comm_out)
{
  *comm_out = comm;
  *comm_out = comm+1;
  return 0;
}

@@ -377,7 +377,7 @@ MPI_Fint MPI_Comm_c2f(MPI_Comm comm) { return comm; };

MPI_Comm MPI_Comm_f2c(MPI_Fint comm) { return comm; };

//* ---------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */

int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
{
+7 −5
Original line number Diff line number Diff line
@@ -603,10 +603,12 @@ void Atom::add_peratom(const char *name, void *address,

void Atom::add_peratom_change_columns(const char *name, int cols)
{
  int i;
  for (int i = 0; i < nperatom; i++)
    if (strcmp(name,peratom[i].name) == 0) peratom[i].cols = cols;
  if (i == nperatom)
  for (int i = 0; i < nperatom; i++) {
    if (strcmp(name,peratom[i].name) == 0) {
            peratom[i].cols = cols;
            return;
    }
  }
  error->all(FLERR,"Could not find name of peratom array for column change");
}

+26 −33
Original line number Diff line number Diff line
@@ -53,7 +53,6 @@ AtomVec::AtomVec(LAMMPS *lmp) : Pointers(lmp)
  argcopy = NULL;

  threads = NULL;
  nthreads = comm->nthreads;

  // peratom variables auto-included in corresponding child style fields string
  // these fields cannot be specified in the fields string
@@ -192,7 +191,7 @@ int AtomVec::grow_nmax_bonus(int nmax_bonus)
void AtomVec::grow(int n)
{
  int datatype,cols,maxcols;
  void *pdata,*plength;
  void *pdata;

  if (n == 0) grow_nmax();
  else nmax = n;
@@ -206,38 +205,39 @@ void AtomVec::grow(int n)
  image = memory->grow(atom->image,nmax,"atom:image");
  x = memory->grow(atom->x,nmax,3,"atom:x");
  v = memory->grow(atom->v,nmax,3,"atom:v");
  f = memory->grow(atom->f,nmax*nthreads,3,"atom:f");
  f = memory->grow(atom->f,nmax*comm->nthreads,3,"atom:f");

  for (int i = 0; i < ngrow; i++) {
    pdata = mgrow.pdata[i];
    datatype = mgrow.datatype[i];
    cols = mgrow.cols[i];
    const int nthreads = threads[i] ? comm->nthreads : 1;
    if (datatype == DOUBLE) {
      if (cols == 0)
        memory->grow(*((double **) pdata),nmax*threads[i],"atom:dvec");
        memory->grow(*((double **) pdata),nmax*nthreads,"atom:dvec");
      else if (cols > 0)
        memory->grow(*((double ***) pdata),nmax*threads[i],cols,"atom:darray");
        memory->grow(*((double ***) pdata),nmax*nthreads,cols,"atom:darray");
      else {
        maxcols = *(mgrow.maxcols[i]);
        memory->grow(*((double ***) pdata),nmax*threads[i],maxcols,"atom:darray");
        memory->grow(*((double ***) pdata),nmax*nthreads,maxcols,"atom:darray");
      }
    } else if (datatype == INT) {
      if (cols == 0)
        memory->grow(*((int **) pdata),nmax*threads[i],"atom:ivec");
        memory->grow(*((int **) pdata),nmax*nthreads,"atom:ivec");
      else if (cols > 0)
        memory->grow(*((int ***) pdata),nmax*threads[i],cols,"atom:iarray");
        memory->grow(*((int ***) pdata),nmax*nthreads,cols,"atom:iarray");
      else {
        maxcols = *(mgrow.maxcols[i]);
        memory->grow(*((int ***) pdata),nmax*threads[i],maxcols,"atom:iarray");
        memory->grow(*((int ***) pdata),nmax*nthreads,maxcols,"atom:iarray");
      }
    } else if (datatype == BIGINT) {
      if (cols == 0)
        memory->grow(*((bigint **) pdata),nmax*threads[i],"atom:bvec");
        memory->grow(*((bigint **) pdata),nmax*nthreads,"atom:bvec");
      else if (cols > 0)
        memory->grow(*((bigint ***) pdata),nmax*threads[i],cols,"atom:barray");
        memory->grow(*((bigint ***) pdata),nmax*nthreads,cols,"atom:barray");
      else {
        maxcols = *(mgrow.maxcols[i]);
        memory->grow(*((int ***) pdata),nmax*threads[i],maxcols,"atom:barray");
        memory->grow(*((int ***) pdata),nmax*nthreads,maxcols,"atom:barray");
      }
    }
  }
@@ -1775,7 +1775,7 @@ void AtomVec::pack_data(double **buf)

  int nlocal = atom->nlocal;

  for (int i = 0; i < nlocal; i++) {
  for (i = 0; i < nlocal; i++) {

    // if needed, change values before packing

@@ -1841,33 +1841,26 @@ void AtomVec::write_data(FILE *fp, int n, double **buf)

    j = 1;
    for (nn = 1; nn < ndata_atom; nn++) {
      pdata = mdata_atom.pdata[nn];
      datatype = mdata_atom.datatype[nn];
      cols = mdata_atom.cols[nn];
      if (datatype == DOUBLE) {
        if (cols == 0) {
          double *vec = *((double **) pdata);
          fprintf(fp," %-1.16e",buf[i][j++]);
        } else {
          double **array = *((double ***) pdata);
          for (m = 0; m < cols; m++)
            fprintf(fp," %-1.16e",buf[i][j++]);
        }
      } else if (datatype == INT) {
        if (cols == 0) {
          int *vec = *((int **) pdata);
          fprintf(fp," %d",(int) ubuf(buf[i][j++]).i);
        } else {
          int **array = *((int ***) pdata);
          for (m = 0; m < cols; m++)
            fprintf(fp," %d",(int) ubuf(buf[i][j++]).i);
        }
      } else if (datatype == BIGINT) {
        if (cols == 0) {
          bigint *vec = *((bigint **) pdata);
          fprintf(fp," " BIGINT_FORMAT,(bigint) ubuf(buf[i][j++]).i);
        } else {
          bigint **array = *((bigint ***) pdata);
          for (m = 0; m < cols; m++)
            fprintf(fp," " BIGINT_FORMAT,(bigint) ubuf(buf[i][j++]).i);
        }
@@ -2297,39 +2290,40 @@ bigint AtomVec::memory_usage()
  bytes += memory->usage(image,nmax);
  bytes += memory->usage(x,nmax,3);
  bytes += memory->usage(v,nmax,3);
  bytes += memory->usage(f,nmax*nthreads,3);
  bytes += memory->usage(f,nmax*comm->nthreads,3);

  for (int i = 0; i < ngrow; i++) {
    pdata = mgrow.pdata[i];
    datatype = mgrow.datatype[i];
    cols = mgrow.cols[i];
    index = mgrow.index[i];
    const int nthreads = threads[i] ? comm->nthreads : 1;
    if (datatype == DOUBLE) {
      if (cols == 0) {
        bytes += memory->usage(*((double **) pdata),nmax*threads[i]);
        bytes += memory->usage(*((double **) pdata),nmax*nthreads);
      } else if (cols > 0) {
        bytes += memory->usage(*((double ***) pdata),nmax*threads[i],cols);
        bytes += memory->usage(*((double ***) pdata),nmax*nthreads,cols);
      } else {
        maxcols = *(mgrow.maxcols[i]);
        bytes += memory->usage(*((double ***) pdata),nmax*threads[i],maxcols);
        bytes += memory->usage(*((double ***) pdata),nmax*nthreads,maxcols);
      }
    } else if (datatype == INT) {
      if (cols == 0) {
        bytes += memory->usage(*((int **) pdata),nmax*threads[i]);
        bytes += memory->usage(*((int **) pdata),nmax*nthreads);
      } else if (cols > 0) {
        bytes += memory->usage(*((int ***) pdata),nmax*threads[i],cols);
        bytes += memory->usage(*((int ***) pdata),nmax*nthreads,cols);
      } else {
        maxcols = *(mgrow.maxcols[i]);
        bytes += memory->usage(*((int ***) pdata),nmax*threads[i],maxcols);
        bytes += memory->usage(*((int ***) pdata),nmax*nthreads,maxcols);
      }
    } else if (datatype == BIGINT) {
      if (cols == 0) {
        bytes += memory->usage(*((bigint **) pdata),nmax*threads[i]);
        bytes += memory->usage(*((bigint **) pdata),nmax*nthreads);
      } else if (cols > 0) {
        bytes += memory->usage(*((bigint ***) pdata),nmax*threads[i],cols);
        bytes += memory->usage(*((bigint ***) pdata),nmax*nthreads,cols);
      } else {
        maxcols = *(mgrow.maxcols[i]);
        bytes += memory->usage(*((bigint ***) pdata),nmax*threads[i],maxcols);
        bytes += memory->usage(*((bigint ***) pdata),nmax*nthreads,maxcols);
      }
    }
  }
@@ -2390,11 +2384,10 @@ void AtomVec::setup_fields()

  // create threads data struct for grow and memory_usage to use

  threads = new int[ngrow];
  threads = new bool[ngrow];
  for (int i = 0; i < ngrow; i++) {
    Atom::PerAtom *field = &atom->peratom[mgrow.index[i]];
    if (field->threadflag) threads[i] = nthreads;
    else threads[i] = 1;
    threads[i] = (field->threadflag) ? true : false;
  }

  // set style-specific sizes
+1 −2
Original line number Diff line number Diff line
@@ -207,8 +207,7 @@ class AtomVec : protected Pointers {
  // thread info for fields that are duplicated over threads
  // used by fields in grow() and memory_usage()

  int nthreads;
  int *threads;
  bool *threads;

  // union data struct for packing 32-bit and 64-bit ints into double bufs
  // this avoids aliasing issues by having 2 pointers (double,int)
Loading