Commit eae73126 authored by Steve Plimpton's avatar Steve Plimpton
Browse files

changes to more PPPM variants for tiled support

parent 5caeb2c2
Loading
Loading
Loading
Loading
+31 −17
Original line number Diff line number Diff line
@@ -203,11 +203,7 @@ void PPPMGPU::compute(int eflag, int vflag)
  // If need per-atom energies/virials, allocate per-atom arrays here
  // so that particle map on host can be done concurrently with GPU calculations

  if (evflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    cg_peratom->ghost_notify();
    cg_peratom->setup();
  }
  if (evflag_atom && !peratom_allocate_flag) allocate_peratom();

  if (triclinic == 0) {
    bool success = true;
@@ -258,10 +254,12 @@ void PPPMGPU::compute(int eflag, int vflag)
  // remap from 3d decomposition to FFT decomposition

  if (triclinic == 0) {
    cg->reverse_comm(this,REVERSE_RHO_GPU);
    gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_GPU,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    brick2fft_gpu();
  } else {
    cg->reverse_comm(this,REVERSE_RHO);
    gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    PPPM::brick2fft();
  }

@@ -274,16 +272,22 @@ void PPPMGPU::compute(int eflag, int vflag)
  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
  else cg->forward_comm(this,FORWARD_IK);
  if (differentiation_flag == 1)
    gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  else
    gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);

  // extra per-atom energy/virial communication

  if (evflag_atom) {
    if (differentiation_flag == 1 && vflag_atom)
      cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
      gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
			      gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    else if (differentiation_flag == 0)
      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
      gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
			      gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  }

  poisson_time += MPI_Wtime()-t3;
@@ -510,8 +514,10 @@ void PPPMGPU::poisson_ik()
   pack own values to buf to send to another proc
------------------------------------------------------------------------- */

void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  int n = 0;

  if (flag == FORWARD_IK) {
@@ -568,8 +574,10 @@ void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
   unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */

void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  int n = 0;

  if (flag == FORWARD_IK) {
@@ -626,8 +634,10 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
   pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */

void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  if (flag == REVERSE_RHO_GPU) {
    FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
@@ -643,8 +653,10 @@ void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
   unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */

void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_reverse_grid(int flag, void *buf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  if (flag == REVERSE_RHO_GPU) {
    FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
@@ -818,7 +830,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
  density_brick = density_A_brick;
  density_fft = density_A_fft;

  cg->reverse_comm(this,REVERSE_RHO);
  gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			  gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  brick2fft();

  // group B
@@ -826,7 +839,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
  density_brick = density_B_brick;
  density_fft = density_B_fft;

  cg->reverse_comm(this,REVERSE_RHO);
  gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			  gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  brick2fft();

  // switch back pointers
+4 −4
Original line number Diff line number Diff line
@@ -46,10 +46,10 @@ class PPPMGPU : public PPPM {
  void brick2fft_gpu();
  virtual void poisson_ik();

  void pack_forward(int, FFT_SCALAR *, int, int *);
  void unpack_forward(int, FFT_SCALAR *, int, int *);
  void pack_reverse(int, FFT_SCALAR *, int, int *);
  void unpack_reverse(int, FFT_SCALAR *, int, int *);
  void pack_forward_grid(int, void *, int, int *);
  void unpack_forward_grid(int, void *, int, int *);
  void pack_reverse_grid(int, void *, int, int *);
  void unpack_reverse_grid(int, void *, int, int *);

  FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
                                 FFT_SCALAR *, int);
+681 −150

File changed.

Preview size limit exceeded, changes collapsed.

+159 −42
Original line number Diff line number Diff line
@@ -16,55 +16,56 @@

#include "pointers.h"

#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif

namespace LAMMPS_NS {

class GridComm : protected Pointers {
 public:
  GridComm(class LAMMPS *, MPI_Comm, int, int,
           int, int, int, int, int, int,
  GridComm(class LAMMPS *, MPI_Comm, int, int, int,
	   int, int, int, int, int, int,
	   int, int, int, int, int, int);
  GridComm(class LAMMPS *, MPI_Comm, int, int,
           int, int, int, int, int, int,
  GridComm(class LAMMPS *, MPI_Comm, int, int, int,
	   int, int, int, int, int, int,
	   int, int, int, int, int, int,
	   int, int, int, int, int, int);
  ~GridComm();
  void ghost_notify();
  int ghost_overlap();
  void setup();
  void forward_comm(class KSpace *, int);
  void reverse_comm(class KSpace *, int);
  double memory_usage();
  void setup(int &, int &);
  int ghost_adjacent();
  void forward_comm_kspace(class KSpace *, int, int, int,
			   void *, void *, MPI_Datatype);
  void reverse_comm_kspace(class KSpace *, int, int, int,
			   void *, void *, MPI_Datatype);

 private:
  int me;
  int nforward,nreverse;
  int me,nprocs;
  int layout;                 // REGULAR or TILED
  MPI_Comm gridcomm;
  MPI_Request request;

  // in = inclusive indices of 3d grid chunk that I own
  // out = inclusive indices of 3d grid chunk I own plus ghosts I use
  // proc = 6 neighbor procs that surround me
  // ghost = # of my owned grid planes needed from me
  //         by each of 6 neighbor procs to become their ghost planes
  // inputs from caller via constructor

  int nx,ny,nz;               // size of global grid in all 3 dims
  int inxlo,inxhi;            // inclusive extent of my grid chunk
  int inylo,inyhi;            //   0 <= in <= N-1
  int inzlo,inzhi;   
  int outxlo,outxhi;          // inclusive extent of my grid chunk plus
  int outylo,outyhi;          //   ghost cells in all 6 directions
  int outzlo,outzhi;          // lo indices can be < 0, hi indices can be >= N
  int outxlo_max,outxhi_max;  // ??
  int outylo_max,outyhi_max;
  int outzlo_max,outzhi_max;

  int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
  int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
  int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
  int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
  int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
  // -------------------------------------------
  // internal variables for REGULAR layout
  // -------------------------------------------

  int nbuf;
  FFT_SCALAR *buf1,*buf2;
  int procxlo,procxhi;     // 6 neighbor procs that adjoin me
  int procylo,procyhi;     //   not used for comm_style = tiled
  int proczlo,proczhi;
  
  int ghostxlo,ghostxhi;   // # of my owned grid planes needed
  int ghostylo,ghostyhi;   //   by neighobr procs in each dir as their ghost planes
  int ghostzlo,ghostzhi;

  // swap = exchange of owned and ghost grid cells between 2 procs, including self
  
  struct Swap {
    int sendproc;       // proc to send to for forward comm
@@ -75,9 +76,125 @@ class GridComm : protected Pointers {
    int *unpacklist;    // 3d array offsets to unpack
  };

  int nswap;
  int nswap,maxswap;
  Swap *swap;

  // -------------------------------------------
  // internal variables for TILED layout
  // -------------------------------------------

  int *overlap_procs;
  MPI_Request *requests;

  // RCB tree of cut info
  // each proc contributes one value, except proc 0
  
  struct RCBinfo {
    int dim;        // 0,1,2 = which dim the cut is in
    int cut;        // grid index of lowest cell in upper half of cut
  };

  RCBinfo *rcbinfo;
    
  // overlap = a proc whose owned cells overlap with my extended ghost box
  // includes overlaps across periodic boundaries, can also be self
  
  struct Overlap {
    int proc;            // proc whose owned cells overlap my ghost cells
    int box[6];          // box that overlaps otherproc's owned cells
                         // this box is wholly contained within global grid
    int pbc[3];          // PBC offsets to convert box to a portion of my ghost box
                         // my ghost box may extend beyond global grid
  };

  int noverlap,maxoverlap;
  Overlap *overlap;
  
  // request = sent to each proc whose owned cells overlap my ghost cells
  
  struct Request {
    int sender;          // sending proc
    int index;           // index of overlap on sender
    int box[6];          // box that overlaps receiver's owned cells
                         // wholly contained within global grid
  };

  Request *srequest,*rrequest;
  
  // response = reply from each proc whose owned cells overlap my ghost cells
  
  struct Response {
    int index;           // index of my overlap for the initial request
    int box[6];          // box that overlaps responder's owned cells
                         // wholly contained within global grid
                         // has to unwrapped by PBC to map to my ghost cells
  };

  Response *sresponse,*rresponse;
  
  // send = proc to send a subset of my owned cells to, for forward comm
  // for reverse comm, proc I receive ghost overlaps with my owned cells from
  // offset used in reverse comm to recv a message in middle of a large buffer

  struct Send {
    int proc;
    int npack;
    int *packlist;
    int offset;
  };

  // recv = proc to recv a subset of my ghost cells from, for forward comm
  // for reverse comm, proc I send a subset of my ghost cells to
  // offset used in forward comm to recv a message in middle of a large buffer
  
  struct Recv {
    int proc;
    int nunpack;
    int *unpacklist;
    int offset;
  };

  int adjacent;      // 0 on a proc who receives ghosts from a non-neighbor proc

  // copy = subset of my owned cells to copy into subset of my ghost cells
  // that describes forward comm, for reverse comm it is the opposite
  
  struct Copy {
    int npack;
    int nunpack;
    int *packlist;
    int *unpacklist;
  };

  int nsend,nrecv,ncopy;
  Send *send;
  Recv *recv;
  Copy *copy;

  // -------------------------------------------
  // internal methods
  // -------------------------------------------
  
  void setup_regular(int &, int &);
  void setup_tiled(int &, int &);
  void ghost_box_drop(int *, int *);
  void box_drop_grid(int *, int, int, int &, int *);
  
  int ghost_adjacent_regular();
  int ghost_adjacent_tiled();
  
  void forward_comm_kspace_regular(class KSpace *, int, int, int,
				   void *, void *, MPI_Datatype);
  void forward_comm_kspace_tiled(class KSpace *, int, int, int,
				 void *, void *, MPI_Datatype);
  void reverse_comm_kspace_regular(class KSpace *, int, int, int,
				   void *, void *, MPI_Datatype);
  void reverse_comm_kspace_tiled(class KSpace *, int, int, int,
				 void *, void *, MPI_Datatype);

  void grow_swap();
  void grow_overlap();
  
  int indices(int *&, int, int, int, int, int, int);
};

+175 −167

File changed.

Preview size limit exceeded, changes collapsed.

Loading