Unverified Commit 4c46119a authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #2280 from lammps/gridcomm-tiled

Support for tiled decompositions in PPPM
parents e7639d49 42018d3b
Loading
Loading
Loading
Loading
+31 −17
Original line number Diff line number Diff line
@@ -203,11 +203,7 @@ void PPPMGPU::compute(int eflag, int vflag)
  // If need per-atom energies/virials, allocate per-atom arrays here
  // so that particle map on host can be done concurrently with GPU calculations

  if (evflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    cg_peratom->ghost_notify();
    cg_peratom->setup();
  }
  if (evflag_atom && !peratom_allocate_flag) allocate_peratom();

  if (triclinic == 0) {
    bool success = true;
@@ -258,10 +254,12 @@ void PPPMGPU::compute(int eflag, int vflag)
  // remap from 3d decomposition to FFT decomposition

  if (triclinic == 0) {
    cg->reverse_comm(this,REVERSE_RHO_GPU);
    gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_GPU,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    brick2fft_gpu();
  } else {
    cg->reverse_comm(this,REVERSE_RHO);
    gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    PPPM::brick2fft();
  }

@@ -274,16 +272,22 @@ void PPPMGPU::compute(int eflag, int vflag)
  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
  else cg->forward_comm(this,FORWARD_IK);
  if (differentiation_flag == 1)
    gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  else
    gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
			    gc_buf1,gc_buf2,MPI_FFT_SCALAR);

  // extra per-atom energy/virial communication

  if (evflag_atom) {
    if (differentiation_flag == 1 && vflag_atom)
      cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
      gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
			      gc_buf1,gc_buf2,MPI_FFT_SCALAR);
    else if (differentiation_flag == 0)
      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
      gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
			      gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  }

  poisson_time += MPI_Wtime()-t3;
@@ -510,8 +514,10 @@ void PPPMGPU::poisson_ik()
   pack own values to buf to send to another proc
------------------------------------------------------------------------- */

void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  int n = 0;

  if (flag == FORWARD_IK) {
@@ -568,8 +574,10 @@ void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
   unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */

void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  int n = 0;

  if (flag == FORWARD_IK) {
@@ -626,8 +634,10 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
   pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */

void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  if (flag == REVERSE_RHO_GPU) {
    FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
@@ -643,8 +653,10 @@ void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
   unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */

void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
  FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;

  if (flag == REVERSE_RHO_GPU) {
    FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
@@ -818,7 +830,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
  density_brick = density_A_brick;
  density_fft = density_A_fft;

  cg->reverse_comm(this,REVERSE_RHO);
  gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			  gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  brick2fft();

  // group B
@@ -826,7 +839,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
  density_brick = density_B_brick;
  density_fft = density_B_fft;

  cg->reverse_comm(this,REVERSE_RHO);
  gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
			  gc_buf1,gc_buf2,MPI_FFT_SCALAR);
  brick2fft();

  // switch back pointers
+4 −4
Original line number Diff line number Diff line
@@ -46,10 +46,10 @@ class PPPMGPU : public PPPM {
  void brick2fft_gpu();
  virtual void poisson_ik();

  void pack_forward(int, FFT_SCALAR *, int, int *);
  void unpack_forward(int, FFT_SCALAR *, int, int *);
  void pack_reverse(int, FFT_SCALAR *, int, int *);
  void unpack_reverse(int, FFT_SCALAR *, int, int *);
  void pack_forward_grid(int, void *, int, int *);
  void unpack_forward_grid(int, void *, int, int *);
  void pack_reverse_grid(int, void *, int, int *);
  void unpack_reverse_grid(int, void *, int, int *);

  FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
                                 FFT_SCALAR *, int);
+596 −263

File changed.

Preview size limit exceeded, changes collapsed.

+36 −58
Original line number Diff line number Diff line
@@ -14,81 +14,59 @@
#ifndef LMP_GRIDCOMM_KOKKOS_H
#define LMP_GRIDCOMM_KOKKOS_H

#include "pointers.h"
#include "gridcomm.h"
#include "kokkos_type.h"
#include "fftdata_kokkos.h"

#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif

namespace LAMMPS_NS {

template<class DeviceType>
class GridCommKokkos : protected Pointers {
class GridCommKokkos : public GridComm {
 public:
  typedef DeviceType device_type;
  typedef ArrayTypes<DeviceType> AT;
  typedef FFTArrayTypes<DeviceType> FFT_AT;

  GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
           int, int, int, int, int, int,
  GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int,
           int, int, int, int, int, int,
           int, int, int, int, int, int);
  GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
           int, int, int, int, int, int,
  GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int, int,
           int, int, int, int, int, int,
           int, int, int, int, int, int,
           int, int, int, int, int, int);
  ~GridCommKokkos();
  void ghost_notify();
  int ghost_overlap();
  void setup();
  void forward_comm(class KSpace *, int);
  void reverse_comm(class KSpace *, int);
  double memory_usage();
  virtual ~GridCommKokkos();
  void forward_comm_kspace(class KSpace *, int, int,
                           FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
  void reverse_comm_kspace(class KSpace *, int, int,
                           FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);

 private:
  int me;
  int nforward,nreverse;
  MPI_Comm gridcomm;
  MPI_Request request;

  // in = inclusive indices of 3d grid chunk that I own
  // out = inclusive indices of 3d grid chunk I own plus ghosts I use
  // proc = 6 neighbor procs that surround me
  // ghost = # of my owned grid planes needed from me
  //         by each of 6 neighbor procs to become their ghost planes

  int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
  int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
  int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
  int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
  int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;

  int nbuf;
  //FFT_SCALAR *buf1,*buf2;
  FFT_DAT::tdual_FFT_SCALAR_1d k_buf1;
  FFT_DAT::tdual_FFT_SCALAR_1d k_buf2;

  struct Swap {
    int sendproc;       // proc to send to for forward comm
    int recvproc;       // proc to recv from for forward comm
    int npack;          // # of datums to pack
    int nunpack;        // # of datums to unpack
    //int *packlist;      // 3d array offsets to pack
    //int *unpacklist;    // 3d array offsets to unpack
  };
  DAT::tdual_int_2d k_swap_packlist;
  DAT::tdual_int_2d k_swap_unpacklist;

  DAT::tdual_int_2d k_send_packlist;

  DAT::tdual_int_2d k_recv_unpacklist;

  DAT::tdual_int_2d k_copy_packlist;
  DAT::tdual_int_2d k_copy_unpacklist;

  // -------------------------------------------
  // internal methods
  // -------------------------------------------

  void setup_regular(int &, int &);
  void setup_tiled(int &, int &);

  DAT::tdual_int_2d k_packlist;
  DAT::tdual_int_2d k_unpacklist;
  void forward_comm_kspace_regular(class KSpace *, int, int,
                                   FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
  void forward_comm_kspace_tiled(class KSpace *, int, int,
                                 FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
  void reverse_comm_kspace_regular(class KSpace *, int, int,
                                   FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
  void reverse_comm_kspace_tiled(class KSpace *, int, int,
                                 FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);

  int nswap;
  Swap *swap;
  void grow_swap();

  int indices(DAT::tdual_int_2d &, int, int, int, int, int, int, int);
};
+4 −4
Original line number Diff line number Diff line
@@ -23,10 +23,10 @@ class KokkosBaseFFT {
  KokkosBaseFFT() {}

  //Kspace
  virtual void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
  virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
  virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
};

}
Loading