Unverified Commit deb24626 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

fix gc vs. gc6 issues in fix pppm/disp

parent 0440b13a
Loading
Loading
Loading
Loading
+61 −57
Original line number Diff line number Diff line
@@ -376,7 +376,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
    }

    gc6->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_G,
                             gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                             gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

    brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
              density_brick_g, density_fft_g, work1_6,remap_6);
@@ -391,7 +391,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                 v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);

      gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_G,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_g_ad<float,double>(fix->get_mixed_buffers());
@@ -403,7 +403,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (vflag_atom)
        gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_G,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

    } else {
      poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
@@ -416,7 +416,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                 v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);

      gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_G,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_g_ik<float,double>(fix->get_mixed_buffers());
@@ -428,7 +428,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (evflag_atom)
        gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_G,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
    }

    if (evflag_atom) fieldforce_g_peratom();
@@ -487,7 +487,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                    v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);

      gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_A,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_a_ad<float,double>(fix->get_mixed_buffers());
@@ -499,7 +499,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (evflag_atom)
        gc6->forward_comm_kspace(this,42,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_A,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

    }  else {
      poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
@@ -530,7 +530,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                    v3_brick_a4, v4_brick_a4, v5_brick_a4);

      gc6->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_IK_A,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_a_ik<float,double>(fix->get_mixed_buffers());
@@ -542,7 +542,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (evflag_atom)
        gc6->forward_comm_kspace(this,49,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_A,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
    }

    if (evflag_atom) fieldforce_a_peratom();
@@ -592,7 +592,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
      }

      gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_NONE,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_none_ad<float,double>(fix->get_mixed_buffers());
@@ -604,7 +604,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (vflag_atom)
        gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_NONE,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

    } else {
      int n = 0;
@@ -621,7 +621,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
      }

      gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_NONE,
                               gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                               gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);

      if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
        fieldforce_none_ik<float,double>(fix->get_mixed_buffers());
@@ -633,7 +633,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)

      if (evflag_atom)
        gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_NONE,
                                 gc_buf1,gc_buf2,MPI_FFT_SCALAR);
                                 gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
    }

    if (evflag_atom) fieldforce_none_peratom();
@@ -2983,10 +2983,10 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)

void PPPMDispIntel::precompute_rho()
{

  half_rho_scale = (rho_points - 1.)/2.;
  half_rho_scale_plus = half_rho_scale + 0.5;

  if (function[0]) {
    for (int i = 0; i < rho_points; i++) {
      FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
      #if defined(LMP_SIMD_COMPILER)
@@ -3018,6 +3018,9 @@ void PPPMDispIntel::precompute_rho()
        }
      }
    }
  }

  if (function[1]+function[2]+function[3]) {
    for (int i = 0; i < rho_points; i++) {
      FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
      #if defined(LMP_SIMD_COMPILER)
@@ -3050,6 +3053,7 @@ void PPPMDispIntel::precompute_rho()
      }
    }
  }
}

/* ----------------------------------------------------------------------
   Returns 0 if Intel optimizations for PPPM ignored due to offload