Commit eb78f884 authored by Steve Plimpton's avatar Steve Plimpton
Browse files

another change to pppm/stagger

parent 6225a7d4
Loading
Loading
Loading
Loading
+122 −117
Original line number Diff line number Diff line
@@ -271,10 +271,16 @@ void PPPMStagger::compute(int eflag, int vflag)

double PPPMStagger::compute_qopt()
{
  if (differentiation_flag == 1)
    return compute_qopt_ad();
  if (differentiation_flag == 1) return compute_qopt_ad();

  int k,l,m,nx,ny,nz,kper,lper,mper;
  double snx,sny,snz;
  double cnx,cny,cnz;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2,dot1,dot2;
  double numerator,denominator;
  double u1,u2,u3,sqk;

  double qopt = 0.0;
  const double * const prd = domain->prd;

  const double xprd = prd[0];
@@ -285,43 +291,43 @@ double PPPMStagger::compute_qopt()
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  double snx,sny,snz;
  double cnx,cny,cnz;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2,dot1,dot2;
  double numerator,denominator;
  double u1,u2,u3,sqk;

  int k,l,m,nx,ny,nz,kper,lper,mper;

  const int nbx = 2;
  const int nby = 2;
  const int nbz = 2;

  const int twoorder = 2*order;

  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    mper = m - nz_pppm*(2*m/nz_pppm);
    snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
    cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);
  // loop over entire FFT grid
  // each proc calculates contributions from every Pth grid point
  
    for (l = nylo_fft; l <= nyhi_fft; l++) {
      lper = l - ny_pppm*(2*l/ny_pppm);
      sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
      cny = cos(0.5*unitky*lper*yprd/ny_pppm);
  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
  int nxy_pppm = nx_pppm * ny_pppm;
  
      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        kper = k - nx_pppm*(2*k/nx_pppm);
        snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
        cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
  double qopt = 0.0;

  for (bigint i = me; i < ngridtotal; i += nprocs) {
    k = i % nx_pppm;
    l = (i/nx_pppm) % ny_pppm;
    m = i / nxy_pppm;

    const int kper = k - nx_pppm*(2*k/nx_pppm);
    const int lper = l - ny_pppm*(2*l/ny_pppm);
    const int mper = m - nz_pppm*(2*m/nz_pppm);

    sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
    if (sqk == 0.0) continue;

    snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
    cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
    sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
    cny = cos(0.5*unitky*lper*yprd/ny_pppm);
    snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
    cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);

        if (sqk != 0.0) {
    numerator = MY_4PI/sqk;
    denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
          sum1 = 0.0;
          sum2 = 0.0;

    sum1 = sum2 = 0.0;

    for (nx = -nbx; nx <= nbx; nx++) {
      qx = unitkx*(kper+nx_pppm*nx);
@@ -351,11 +357,10 @@ double PPPMStagger::compute_qopt()
	}
      }
    }
      
    qopt += sum1 - sum2/denominator;
  }
      }
    }
  }
  
  double qopt_all;
  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
  return qopt_all;
@@ -367,7 +372,11 @@ double PPPMStagger::compute_qopt()

double PPPMStagger::compute_qopt_ad()
{
  double qopt = 0.0;
  int k,l,m,nx,ny,nz,kper,lper,mper;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
  double u1,u2,sqk;

  const double * const prd = domain->prd;

  const double xprd = prd[0];
@@ -378,36 +387,33 @@ double PPPMStagger::compute_qopt_ad()
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
  double u1,u2,sqk;

  int k,l,m,nx,ny,nz,kper,lper,mper;

  const int nbx = 2;
  const int nby = 2;
  const int nbz = 2;

  const int twoorder = 2*order;

  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    mper = m - nz_pppm*(2*m/nz_pppm);
  // loop over entire FFT grid
  // each proc calculates contributions from every Pth grid point
  
    for (l = nylo_fft; l <= nyhi_fft; l++) {
      lper = l - ny_pppm*(2*l/ny_pppm);
  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
  int nxy_pppm = nx_pppm * ny_pppm;
  
      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        kper = k - nx_pppm*(2*k/nx_pppm);
  double qopt = 0.0;

  for (bigint i = me; i < ngridtotal; i += nprocs) {
    k = i % nx_pppm;
    l = (i/nx_pppm) % ny_pppm;
    m = i / nxy_pppm;

    const int kper = k - nx_pppm*(2*k/nx_pppm);
    const int lper = l - ny_pppm*(2*l/ny_pppm);
    const int mper = m - nz_pppm*(2*m/nz_pppm);

    sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
    if (sqk == 0.0) continue;

        if (sqk != 0.0) {
          sum1 = 0.0;
          sum2 = 0.0;
          sum3 = 0.0;
          sum4 = 0.0;
          sum5 = 0.0;
          sum6 = 0.0;
    sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
    
    for (nx = -nbx; nx <= nbx; nx++) {
      qx = unitkx*(kper+nx_pppm*nx);
@@ -439,11 +445,10 @@ double PPPMStagger::compute_qopt_ad()
	}
      }
    }
    
    qopt += sum1 - sum2/(0.5*(sum3*sum4 + sum5*sum6));
  }
      }
    }
  }

  double qopt_all;
  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
  return qopt_all;