Commit 265b6c26 authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Fixed bugs with lj/expand/coul/long and its gpu version

parent 3c781afa
Loading
Loading
Loading
Loading
+19 −12
Original line number Diff line number Diff line
@@ -94,17 +94,21 @@ __kernel void k_lj_expand_coul_long(const __global numtyp4 *restrict x_,

      int mtype=itype*lj_types+jtype;
      if (rsq<lj1[mtype].z) {
        numtyp r2inv=ucl_recip(rsq);
        numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
        numtyp r2inv=ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);

        if (rsq < lj1[mtype].w) {
          r6inv = r2inv*r2inv*r2inv;
          force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
          numtyp rshift = r - lj3[mtype].w;
          numtyp rshiftsq = rshift*rshift;
          numtyp rshift2inv = ucl_recip(rshiftsq);
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
          force_lj *= factor_lj/rshift/r;
        } else
          force_lj = (numtyp)0.0;

        if (rsq < cut_coulsq) {
          numtyp r = ucl_rsqrt(r2inv);
          numtyp grij = g_ewald * r;
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
@@ -115,7 +119,7 @@ __kernel void k_lj_expand_coul_long(const __global numtyp4 *restrict x_,
        } else
          forcecoul = (numtyp)0.0;

        force = (force_lj + forcecoul) * r2inv;
        force = force_lj + forcecoul*r2inv;

        f.x+=delx*force;
        f.y+=dely*force;
@@ -168,7 +172,6 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
    sp_lj[tid]=sp_lj_in[tid];
  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
    lj1[tid]=lj1_in[tid];
    if (eflag>0)
    lj3[tid]=lj3_in[tid];
  }

@@ -212,17 +215,21 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<lj1[mtype].z) {
        numtyp r2inv=ucl_recip(rsq);
        numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
        numtyp r2inv=ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);

        if (rsq < lj1[mtype].w) {
          r6inv = r2inv*r2inv*r2inv;
          force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
          numtyp rshift = r - lj3[mtype].w;
          numtyp rshiftsq = rshift*rshift;
          numtyp rshift2inv = ucl_recip(rshiftsq);
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
          force_lj *= factor_lj/rshift/r;
        } else
          force_lj = (numtyp)0.0;

        if (rsq < cut_coulsq) {
          numtyp r = ucl_rsqrt(r2inv);
          numtyp grij = g_ewald * r;
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
@@ -233,7 +240,7 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
        } else
          forcecoul = (numtyp)0.0;

        force = (force_lj + forcecoul) * r2inv;
        force = force_lj + forcecoul*r2inv;

        f.x+=delx*force;
        f.y+=dely*force;
+9 −3
Original line number Diff line number Diff line
@@ -223,8 +223,9 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
  double fraction,table;
  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double grij,expm2,prefactor,t,erfc;
  double rsq,rshift,rshiftsq,rshift2inv;

  int *jlist;
  double rsq;

  evdwl = ecoul = 0.0;

@@ -290,11 +291,16 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
        } else forcecoul = 0.0;

        if (rsq < cut_ljsq[itype][jtype]) {
          r6inv = r2inv*r2inv*r2inv;
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
          rshiftsq = rshift*rshift;
          rshift2inv = 1.0/rshiftsq;
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
          forcelj = factor_lj*forcelj/rshift/r;
        } else forcelj = 0.0;

        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
        fpair = forcecoul*r2inv + forcelj;

        f[i][0] += delx*fpair;
        f[i][1] += dely*fpair;
+16 −16
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@ void PairLJExpandCoulLong::compute(int eflag, int vflag)
  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double grij,expm2,prefactor,t,erfc;
  int *ilist,*jlist,*numneigh,**firstneigh;
  double rsq,rshift,rshiftsq;
  double rsq,rshift,rshiftsq,rshift2inv;

  evdwl = ecoul = 0.0;
  if (eflag || vflag) ev_setup(eflag,vflag);
@@ -166,8 +166,8 @@ void PairLJExpandCoulLong::compute(int eflag, int vflag)
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
          rshiftsq = rshift*rshift;
          r2inv = 1.0/rshiftsq;
          r6inv = r2inv*r2inv*r2inv;
          rshift2inv = 1.0/rshiftsq;
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
          forcelj = factor_lj*forcelj/rshift/r;
        } else forcelj = 0.0;
@@ -217,7 +217,7 @@ void PairLJExpandCoulLong::compute_inner()
  int i,j,ii,jj,inum,jnum,itype,jtype;
  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double rsw,r,rshift,rshiftsq;
  double rsw,rsq,rshift,rshiftsq,rshift2inv;
  int *ilist,*jlist,*numneigh,**firstneigh;

  double **x = atom->x;
@@ -275,8 +275,8 @@ void PairLJExpandCoulLong::compute_inner()
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
          rshiftsq = rshift*rshift;
          r2inv = 1.0/rshiftsq;
          r6inv = r2inv*r2inv*r2inv;
          rshift2inv = 1.0/rshiftsq;
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
          forcelj = factor_lj*forcelj/rshift/r;
        } else forcelj = 0.0;
@@ -307,7 +307,7 @@ void PairLJExpandCoulLong::compute_middle()
  int i,j,ii,jj,inum,jnum,itype,jtype;
  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double rsw,r,rshift,rshiftsq;
  double rsw,rsq,rshift,rshiftsq,rshift2inv;
  int *ilist,*jlist,*numneigh,**firstneigh;

  double **x = atom->x;
@@ -370,8 +370,8 @@ void PairLJExpandCoulLong::compute_middle()
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
          rshiftsq = rshift*rshift;
          r2inv = 1.0/rshiftsq;
          r6inv = r2inv*r2inv*r2inv;
          rshift2inv = 1.0/rshiftsq;
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
          forcelj = factor_lj*forcelj/rshift/r;
        } else forcelj = 0.0;
@@ -408,9 +408,8 @@ void PairLJExpandCoulLong::compute_outer(int eflag, int vflag)
  double fraction,table;
  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double grij,expm2,prefactor,t,erfc;
  double rsw,rshift,rshiftsq;
  double rsw,rsq,rshift,rshiftsq,rshift2inv;
  int *ilist,*jlist,*numneigh,**firstneigh;
  double rsq;

  evdwl = ecoul = 0.0;
  if (eflag || vflag) ev_setup(eflag,vflag);
@@ -507,8 +506,8 @@ void PairLJExpandCoulLong::compute_outer(int eflag, int vflag)
          r = sqrt(rsq);
          rshift = r - shift[itype][jtype];
          rshiftsq = rshift*rshift;
          r2inv = 1.0/rshiftsq;
          r6inv = r2inv*r2inv*r2inv;
          rshift2inv = 1.0/rshiftsq;
          r6inv = rshift2inv*rshift2inv*rshift2inv;
          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
          forcelj = factor_lj*forcelj/rshift/r;
          if (rsq < cut_in_on_sq) {
@@ -680,7 +679,7 @@ void PairLJExpandCoulLong::init_style()
  if (!atom->q_flag)
    error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");

  // request regular or rRESPA neighbor lists
  // request regular or rRESPA neighbor list

  int irequest;
  int respa = 0;
@@ -729,12 +728,13 @@ double PairLJExpandCoulLong::init_one(int i, int j)
                               sigma[i][i],sigma[j][j]);
    sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
    cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
    shift[i][j] = 0.5 * (shift[i][i] + shift[j][j]);
  }

  // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P

  double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
  double cut = MAX(cut_lj[i][j]+shift[i][j],cut_coul+2.0*qdist);
  cut_ljsq[i][j] = (cut_lj[i][j]+shift[i][j]) *(cut_lj[i][j]+shift[i][j]);

  lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
  lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);