Commit 70a7b376 authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Updated pair coul/long/cs and born/coul/long/cs; updated gpu neighbor builds...

Updated pair coul/long/cs and born/coul/long/cs; updated gpu neighbor builds to support core-shell styles where r2 can be tiny.
parent 6842a527
Loading
Loading
Loading
Loading
+42 −41
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@
//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
//    begin                :
//    begin                : June 2018
//    email                : trung.nguyen@northwestern.edu
// ***************************************************************************/

@@ -29,15 +29,19 @@ texture<int2> q_tex;
#define q_tex q_
#endif

#define B0 (numtyp)-0.1335096380159268
#define B1 (numtyp)-2.57839507e-1
#define B2 (numtyp)-1.37203639e-1
#define B3 (numtyp)-8.88822059e-3
#define B4 (numtyp)-5.80844129e-3
#define B5 (numtyp)1.14652755e-1
#define EPSILON (numtyp)1.0e-20
#define EPS_EWALD (numtyp)1.0e-6
#define EPS_EWALD_SQR (numtyp)1.0e-12
// Note: EWALD_P is different from that in lal_preprocessor.h
//       acctyp is needed for these parameters
#define CS_EWALD_P (acctyp)9.95473818e-1
#define B0        (acctyp)-0.1335096380159268
#define B1        (acctyp)-2.57839507e-1
#define B2        (acctyp)-1.37203639e-1
#define B3        (acctyp)-8.88822059e-3
#define B4        (acctyp)-5.80844129e-3
#define B5        (acctyp)1.14652755e-1

#define EPSILON (acctyp)(1.0e-20)
#define EPS_EWALD (acctyp)(1.0e-6)
#define EPS_EWALD_SQR (acctyp)(1.0e-12)

__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
                          const __global numtyp4 *restrict coeff1,
@@ -105,39 +109,37 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,

      int mtype=itype*lj_types+jtype;
      if (rsq<cutsq_sigma[mtype].x) { // cutsq 
        numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
        numtyp rexp = (numtyp)0.0;

        if (rsq < cut_coulsq) {
          rsq += EPSILON;
        numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;

        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
        numtyp r2inv = ucl_recip(rsq);
          numtyp r = ucl_rsqrt(r2inv);

        if (rsq < cut_coulsq) {
          numtyp r = ucl_sqrt(rsq);
          fetch(prefactor,j,q_tex);
          prefactor *= qqrd2e * qtmp;
          if (factor_coul<(numtyp)1.0) {
            numtyp grij = g_ewald * (r+EPS_EWALD);
            numtyp expm2 = ucl_exp(-grij*grij);
            numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
            numtyp u = (numtyp)1.0 - t;
            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
            prefactor /= (r+EPS_EWALD);
            forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
            // Additionally r2inv needs to be accordingly modified since the later
            // scaling of the overall force shall be consistent
            r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
            forcecoul *= r2inv;
          } else {
            numtyp grij = g_ewald * r;
            numtyp expm2 = ucl_exp(-grij*grij);
            numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
            numtyp u = (numtyp)1.0 - t;
            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
            prefactor /= r;
            forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
            forcecoul *= r2inv;
          }
        } else forcecoul = (numtyp)0.0;

        numtyp r2inv = ucl_recip(rsq);
        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
          numtyp r = ucl_sqrt(rsq);
          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
@@ -146,7 +148,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
        } else forceborn = (numtyp)0.0;

        force = forceborn*r2inv + forcecoul;
        force = (forcecoul + forceborn) * r2inv;

        f.x+=delx*force;
        f.y+=dely*force;
@@ -154,9 +156,9 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,

        if (eflag>0) {
          if (rsq < cut_coulsq) {
            e_coul += prefactor*_erfc;
            if (factor_coul<(numtyp)1.0)
              e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
            numtyp e = prefactor*_erfc;
            if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
            e_coul += e;
          }
          if (rsq < cutsq_sigma[mtype].y) {
            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
@@ -248,39 +250,38 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<cutsq_sigma[mtype].x) { // cutsq 
        numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
        numtyp rexp = (numtyp)0.0;

        if (rsq < cut_coulsq) {
          rsq += EPSILON;
        numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;

        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
        numtyp r2inv = ucl_recip(rsq);
          numtyp r = ucl_rsqrt(r2inv);

        if (rsq < cut_coulsq) {
          numtyp r = ucl_sqrt(rsq);
          fetch(prefactor,j,q_tex);
          prefactor *= qqrd2e * qtmp;
          if (factor_coul<(numtyp)1.0) {
            numtyp grij = g_ewald * (r+EPS_EWALD);
            numtyp expm2 = ucl_exp(-grij*grij);
            numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
            numtyp u = (numtyp)1.0 - t;
            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
            prefactor /= (r+EPS_EWALD);
            forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
            // Additionally r2inv needs to be accordingly modified since the later
            // scaling of the overall force shall be consistent
            r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
            forcecoul *= r2inv;
          } else {

            numtyp grij = g_ewald * r;
            numtyp expm2 = ucl_exp(-grij*grij);
            numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
            numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
            numtyp u = (numtyp)1.0 - t;
            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
            prefactor /= r;
            forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
            forcecoul *= r2inv;
          }
        } else forcecoul = (numtyp)0.0;

        numtyp r2inv = ucl_recip(rsq);
        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
          numtyp r = ucl_sqrt(rsq);
          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
@@ -289,7 +290,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
        } else forceborn = (numtyp)0.0;

        force = forceborn*r2inv + forcecoul;
        force = (forcecoul + forceborn) * r2inv;

        f.x+=delx*force;
        f.y+=dely*force;
@@ -297,9 +298,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,

        if (eflag>0) {
          if (rsq < cut_coulsq) {
            e_coul += prefactor*_erfc;
            if (factor_coul<(numtyp)1.0)
              e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
            numtyp e = prefactor*_erfc;
            if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
            e_coul += e;
          }
          if (rsq < cutsq_sigma[mtype].y) {
            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
+32 −24
Original line number Diff line number Diff line
@@ -29,6 +29,20 @@ texture<int2> q_tex;
#define q_tex q_
#endif

// Note: EWALD_P is different from that in lal_preprocessor.h
//       acctyp is needed for these parameters
#define CS_EWALD_P (acctyp)9.95473818e-1
#define B0        (acctyp)-0.1335096380159268
#define B1        (acctyp)-2.57839507e-1
#define B2        (acctyp)-1.37203639e-1
#define B3        (acctyp)-8.88822059e-3
#define B4        (acctyp)-5.80844129e-3
#define B5        (acctyp)1.14652755e-1

#define EPSILON (acctyp)(1.0e-20)
#define EPS_EWALD (acctyp)(1.0e-6)
#define EPS_EWALD_SQR (acctyp)(1.0e-12)

#if (ARCH < 300)

#define store_answers_lq(f, e_coul, virial, ii, inum, tid,                  \
@@ -123,16 +137,6 @@ texture<int2> q_tex;

#endif

#define B0 (numtyp)-0.1335096380159268
#define B1 (numtyp)-2.57839507e-1
#define B2 (numtyp)-1.37203639e-1
#define B3 (numtyp)-8.88822059e-3
#define B4 (numtyp)-5.80844129e-3
#define B5 (numtyp)1.14652755e-1
#define EPSILON (numtyp)1.0e-20
#define EPS_EWALD (numtyp)1.0e-6
#define EPS_EWALD_SQR (numtyp)1.0e-12

__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
                          const __global numtyp *restrict scale,
                          const int lj_types,
@@ -191,7 +195,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,

      int mtype=itype*lj_types+jtype;
      if (rsq < cut_coulsq) {
        rsq += EPSILON;
        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;

        numtyp force,prefactor,_erfc;
        numtyp r2inv = ucl_recip(rsq);
@@ -201,17 +205,19 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
        if (factor_coul<(numtyp)1.0) {
          numtyp grij = g_ewald * (r+EPS_EWALD);
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
          numtyp u = (numtyp)1.0 - t;
          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
          prefactor /= (r+EPS_EWALD);
          force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
          // Additionally r2inv needs to be accordingly modified since the later
          // scaling of the overall force shall be consistent
          r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
          force *= r2inv;
        } else {
          numtyp grij = g_ewald * r;
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
          numtyp u = (numtyp)1.0 - t;
          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
          prefactor /= r;
@@ -224,9 +230,9 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
        f.z+=delz*force;

        if (eflag>0) {
          e_coul += prefactor*_erfc;
          if (factor_coul<(numtyp)1.0)
            e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
          numtyp e = prefactor*_erfc;
          if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
          e_coul += e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
@@ -304,7 +310,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq < cut_coulsq) {
        rsq += EPSILON;
        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;

        numtyp force,prefactor,_erfc;
        numtyp r2inv = ucl_recip(rsq);
@@ -314,32 +320,34 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
        if (factor_coul<(numtyp)1.0) {
          numtyp grij = g_ewald * (r+EPS_EWALD);
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
          numtyp u = (numtyp)1.0 - t;
          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
          prefactor /= (r+EPS_EWALD);
          force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
          // Additionally r2inv needs to be accordingly modified since the later
          // scaling of the overall force shall be consistent
          r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
          force *= r2inv;
        } else {
          numtyp grij = g_ewald * r;
          numtyp expm2 = ucl_exp(-grij*grij);
          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
          numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
          numtyp u = (numtyp)1.0 - t;
          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
          prefactor /= r;
          force = prefactor * (_erfc + EWALD_F*grij*expm2);
          force *= r2inv;
        }

        force *= r2inv;

        f.x+=delx*force;
        f.y+=dely*force;
        f.z+=delz*force;

        if (eflag>0) {
          e_coul += prefactor*_erfc;
          if (factor_coul<(numtyp)1.0)
            e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
          numtyp e = prefactor*_erfc;
          if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
          e_coul += e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
+18 −18
Original line number Diff line number Diff line
@@ -232,7 +232,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
                diff.z = atom_i.z - pos_sh[j].z;

                r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
                if (r2 < cell_size*cell_size && r2 > 1e-5) {
                if (r2 < cell_size*cell_size && pid_j != pid_i) { //  && r2 > 1e-5
                  cnt++;
                  if (cnt <= neigh_bin_size) {
                    *neigh_list = pid_j;