Unverified Commit e123b6f0 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1550 from ndtrung81/lj96-fixes

Fixed a bug in lj96/cut/gpu involving missing the special_lj factor
parents b727f0b1 46a9772a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -174,6 +174,7 @@ __kernel void k_lj96_fast(const __global numtyp4 *restrict x_,
        numtyp r6inv = r2inv*r2inv*r2inv;
        numtyp r3inv = ucl_sqrt(r6inv);
        numtyp force = r2inv*r6inv*(lj1[mtype].x*r3inv-lj1[mtype].y);
        force*=factor_lj;

        f.x+=delx*force;
        f.y+=dely*force;
+7 −33
Original line number Diff line number Diff line
@@ -308,8 +308,6 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
      delr1.z = jx.z-ix.z;
      numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z;

//      if (rsq1 > cutsq[ijparam]) continue;

      // compute zeta_ij
      z = (acctyp)0;

@@ -355,13 +353,9 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
                  rsq1, rsq2, delr1, delr2);
      }

      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acc_zeta(z, tid, t_per_atom, offset_k);

      numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -585,14 +579,9 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
      numtyp r1inv = ucl_rsqrt(rsq1);

      // look up for zeta_ij

      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
      numtyp force = zeta_ij.x*tpainv;
      numtyp prefactor = zeta_ij.y;
@@ -823,13 +812,9 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -891,13 +876,10 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
        f.y += fi[1];
        f.z += fi[2];

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);

        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;
        int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1068,13 +1050,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -1143,13 +1121,9 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
        virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
        virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);
        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;

+6 −31
Original line number Diff line number Diff line
@@ -356,13 +356,9 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
                  ijkparam_c5, rsq1, rsq2, delr1, delr2);
      }

      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acc_zeta(z, tid, t_per_atom, offset_k);

      numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -587,14 +583,9 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
      numtyp r1inv = ucl_rsqrt(rsq1);

      // look up for zeta_ij

      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
      numtyp force = zeta_ij.x*tpainv;
      numtyp prefactor = zeta_ij.y;
@@ -831,13 +822,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -902,13 +889,9 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
        f.y += fi[1];
        f.z += fi[2];

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);
        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;
        int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1085,13 +1068,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -1163,13 +1142,9 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
        virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
        virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);
        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;

+6 −31
Original line number Diff line number Diff line
@@ -359,13 +359,9 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
                  rsq1, rsq2, delr1, delr2);
      }

      //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acc_zeta(z, tid, t_per_atom, offset_k);

      numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
@@ -603,14 +599,9 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
      numtyp r1inv = ucl_rsqrt(rsq1);

      // look up for zeta_ij

      //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride;
      //int idx = jj*n_stride + i*t_per_atom + offset_j;
      // idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor
      int idx = nbor_j;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex);
      numtyp force = zeta_ij.x*tpainv;
      numtyp prefactor = zeta_ij.y;
@@ -841,13 +832,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; // fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -909,13 +896,9 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
        f.y += fi[1];
        f.z += fi[2];

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);
        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;
        int jkiparam=elem2param[jtype*nelements*nelements+ktype*nelements+itype];
@@ -1086,13 +1069,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
        offset_kf = red_acc[2*m+1];
      }

      //int iix = (ijnum - offset_kf - 2*nbor_pitch) / n_stride;
      //int idx = iix*n_stride + j*t_per_atom + offset_kf;
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
      if (dev_packed==dev_nbor) idx -= n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               j, ijnum, offset_kf, idx);
      acctyp4 zeta_ji = zetaij[idx]; //  fetch(zeta_ji,idx,zeta_tex);
      numtyp force = zeta_ji.x*tpainv;
      numtyp prefactor_ji = zeta_ji.y;
@@ -1161,13 +1140,9 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
        virial[4] += TWOTHIRD*(mdelr1[0]*fj[2] + delr2[0]*fk[2]);
        virial[5] += TWOTHIRD*(mdelr1[1]*fj[2] + delr2[1]*fk[2]);

        //int kk = (nbor_k - offset_k - 2*nbor_pitch) / n_stride;
        //int idx = kk*n_stride + j*t_per_atom + offset_k;
        // idx to zetaij is shifted by n_stride relative to nbor_k in dev_short_nbor
        int idx = nbor_k;
        if (dev_packed==dev_nbor) idx -= n_stride;
//        zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//                 j, nbor_k, offset_k, idx);
        acctyp4 zeta_jk = zetaij[idx]; // fetch(zeta_jk,idx,zeta_tex);
        numtyp prefactor_jk = zeta_jk.y;

+16 −23
Original line number Diff line number Diff line
@@ -129,16 +129,13 @@ __kernel void k_zbl(const __global numtyp4 *restrict x_,
      int mtype=itype*lj_types+jtype;
      if (rsq<cut_globalsq) {
        numtyp r, t, force;

        r = ucl_sqrt(rsq);
        force = dzbldr(r, coeff2[mtype].x, coeff2[mtype].y,
                       coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);

        if (rsq>cut_innersq) {
          t = r - cut_inner;
          force = t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
        }

        force *= (numtyp)-1.0*ucl_recip(r);

        f.x+=delx*force;
@@ -152,7 +149,6 @@ __kernel void k_zbl(const __global numtyp4 *restrict x_,
          if (rsq > cut_innersq) {
            e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
          }

          energy+=e;
        }
        if (vflag>0) {
@@ -232,11 +228,9 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_,

      if (rsq<cut_globalsq) {
        numtyp r, t, force;

        r = ucl_sqrt(rsq);
        force = dzbldr(r, coeff2[mtype].x, coeff2[mtype].y,
                       coeff2[mtype].z, coeff2[mtype].w, coeff1[mtype].z);

        if (rsq>cut_innersq) {
          t = r - cut_inner;
          force += t*t * (coeff1[mtype].x + coeff1[mtype].y*t);
@@ -255,7 +249,6 @@ __kernel void k_zbl_fast(const __global numtyp4 *restrict x_,
          if (rsq > cut_innersq) {
            e += t*t*t * (coeff3[mtype].x + coeff3[mtype].y*t);
          }

          energy+=e;
        }
        if (vflag>0) {
+6 −6

File changed.

Contains only whitespace changes.

Loading