Commit 8c9db3ea authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Built 2-body short neighbor list and used for 2-body kernels in tersoff gpu styles

parent ea2b01e8
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -250,7 +250,8 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
                               (BX/this->_threads_per_atom)));

  this->k_short_nbor.set_size(GX,BX);
  this->k_short_nbor.run(&this->atom->x, &_cutshortsq,
  this->k_short_nbor.run(&this->atom->x, &cutsq, &map,
                 &elem2param, &_nelements, &_nparams,
                 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                 &this->dev_short_nbor, &ainum,
                 &nbor_pitch, &this->_threads_per_atom);
@@ -283,6 +284,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
  this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq,
                   &map, &elem2param, &_nelements, &_nparams,
                   &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                   &this->dev_short_nbor,
                   &this->ans->force, &this->ans->engv,
                   &eflag, &vflag, &ainum, &nbor_pitch,
                   &this->_threads_per_atom);
+47 −34
Original line number Diff line number Diff line
@@ -165,7 +165,10 @@ texture<int4> ts5_tex;
#endif

__kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
                                   const numtyp cutshortsq,
                                   const __global numtyp *restrict cutsq,
                                   const __global int *restrict map,
                                   const __global int *restrict elem2param,
                                   const int nelements, const int nparams,
                                   const __global int * dev_nbor,
                                   const __global int * dev_packed,
                                   __global int * dev_short_nbor,
@@ -182,6 +185,8 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
              n_stride,nbor_end,nbor);

    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
    int itype=ix.w;
    itype=map[itype];

    int ncount = 0;
    int m = nbor;
@@ -195,6 +200,9 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
      j &= NEIGHMASK;

      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
      int jtype=jx.w;
      jtype=map[jtype];
      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];

      // Compute r12
      numtyp delx = ix.x-jx.x;
@@ -202,7 +210,7 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
      numtyp delz = ix.z-jx.z;
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<cutshortsq) {
      if (rsq<cutsq[ijparam]) {
        dev_short_nbor[nbor_short] = nj;
        nbor_short += n_stride;
        ncount++;
@@ -298,7 +306,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
      delr1.z = jx.z-ix.z;
      numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z;

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      // compute zeta_ij
      z = (acctyp)0;
@@ -391,6 +399,7 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_,
                                  const int nelements, const int nparams,
                                  const __global int * dev_nbor,
                                  const __global int * dev_packed,
                                  const __global int * dev_short_nbor,
                                  __global acctyp4 *restrict ans,
                                  __global acctyp *restrict engv,
                                  const int eflag, const int vflag,
@@ -426,9 +435,14 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_,
    int itype=ix.w;
    itype=map[itype];

    // recalculate numj and nbor_end for use of the short nbor list
    numj = dev_short_nbor[nbor];
    nbor += n_stride;
    nbor_end = nbor+fast_mul(numj,n_stride);

    for ( ; nbor<nbor_end; nbor+=n_stride) {

      int j=dev_packed[nbor];
      int j=dev_short_nbor[nbor];
      j &= NEIGHMASK;

      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
@@ -443,7 +457,7 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_,
      numtyp delz = ix.z-jx.z;
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<cutsq[ijparam]) {
      // rsq<cutsq[ijparam]
      numtyp feng[2];
      numtyp ijparam_lam1 = ts1[ijparam].x;
      numtyp4 ts2_ijparam = ts2[ijparam];
@@ -469,7 +483,6 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_,
        virial[4] += delx*delz*force;
        virial[5] += dely*delz*force;
      }
      }
    } // for nbor

    store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
@@ -556,7 +569,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);

@@ -738,7 +751,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      numtyp mdelr1[3];
      mdelr1[0] = -delr1[0];
@@ -978,7 +991,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      numtyp mdelr1[3];
      mdelr1[0] = -delr1[0];
+3 −1
Original line number Diff line number Diff line
@@ -250,7 +250,8 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
                               (BX/this->_threads_per_atom)));

  this->k_short_nbor.set_size(GX,BX);
  this->k_short_nbor.run(&this->atom->x, &_cutshortsq,
  this->k_short_nbor.run(&this->atom->x, &cutsq, &map,
                 &elem2param, &_nelements, &_nparams,
                 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                 &this->dev_short_nbor, &ainum,
                 &nbor_pitch, &this->_threads_per_atom);
@@ -283,6 +284,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
  this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq,
                   &map, &elem2param, &_nelements, &_nparams,
                   &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                   &this->dev_short_nbor,
                   &this->ans->force, &this->ans->engv,
                   &eflag, &vflag, &ainum, &nbor_pitch,
                   &this->_threads_per_atom);
+46 −33
Original line number Diff line number Diff line
@@ -165,7 +165,10 @@ texture<int4> ts5_tex;
#endif

__kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
                                   const numtyp cutshortsq,
                                   const __global numtyp *restrict cutsq,
                                   const __global int *restrict map,
                                   const __global int *restrict elem2param,
                                   const int nelements, const int nparams,
                                   const __global int * dev_nbor,
                                   const __global int * dev_packed,
                                   __global int * dev_short_nbor,
@@ -182,6 +185,8 @@ __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
              n_stride,nbor_end,nbor);

    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
    int itype=ix.w;
    itype=map[itype];

    int ncount = 0;
    int m = nbor;
@@ -195,6 +200,9 @@ __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
      j &= NEIGHMASK;

      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
      int jtype=jx.w;
      jtype=map[jtype];
      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];

      // Compute r12
      numtyp delx = ix.x-jx.x;
@@ -202,7 +210,7 @@ __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
      numtyp delz = ix.z-jx.z;
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<cutshortsq) {
      if (rsq<cutsq[ijparam]) {
        dev_short_nbor[nbor_short] = nj;
        nbor_short += n_stride;
        ncount++;
@@ -298,7 +306,7 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
      delr1.z = jx.z-ix.z;
      numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z;

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      // compute zeta_ij
      z = (acctyp)0;
@@ -392,6 +400,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
                                  const int nelements, const int nparams,
                                  const __global int * dev_nbor,
                                  const __global int * dev_packed,
                                  const __global int * dev_short_nbor,
                                  __global acctyp4 *restrict ans,
                                  __global acctyp *restrict engv,
                                  const int eflag, const int vflag,
@@ -427,9 +436,14 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
    int itype=ix.w;
    itype=map[itype];

    // recalculate numj and nbor_end for use of the short nbor list
    numj = dev_short_nbor[nbor];
    nbor += n_stride;
    nbor_end = nbor+fast_mul(numj,n_stride);

    for ( ; nbor<nbor_end; nbor+=n_stride) {

      int j=dev_packed[nbor];
      int j=dev_short_nbor[nbor];
      j &= NEIGHMASK;

      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
@@ -444,7 +458,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
      numtyp delz = ix.z-jx.z;
      numtyp rsq = delx*delx+dely*dely+delz*delz;

      if (rsq<cutsq[ijparam]) {
      // rsq<cutsq[ijparam]
      numtyp feng[2];
      numtyp ijparam_lam1 = ts1[ijparam].x;
      numtyp4 ts2_ijparam = ts2[ijparam];
@@ -470,7 +484,6 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
        virial[4] += delx*delz*force;
        virial[5] += dely*delz*force;
      }
      }
    } // for nbor

    store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
@@ -560,7 +573,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);

@@ -748,7 +761,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      numtyp mdelr1[3];
      mdelr1[0] = -delr1[0];
@@ -997,7 +1010,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
      delr1[2] = jx.z-ix.z;
      numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];

      if (rsq1 > cutsq[ijparam]) continue;
//      if (rsq1 > cutsq[ijparam]) continue;

      numtyp mdelr1[3];
      mdelr1[0] = -delr1[0];
+3 −1
Original line number Diff line number Diff line
@@ -275,7 +275,8 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
                               (BX/this->_threads_per_atom)));

  this->k_short_nbor.set_size(GX,BX);
  this->k_short_nbor.run(&this->atom->x, &_cutshortsq,
  this->k_short_nbor.run(&this->atom->x, &cutsq, &map,
                 &elem2param, &_nelements, &_nparams,
                 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                 &this->dev_short_nbor, &ainum,
                 &nbor_pitch, &this->_threads_per_atom);
@@ -309,6 +310,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
                   &_global_e, &_global_a_0, &_global_epsilon_0, &cutsq,
                   &map, &elem2param, &_nelements, &_nparams,
                   &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                   &this->dev_short_nbor,
                   &this->ans->force, &this->ans->engv,
                   &eflag, &vflag, &ainum, &nbor_pitch,
                   &this->_threads_per_atom);
Loading