Commit 77c60189 authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Minor cleanups for tersoff/gpu

parent 1c6533e5
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -163,10 +163,11 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
    cutsq_view[i]=static_cast<numtyp>(host_cutsq[i]);
    if (cutsqmax < host_cutsq[i]) cutsqmax = host_cutsq[i];
  }
  _cutshortsq = static_cast<numtyp>(cutsqmax);
  cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
  ucl_copy(cutsq,cutsq_view,false);

  _cutshortsq = static_cast<numtyp>(cutsqmax);

  UCL_H_Vec<int> dview_elem2param(nelements*nelements*nelements,
                           *(this->ucl_device), UCL_WRITE_ONLY);

+3 −3
Original line number Diff line number Diff line
@@ -106,7 +106,7 @@ texture<int4> ts5_tex;
    ans[ii]=old;                                                            \
  }

#define store_zeta(z, tid, t_per_atom, offset)                              \
#define acc_zeta(z, tid, t_per_atom, offset)                                \
  if (t_per_atom>1) {                                                       \
    __local acctyp red_acc[BLOCK_PAIR];                                     \
    red_acc[tid]=z;                                                         \
@@ -155,7 +155,7 @@ texture<int4> ts5_tex;
    ans[ii]=old;                                                            \
  }

#define store_zeta(z, tid, t_per_atom, offset)                              \
#define acc_zeta(z, tid, t_per_atom, offset)                                \
  if (t_per_atom>1) {                                                       \
    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
      z += shfl_xor(z, s, t_per_atom);                                      \
@@ -348,7 +348,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
      int idx = nbor_j - n_stride;
//      zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom,
//               i, nbor_j, offset_j, idx);
      store_zeta(z, tid, t_per_atom, offset_k);
      acc_zeta(z, tid, t_per_atom, offset_k);

      numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex);
      numtyp ijparam_lam2 = ts1_ijparam.y;