Unverified Commit f7329c0f authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1540 from stanmoore1/kk_snap

Port Recent SNAP changes to Kokkos
parents 33d3bd7a 559c1879
Loading
Loading
Loading
Loading
+20 −5
Original line number Diff line number Diff line
@@ -31,7 +31,10 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
namespace LAMMPS_NS {

template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAP{};
struct TagPairSNAPCompute{};

struct TagPairSNAPBeta{};
struct TagPairSNAPBispectrum{};

template<class DeviceType>
class PairSNAPKokkos : public PairSNAP {
@@ -53,11 +56,17 @@ public:

  template<int NEIGHFLAG, int EVFLAG>
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const;
  void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const;

  template<int NEIGHFLAG, int EVFLAG>
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
  void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const;

  template<int NEIGHFLAG>
  KOKKOS_INLINE_FUNCTION
@@ -82,10 +91,14 @@ protected:
  SNAKokkos<DeviceType> snaKK;

  // How much parallelism to use within an interaction
  int vector_length;
  int vector_length,team_size;
  int team_scratch_size;
  int thread_scratch_size;

  int eflag,vflag;

  void compute_beta();
  void compute_bispectrum();
  void allocate();
  //void read_files(char *, char *);
  /*template<class DeviceType>
@@ -118,6 +131,8 @@ inline double dist2(double* x,double* y);
  Kokkos::View<F_FLOAT*, DeviceType> d_wjelem;               // elements weights
  Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem;           // element bispectrum coefficients
  Kokkos::View<T_INT*, DeviceType> d_map;                    // mapping from atom types to elements
  Kokkos::View<F_FLOAT**, DeviceType> d_beta;                // betas for all atoms in list
  Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum;          // bispectrum components for all atoms in list

  typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
  tdual_fparams k_cutsq;
+230 −150
Original line number Diff line number Diff line
@@ -186,31 +186,45 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  snaKK.nmax = max_neighs;

  T_INT team_scratch_size = snaKK.size_team_scratch_arrays();
  T_INT thread_scratch_size = snaKK.size_thread_scratch_arrays();
  team_scratch_size = snaKK.size_team_scratch_arrays();
  thread_scratch_size = snaKK.size_thread_scratch_arrays();

  //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024);
  int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
  int vector_length = 8;
  vector_length = 8;
#ifdef KOKKOS_ENABLE_CUDA
  int team_size = 32;//max_neighs;
  team_size = 32;//max_neighs;
  if (team_size*vector_length > team_size_max)
    team_size = team_size_max/vector_length;
#else
  int team_size = 1;
  team_size = 1;
#endif

  if (beta_max < list->inum) {
    d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",
     list->inum,ncoeff);
    d_bispectrum = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:bispectrum",
     list->inum,ncoeff);
    beta_max = list->inum;
  }

  // compute dE_i/dB_i = beta_i for all i in list

  if (quadraticflag || eflag) 
    compute_bispectrum();
  compute_beta();

  EV_FLOAT ev;

  if (eflag) {
    if (neighflag == HALF) {
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,1> > policy(inum,team_size,vector_length);
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,1> > policy(inum,team_size,vector_length);
      Kokkos::parallel_reduce(policy
          .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
          .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
        ,*this,ev);
    } else if (neighflag == HALFTHREAD) {
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,1> > policy(inum,team_size,vector_length);
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,1> > policy(inum,team_size,vector_length);
      Kokkos::parallel_reduce(policy
          .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
          .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
@@ -218,13 +232,13 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    }
  } else {
    if (neighflag == HALF) {
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,0> > policy(inum,team_size,vector_length);
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,0> > policy(inum,team_size,vector_length);
      Kokkos::parallel_for(policy
          .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
          .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
        ,*this);
    } else if (neighflag == HALFTHREAD) {
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,0> > policy(inum,team_size,vector_length);
      typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,0> > policy(inum,team_size,vector_length);
      Kokkos::parallel_for(policy
          .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
          .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
@@ -232,11 +246,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    }
  }

//static int step =0;
//step++;
//if (step%10==0)
//        printf(" %e %e %e %e %e (%e %e): %e\n",t1,t2,t3,t4,t5,t6,t7,t1+t2+t3+t4+t5);

  if (need_dup)
    Kokkos::Experimental::contribute(f, dup_f);

@@ -275,6 +284,153 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  }
}

/* ----------------------------------------------------------------------
   compute beta
------------------------------------------------------------------------- */

template<class DeviceType>
void PairSNAPKokkos<DeviceType>::compute_beta()
{
  // TODO: use RangePolicy instead, or thread over ncoeff?
  int inum = list->inum;
  typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy(inum,team_size,vector_length);
  Kokkos::parallel_for(policy
      .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
      .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
    ,*this);
}

/* ---------------------------------------------------------------------- */

template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const {

  const int ii = team.league_rank();
  const int i = d_ilist[ii];
  const int itype = type[i];
  const int ielem = d_map[itype];
  Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
    d_coeffi(d_coeffelem,ielem,Kokkos::ALL);

  for (int icoeff = 0; icoeff < ncoeff; icoeff++)
    d_beta(ii,icoeff) = d_coeffi[icoeff+1];

  if (quadraticflag) {
    int k = ncoeff+1;
    for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
      double bveci = d_bispectrum(ii,icoeff);
      d_beta(ii,icoeff) += d_coeffi[k]*bveci;
      k++;
      for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
        double bvecj = d_bispectrum(ii,jcoeff);
        d_beta(ii,icoeff) += d_coeffi[k]*bvecj;
        d_beta(ii,jcoeff) += d_coeffi[k]*bveci;
        k++;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   compute bispectrum
------------------------------------------------------------------------- */

template<class DeviceType>
void PairSNAPKokkos<DeviceType>::compute_bispectrum()
{
  int inum = list->inum;
  typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum> policy(inum,team_size,vector_length);
  Kokkos::parallel_for(policy
      .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
      .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
    ,*this);
}

/* ---------------------------------------------------------------------- */

template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const {

  const int ii = team.league_rank();
  const int i = d_ilist[ii];
  SNAKokkos<DeviceType> my_sna(snaKK,team);
  const double xtmp = x(i,0);
  const double ytmp = x(i,1);
  const double ztmp = x(i,2);
  const int itype = type[i];
  const int ielem = d_map[itype];
  const double radi = d_radelem[ielem];

  const int num_neighs = d_numneigh[i];

  // rij[][3] = displacements between atom I and those neighbors
  // inside = indices of neighbors of I within cutoff
  // wj = weights for neighbors of I within cutoff
  // rcutij = cutoffs for neighbors of I within cutoff
  // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi

  int ninside = 0;
  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
      [&] (const int jj, int& count) {
    Kokkos::single(Kokkos::PerThread(team), [&] (){
      T_INT j = d_neighbors(i,jj);
      const F_FLOAT dx = x(j,0) - xtmp;
      const F_FLOAT dy = x(j,1) - ytmp;
      const F_FLOAT dz = x(j,2) - ztmp;

      const int jtype = type(j);
      const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
      const int elem_j = d_map[jtype];

      if ( rsq < rnd_cutsq(itype,jtype) )
       count++;
    });
  },ninside);

  if (team.team_rank() == 0)
  Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
      [&] (const int jj, int& offset, bool final) {
  //for (int jj = 0; jj < num_neighs; jj++) {
    T_INT j = d_neighbors(i,jj);
    const F_FLOAT dx = x(j,0) - xtmp;
    const F_FLOAT dy = x(j,1) - ytmp;
    const F_FLOAT dz = x(j,2) - ztmp;

    const int jtype = type(j);
    const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
    const int elem_j = d_map[jtype];

    if ( rsq < rnd_cutsq(itype,jtype) ) {
      if (final) {
        my_sna.rij(offset,0) = dx;
        my_sna.rij(offset,1) = dy;
        my_sna.rij(offset,2) = dz;
        my_sna.inside[offset] = j;
        my_sna.wj[offset] = d_wjelem[elem_j];
        my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
      }
      offset++;
    }
  });
  team.team_barrier();

  // compute Ui, Zi, and Bi for atom I

  my_sna.compute_ui(team,ninside);
  team.team_barrier();

  my_sna.compute_zi(team);
  team.team_barrier();

  my_sna.compute_bi(team);
  team.team_barrier();

  for (int icoeff = 0; icoeff < ncoeff; icoeff++)
    d_bispectrum(ii,icoeff) = my_sna.blist[icoeff];
}

/* ----------------------------------------------------------------------
   allocate all arrays
------------------------------------------------------------------------- */
@@ -354,7 +510,7 @@ void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {

  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial

@@ -364,12 +520,12 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
  const int ii = team.league_rank();
  const int i = d_ilist[ii];
  SNAKokkos<DeviceType> my_sna(snaKK,team);
  const double x_i = x(i,0);
  const double y_i = x(i,1);
  const double z_i = x(i,2);
  const int type_i = type[i];
  const int elem_i = d_map[type_i];
  const double radi = d_radelem[elem_i];
  const double xtmp = x(i,0);
  const double ytmp = x(i,1);
  const double ztmp = x(i,2);
  const int itype = type[i];
  const int ielem = d_map[itype];
  const double radi = d_radelem[ielem];

  const int num_neighs = d_numneigh[i];

@@ -379,41 +535,38 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
  // rcutij = cutoffs for neighbors of I within cutoff
  // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi

  //Kokkos::Timer timer;
  int ninside = 0;
  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
      [&] (const int jj, int& count) {
    Kokkos::single(Kokkos::PerThread(team), [&] (){
      T_INT j = d_neighbors(i,jj);
      const F_FLOAT dx = x(j,0) - x_i;
      const F_FLOAT dy = x(j,1) - y_i;
      const F_FLOAT dz = x(j,2) - z_i;
      const F_FLOAT dx = x(j,0) - xtmp;
      const F_FLOAT dy = x(j,1) - ytmp;
      const F_FLOAT dz = x(j,2) - ztmp;

      const int type_j = type(j);
      const int jtype = type(j);
      const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
      const int elem_j = d_map[type_j];
      const int elem_j = d_map[jtype];

      if ( rsq < rnd_cutsq(type_i,type_j) )
      if ( rsq < rnd_cutsq(itype,jtype) )
       count++;
    });
  },ninside);

  //t1 += timer.seconds(); timer.reset();

  if (team.team_rank() == 0)
  Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
      [&] (const int jj, int& offset, bool final) {
  //for (int jj = 0; jj < num_neighs; jj++) {
    T_INT j = d_neighbors(i,jj);
    const F_FLOAT dx = x(j,0) - x_i;
    const F_FLOAT dy = x(j,1) - y_i;
    const F_FLOAT dz = x(j,2) - z_i;
    const F_FLOAT dx = x(j,0) - xtmp;
    const F_FLOAT dy = x(j,1) - ytmp;
    const F_FLOAT dz = x(j,2) - ztmp;

    const int type_j = type(j);
    const int jtype = type(j);
    const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
    const int elem_j = d_map[type_j];
    const int elem_j = d_map[jtype];

    if ( rsq < rnd_cutsq(type_i,type_j) ) {
    if ( rsq < rnd_cutsq(itype,jtype) ) {
      if (final) {
        my_sna.rij(offset,0) = dx;
        my_sna.rij(offset,1) = dy;
@@ -425,97 +578,35 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
      offset++;
    }
  });
  team.team_barrier();

  //t2 += timer.seconds(); timer.reset();
  // compute Ui, Yi for atom I

  team.team_barrier();
  // compute Ui, Zi, and Bi for atom I
  my_sna.compute_ui(team,ninside);
  //t3 += timer.seconds(); timer.reset();
  team.team_barrier();
  my_sna.compute_zi(team);
  //t4 += timer.seconds(); timer.reset();
  team.team_barrier();

  if (quadraticflag) {
    my_sna.compute_bi(team);
    team.team_barrier();
    my_sna.copy_bi2bvec(team);
  team.team_barrier();
  }

  // for neighbors of I within cutoff:
  // compute dUi/drj and dBi/drj
  // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
  // compute Fij = dEi/dRj = -dEi/dRi 
  // add to Fi, subtract from Fj

  my_sna.compute_yi(team,d_beta,ii);
  team.team_barrier();

  Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
    d_coeffi(d_coeffelem,elem_i,Kokkos::ALL);
    d_coeffi(d_coeffelem,ielem,Kokkos::ALL);

  Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
      [&] (const int jj) {
  //for (int jj = 0; jj < ninside; jj++) {
    int j = my_sna.inside[jj];

    //Kokkos::Timer timer2;
    my_sna.compute_duidrj(team,&my_sna.rij(jj,0),
                           my_sna.wj[jj],my_sna.rcutij[jj]);
    //t6 += timer2.seconds(); timer2.reset();
    my_sna.compute_dbidrj(team);
    //t7 += timer2.seconds(); timer2.reset();
    my_sna.copy_dbi2dbvec(team);
                           my_sna.wj[jj],my_sna.rcutij[jj],jj);

    Kokkos::single(Kokkos::PerThread(team), [&] (){
    F_FLOAT fij[3];
    my_sna.compute_deidrj(team,fij);

    fij[0] = 0.0;
    fij[1] = 0.0;
    fij[2] = 0.0;

    // linear contributions

    for (int k = 1; k <= ncoeff; k++) {
      double bgb = d_coeffi[k];
      fij[0] += bgb*my_sna.dbvec(k-1,0);
      fij[1] += bgb*my_sna.dbvec(k-1,1);
      fij[2] += bgb*my_sna.dbvec(k-1,2);
    }

    if (quadraticflag) {

      int k = ncoeff+1;
      for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
        double bveci = my_sna.bvec[icoeff];
        double fack = d_coeffi[k]*bveci;
        double dbvecix = my_sna.dbvec(icoeff,0);
        double dbveciy = my_sna.dbvec(icoeff,1);
        double dbveciz = my_sna.dbvec(icoeff,2);
        fij[0] += fack*dbvecix;
        fij[1] += fack*dbveciy;
        fij[2] += fack*dbveciz;
        k++;
        for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
          double facki = d_coeffi[k]*bveci;
          double fackj = d_coeffi[k]*my_sna.bvec[jcoeff];
          fij[0] += facki*my_sna.dbvec(jcoeff,0)+fackj*dbvecix;
          fij[1] += facki*my_sna.dbvec(jcoeff,1)+fackj*dbveciy;
          fij[2] += facki*my_sna.dbvec(jcoeff,2)+fackj*dbveciz;
          k++;
        }
      }
    }

    // Hard-coded ZBL potential
    //const double dx = my_sna.rij(jj,0);
    //const double dy = my_sna.rij(jj,1);
    //const double dz = my_sna.rij(jj,2);
    //const double fdivr = -1.5e6/pow(dx*dx + dy*dy + dz*dz,7.0);
    //fij[0] += dx*fdivr;
    //fij[1] += dy*fdivr;
    //fij[2] += dz*fdivr;

    //OK
    //printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf SNAP-COMPARE: FIJ\n"
    //    ,x(i,0),x(i,1),x(i,2),x(j,0),x(j,1),x(j,2),fij[0],fij[1],fij[2] );
    Kokkos::single(Kokkos::PerThread(team), [&] (){
      a_f(i,0) += fij[0];
      a_f(i,1) += fij[1];
      a_f(i,2) += fij[2];
@@ -536,46 +627,35 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
      
    });
  });
  //t5 += timer.seconds(); timer.reset();

  // tally energy contribution

  if (EVFLAG) {
    if (eflag_either) {

      if (!quadraticflag) {
        my_sna.compute_bi(team);
        team.team_barrier();
        my_sna.copy_bi2bvec(team);
        team.team_barrier();
      }

      // E = beta.B + 0.5*B^t.alpha.B
      // coeff[k] = beta[k-1] or
      // coeff[k] = alpha_ii or
      // coeff[k] = alpha_ij = alpha_ji, j != i

      Kokkos::single(Kokkos::PerTeam(team), [&] () {

        // evdwl = energy of atom I, sum over coeffs_k * Bi_k

        double evdwl = d_coeffi[0];
        
        // E = beta.B + 0.5*B^t.alpha.B
        
        // linear contributions
        // could use thread vector range on this loop
        
        for (int k = 1; k <= ncoeff; k++)
          evdwl += d_coeffi[k]*my_sna.bvec[k-1];
        for (int icoeff = 0; icoeff < ncoeff; icoeff++)
          evdwl += d_coeffi[icoeff+1]*d_bispectrum(ii,icoeff);
        
        // quadratic contributions
        
        if (quadraticflag) {
          int k = ncoeff+1;
          for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
            double bveci = my_sna.bvec[icoeff];
            double bveci = d_bispectrum(ii,icoeff);
            evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
            for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
              evdwl += d_coeffi[k++]*bveci*my_sna.bvec[jcoeff];
              double bvecj = d_bispectrum(ii,jcoeff);
              evdwl += d_coeffi[k++]*bveci*bvecj;
            }
          }
        }
@@ -591,9 +671,9 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const {
  EV_FLOAT ev;
  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAP<NEIGHFLAG,EVFLAG>(), team, ev);
  this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPCompute<NEIGHFLAG,EVFLAG>(), team, ev);
}

/* ---------------------------------------------------------------------- */
+35 −25
Original line number Diff line number Diff line
@@ -25,7 +25,11 @@

namespace LAMMPS_NS {

struct SNAKK_LOOPINDICES {
struct SNAKK_ZINDICES {
  int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
};

struct SNAKK_BINDICES {
  int j1, j2, j;
};

@@ -35,9 +39,9 @@ class SNAKokkos {
public:
  typedef Kokkos::View<int*, DeviceType> t_sna_1i;
  typedef Kokkos::View<double*, DeviceType> t_sna_1d;
  typedef Kokkos::View<double*, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
  typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d;
  typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d;
  typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_3d_atomic;
  typedef Kokkos::View<double***[3], Kokkos::LayoutRight, DeviceType> t_sna_4d;
  typedef Kokkos::View<double**[3], Kokkos::LayoutRight, DeviceType> t_sna_3d3;
  typedef Kokkos::View<double*****, Kokkos::LayoutRight, DeviceType> t_sna_5d;
@@ -76,18 +80,19 @@ inline
  KOKKOS_INLINE_FUNCTION
  void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);    // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);    // ForceSNAP
  void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
   const Kokkos::View<F_FLOAT**, DeviceType> &beta, const int ii); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void copy_bi2bvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
  void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);    // ForceSNAP

  // functions for derivatives

  KOKKOS_INLINE_FUNCTION
  void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double*, double, double); //ForceSNAP
  void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double*, double, double, int); //ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void copy_dbi2dbvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
  void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double *); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  double compute_sfac(double, double); // add_uarraytot, compute_duarray
  KOKKOS_INLINE_FUNCTION
@@ -114,37 +119,42 @@ inline

  int twojmax, diagonalstyle;
  // Per InFlight Particle
  t_sna_3d barray;
  t_sna_3d uarraytot_r, uarraytot_i;
  t_sna_3d_atomic uarraytot_r_a, uarraytot_i_a;
  t_sna_5d zarray_r, zarray_i;
  t_sna_1d blist;
  t_sna_1d ulisttot_r, ulisttot_i;
  t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a;
  t_sna_1d zlist_r, zlist_i;
  t_sna_2d ulist_r_ij, ulist_i_ij;

  // Per InFlight Interaction
  t_sna_3d uarray_r, uarray_i;

  Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bvec;
  t_sna_1d ulist_r, ulist_i;
  t_sna_1d_atomic ylist_r, ylist_i;

  // derivatives of data
  Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType> dbvec;
  t_sna_4d duarray_r, duarray_i;
  t_sna_4d dbarray;
  t_sna_2d dulist_r, dulist_i;
  t_sna_2d dblist;

private:
  double rmin0, rfac0;

  //use indexlist instead of loops, constructor generates these
  // Same accross all SNAKokkos
  Kokkos::View<SNAKK_LOOPINDICES*, DeviceType> idxj,idxj_full;
  int idxj_max,idxj_full_max;
  // Same across all SNAKokkos
  Kokkos::View<SNAKK_ZINDICES*, DeviceType> idxz;
  Kokkos::View<SNAKK_BINDICES*, DeviceType> idxb;
  int idxcg_max, idxu_max, idxz_max, idxb_max;
  Kokkos::View<int***, DeviceType> idxcg_block;
  Kokkos::View<int*, DeviceType> idxu_block;
  Kokkos::View<int***, DeviceType> idxz_block;
  Kokkos::View<int***, DeviceType> idxb_block;

  // data for bispectrum coefficients

  // Same accross all SNAKokkos
  t_sna_5d cgarray;
  t_sna_1d cglist;
  t_sna_2d rootpqarray;


  static const int nmaxfactorial = 167;
  KOKKOS_INLINE_FUNCTION
  static const double nfac_table[];
  inline
  double factorial(int);

  KOKKOS_INLINE_FUNCTION
@@ -162,13 +172,13 @@ inline
  KOKKOS_INLINE_FUNCTION
  void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double); // compute_ui
  KOKKOS_INLINE_FUNCTION
  void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double, double, double); // compute_ui
  void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double, double, double, int); // compute_ui

  KOKKOS_INLINE_FUNCTION
  void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
                      double, double, double,
                      double, double); // compute_ui
  KOKKOS_INLINE_FUNCTION
  inline
  double deltacg(int, int, int);  // init_clebsch_gordan

inline
@@ -176,7 +186,7 @@ inline
  KOKKOS_INLINE_FUNCTION
  void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
                       double, double, double, // compute_duidrj
                       double, double, double, double, double);
                       double, double, double, double, double, int);

  // Sets the style for the switching function
  // 0 = none
Loading