Unverified Commit 2fea8f88 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

Merge branch 'master' into collected-small-changes

parents 3aee1b75 d63f3d87
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -13,7 +13,7 @@ Syntax
Examples
Examples
""""""""
""""""""


.. code-blocK:: LAMMPS
.. code-block:: LAMMPS


   bond_style none
   bond_style none


+1 −1
Original line number Original line Diff line number Diff line
@@ -16,7 +16,7 @@ Syntax
Examples
Examples
""""""""
""""""""


.. code:: LAMMPS
.. code-block:: LAMMPS


   pair_style meam/spline
   pair_style meam/spline
   pair_coeff * * Ti.meam.spline Ti
   pair_coeff * * Ti.meam.spline Ti
+12 −1
Original line number Original line Diff line number Diff line
@@ -1058,7 +1058,7 @@ struct alignas(2*sizeof(real)) SNAComplex
{
{
  real re,im;
  real re,im;


  KOKKOS_FORCEINLINE_FUNCTION SNAComplex() = default;
  SNAComplex() = default;


  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
   : re(re), im(static_cast<real>(0.)) { ; }
   : re(re), im(static_cast<real>(0.)) { ; }
@@ -1100,6 +1100,17 @@ KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAC


typedef SNAComplex<SNAreal> SNAcomplex;
typedef SNAComplex<SNAreal> SNAcomplex;


// Cayley-Klein pack
// Can guarantee it's aligned to 2 complex
struct alignas(32) CayleyKleinPack {

  SNAcomplex a, b;
  SNAcomplex da[3], db[3];
  SNAreal sfac;
  SNAreal dsfacu[3];

};



#if defined(KOKKOS_ENABLE_CXX11)
#if defined(KOKKOS_ENABLE_CXX11)
#undef ISFINITE
#undef ISFINITE
+5 −4
Original line number Original line Diff line number Diff line
@@ -50,6 +50,7 @@ struct TagPairSNAPComputeFusedDeidrj{};
// CPU backend only
// CPU backend only
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPTransformUiCPU{};
struct TagPairSNAPComputeZiCPU{};
struct TagPairSNAPComputeZiCPU{};
struct TagPairSNAPBetaCPU{};
struct TagPairSNAPBetaCPU{};
struct TagPairSNAPComputeBiCPU{};
struct TagPairSNAPComputeBiCPU{};
@@ -104,7 +105,7 @@ public:
  void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
  void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const;
  void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int j, const int iatom_div) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;
  void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;
@@ -135,13 +136,13 @@ public:
  void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
  void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;
  void operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;
  void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPZeroYiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPZeroYiCPU>::member_type& team) const;
  void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;


  KOKKOS_INLINE_FUNCTION
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
  void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
+112 −47
Original line number Original line Diff line number Diff line
@@ -206,8 +206,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)


  EV_FLOAT ev;
  EV_FLOAT ev;


  int idxu_max = snaKK.idxu_max;

  while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
  while (chunk_offset < inum) { // chunk up loop to prevent running out of memory


    EV_FLOAT ev_tmp;
    EV_FLOAT ev_tmp;
@@ -246,6 +244,13 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        Kokkos::parallel_for("ComputeUiCPU",policy_ui_cpu,*this);
        Kokkos::parallel_for("ComputeUiCPU",policy_ui_cpu,*this);
      }
      }


      {
        // Expand ulisttot -> ulisttot_full
        // Zero out ylist
        typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformUiCPU> policy_transform_ui_cpu({0,0},{twojmax+1,chunk_size});
        Kokkos::parallel_for("TransformUiCPU",policy_transform_ui_cpu,*this);
      }

      //Compute bispectrum
      //Compute bispectrum
      if (quadraticflag || eflag) {
      if (quadraticflag || eflag) {
        //ComputeZi
        //ComputeZi
@@ -261,20 +266,12 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        Kokkos::parallel_for("ComputeBiCPU",policy_bi_cpu,*this);
        Kokkos::parallel_for("ComputeBiCPU",policy_bi_cpu,*this);
      }
      }


      //ZeroYi,ComputeYi
      //ComputeYi
      {
      {
        int vector_length = vector_length_default;
        int team_size = team_size_default;

        //Compute beta = dE_i/dB_i for all i in list
        //Compute beta = dE_i/dB_i for all i in list
        typename Kokkos::RangePolicy<DeviceType,TagPairSNAPBetaCPU> policy_beta(0,chunk_size);
        typename Kokkos::RangePolicy<DeviceType,TagPairSNAPBetaCPU> policy_beta(0,chunk_size);
        Kokkos::parallel_for("ComputeBetaCPU",policy_beta,*this);
        Kokkos::parallel_for("ComputeBetaCPU",policy_beta,*this);


        //ZeroYi
        check_team_size_for<TagPairSNAPZeroYiCPU>(chunk_size,team_size,vector_length);
        typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYiCPU> policy_zero_yi(((idxu_max+team_size-1)/team_size)*chunk_size,team_size,vector_length);
        Kokkos::parallel_for("ZeroYiCPU",policy_zero_yi,*this);

        //ComputeYi
        //ComputeYi
        int idxz_max = snaKK.idxz_max;
        int idxz_max = snaKK.idxz_max;
        typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeYiCPU> policy_yi_cpu(0,chunk_size*idxz_max);
        typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeYiCPU> policy_yi_cpu(0,chunk_size*idxz_max);
@@ -294,6 +291,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)


        Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this);
        Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this);
      }
      }

    } else { // GPU
    } else { // GPU


#ifdef LMP_KOKKOS_GPU
#ifdef LMP_KOKKOS_GPU
@@ -313,10 +311,10 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        int team_size = 4; // need to cap b/c of shared memory reqs
        int team_size = 4; // need to cap b/c of shared memory reqs
        check_team_size_for<TagPairSNAPComputeUi>(chunk_size,team_size,vector_length);
        check_team_size_for<TagPairSNAPComputeUi>(chunk_size,team_size,vector_length);


        // scratch size: 2 * team_size * (twojmax+1)^2, to cover all `m1`,`m2` values
        // scratch size: 2 * team_size * (twojmax+1)^2, to cover all `m1`,`m2` values, div 2 for symmetry
        //   2 is for double buffer
        //   2 is for double buffer


        const int tile_size = (twojmax+1)*(twojmax+1);
        const int tile_size = (twojmax+1)*(twojmax/2+1);
        typedef Kokkos::View< SNAcomplex*,
        typedef Kokkos::View< SNAcomplex*,
                              Kokkos::DefaultExecutionSpace::scratch_memory_space,
                              Kokkos::DefaultExecutionSpace::scratch_memory_space,
                              Kokkos::MemoryTraits<Kokkos::Unmanaged> >
                              Kokkos::MemoryTraits<Kokkos::Unmanaged> >
@@ -329,7 +327,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        Kokkos::parallel_for("ComputeUi",policy_ui,*this);
        Kokkos::parallel_for("ComputeUi",policy_ui,*this);


        //Transform data layout of ulisttot to AoSoA, zero ylist
        //Transform data layout of ulisttot to AoSoA, zero ylist
        typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformUi> policy_transform_ui({0,0,0},{32,idxu_max,(chunk_size + 32 - 1) / 32},{32,4,1});
        typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformUi> policy_transform_ui({0,0,0},{32,twojmax+1,(chunk_size + 32 - 1) / 32},{32,4,1});
        Kokkos::parallel_for("TransformUi",policy_transform_ui,*this);
        Kokkos::parallel_for("TransformUi",policy_transform_ui,*this);


      }
      }
@@ -367,7 +365,8 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        Kokkos::parallel_for("ComputeYi",policy_compute_yi,*this);
        Kokkos::parallel_for("ComputeYi",policy_compute_yi,*this);


        //Transform data layout of ylist out of AoSoA
        //Transform data layout of ylist out of AoSoA
        typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformYi> policy_transform_yi({0,0,0},{32,idxu_max,(chunk_size + 32 - 1) / 32},{32,4,1});
        const int idxu_half_max = snaKK.idxu_half_max;
        typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformYi> policy_transform_yi({0,0,0},{32,idxu_half_max,(chunk_size + 32 - 1) / 32},{32,4,1});
        Kokkos::parallel_for("TransformYi",policy_transform_yi,*this);
        Kokkos::parallel_for("TransformYi",policy_transform_yi,*this);


      }
      }
@@ -397,7 +396,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
        }
        }
      }
      }


#endif // KOKKOS_ENABLE_CUDA
#endif // LMP_KOKKOS_GPU


    }
    }


@@ -608,12 +607,21 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeNeigh,const typen


    if ( rsq < rnd_cutsq(itype,jtype) ) {
    if ( rsq < rnd_cutsq(itype,jtype) ) {
      if (final) {
      if (final) {
#ifdef LMP_KOKKOS_GPU
        if (std::is_same<DeviceType,Kokkos::Cuda>::value) {
          my_sna.compute_cayley_klein(ii, offset, dx, dy, dz, (radi + d_radelem[elem_j])*rcutfac,
                                      d_wjelem[elem_j]);
        } else {
#endif
          my_sna.rij(ii,offset,0) = dx;
          my_sna.rij(ii,offset,0) = dx;
          my_sna.rij(ii,offset,1) = dy;
          my_sna.rij(ii,offset,1) = dy;
          my_sna.rij(ii,offset,2) = dz;
          my_sna.rij(ii,offset,2) = dz;
        my_sna.inside(ii,offset) = j;
          my_sna.wj(ii,offset) = d_wjelem[elem_j];
          my_sna.wj(ii,offset) = d_wjelem[elem_j];
          my_sna.rcutij(ii,offset) = (radi + d_radelem[elem_j])*rcutfac;
          my_sna.rcutij(ii,offset) = (radi + d_radelem[elem_j])*rcutfac;
#ifdef LMP_KOKKOS_GPU
        }
#endif
        my_sna.inside(ii,offset) = j;
        if (chemflag)
        if (chemflag)
          my_sna.element(ii,offset) = elem_j;
          my_sna.element(ii,offset) = elem_j;
        else
        else
@@ -704,27 +712,54 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUi,const typename


template<class DeviceType>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int j, const int iatom_div) const {
  SNAKokkos<DeviceType> my_sna = snaKK;
  SNAKokkos<DeviceType> my_sna = snaKK;


  const int iatom = iatom_mod + iatom_div * 32;
  const int iatom = iatom_mod + iatom_div * 32;
  if (iatom >= chunk_size) return;
  if (iatom >= chunk_size) return;


  if (idxu >= my_sna.idxu_max) return;
  if (j > twojmax) return; 


  int elem_count = chemflag ? nelements : 1;
  int elem_count = chemflag ? nelements : 1;


  for (int ielem = 0; ielem < elem_count; ielem++) {
  for (int ielem = 0; ielem < elem_count; ielem++) {
    const int jju_half = my_sna.idxu_half_block(j);
    const int jju = my_sna.idxu_block(j);

    for (int mb = 0; 2*mb <= j; mb++) {
      for (int ma = 0; ma <= j; ma++) {
        // Extract top half


    const auto utot_re = my_sna.ulisttot_re(idxu, ielem, iatom);
        const int idxu_shift = mb * (j + 1) + ma;
    const auto utot_im = my_sna.ulisttot_im(idxu, ielem, iatom);
        const int idxu_half = jju_half + idxu_shift;
        const int idxu = jju + idxu_shift;


        auto utot_re = my_sna.ulisttot_re(idxu_half, ielem, iatom);
        auto utot_im = my_sna.ulisttot_im(idxu_half, ielem, iatom);

        // Store
        my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
        my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
          
          
    my_sna.ylist_pack_re(iatom_mod, idxu, ielem, iatom_div) = 0.;
        // Also zero yi
    my_sna.ylist_pack_im(iatom_mod, idxu, ielem, iatom_div) = 0.;
        my_sna.ylist_pack_re(iatom_mod, idxu_half, ielem, iatom_div) = 0.;
        my_sna.ylist_pack_im(iatom_mod, idxu_half, ielem, iatom_div) = 0.;

        // Symmetric term
        const int sign_factor = (((ma+mb)%2==0)?1:-1);
        const int idxu_flip = jju + (j + 1 - mb) * (j + 1) - (ma + 1);

        if (sign_factor == 1) {
          utot_im = -utot_im;
        } else {
          utot_re = -utot_re;
        }
        }


        my_sna.ulisttot_pack(iatom_mod, idxu_flip, ielem, iatom_div) = { utot_re, utot_im };

        // No need to zero symmetrized ylist
      }
    }
  }
}
}


template<class DeviceType>
template<class DeviceType>
@@ -742,20 +777,20 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const int iato


template<class DeviceType>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu_half, const int iatom_div) const {
  SNAKokkos<DeviceType> my_sna = snaKK;
  SNAKokkos<DeviceType> my_sna = snaKK;


  const int iatom = iatom_mod + iatom_div * 32;
  const int iatom = iatom_mod + iatom_div * 32;
  if (iatom >= chunk_size) return;
  if (iatom >= chunk_size) return;


  if (idxu >= my_sna.idxu_max) return;
  if (idxu_half >= my_sna.idxu_half_max) return;


  int elem_count = chemflag ? nelements : 1;
  int elem_count = chemflag ? nelements : 1;
  for (int ielem = 0; ielem < elem_count; ielem++) {
  for (int ielem = 0; ielem < elem_count; ielem++) {
    const auto y_re = my_sna.ylist_pack_re(iatom_mod, idxu, ielem, iatom_div);
    const auto y_re = my_sna.ylist_pack_re(iatom_mod, idxu_half, ielem, iatom_div);
    const auto y_im = my_sna.ylist_pack_im(iatom_mod, idxu, ielem, iatom_div);
    const auto y_im = my_sna.ylist_pack_im(iatom_mod, idxu_half, ielem, iatom_div);


    my_sna.ylist(idxu, ielem, iatom) = { y_re, y_im };
    my_sna.ylist(idxu_half, ielem, iatom) = { y_re, y_im };
  }
  }


}
}
@@ -904,22 +939,52 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUiCPU,const typen


template<class DeviceType>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPZeroYiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYiCPU>::member_type& team) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
  SNAKokkos<DeviceType> my_sna = snaKK;
  SNAKokkos<DeviceType> my_sna = snaKK;


  // Extract the quantum number
  if (iatom >= chunk_size) return;
  const int idx = team.team_rank() + team.team_size() * (team.league_rank() % ((my_sna.idxu_max+team.team_size()-1)/team.team_size()));
  if (idx >= my_sna.idxu_max) return;


  // Extract the atomic index
  if (j > twojmax) return; 
  const int ii = team.league_rank() / ((my_sna.idxu_max+team.team_size()-1)/team.team_size());
  if (ii >= chunk_size) return;


  if (chemflag)
  int elem_count = chemflag ? nelements : 1;
    for(int ielem = 0; ielem < nelements; ielem++)

      my_sna.zero_yi_cpu(idx,ii,ielem);
  // De-symmetrize ulisttot
  else
  for (int ielem = 0; ielem < elem_count; ielem++) {
    my_sna.zero_yi_cpu(idx,ii,0);

    const int jju_half = my_sna.idxu_half_block(j);
    const int jju = my_sna.idxu_block(j);

    for (int mb = 0; 2*mb <= j; mb++) {
      for (int ma = 0; ma <= j; ma++) {
        // Extract top half

        const int idxu_shift = mb * (j + 1) + ma;
        const int idxu_half = jju_half + idxu_shift;
        const int idxu = jju + idxu_shift;

        // Load ulist
        auto utot = my_sna.ulisttot(idxu_half, ielem, iatom);

        // Store
        my_sna.ulisttot_full(idxu, ielem, iatom) = utot;

        // Zero Yi
        my_sna.ylist(idxu_half, ielem, iatom) = {0., 0.};

        // Symmetric term
        const int sign_factor = (((ma+mb)%2==0)?1:-1);
        const int idxu_flip = jju + (j + 1 - mb) * (j + 1) - (ma + 1);

        if (sign_factor == 1) {
          utot.im = -utot.im;
        } else {
          utot.re = -utot.re;
        }

        my_sna.ulisttot_full(idxu_flip, ielem, iatom) = utot;
      }
    }
  }
}
}


template<class DeviceType>
template<class DeviceType>
Loading