Unverified Commit 3fffe204 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #2172 from weinbe2/fea-snap-yi-aosoa

SNAP optimizations - AoSoA data layouts, chem snap indexing conventions, misc perf improvements
parents f5a31fef b0dd93f0
Loading
Loading
Loading
Loading
+51 −12
Original line number Diff line number Diff line
@@ -30,19 +30,31 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)

namespace LAMMPS_NS {

// Routines for both the CPU and GPU backend
template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAPComputeForce{};

struct TagPairSNAPBeta{};
struct TagPairSNAPComputeNeigh{};

// GPU backend only
struct TagPairSNAPPreUi{};
struct TagPairSNAPComputeUi{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
struct TagPairSNAPComputeZi{};
struct TagPairSNAPBeta{};
struct TagPairSNAPComputeBi{};
struct TagPairSNAPZeroYi{};
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
struct TagPairSNAPComputeYi{};
struct TagPairSNAPTransformYi{}; // re-order ylist from AoSoA to AoS
struct TagPairSNAPComputeFusedDeidrj{};

// CPU backend only
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPComputeZiCPU{};
struct TagPairSNAPBetaCPU{};
struct TagPairSNAPComputeBiCPU{};
struct TagPairSNAPZeroYiCPU{};
struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{};

@@ -81,6 +93,10 @@ public:
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPBetaCPU,const int& ii) const;

  // GPU backend only
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;

@@ -88,31 +104,53 @@ public:
  void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
  void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeZi,const int& ii) const;
  void operator() (TagPairSNAPBeta, const int& ii) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeBi,const int iatom_mod, const int idxb, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const;
  void operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPZeroYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPZeroYi>::member_type& team) const;
  void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeYi,const int& ii) const;
  void operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeFusedDeidrj>::member_type& team) const;

  // CPU backend only
  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrjCPU>::member_type& team) const;
  void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrjCPU>::member_type& team) const;
  void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPBeta,const int& ii) const;
  void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPZeroYiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPZeroYiCPU>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrjCPU>::member_type& team) const;

  KOKKOS_INLINE_FUNCTION
  void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrjCPU>::member_type& team) const;

  template<int NEIGHFLAG>
  KOKKOS_INLINE_FUNCTION
@@ -175,6 +213,7 @@ inline double dist2(double* x,double* y);
  Kokkos::View<T_INT*, DeviceType> d_map;                    // mapping from atom types to elements
  Kokkos::View<T_INT*, DeviceType> d_ninside;                // ninside for all atoms in list
  Kokkos::View<F_FLOAT**, DeviceType> d_beta;                // betas for all atoms in list
  Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> d_beta_pack;          // betas for all atoms in list, GPU
  Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum;          // bispectrum components for all atoms in list

  typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
+391 −131

File changed.

Preview size limit exceeded, changes collapsed.

+45 −19
Original line number Diff line number Diff line
@@ -36,7 +36,9 @@ public:
  typedef Kokkos::View<double**, DeviceType> t_sna_2d;
  typedef Kokkos::View<double**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
  typedef Kokkos::View<double***, DeviceType> t_sna_3d;
  typedef Kokkos::View<double***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
  typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
  typedef Kokkos::View<double****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
  typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
  typedef Kokkos::View<double*****, DeviceType> t_sna_5d;

@@ -48,7 +50,8 @@ public:
  typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
  typedef Kokkos::View<SNAcomplex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
  typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
  typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
  typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
  typedef Kokkos::View<SNAcomplex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
  typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
  typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;

@@ -73,27 +76,39 @@ inline

  int ncoeff;

  // functions for bispectrum coefficients
  // functions for bispectrum coefficients, GPU only
  KOKKOS_INLINE_FUNCTION
  void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&, int); // ForceSNAP
  void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_zi(const int&, const int&, const int&);    // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_yi(int,int,int,
   const Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_bi(const int&, const int&, const int&);    // ForceSNAP

  // functions for bispectrum coefficients, CPU only
  KOKKOS_INLINE_FUNCTION
  void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_zi(const int&);    // ForceSNAP
  void compute_zi_cpu(const int&);    // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void zero_yi(const int&, const int&, int); // ForceSNAP
  void zero_yi_cpu(const int&,const int&,const int&); // ForceSNAP
  KOKKOS_INLINE_FUNCTION
  void compute_yi(int,
  void compute_yi_cpu(int,
   const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
    KOKKOS_INLINE_FUNCTION
  void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int);    // ForceSNAP

  // functions for derivatives
  void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int);    // ForceSNAP

  // functions for derivatives, GPU only
  KOKKOS_INLINE_FUNCTION
  void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); //ForceSNAP

  // functions for derivatives, CPU only
  KOKKOS_INLINE_FUNCTION
  void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
  KOKKOS_INLINE_FUNCTION
@@ -139,18 +154,32 @@ inline

  int twojmax, diagonalstyle;

  t_sna_2d_ll blist;
  t_sna_2c_ll ulisttot;
  t_sna_2c_ll zlist;
  t_sna_3d_ll blist;
  t_sna_3c_ll ulisttot;
  t_sna_3c_ll zlist;

  t_sna_3c_ll ulist;
  t_sna_2c_ll ylist;
  t_sna_3c_ll ylist;
  
  // derivatives of data
  t_sna_4c_ll dulist;
  t_sna_4c3_ll dulist;
  
  // Modified structures for GPU backend
  t_sna_3d_ll ulisttot_re; // split real,
  t_sna_3d_ll ulisttot_im; // imag
  t_sna_4c_ll ulisttot_pack; // AoSoA layout
  t_sna_4c_ll zlist_pack; // AoSoA layout
  t_sna_4d_ll blist_pack;
  t_sna_4d_ll ylist_pack_re; // split real, 
  t_sna_4d_ll ylist_pack_im; // imag AoSoA layout

  int idxcg_max, idxu_max, idxz_max, idxb_max;

  // Chem snap counts
  int nelements;
  int ndoubles;
  int ntriples;

private:
  double rmin0, rfac0;

@@ -212,9 +241,6 @@ inline
  // Chem snap flags
  int chem_flag;
  int bnorm_flag;
  int nelements;
  int ndoubles;
  int ntriples;

  // Self-weight
  double wself;
+665 −331

File changed.

Preview size limit exceeded, changes collapsed.