Commit f7026491 authored by Stan Moore's avatar Stan Moore
Browse files

Code reformat

parent 708052dc
Loading
Loading
Loading
Loading
+216 −190
Original line number Diff line number Diff line
@@ -233,21 +233,19 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
  int atoms_per_team = 4;
  int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);

  Kokkos::TeamPolicy <DeviceType> policy(num_teams, atoms_per_team, vector_length);
  if (neighflag == FULL){
      FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(this,
									 atoms_per_team,
  Kokkos::TeamPolicy<DeviceType> policy(num_teams, atoms_per_team,
                                        vector_length);
  if (neighflag == FULL) {
    FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(
        this, atoms_per_team, vector_length);
    Kokkos::parallel_for(policy, computeH_functor);
  } else if (neighflag == HALF) {
      FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(this,
									 atoms_per_team,
									 vector_length);
    FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(
        this, atoms_per_team, vector_length);
    Kokkos::parallel_for(policy, computeH_functor);
  } else {
      FixQEqReaxKokkosComputeHFunctor<DeviceType, HALFTHREAD> computeH_functor(this,
									       atoms_per_team,
									       vector_length);
    FixQEqReaxKokkosComputeHFunctor<DeviceType, HALFTHREAD> computeH_functor(
        this, atoms_per_team, vector_length);
    Kokkos::parallel_for(policy, computeH_functor);
  }

@@ -403,25 +401,38 @@ void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const
// d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i)
// d_firstnbr- d_firstnbr[i] contains the beginning index from where the H matrix entries corresponding to row-i is stored in d_val
// d_jlist   - contains the column index corresponding to each entry in d_val

template <class DeviceType>
template <int NEIGHFLAG>
void
FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <DeviceType> ::member_type &team,
					     int atoms_per_team,
					     int vector_length) const{
void FixQEqReaxKokkos<DeviceType>::compute_h_team(
    const typename Kokkos::TeamPolicy<DeviceType>::member_type &team,
    int atoms_per_team, int vector_length) const {

  // scratch space setup
    Kokkos::View< int*,      Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_ilist(team.team_shmem(), atoms_per_team);
    Kokkos::View< int*,      Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_numnbrs(team.team_shmem(), atoms_per_team);
    Kokkos::View< int*,      Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_firstnbr(team.team_shmem(), atoms_per_team);

    Kokkos::View< int**,     Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_jtype(team.team_shmem(), atoms_per_team, vector_length);
    Kokkos::View< int**,     Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_jlist(team.team_shmem(), atoms_per_team, vector_length);
    Kokkos::View< F_FLOAT**, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_r(team.team_shmem(), atoms_per_team, vector_length);
  Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_ilist(team.team_shmem(), atoms_per_team);
  Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_numnbrs(team.team_shmem(), atoms_per_team);
  Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_firstnbr(team.team_shmem(), atoms_per_team);

  Kokkos::View<int **, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_jtype(team.team_shmem(), atoms_per_team, vector_length);
  Kokkos::View<int **, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_jlist(team.team_shmem(), atoms_per_team, vector_length);
  Kokkos::View<F_FLOAT **, Kokkos::ScratchMemorySpace<DeviceType>,
               Kokkos::MemoryTraits<Kokkos::Unmanaged>>
      s_r(team.team_shmem(), atoms_per_team, vector_length);

  // team of threads work on atoms with index in [firstatom, lastatom)
  int firstatom = team.league_rank() * atoms_per_team;
    int lastatom  = ( firstatom + atoms_per_team < inum ) ? ( firstatom + atoms_per_team ) : inum;
  int lastatom =
      (firstatom + atoms_per_team < inum) ? (firstatom + atoms_per_team) : inum;

  // kokkos-thread-0 is used to load info from global memory into scratch space
  if (team.team_rank() == 0) {
@@ -431,7 +442,8 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
    // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the
    // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom])

	Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, atoms_per_team), [&](const int &idx, int &totalnbrs, bool final) {
    Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team),
                          [&](const int &idx, int &totalnbrs, bool final) {
                            int ii = firstatom + idx;

                            if (ii < inum) {
@@ -450,21 +462,26 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
                          });
  }


    // barrier ensures that the data moved to scratch space is visible to all the threads of the corresponding team
  // barrier ensures that the data moved to scratch space is visible to all the
  // threads of the corresponding team
  team.team_barrier();

    // calculate the global memory offset from where the H matrix values to be calculated by the current team will be stored in d_val
  // calculate the global memory offset from where the H matrix values to be
  // calculated by the current team will be stored in d_val
  int team_firstnbr_idx = 0;
    Kokkos::single (Kokkos::PerTeam (team), [=] (int &val) {
	    int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1];
  Kokkos::single(Kokkos::PerTeam(team),
                 [=](int &val) {
                   int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
                                   s_numnbrs[lastatom - firstatom - 1];
                   val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
	}, team_firstnbr_idx);


    // map the H matrix computation of each atom to kokkos-thread (one atom per kokkos-thread)
    // neighbor computation for each atom is assigned to vector lanes of the corresponding thread
    Kokkos::parallel_for( Kokkos::TeamThreadRange(team, atoms_per_team), [&] (const int &idx) {
                 },
                 team_firstnbr_idx);

  // map the H matrix computation of each atom to kokkos-thread (one atom per
  // kokkos-thread) neighbor computation for each atom is assigned to vector
  // lanes of the corresponding thread
  Kokkos::parallel_for(
      Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) {
        int ii = firstatom + idx;

        if (ii < inum) {
@@ -480,28 +497,33 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <

            // calculate the write-offset for atom-i's first neighbor
            int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
		    Kokkos::single (Kokkos::PerThread (team), [&] () {
			    d_firstnbr[i] = atomi_firstnbr_idx;
			});
            Kokkos::single(Kokkos::PerThread(team),
                           [&]() { d_firstnbr[i] = atomi_firstnbr_idx; });


		    // current # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i
		    // which represents the # of non-zero elements in row-i of H matrix
            // current # of neighbor atoms with non-zero electrostatic
            // interaction coefficients with atom-i which represents the # of
            // non-zero elements in row-i of H matrix
            int atomi_nbrs_inH = 0;

		    // calculate H matrix values corresponding to atom-i where neighbors are processed in batches and the batch size is vector_length
            // calculate H matrix values corresponding to atom-i where neighbors
            // are processed in batches and the batch size is vector_length
            for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {

              int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;

			// count the # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i in the current batch
              // count the # of neighbor atoms with non-zero electrostatic
              // interaction coefficients with atom-i in the current batch
              int atomi_nbrs_curbatch = 0;

			// compute rsq, jtype, j and store in scratch space which is reused later
			Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill) {
              // compute rsq, jtype, j and store in scratch space which is
              // reused later
              Kokkos::parallel_reduce(
                  Kokkos::ThreadVectorRange(team, vector_length),
                  [&](const int &idx, int &m_fill) {
                    const int jj = jj_start + idx;

				// initialize: -1 represents no interaction with atom-j where j = d_neighbors(i,jj)
                    // initialize: -1 represents no interaction with atom-j
                    // where j = d_neighbors(i,jj)
                    s_jlist(team.team_rank(), idx) = -1;

                    if (jj < jnum) {
@@ -530,13 +552,15 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
                              valid = false;
                            if (x(j, 2) == ztmp && x(j, 1) < ytmp)
                              valid = false;
						if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp)
                            if (x(j, 2) == ztmp && x(j, 1) == ytmp &&
                                x(j, 0) < xtmp)
                              valid = false;
                          }
                        }
                      }

				    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
                      const F_FLOAT rsq =
                          delx * delx + dely * dely + delz * delz;
                      if (rsq > cutsq)
                        valid = false;

@@ -547,10 +571,13 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
                        m_fill++;
                      }
                    }
			    }, atomi_nbrs_curbatch);
                  },
                  atomi_nbrs_curbatch);

              // write non-zero entries of H to global memory
			Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill, bool final) {
              Kokkos::parallel_scan(
                  Kokkos::ThreadVectorRange(team, vector_length),
                  [&](const int &idx, int &m_fill, bool final) {
                    int j = s_jlist(team.team_rank(), idx);
                    if (final) {
                      if (j != -1) {
@@ -559,7 +586,8 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
                        const F_FLOAT shldij = d_shield(itype, jtype);

                        d_jlist[atomi_nbr_writeIdx + m_fill] = j;
					d_val[atomi_nbr_writeIdx + m_fill]   = calculate_H_k(r, shldij);
                        d_val[atomi_nbr_writeIdx + m_fill] =
                            calculate_H_k(r, shldij);
                      }
                    }

@@ -570,13 +598,11 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
              atomi_nbrs_inH += atomi_nbrs_curbatch;
            }

		    Kokkos::single (Kokkos::PerThread (team), [&] () {
			    d_numnbrs[i] = atomi_nbrs_inH;
			});
            Kokkos::single(Kokkos::PerThread(team),
                           [&]() { d_numnbrs[i] = atomi_nbrs_inH; });
          }
        }
      });

}

/* ---------------------------------------------------------------------- */
+34 −25
Original line number Diff line number Diff line
@@ -265,24 +265,33 @@ struct FixQEqReaxKokkosComputeHFunctor {
  FixQEqReaxKokkos<DeviceType> c;

  FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos<DeviceType> *c_ptr,
				    int _atoms_per_team,
				    int _vector_length):
	c(*c_ptr), atoms_per_team(_atoms_per_team), vector_length(_vector_length) {
                                  int _atoms_per_team, int _vector_length)
      : c(*c_ptr), atoms_per_team(_atoms_per_team),
        vector_length(_vector_length) {
    c.cleanup_copy();
  };

  KOKKOS_INLINE_FUNCTION
    void operator()(const typename Kokkos::TeamPolicy <DeviceType> ::member_type &team) const {
  void operator()(
      const typename Kokkos::TeamPolicy<DeviceType>::member_type &team) const {
    c.template compute_h_team<NEIGHFLAG>(team, atoms_per_team, vector_length);
  }

  size_t team_shmem_size(int team_size) const {
	size_t shmem_size = Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) + // s_ilist
	    Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) + // s_numnbrs
	    Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) +  // s_firstnbr
	    Kokkos::View<int**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) + //s_jtype
	    Kokkos::View<int**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) + //s_j
	    Kokkos::View<F_FLOAT**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) ; //s_r
    size_t shmem_size =
        Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
            atoms_per_team) + // s_ilist
        Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
            atoms_per_team) + // s_numnbrs
        Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
            atoms_per_team) + // s_firstnbr
        Kokkos::View<int **, scratch_space, Kokkos::MemoryUnmanaged>::
            shmem_size(atoms_per_team, vector_length) + // s_jtype
        Kokkos::View<int **, scratch_space, Kokkos::MemoryUnmanaged>::
            shmem_size(atoms_per_team, vector_length) + // s_j
        Kokkos::View<F_FLOAT **, scratch_space,
                     Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team,
                                                          vector_length); // s_r
    return shmem_size;
  }
};