Commit bdbb36e5 authored by Agilio Padua's avatar Agilio Padua
Browse files

Merge branch 'master' into core_soft

parents 012d5a55 bd3b7129
Loading
Loading
Loading
Loading
+24 −20
Original line number Diff line number Diff line
@@ -256,7 +256,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (vflag_fdotr) pair_virial_fdotr_compute(this);


  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
@@ -453,6 +452,13 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
  //t4 += timer.seconds(); timer.reset();
  team.team_barrier();

  if (quadraticflag) {
    my_sna.compute_bi(team);
    team.team_barrier();
    my_sna.copy_bi2bvec(team);
    team.team_barrier();
  }

  // for neighbors of I within cutoff:
  // compute dUi/drj and dBi/drj
  // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
@@ -472,10 +478,6 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
    my_sna.compute_dbidrj(team);
    //t7 += timer2.seconds(); timer2.reset();
    my_sna.copy_dbi2dbvec(team);
    if (quadraticflag) {
      my_sna.compute_bi(team);
      my_sna.copy_bi2bvec(team);
    }

    Kokkos::single(Kokkos::PerThread(team), [&] (){
    F_FLOAT fij[3];
@@ -536,10 +538,10 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
    a_f(j,1) -= fij[1];
    a_f(j,2) -= fij[2];

    // tally per-atom virial contribution
    // tally global and per-atom virial contribution

    if (EVFLAG) {
      if (vflag) {
      if (vflag_either) {
        v_tally_xyz<NEIGHFLAG>(ev,i,j,
          fij[0],fij[1],fij[2],
          -my_sna.rij(jj,0),-my_sna.rij(jj,1),
@@ -554,11 +556,13 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
  // tally energy contribution

  if (EVFLAG) {
    if (eflag) {
    if (eflag_either) {

      if (!quadraticflag) {
        my_sna.compute_bi(team);
        team.team_barrier();
        my_sna.copy_bi2bvec(team);
        team.team_barrier();
      }

      // E = beta.B + 0.5*B^t.alpha.B
@@ -566,14 +570,15 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
      // coeff[k] = alpha_ii or
      // coeff[k] = alpha_ij = alpha_ji, j != i

      if (team.team_rank() == 0)
      Kokkos::single(Kokkos::PerThread(team), [&] () {
      Kokkos::single(Kokkos::PerTeam(team), [&] () {

        // evdwl = energy of atom I, sum over coeffs_k * Bi_k

        double evdwl = d_coeffi[0];

        // linear contributions
        // could use thread vector range on this loop

        for (int k = 1; k <= ncoeff; k++)
          evdwl += d_coeffi[k]*my_sna.bvec[k-1];

@@ -589,11 +594,10 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
            }
          }
        }

        //ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0);
        if (eflag_either) {
        if (eflag_global) ev.evdwl += evdwl;
        if (eflag_atom) d_eatom[i] += evdwl;
        }
      });
    }
  }
+33 −20
Original line number Diff line number Diff line
@@ -327,29 +327,40 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
  clock_gettime(CLOCK_REALTIME, &starttime);
#endif

  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxj_max),
  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxj_full_max),
      [&] (const int& idx) {
    const int j1 = idxj(idx).j1;
    const int j2 = idxj(idx).j2;
    const int j =  idxj(idx).j;
    double b_j1_j2_j = 0.0;
    const int j1 = idxj_full(idx).j1;
    const int j2 = idxj_full(idx).j2;
    const int j =  idxj_full(idx).j;

    for(int mb = 0; 2*mb < j; mb++)
      for(int ma = 0; ma <= j; ma++) {
        b_j1_j2_j +=
    const int bound = (j+2)/2;
    double b_j1_j2_j = 0.0;
    double b_j1_j2_j_temp = 0.0;
    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
        [&] (const int mbma, double& sum) {
        //for(int mb = 0; 2*mb <= j; mb++)
          //for(int ma = 0; ma <= j; ma++) {
        const int ma = mbma%(j+1);
        const int mb = mbma/(j+1);
        if (2*mb == j) return;
        sum +=
          uarraytot_r(j,ma,mb) * zarray_r(j1,j2,j,mb,ma) +
          uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma);
      } // end loop over ma, mb
      },b_j1_j2_j_temp); // end loop over ma, mb
      b_j1_j2_j += b_j1_j2_j_temp;

    // For j even, special treatment for middle column

    if (j%2 == 0) {
      const int mb = j/2;
      for(int ma = 0; ma < mb; ma++) {
        b_j1_j2_j +=
      Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
          [&] (const int ma, double& sum) {
      //for(int ma = 0; ma < mb; ma++) {
        sum +=
          uarraytot_r(j,ma,mb) * zarray_r(j1,j2,j,mb,ma) +
          uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma);
      }
      },b_j1_j2_j_temp); // end loop over ma
      b_j1_j2_j += b_j1_j2_j_temp;

      const int ma = mb;
      b_j1_j2_j +=
@@ -357,11 +368,13 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
         uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma))*0.5;
    }

    Kokkos::single(Kokkos::PerThread(team), [&] () {
      b_j1_j2_j *= 2.0;
      if (bzero_flag)
        b_j1_j2_j -= bzero[j];

      barray(j1,j2,j) = b_j1_j2_j;
    });
  });
      //} // end loop over j
    //} // end loop over j1, j2
@@ -1028,6 +1041,8 @@ void SNAKokkos<DeviceType>::create_team_scratch_arrays(const typename Kokkos::Te
  uarraytot_i_a = uarraytot_i = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim);
  zarray_r = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim);
  zarray_i = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim);
  bvec = Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>(team.team_scratch(1),ncoeff);
  barray = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim);

  rij = t_sna_2d(team.team_scratch(1),nmax,3);
  rcutij = t_sna_1d(team.team_scratch(1),nmax);
@@ -1046,6 +1061,8 @@ T_INT SNAKokkos<DeviceType>::size_team_scratch_arrays() {
  size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarraytot_i_a
  size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_r
  size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_i
  size += Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // bvec
  size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray

  size += t_sna_2d::shmem_size(nmax,3); // rij
  size += t_sna_1d::shmem_size(nmax); // rcutij
@@ -1062,8 +1079,6 @@ KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType>::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
{
  int jdim = twojmax + 1;
  bvec = Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>(team.thread_scratch(1),ncoeff);
  barray = t_sna_3d(team.thread_scratch(1),jdim,jdim,jdim);

  dbvec = Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType>(team.thread_scratch(1),ncoeff);
  dbarray = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim);
@@ -1079,8 +1094,6 @@ inline
T_INT SNAKokkos<DeviceType>::size_thread_scratch_arrays() {
  T_INT size = 0;
  int jdim = twojmax + 1;
  size += Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // bvec
  size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray

  size += Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // dbvec
  size += t_sna_4d::shmem_size(jdim,jdim,jdim); // dbarray
+22 −22
Original line number Diff line number Diff line
@@ -189,17 +189,17 @@ void ImproperUmbrella::compute(int eflag, int vflag)
    dahy = ary-c*hry;
    dahz = arz-c*hrz;

    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
    f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a;
    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a;
    f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a;

    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a;
    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a;
    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a;

    f4[0] = dahx*rhr;
    f4[1] = dahy*rhr;
    f4[2] = dahz*rhr;
    f4[0] = dahx*rhr*a;
    f4[1] = dahy*rhr*a;
    f4[2] = dahz*rhr*a;

    f1[0] = -(f2[0] + f3[0] + f4[0]);
    f1[1] = -(f2[1] + f3[1] + f4[1]);
@@ -208,27 +208,27 @@ void ImproperUmbrella::compute(int eflag, int vflag)
    // apply force to each of 4 atoms

    if (newton_bond || i1 < nlocal) {
      f[i1][0] += f1[0]*a;
      f[i1][1] += f1[1]*a;
      f[i1][2] += f1[2]*a;
      f[i1][0] += f1[0];
      f[i1][1] += f1[1];
      f[i1][2] += f1[2];
    }

    if (newton_bond || i2 < nlocal) {
      f[i2][0] += f3[0]*a;
      f[i2][1] += f3[1]*a;
      f[i2][2] += f3[2]*a;
      f[i2][0] += f3[0];
      f[i2][1] += f3[1];
      f[i2][2] += f3[2];
    }

    if (newton_bond || i3 < nlocal) {
      f[i3][0] += f2[0]*a;
      f[i3][1] += f2[1]*a;
      f[i3][2] += f2[2]*a;
      f[i3][0] += f2[0];
      f[i3][1] += f2[1];
      f[i3][2] += f2[2];
    }

    if (newton_bond || i4 < nlocal) {
      f[i4][0] += f4[0]*a;
      f[i4][1] += f4[1]*a;
      f[i4][2] += f4[2]*a;
      f[i4][0] += f4[0];
      f[i4][1] += f4[1];
      f[i4][2] += f4[2];
    }

    if (evflag) {
@@ -247,7 +247,7 @@ void ImproperUmbrella::compute(int eflag, int vflag)
      vb3y = x[i4][1] - x[i3][1];
      vb3z = x[i4][2] - x[i3][2];

      ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4,
      ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f2,f4,
               vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
    }
  }
+65 −23
Original line number Diff line number Diff line
@@ -162,6 +162,8 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
  memory->create(reacted_mol,nreacts,"bond/react:reacted_mol");
  memory->create(fraction,nreacts,"bond/react:fraction");
  memory->create(max_rxn,nreacts,"bond/react:max_rxn");
  memory->create(nlocalskips,nreacts,"bond/react:nlocalskips");
  memory->create(nghostlyskips,nreacts,"bond/react:nghostlyskips");
  memory->create(seed,nreacts,"bond/react:seed");
  memory->create(limit_duration,nreacts,"bond/react:limit_duration");
  memory->create(stabilize_steps_flag,nreacts,"bond/react:stabilize_steps_flag");
@@ -180,7 +182,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
  for (int i = 0; i < nreacts; i++) {
    fraction[i] = 1;
    seed[i] = 12345;
    max_rxn[i] = BIG;
    max_rxn[i] = INT_MAX;
    stabilize_steps_flag[i] = 0;
    update_edges_flag[i] = 0;
    // set default limit duration to 60 timesteps
@@ -413,6 +415,8 @@ FixBondReact::~FixBondReact()
  memory->destroy(fraction);
  memory->destroy(seed);
  memory->destroy(max_rxn);
  memory->destroy(nlocalskips);
  memory->destroy(nghostlyskips);
  memory->destroy(limit_duration);
  memory->destroy(stabilize_steps_flag);
  memory->destroy(update_edges_flag);
@@ -684,6 +688,8 @@ void FixBondReact::post_integrate()
    reaction_count[i] = 0;
    local_rxn_count[i] = 0;
    ghostly_rxn_count[i] = 0;
    nlocalskips[i] = 0;
    nghostlyskips[i] = 0;
  }

  if (nevery_check) {
@@ -1153,16 +1159,44 @@ void FixBondReact::superimpose_algorithm()

  MPI_Allreduce(&local_rxn_count[0],&reaction_count[0],nreacts,MPI_INT,MPI_SUM,world);

  for (int i = 0; i < nreacts; i++)
    reaction_count_total[i] += reaction_count[i];

  if (me == 0)
    for (int i = 0; i < nreacts; i++)
      reaction_count_total[i] += ghostly_rxn_count[i];
      reaction_count_total[i] += reaction_count[i] + ghostly_rxn_count[i];

  //  bcast ghostly_rxn_count
  MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world);

  // check if we overstepped our reaction limit
  for (int i = 0; i < nreacts; i++) {
    if (reaction_count_total[i] > max_rxn[i]) {
      // let's randomly choose rxns to skip, unbiasedly from local and ghostly
      int local_rxncounts[nprocs];
      int all_localskips[nprocs];
      MPI_Gather(&local_rxn_count[i],1,MPI_INT,local_rxncounts,1,MPI_INT,0,world);
      if (me == 0) {
        int overstep = reaction_count_total[i] - max_rxn[i];
        int delta_rxn = reaction_count[i] + ghostly_rxn_count[i];
        int rxn_by_proc[delta_rxn];
        for (int j = 0; j < delta_rxn; j++)
          rxn_by_proc[j] = -1; // corresponds to ghostly
        int itemp = 0;
        for (int j = 0; j < nprocs; j++)
          for (int k = 0; k < local_rxn_count[j]; k++)
            rxn_by_proc[itemp++] = j;
        std::random_shuffle(&rxn_by_proc[0],&rxn_by_proc[delta_rxn]);
        for (int j = 0; j < nprocs; j++)
          all_localskips[j] = 0;
        nghostlyskips[i] = 0;
        for (int j = 0; j < overstep; j++) {
          if (rxn_by_proc[j] == -1) nghostlyskips[i]++;
          else all_localskips[rxn_by_proc[j]]++;
        }
      }
      reaction_count_total[i] = max_rxn[i];
      MPI_Scatter(&all_localskips[0],1,MPI_INT,&nlocalskips[i],1,MPI_INT,0,world);
      MPI_Bcast(&nghostlyskips[i],1,MPI_INT,0,world);
    }
  }

  // this updates topology next step
  next_reneighbor = update->ntimestep;

@@ -1957,6 +1991,7 @@ void FixBondReact::glove_ghostcheck()
  // 'ghosts of another' indication taken from comm->sendlist

  int ghostly = 0;
  #if !defined(MPI_STUBS)
    if (comm->style == 0) {
      for (int i = 0; i < onemol->natoms; i++) {
        int ilocal = atom->map(glove[i][1]);
@@ -1966,10 +2001,9 @@ void FixBondReact::glove_ghostcheck()
        }
      }
    } else {
    #if !defined(MPI_STUBS)
      ghostly = 1;
    #endif
    }
  #endif

  if (ghostly == 1) {
    ghostly_mega_glove[0][ghostly_num_mega] = rxnID;
@@ -2104,18 +2138,26 @@ void FixBondReact::update_everything()
  memory->create(update_mega_glove,max_natoms+1,MAX(local_num_mega,global_megasize),"bond/react:update_mega_glove");

  for (int pass = 0; pass < 2; pass++) {

    update_num_mega = 0;
    int iskip[nreacts];
    for (int i = 0; i < nreacts; i++) iskip[i] = 0;
    if (pass == 0) {
      update_num_mega = local_num_mega;
      for (int i = 0; i < update_num_mega; i++) {
      for (int i = 0; i < local_num_mega; i++) {
        rxnID = local_mega_glove[0][i];
        // reactions already shuffled from dedup procedure, so can skip first N
        if (iskip[rxnID]++ < nlocalskips[rxnID]) continue;
        for (int j = 0; j < max_natoms+1; j++)
          update_mega_glove[j][i] = local_mega_glove[j][i];
          update_mega_glove[j][update_num_mega] = local_mega_glove[j][i];
        update_num_mega++;
      }
    } else if (pass == 1) {
      update_num_mega = global_megasize;
      for (int i = 0; i < global_megasize; i++) {
        rxnID = global_mega_glove[0][i];
        // reactions already shuffled from dedup procedure, so can skip first N
        if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue;
        for (int j = 0; j < max_natoms+1; j++)
          update_mega_glove[j][i] = global_mega_glove[j][i];
          update_mega_glove[j][update_num_mega] = global_mega_glove[j][i];
        update_num_mega++;
      }
    }

+2 −1
Original line number Diff line number Diff line
@@ -54,7 +54,8 @@ class FixBondReact : public Fix {
  FILE *fp;
  int *iatomtype,*jatomtype;
  int *seed;
  double **cutsq,*fraction,*max_rxn;
  double **cutsq,*fraction;
  int *max_rxn,*nlocalskips,*nghostlyskips;
  tagint lastcheck;
  int stabilization_flag;
  int custom_exclude_flag;
Loading