Commit 73fa8d40 authored by Stan Moore's avatar Stan Moore
Browse files

Rename Kokkos variables

parent bd237a05
Loading
Loading
Loading
Loading
+16 −16
Original line number Diff line number Diff line
@@ -78,9 +78,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

  // process any command-line args that invoke Kokkos settings

  ngpu = 0;
  ngpus = 0;
  int device = 0;
  num_threads = 1;
  nthreads = 1;
  numa = 1;

  int iarg = 0;
@@ -96,7 +96,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA");
#endif
      if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
      ngpu = atoi(arg[iarg+1]);
      ngpus = atoi(arg[iarg+1]);

      int skip_gpu = 9999;
      if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
@@ -108,23 +108,23 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      char *str;
      if ((str = getenv("SLURM_LOCALID"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpu;
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
      }
      if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpu;
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
      }
      if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpu;
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
      }

    } else if (strcmp(arg[iarg],"t") == 0 ||
               strcmp(arg[iarg],"threads") == 0) {
      num_threads = atoi(arg[iarg+1]);
      nthreads = atoi(arg[iarg+1]);
      iarg += 2;

    } else if (strcmp(arg[iarg],"n") == 0 ||
@@ -138,12 +138,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  // initialize Kokkos

  if (me == 0) {
    if (screen) fprintf(screen,"  will use up to %d GPU(s) per node\n",ngpu);
    if (logfile) fprintf(logfile,"  will use up to %d GPU(s) per node\n",ngpu);
    if (screen) fprintf(screen,"  will use up to %d GPU(s) per node\n",ngpus);
    if (logfile) fprintf(logfile,"  will use up to %d GPU(s) per node\n",ngpus);
  }

#ifdef KOKKOS_ENABLE_CUDA
  if (ngpu <= 0)
  if (ngpus <= 0)
    error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested");

  // check and warn about GPU-direct availability when using multiple MPI tasks
@@ -167,14 +167,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
#endif

#ifndef KOKKOS_ENABLE_SERIAL
  if (num_threads == 1)
  if (nthreads == 1)
    error->warning(FLERR,"When using a single thread, the Kokkos Serial backend "
                         "(i.e. Makefile.kokkos_mpi_only) gives better performance "
                         "than the OpenMP backend");
#endif

  Kokkos::InitArguments args;
  args.num_threads = num_threads;
  args.num_threads = nthreads;
  args.num_numa = numa;
  args.device_id = device;

@@ -184,7 +184,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

  binsize = 0.0;
  gpu_direct_flag = 1;
  if (ngpu > 0) {
  if (ngpus > 0) {
    neighflag = FULL;
    neighflag_qeq = FULL;
    neighflag_qeq_set = 0;
@@ -192,7 +192,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
    exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
    exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
  } else {
    if (num_threads > 1) {
    if (nthreads > 1) {
      neighflag = HALFTHREAD;
      neighflag_qeq = HALFTHREAD;
    } else {
@@ -236,7 +236,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
      if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL;
      else if (strcmp(arg[iarg+1],"half") == 0) {
        if (num_threads > 1 || ngpu > 0)
        if (nthreads > 1 || ngpus > 0)
          neighflag = HALFTHREAD;
        else
          neighflag = HALF;
@@ -248,7 +248,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
      if (strcmp(arg[iarg+1],"full") == 0) neighflag_qeq = FULL;
      else if (strcmp(arg[iarg+1],"half") == 0) {
        if (num_threads > 1 || ngpu > 0)
        if (nthreads > 1 || ngpus > 0)
          neighflag_qeq = HALFTHREAD;
        else
          neighflag_qeq = HALF;
+1 −1
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ class KokkosLMP : protected Pointers {
  int exchange_comm_on_host;
  int forward_comm_on_host;
  int reverse_comm_on_host;
  int num_threads,ngpu;
  int nthreads,ngpus;
  int numa;
  int auto_sync;
  int gpu_direct_flag;
+1 −1
Original line number Diff line number Diff line
@@ -362,7 +362,7 @@ void NeighborKokkos::modify_mol_intra_grow_kokkos(){

/* ---------------------------------------------------------------------- */
void NeighborKokkos::set_binsize_kokkos() {
  if (!binsizeflag && lmp->kokkos->ngpu > 0) {
  if (!binsizeflag && lmp->kokkos->ngpus > 0) {
    binsize_user = cutneighmax;
    binsizeflag = 1;
  }
+7 −7
Original line number Diff line number Diff line
@@ -310,12 +310,12 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

#else // No atomics

  num_threads = lmp->kokkos->num_threads;
  nthreads = lmp->kokkos->nthreads;
  int nmax = f.extent(0);
  if (nmax > t_f.extent(1)) {
    t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax);
    t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax);
    t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",num_threads,nmax);
    t_f = t_f_array_thread("pair_exp6_rx:t_f",nthreads,nmax);
    t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",nthreads,nmax);
    t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",nthreads,nmax);
  }

  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxZeroDupViews>(0,nmax),*this);
@@ -1642,7 +1642,7 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIG
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCollapseDupViews, const int &i) const {
  for (int n = 0; n < num_threads; n++) {
  for (int n = 0; n < nthreads; n++) {
    f(i,0) += t_f(n,i,0);
    f(i,1) += t_f(n,i,1);
    f(i,2) += t_f(n,i,2);
@@ -1654,7 +1654,7 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCollapseDupViews, con
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxZeroDupViews, const int &i) const {
  for (int n = 0; n < num_threads; n++) {
  for (int n = 0; n < nthreads; n++) {
    t_f(n,i,0) = 0.0;
    t_f(n,i,1) = 0.0;
    t_f(n,i,2) = 0.0;
@@ -2105,7 +2105,7 @@ void PairExp6rxKokkos<DeviceType>::getMixingWeights(int id,double &epsilon1,doub
void partition_range( const int begin, const int end, int &thread_begin, int &thread_end, const int chunkSize = 1)
{
   int threadId = omp_get_thread_num();
   int nThreads = omp_get_num_threads();
   int nThreads = omp_get_nthreads();

   const int len = end - begin;
   const int nBlocks = (len + (chunkSize - 1)) / chunkSize;
+1 −1
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx {
  int eflag,vflag;
  int nlocal,newton_pair,neighflag;
  double special_lj[4];
  int num_threads,ntypes;
  int nthreads,ntypes;

  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
Loading