Commit 326a8a12 authored by sjplimp's avatar sjplimp Committed by GitHub
Browse files

Merge pull request #536 from akohlmey/fix-nvcc-openmp-conflicts

Implement workaround for NVCC incompatibilities with OpenMP directives
parents b5300724 164cedf3
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)

        qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
        fqi   += jq * fqij + fqji;
#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
        qf[j] += (iq * fqij + fqjj);
@@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)

        qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
        fqi += fqij;
#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
        qf[j] += fqjj;
      }

#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
      qf[i] += fqi;
+7 −1
Original line number Diff line number Diff line
@@ -69,7 +69,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
  double total_Econ = 0;
  int  nthreads = control->nthreads;

#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
#endif
  {
  int i, j, k, l, pi, pj, pk, pl, pij, plk;
  int type_i, type_j, type_k, type_l;
@@ -125,7 +127,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
                                    system->N, system->pair_ptr->eatom,
                                    system->pair_ptr->vatom, thr);

#if defined(_OPENMP)
#pragma omp for schedule(static)
#endif
  for (j = 0; j < system->N; ++j) {
    start_j = Start_Index(j, bonds);
    end_j = End_Index(j, bonds);
@@ -137,7 +141,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
    }
  }

#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)
#endif
  for (j = 0; j < natoms; ++j) {
    type_j = system->my_atoms[j].type;
    Delta_j = workspace->Delta_boc[j];
+12 −3
Original line number Diff line number Diff line
@@ -124,8 +124,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
  int  nthreads = control->nthreads;
  int  num_thb_intrs = 0;
  int  TWICE = 2;

#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
#endif
  {
    int i, j, pi, k, pk, t;
    int type_i, type_j, type_k;
@@ -180,7 +181,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,

    const int per_thread = thb_intrs->num_intrs / nthreads;

#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)
#endif
    for (j = 0; j < system->N; ++j) {
      type_j = system->my_atoms[j].type;
      _my_offset[j] = 0;
@@ -251,11 +254,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
    } // for(j)

    // Wait for all threads to finish counting angles
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier

#endif
    // Master thread uses angle counts to compute offsets
    // This can be threaded
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp master
#endif
    {
      int current_count = 0;
      int m = _my_offset[0];
@@ -269,12 +275,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
    }

    // All threads wait till master thread finished computing offsets
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier

#endif
    // Original loop, but now using precomputed offsets
    // Safe to use all threads available, regardless of threads tasked above
    // We also now skip over atoms that have no angles assigned
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
#endif
    for (j = 0; j < system->N; ++j) {         // Ray: the first one with system->N
      type_j = system->my_atoms[j].type;
      if(type_j < 0) continue;