Commit e634c5a2 authored by Steve Plimpton's avatar Steve Plimpton
Browse files

memory allocation bugfix for USER-INTEL pppm from M Brown

parent 883b7aaa
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -106,6 +106,8 @@ $t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
For some of the simple 2-body potentials without long-range
electrostatics, performance and scalability can be better with
the "newton off" setting added to the input script :l
For simulations on higher node counts, add "processors * * * grid 
numa" to the beginning of the input script for better scalability :l
If using {kspace_style pppm} in the input script, add
"kspace_modify diff ad" for better performance :l
:ule
@@ -392,6 +394,10 @@ hybrid intel omp"_suffix.html command can also be used within the
input script to automatically append the "omp" suffix to styles when
USER-INTEL styles are not available.

NOTE: For simulations on higher node counts, add "processors * * * 
grid numa"_processors.html" to the beginning of the input script for
better scalability.

When running on many nodes, performance might be better when using
fewer OpenMP threads and more MPI tasks. This will depend on the
simulation and the machine. Using the "verlet/split"_run_style.html
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ read_data file keyword args ... :pre

file = name of data file to read in :ulb,l
zero or more keyword/arg pairs may be appended :l
keyword = {add} or {offset} or {shift} or {extra/atom/types} or {extra/bond/types} or {extra/angle/types} or {extra/dihedral/types} or {extra/improper/types} or {group} or {nocoeff} or {fix} :l
keyword = {add} or {offset} or {shift} or {extra/atom/types} or {extra/bond/types} or {extra/angle/types} or {extra/dihedral/types} or {extra/improper/types} or {extra/bond/per/atom} or {extra/angle/per/atom} or {extra/dihedral/per/atom} or {extra/improper/per/atom} or {group} or {nocoeff} or {fix} :l
  {add} arg = {append} or {Nstart} or {merge}
    append = add new atoms with IDs appended to current IDs
    Nstart = add new atoms with IDs starting with Nstart
+8 −4
Original line number Diff line number Diff line
@@ -748,7 +748,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
      if (eatom) {
        double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
        #if defined(LMP_SIMD_COMPILER)
        #pragma novector
        #pragma vector aligned
	#pragma ivdep
        #endif
        for (int i = ifrom; i < ito; i++) {
          f[i].x += f_in[ii].x;
@@ -762,7 +763,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
        }
      } else {
        #if defined(LMP_SIMD_COMPILER)
        #pragma novector
        #pragma vector aligned
	#pragma ivdep
        #endif
        for (int i = ifrom; i < ito; i++) {
          f[i].x += f_in[ii].x;
@@ -778,7 +780,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
      if (eatom) {
        double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
        #if defined(LMP_SIMD_COMPILER)
        #pragma novector
        #pragma vector aligned
	#pragma ivdep
        #endif
        for (int i = ifrom; i < ito; i++) {
          f[i].x += f_in[i].x;
@@ -788,7 +791,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
        }
      } else {
        #if defined(LMP_SIMD_COMPILER)
        #pragma novector
        #pragma vector aligned
	#pragma ivdep
        #endif
        for (int i = ifrom; i < ito; i++) {
          f[i].x += f_in[i].x;
+24 −2
Original line number Diff line number Diff line
@@ -172,6 +172,10 @@ class IntelBuffers {

  inline void thr_pack(const int ifrom, const int ito, const int ago) {
    if (ago == 0) {
      #if defined(LMP_SIMD_COMPILER)
      #pragma vector aligned
      #pragma ivdep
      #endif
      for (int i = ifrom; i < ito; i++) {
        _x[i].x = lmp->atom->x[i][0];
        _x[i].y = lmp->atom->x[i][1];
@@ -179,9 +183,17 @@ class IntelBuffers {
        _x[i].w = lmp->atom->type[i];
      }
      if (lmp->atom->q != NULL)
        #if defined(LMP_SIMD_COMPILER)
        #pragma vector aligned
        #pragma ivdep
        #endif
        for (int i = ifrom; i < ito; i++)
          _q[i] = lmp->atom->q[i];
    } else {
      #if defined(LMP_SIMD_COMPILER)
      #pragma vector aligned
      #pragma ivdep
      #endif
      for (int i = ifrom; i < ito; i++) {
        _x[i].x = lmp->atom->x[i][0];
        _x[i].y = lmp->atom->x[i][1];
@@ -204,7 +216,10 @@ class IntelBuffers {
                           const int offset, const bool dotype = false) {
    double ** x = lmp->atom->x + offset;
    if (dotype == false) {
      #pragma vector nontemporal
      #if defined(LMP_SIMD_COMPILER)
      #pragma vector aligned
      #pragma ivdep
      #endif
      for (int i = ifrom; i < ito; i++) {
        _x[i].x = x[i][0];
        _x[i].y = x[i][1];
@@ -212,7 +227,10 @@ class IntelBuffers {
      }
    } else {
      int *type = lmp->atom->type + offset;
      #pragma vector nontemporal
      #if defined(LMP_SIMD_COMPILER)
      #pragma vector aligned
      #pragma ivdep
      #endif
      for (int i = ifrom; i < ito; i++) {
        _x[i].x = x[i][0];
        _x[i].y = x[i][1];
@@ -225,6 +243,10 @@ class IntelBuffers {
  inline void thr_pack_host(const int ifrom, const int ito,
                            const int offset) {
    double ** x = lmp->atom->x + offset;
    #if defined(LMP_SIMD_COMPILER)
    #pragma vector aligned
    #pragma ivdep
    #endif
    for (int i = ifrom; i < ito; i++) {
      _host_x[i].x = x[i][0];
      _host_x[i].y = x[i][1];
+16 −1
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
#define INTEL_MAX_STENCIL 256
// INTEL_MAX_STENCIL * sqrt(INTEL_MAX_STENCIL)
#define INTEL_MAX_STENCIL_CHECK 4096
#define INTEL_P3M_MAXORDER 7
#define INTEL_P3M_MAXORDER 8
#define INTEL_P3M_ALIGNED_MAXORDER 8
// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY)
#define INTEL_P3M_TABLE 1
@@ -248,6 +248,12 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,

#else

#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads)       \
  {                                                             \
    ifrom = 0;                                                  \
    ito = inum;                                                 \
  }

#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads)    \
  {                                                             \
    tid = 0;                                                    \
@@ -293,6 +299,15 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
    ito = inum;                                                 \
  }

#define IP_PRE_omp_range_id_vec(ifrom, ip, ito, tid, inum,      \
                                nthreads, vecsize)              \
  {                                                             \
    tid = 0;                                                    \
    ifrom = 0;                                                  \
    ito = inum;                                                 \
    ip = vecsize;                                               \
  }

#endif

#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start,  \
Loading