Commit 9daf5799 authored by sjplimp's avatar sjplimp
Browse files

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@16008 f3b2605a-c512-4ea7-a41b-209d697bcdaa
parent 515a68d6
Loading
Loading
Loading
Loading
+788 −0

File added.

Preview size limit exceeded, changes collapsed.

+107 −0
Original line number Diff line number Diff line
/* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

#ifdef PAIR_CLASS

PairStyle(eam/intel,PairEAMIntel)

#else

#ifndef LMP_PAIR_EAM_INTEL_H
#define LMP_PAIR_EAM_INTEL_H

#include <stdio.h>
#include "pair_eam.h"
#include "fix_intel.h"

namespace LAMMPS_NS {


class PairEAMIntel : public PairEAM {
 public:
  friend class FixSemiGrandCanonicalMC;   // Alex Stukowski option

  PairEAMIntel(class LAMMPS *);
  virtual ~PairEAMIntel();
  virtual void compute(int, int);
  void init_style();
  int pack_forward_comm(int, int *, double *, int, int *);
  void unpack_forward_comm(int, int, double *);

 protected:

  FixIntel *fix;
  int _cop, _onetype;
  float *fp_float;

  template <class flt_t>
  int pack_forward_comm(int, int *, double *, flt_t *);
  template <class flt_t>
  void unpack_forward_comm(int, int, double *, flt_t *);

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
               const ForceConst<flt_t> &fc);
  template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, 
	    class acc_t>
  void eval(const int offload, const int vflag,
            IntelBuffers<flt_t,acc_t> * buffers,
            const ForceConst<flt_t> &fc, const int astart, const int aend);

  template <class flt_t, class acc_t>
  void pack_force_const(ForceConst<flt_t> &fc,
                        IntelBuffers<flt_t, acc_t> *buffers);

  // ----------------------------------------------------------------------

  template <class flt_t>
  class ForceConst {
  public:
    typedef struct { flt_t a, b, c, d; } fc_packed1;
    typedef struct { flt_t a, b, c, d, e, f, g, h; } fc_packed2;

    flt_t **scale_f;
    fc_packed1 *rhor_spline_f, *rhor_spline_e;
    fc_packed1 *frho_spline_f, *frho_spline_e;
    fc_packed2 *z2r_spline_t;

    ForceConst() : _ntypes(0), _nr(0)  {}
    ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }

    void set_ntypes(const int ntypes, const int nr, const int nrho, 
		    Memory *memory, const int cop);
    inline int rhor_jstride() const { return _nr; }
    inline int rhor_istride() const { return _nr * _ntypes; }
    inline int frho_stride() const { return _nrho; }

  private:
    int _ntypes, _nr, _nrho, _cop;
    Memory *_memory;
  };
  ForceConst<float> force_const_single;
  ForceConst<double> force_const_double;
};

}

#endif
#endif

/* ERROR/WARNING messages:

E: The 'package intel' command is required for /intel styles

Self-explanatory.

*/
+132 −96
Original line number Diff line number Diff line
@@ -171,6 +171,15 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
  const int ntypes = atom->ntypes + 1;
  const int eatom = this->eflag_atom;

  flt_t * _noalias const ccachex = buffers->get_ccachex();
  flt_t * _noalias const ccachey = buffers->get_ccachey();
  flt_t * _noalias const ccachez = buffers->get_ccachez();
  flt_t * _noalias const ccachew = buffers->get_ccachew();
  int * _noalias const ccachei = buffers->get_ccachei();
  int * _noalias const ccachej = buffers->get_ccachej();
  const int ccache_stride = _ccache_stride;


  // Determine how much data to transfer
  int x_size, q_size, f_stride, ev_size, separate_flag;
  IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
@@ -208,8 +217,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
    in(x:length(x_size) alloc_if(0) free_if(0)) \
    in(q:length(q_size) alloc_if(0) free_if(0)) \
    in(overflow:length(0) alloc_if(0) free_if(0)) \
    in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \
    in(f_stride,separate_flag,offload) \
    in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
    in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
    in(ccache_stride,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq) \
    in(vflag,eatom,f_stride,separate_flag,offload) \
    in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
    out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
    out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
@@ -246,6 +257,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
      memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
      flt_t cutboth = cut_coulsq;

      const int toffs = tid * ccache_stride;
      flt_t * _noalias const tdelx = ccachex + toffs;
      flt_t * _noalias const tdely = ccachey + toffs;
      flt_t * _noalias const tdelz = ccachez + toffs;
      flt_t * _noalias const trsq = ccachew + toffs;
      int * _noalias const tj = ccachei + toffs;
      int * _noalias const tjtype = ccachej + toffs;

      for (int i = iifrom; i < iito; ++i) {
	//        const int i = ilist[ii];
        const int itype = x[i].w;
@@ -270,28 +289,44 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
	  if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
	}

	int ej = 0;
        #if defined(LMP_SIMD_COMPILER)
	#pragma vector aligned
	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
	                       sv0, sv1, sv2, sv3, sv4, sv5)
	#pragma ivdep
        #endif
        for (int jj = 0; jj < jnum; jj++) {
          flt_t forcecoul, forcelj, evdwl, ecoul;
          forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;

          const int sbindex = jlist[jj] >> SBBITS & 3;
          const int j = jlist[jj] & NEIGHMASK;

	  const flt_t delx = xtmp - x[j].x;
          const flt_t dely = ytmp - x[j].y;
          const flt_t delz = ztmp - x[j].z;
          const int jtype = x[j].w;
          const flt_t rsq = delx * delx + dely * dely + delz * delz;
          const flt_t r2inv = (flt_t)1.0 / rsq;

	  #ifdef INTEL_VMASK
	  if (rsq < cut_coulsq) {
	    trsq[ej]=rsq;
	    tdelx[ej]=delx;
	    tdely[ej]=dely;
	    tdelz[ej]=delz;
	    tjtype[ej]=x[j].w;
	    tj[ej]=jlist[jj];
	    ej++;
	  }
	}

        #if defined(LMP_SIMD_COMPILER)
	#pragma vector aligned
	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
	                       sv0, sv1, sv2, sv3, sv4, sv5)
        #endif
        for (int jj = 0; jj < ej; jj++) {
          flt_t forcecoul, forcelj, evdwl, ecoul;
          forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;

	  const int j = tj[jj] & NEIGHMASK;
          const int sbindex = tj[jj] >> SBBITS & 3;
	  const int jtype = tjtype[jj];
	  const flt_t rsq = trsq[jj];
          const flt_t r2inv = (flt_t)1.0 / rsq;

          #ifdef INTEL_ALLOW_TABLE
          if (!ncoultablebits || rsq <= tabinnersq) {
          #endif
@@ -341,9 +376,6 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
	    }
          }
          #endif
	  #ifdef INTEL_VMASK
	  }
	  #endif

	  #ifdef INTEL_VMASK
	  if (rsq < cut_ljsq) {
@@ -393,9 +425,9 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
	  if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
	  #endif

	  #ifdef INTEL_VMASK
	  if (rsq < cut_coulsq) {
	  #endif
	  const flt_t delx = tdelx[jj];
	  const flt_t dely = tdely[jj];
	  const flt_t delz = tdelz[jj];
	  const flt_t fpair = (forcecoul + forcelj) * r2inv;
	  fxtmp += delx * fpair;
	  fytmp += dely * fpair;
@@ -427,9 +459,6 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
	    IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
				 delx, dely, delz);
	  }
	  #ifdef INTEL_VMASK
	  }
	  #endif
        } // for jj
        f[i].x += fxtmp;
        f[i].y += fytmp;
@@ -517,6 +546,13 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
  if (ncoultablebits)
    for (int i = 0; i < ncoultablebits; i++) ntable *= 2;

  int off_ccache = 0;
  #ifdef _LMP_INTEL_OFFLOAD
  if (_cop >= 0) off_ccache = 1;
  #endif
  buffers->grow_ccache(off_ccache, comm->nthreads, 1);
  _ccache_stride = buffers->ccache_stride();

  fc.set_ntypes(tp1, ntable, memory, _cop);
  buffers->set_ntypes(tp1);
  flt_t **cutneighsq = buffers->get_cutneighsq();
+1 −1
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {

 private:
  FixIntel *fix;
  int _cop, _lrt;
  int _cop, _lrt, _ccache_stride;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>