Commit d615b660 authored by sjplimp's avatar sjplimp
Browse files

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14596 f3b2605a-c512-4ea7-a41b-209d697bcdaa
parent e813620f
Loading
Loading
Loading
Loading
+13 −12
Original line number Diff line number Diff line
@@ -31,7 +31,8 @@
using namespace LAMMPS_NS;
using namespace MathConst;

#define SMALL (flt_t)0.001
#define SMALL2     (flt_t)0.000001
#define INVSMALL   (flt_t)1000.0
typedef struct { int a,b,c,t;  } int4_t;

/* ---------------------------------------------------------------------- */
@@ -162,7 +163,7 @@ void AngleCharmmIntel::eval(const int vflag,
      const flt_t delz1 = x[i1].z - x[i2].z;

      const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
      const flt_t r1 = sqrt(rsq1);
      flt_t ir12 = (flt_t)1.0/sqrt(rsq1);

      // 2nd bond

@@ -171,7 +172,7 @@ void AngleCharmmIntel::eval(const int vflag,
      const flt_t delz2 = x[i3].z - x[i2].z;

      const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
      const flt_t r2 = sqrt(rsq2);
      ir12 *= (flt_t)1.0/sqrt(rsq2);

      // Urey-Bradley bond

@@ -180,15 +181,15 @@ void AngleCharmmIntel::eval(const int vflag,
      const flt_t delzUB = x[i3].z - x[i1].z;

      const flt_t rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB;
      const flt_t rUB = sqrt(rsqUB);
      const flt_t irUB = (flt_t)1.0/sqrt(rsqUB);

      // Urey-Bradley force & energy

      const flt_t dr = rUB - fc.fc[type].r_ub;
      const flt_t dr = (flt_t)1.0/irUB - fc.fc[type].r_ub;
      const flt_t rk = fc.fc[type].k_ub * dr;

      flt_t forceUB;
      if (rUB > (flt_t)0.0) forceUB = (flt_t)-2.0*rk/rUB;
      if (rsqUB > (flt_t)0.0) forceUB = (flt_t)-2.0*rk*irUB;
      else forceUB = 0.0;

      flt_t eangle;
@@ -197,14 +198,14 @@ void AngleCharmmIntel::eval(const int vflag,
      // angle (cos and sin)

      flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
      c /= r1*r2;
      c *= ir12;

      if (c > (flt_t)1.0) c = (flt_t)1.0;
      if (c < (flt_t)-1.0) c = (flt_t)-1.0;

      flt_t s = sqrt((flt_t)1.0 - c*c);
      if (s < SMALL) s = SMALL;
      s = (flt_t)1.0/s;
      const flt_t sd = (flt_t)1.0 - c * c;
      flt_t s = (flt_t)1.0 / sqrt(sd);
      if (sd < SMALL2) s = INVSMALL;

      // harmonic force & energy

@@ -215,7 +216,7 @@ void AngleCharmmIntel::eval(const int vflag,

      const flt_t a = (flt_t)-2.0 * tk * s;
      const flt_t a11 = a*c / rsq1;
      const flt_t a12 = -a / (r1*r2);
      const flt_t a12 = -a * ir12;
      const flt_t a22 = a*c / rsq2;

      const flt_t f1x = a11*delx1 + a12*delx2 - delxUB*forceUB;
@@ -303,7 +304,7 @@ template <class flt_t, class acc_t>
void AngleCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
                                        IntelBuffers<flt_t,acc_t> *buffers)
{
  const int bp1 = atom->ndihedraltypes + 1;
  const int bp1 = atom->nangletypes + 1;
  fc.set_ntypes(bp1,memory);

  for (int i = 0; i < bp1; i++) {
+312 −0
Original line number Diff line number Diff line
/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */

#include <math.h>
#include <stdlib.h>
#include "angle_harmonic_intel.h"
#include "atom.h"
#include "neighbor.h"
#include "domain.h"
#include "comm.h"
#include "force.h"
#include "math_const.h"
#include "memory.h"
#include "suffix.h"
#include "error.h"

using namespace LAMMPS_NS;
using namespace MathConst;

#define SMALL2 (flt_t)0.000001
#define INVSMALL (flt_t)1000.0
typedef struct { int a,b,c,t;  } int4_t;

/* ---------------------------------------------------------------------- */

AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp) 
{
  suffix_flag |= Suffix::INTEL;
}

/* ---------------------------------------------------------------------- */

AngleHarmonicIntel::~AngleHarmonicIntel()
{
}

/* ---------------------------------------------------------------------- */

void AngleHarmonicIntel::compute(int eflag, int vflag)
{
  #ifdef _LMP_INTEL_OFFLOAD
  if (_use_base) {
    AngleHarmonic::compute(eflag, vflag);
    return;
  }
  #endif

  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
    compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
                          force_const_single);
  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
    compute<double,double>(eflag, vflag, fix->get_double_buffers(),
                           force_const_double);
  else
    compute<float,float>(eflag, vflag, fix->get_single_buffers(),
                         force_const_single);
}

/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void AngleHarmonicIntel::compute(int eflag, int vflag,
			       IntelBuffers<flt_t,acc_t> *buffers,
			       const ForceConst<flt_t> &fc)
{
  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = 0;

  if (evflag) {
    if (eflag) {
      if (force->newton_bond)
	eval<1,1,1>(vflag, buffers, fc);
      else
	eval<1,1,0>(vflag, buffers, fc);
    } else {
      if (force->newton_bond)
	eval<1,0,1>(vflag, buffers, fc);
      else
	eval<1,0,0>(vflag, buffers, fc);
    }
  } else {
    if (force->newton_bond)
      eval<0,0,1>(vflag, buffers, fc);
    else
      eval<0,0,0>(vflag, buffers, fc);
  }
}

/* ---------------------------------------------------------------------- */

template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
void AngleHarmonicIntel::eval(const int vflag, 
			    IntelBuffers<flt_t,acc_t> *buffers,
			    const ForceConst<flt_t> &fc)

{
  const int inum = neighbor->nanglelist;
  if (inum == 0) return;

  ATOM_T * _noalias const x = buffers->get_x(0);
  const int nlocal = atom->nlocal;
  const int nall = nlocal + atom->nghost;

  int f_stride;
  if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
  else f_stride = buffers->get_stride(nlocal);

  int tc;
  FORCE_T * _noalias f_start;
  acc_t * _noalias ev_global;
  IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
  const int nthreads = tc;

  acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
  if (EVFLAG) {
    if (EFLAG)
      oeangle = (acc_t)0.0;
    if (vflag) {
      ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
    }
  }

  #if defined(_OPENMP)
  #pragma omp parallel default(none) \
    shared(f_start,f_stride,fc)	\
    reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
  #endif
  {
    int nfrom, nto, tid;
    IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);

    FORCE_T * _noalias const f = f_start + (tid * f_stride);
    if (fix->need_zero(tid))
      memset(f, 0, f_stride * sizeof(FORCE_T));

    const int4_t * _noalias const anglelist = 
      (int4_t *) neighbor->anglelist[0];

    for (int n = nfrom; n < nto; n++) {
      const int i1 = anglelist[n].a;
      const int i2 = anglelist[n].b;
      const int i3 = anglelist[n].c;
      const int type = anglelist[n].t;

      // 1st bond

      const flt_t delx1 = x[i1].x - x[i2].x;
      const flt_t dely1 = x[i1].y - x[i2].y;
      const flt_t delz1 = x[i1].z - x[i2].z;

      const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
      const flt_t r1 = (flt_t)1.0/sqrt(rsq1);

      // 2nd bond

      const flt_t delx2 = x[i3].x - x[i2].x;
      const flt_t dely2 = x[i3].y - x[i2].y;
      const flt_t delz2 = x[i3].z - x[i2].z;

      const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
      const flt_t r2 = (flt_t)1.0/sqrt(rsq2);

      // angle (cos and sin)

      flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
      const flt_t r1r2 = r1 * r2;
      c *= r1r2;

      if (c > (flt_t)1.0) c = (flt_t)1.0;
      if (c < (flt_t)-1.0) c = (flt_t)-1.0;

      const flt_t sd = (flt_t)1.0 - c * c;
      flt_t s = (flt_t)1.0/sqrt(sd);
      if (sd < SMALL2) s = INVSMALL;

      // harmonic force & energy

      const flt_t dtheta = acos(c) - fc.fc[type].theta0;
      const flt_t tk = fc.fc[type].k * dtheta;

      flt_t eangle;
      if (EFLAG) eangle = tk*dtheta;

      const flt_t a = (flt_t)-2.0 * tk * s;
      const flt_t ac = a*c;
      const flt_t a11 = ac / rsq1;
      const flt_t a12 = -a * (r1r2);
      const flt_t a22 = ac / rsq2;

      const flt_t f1x = a11*delx1 + a12*delx2;
      const flt_t f1y = a11*dely1 + a12*dely2;
      const flt_t f1z = a11*delz1 + a12*delz2;

      const flt_t f3x = a22*delx2 + a12*delx1;
      const flt_t f3y = a22*dely2 + a12*dely1;
      const flt_t f3z = a22*delz2 + a12*delz1;

      // apply force to each of 3 atoms

      if (NEWTON_BOND || i1 < nlocal) {
        f[i1].x += f1x;
	f[i1].y += f1y;
	f[i1].z += f1z;
      }

      if (NEWTON_BOND || i2 < nlocal) {
        f[i2].x -= f1x + f3x;
        f[i2].y -= f1y + f3y;
        f[i2].z -= f1z + f3z;
      }

      if (NEWTON_BOND || i3 < nlocal) {
        f[i3].x += f3x;
        f[i3].y += f3y;
        f[i3].z += f3z;
      }

      if (EVFLAG) {
	IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x, 
                              f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1, 
                              delx2, dely2, delz2, oeangle, f, NEWTON_BOND, 
                              nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
      }
    } // for n
  } // omp parallel

  if (EVFLAG) {
    if (EFLAG)
      energy += oeangle;
    if (vflag) {
      virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
      virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; 
    }
  }

  fix->set_reduce_flag();
}

/* ---------------------------------------------------------------------- */

void AngleHarmonicIntel::init_style()
{
  AngleHarmonic::init_style();

  int ifix = modify->find_fix("package_intel");
  if (ifix < 0)
    error->all(FLERR,
               "The 'package intel' command is required for /intel styles");
  fix = static_cast<FixIntel *>(modify->fix[ifix]);

  #ifdef _LMP_INTEL_OFFLOAD
  _use_base = 0;
  if (fix->offload_balance() != 0.0) {
    _use_base = 1;
    return;
  }
  #endif

  fix->bond_init_check();

  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
    pack_force_const(force_const_single, fix->get_mixed_buffers());
  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
}

/* ---------------------------------------------------------------------- */

template <class flt_t, class acc_t>
void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
                                        IntelBuffers<flt_t,acc_t> *buffers)
{
  const int bp1 = atom->nangletypes + 1;
  fc.set_ntypes(bp1,memory);

  for (int i = 0; i < bp1; i++) {
    fc.fc[i].k = k[i];
    fc.fc[i].theta0 = theta0[i];
  }
}

/* ---------------------------------------------------------------------- */

template <class flt_t>
void AngleHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes,
	                                             Memory *memory) {
  if (nangletypes != _nangletypes) {
    if (_nangletypes > 0)
      _memory->destroy(fc);
    
    if (nangletypes > 0)
      _memory->create(fc,nangletypes,"anglecharmmintel.fc");
  }
  _nangletypes = nangletypes;
  _memory = memory;
}
+84 −0
Original line number Diff line number Diff line
/* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */

#ifdef ANGLE_CLASS

AngleStyle(harmonic/intel,AngleHarmonicIntel)

#else

#ifndef LMP_ANGLE_HARMONIC_INTEL_H
#define LMP_ANGLE_HARMONIC_INTEL_H

#include <stdio.h>
#include "angle_harmonic.h"
#include "fix_intel.h"

namespace LAMMPS_NS {

class AngleHarmonicIntel : public AngleHarmonic {
 public:
  AngleHarmonicIntel(class LAMMPS *);
  virtual ~AngleHarmonicIntel();
  virtual void compute(int, int);
  virtual void init_style();

 protected:
  FixIntel *fix;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
               const ForceConst<flt_t> &fc);
  template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
  void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, 
	    const ForceConst<flt_t> &fc);
  template <class flt_t, class acc_t>
  void pack_force_const(ForceConst<flt_t> &fc,
                        IntelBuffers<flt_t, acc_t> *buffers);

  #ifdef _LMP_INTEL_OFFLOAD
  int _use_base;
  #endif

  template <class flt_t>
  class ForceConst {
   public:
    typedef struct { flt_t k, theta0; } fc_packed1;

    fc_packed1 *fc;
    ForceConst() : _nangletypes(0)  {}
    ~ForceConst() { set_ntypes(0, NULL); }

    void set_ntypes(const int nangletypes, Memory *memory);

   private:
    int _nangletypes;
    Memory *_memory;
  };
  ForceConst<float> force_const_single;
  ForceConst<double> force_const_double;
};

}

#endif
#endif

/* ERROR/WARNING messages:

*/
+411 −0

File added.

Preview size limit exceeded, changes collapsed.

+81 −0
Original line number Diff line number Diff line
/* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */

#ifdef DIHEDRAL_CLASS

DihedralStyle(harmonic/intel,DihedralHarmonicIntel)

#else

#ifndef LMP_DIHEDRAL_HARMONIC_INTEL_H
#define LMP_DIHEDRAL_HARMONIC_INTEL_H

#include "dihedral_harmonic.h"
#include "fix_intel.h"

namespace LAMMPS_NS {

class DihedralHarmonicIntel : public DihedralHarmonic {

 public:
  DihedralHarmonicIntel(class LAMMPS *lmp);
  virtual void compute(int, int);
  void init_style();

 private:
  FixIntel *fix;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
               const ForceConst<flt_t> &fc);
  template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
  void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, 
	    const ForceConst<flt_t> &fc);
  template <class flt_t, class acc_t>
  void pack_force_const(ForceConst<flt_t> &fc,
                        IntelBuffers<flt_t, acc_t> *buffers);

  #ifdef _LMP_INTEL_OFFLOAD
  int _use_base;
  #endif

  template <class flt_t>
  class ForceConst {
   public:
    typedef struct { flt_t cos_shift, sin_shift, k; 
                     int multiplicity; } fc_packed1;

    fc_packed1 *bp;

    ForceConst() : _nbondtypes(0)  {}
    ~ForceConst() { set_ntypes(0, NULL); }

    void set_ntypes(const int nbondtypes, Memory *memory);

   private:
    int _nbondtypes;
    Memory *_memory;
  };
  ForceConst<float> force_const_single;
  ForceConst<double> force_const_double;
};

}

#endif
#endif
Loading