Commit d71e037c authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Renamed dev_acc to dev_ilist for better description and updated the 3-body...

Renamed dev_acc  to dev_ilist for better description and updated the 3-body styles accordingly; also fixed bugs with accessing dev_packed from the three_end kernel of tersoff/mod and tersoff/zbl gpu styles for OpenCL builds
parent 94da4be9
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -127,10 +127,11 @@ void Neighbor::alloc(bool &success) {
    dev_packed.clear();
    success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev,
                                         _packed_permissions)==UCL_SUCCESS);
    dev_acc.clear();
    success=success && (dev_acc.alloc(_max_atoms,*dev,
    dev_ilist.clear();
    success=success && (dev_ilist.alloc(_max_atoms,*dev,
                                      UCL_READ_WRITE)==UCL_SUCCESS);
    _c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes();
                                      dev_ilist.clear();
    _c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes();
  }
  if (_max_host>0) {
    nbor_host.clear();
@@ -197,7 +198,7 @@ void Neighbor::clear() {

    host_packed.clear();
    host_acc.clear();
    dev_acc.clear();
    dev_ilist.clear();
    dev_nbor.clear();
    nbor_host.clear();
    dev_packed.clear();
@@ -289,7 +290,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
    int i=ilist[ii];
    host_view[i] = ii;
  }
  ucl_copy(dev_acc,host_view,true);
  ucl_copy(dev_ilist,host_view,true);

  time_nbor.stop();

+1 −1
Original line number Diff line number Diff line
@@ -200,7 +200,7 @@ class Neighbor {
  /// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
  UCL_H_Vec<int> host_acc;
  /// Device storage for accessing atom indices from the neighbor list (3-body)
  UCL_D_Vec<int> dev_acc;
  UCL_D_Vec<int> dev_ilist;

  // ----------------- Data for GPU Neighbor Calculation ---------------

+2 −2
Original line number Diff line number Diff line
@@ -243,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
    this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3,
                          &map, &elem2param, &_nelements,
                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                          &this->nbor->dev_acc, &this->dev_short_nbor,
                          &this->nbor->dev_ilist, &this->dev_short_nbor,
                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);

@@ -252,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
    this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3,
                          &map, &elem2param, &_nelements,
                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                          &this->nbor->dev_acc, &this->dev_short_nbor,
                          &this->nbor->dev_ilist, &this->dev_short_nbor,
                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);

+6 −6
Original line number Diff line number Diff line
@@ -544,7 +544,7 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
                             const int nelements,
                             const __global int * dev_nbor,
                             const __global int * dev_packed,
                             const __global int * dev_acc,
                             const __global int * dev_ilist,
                             const __global int * dev_short_nbor,
                             __global acctyp4 *restrict ans,
                             __global acctyp *restrict engv,
@@ -614,13 +614,13 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
      int nbor_k,numk;
      if (dev_nbor==dev_packed) {
        if (gpu_nbor) nbor_k=j+nbor_pitch;
        else nbor_k=dev_acc[j]+nbor_pitch;
        else nbor_k=dev_ilist[j]+nbor_pitch;
        numk=dev_nbor[nbor_k];
        nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
        k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
        nbor_k+=offset_k;
      } else {
        nbor_k=dev_acc[j]+nbor_pitch;
        nbor_k=dev_ilist[j]+nbor_pitch;
        numk=dev_nbor[nbor_k];
        nbor_k+=nbor_pitch;
        nbor_k=dev_nbor[nbor_k];
@@ -698,7 +698,7 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
                             const int nelements,
                             const __global int * dev_nbor,
                             const __global int * dev_packed,
                             const __global int * dev_acc,
                             const __global int * dev_ilist,
                             const __global int * dev_short_nbor,
                             __global acctyp4 *restrict ans,
                             __global acctyp *restrict engv,
@@ -768,13 +768,13 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
      int nbor_k,numk;
      if (dev_nbor==dev_packed) {
        if (gpu_nbor) nbor_k=j+nbor_pitch;
        else nbor_k=dev_acc[j]+nbor_pitch;
        else nbor_k=dev_ilist[j]+nbor_pitch;
        numk=dev_nbor[nbor_k];
        nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
        k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
        nbor_k+=offset_k;
      } else {
        nbor_k=dev_acc[j]+nbor_pitch;
        nbor_k=dev_ilist[j]+nbor_pitch;
        numk=dev_nbor[nbor_k];
        nbor_k+=nbor_pitch;
        nbor_k=dev_nbor[nbor_k];
+2 −2
Original line number Diff line number Diff line
@@ -311,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
    this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
                          &map, &elem2param, &_nelements, &_nparams, &_zetaij,
                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                          &this->nbor->dev_acc, &this->dev_short_nbor,
                          &this->nbor->dev_ilist, &this->dev_short_nbor,
                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);

@@ -320,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
    this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
                          &map, &elem2param, &_nelements, &_nparams, &_zetaij,
                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                          &this->nbor->dev_acc, &this->dev_short_nbor,
                          &this->nbor->dev_ilist, &this->dev_short_nbor,
                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
  }
Loading