Commit f69a17dd authored by Vsevak's avatar Vsevak
Browse files

Reduce unnecessary data exchanges

parent 868df1f6
Loading
Loading
Loading
Loading
+20 −21
Original line number Diff line number Diff line
@@ -198,8 +198,7 @@ void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) {
          &nall, &ainum,
          &nbor_pitch, &this->_threads_per_atom,
          &hneight, &m, &TypeO, &TypeH, &alpha,
          &this->atom->q, &tag, &map_array,
          &atom_sametag);
          &this->atom->q);

  GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
                               (BX/this->_threads_per_atom)));
@@ -217,8 +216,7 @@ void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) {
          &ainum, &nbor_pitch, &this->_threads_per_atom,
          &hneight, &m, &TypeO, &TypeH, &alpha,
          &this->atom->q, &cutsq, &_qqrd2e, &_g_ewald,
          &cut_coulsq, &cut_coulsqplus, &tag, &map_array,
          &atom_sametag, &this->ansO);
          &cut_coulsq, &cut_coulsqplus, &this->ansO);
  GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
  this->k_pair_distrib.set_size(GX,BX);
  this->k_pair_distrib.run(&this->atom->x, &this->ans->force, &this->ans->engv,
@@ -234,11 +232,11 @@ void LJTIP4PLongT::copy_relations_data(int n, tagint *tag, int *map_array,
  int nall = n;
  const int hn_sz = n*4; // matrix size = col size * col number
  hneight.resize_ib(hn_sz);
  if (ago == 0)
    hneight.zero();

  m.resize_ib(n);
  m.zero();

  if (ago == 0) {
    hneight.zero();
    UCL_H_Vec<int> host_tag_write(nall,*(this->ucl_device),UCL_WRITE_ONLY);
    this->tag.resize_ib(nall);
    for(int i=0; i<nall; ++i) host_tag_write[i] = tag[i];
@@ -254,6 +252,7 @@ void LJTIP4PLongT::copy_relations_data(int n, tagint *tag, int *map_array,
    for(int i=0; i<map_size; ++i) host_tag_write[i] = map_array[i];
    ucl_copy(this->map_array, host_tag_write, map_size, false);
  }
}



+2 −5
Original line number Diff line number Diff line
@@ -226,9 +226,7 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
    __global int *restrict hneigh,
    __global numtyp4 *restrict m,
    const int typeO, const int typeH,
    const numtyp alpha, const __global numtyp *restrict q_,
    const __global int *restrict tag, const __global int *restrict map,
    const __global int *restrict sametag) {
    const numtyp alpha, const __global numtyp *restrict q_) {
  int tid, ii, offset;
  atom_info(t_per_atom,ii,tid,offset);
  int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid;
@@ -268,8 +266,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
    const __global numtyp *restrict cutsq,
    const numtyp qqrd2e, const numtyp g_ewald,
    const numtyp cut_coulsq, const numtyp cut_coulsqplus,
    const __global int *restrict tag, const __global int *restrict map,
    const __global int *restrict sametag, __global acctyp4 *restrict ansO) {
    __global acctyp4 *restrict ansO) {
  int tid, ii, offset;
  atom_info(t_per_atom,ii,tid,offset);