Merge pull request #1009 from ndtrung81/gpu-maint (c31d7c6f) · Commits · 郑智淋 / lammps

lib/gpu/geryon/nvd_device.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -260,6 +260,9 @@ class UCL_Device {
		/// List all devices along with all properties
		inline void print_all(std::ostream &out);

		/// Select the platform that has accelerators (for compatibility with OpenCL)
		inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }

		private:
		int _device, _num_devices;
		std::vector<NVDProperties> _properties;

+4 −0

Original line number	Diff line number	Diff line
		@@ -322,10 +322,12 @@ class Atom {

		// Copy charges to device asynchronously
		inline void add_q_data() {
		time_q.start();
		if (_q_avail==false) {
		q.update_device(_nall,true);
		_q_avail=true;
		}
		time_q.stop();
		}

		// Cast quaternions to write buffer
		@@ -347,10 +349,12 @@ class Atom {
		// Copy quaternions to device
		/** Copies nall()4 elements */
		inline void add_quat_data() {
		time_quat.start();
		if (_quat_avail==false) {
		quat.update_device(_nall*4,true);
		_quat_avail=true;
		}
		time_quat.stop();
		}

		/// Cast velocities and tags to write buffer

+8 −0

Original line number	Diff line number	Diff line
		@@ -130,8 +130,16 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,

		// Time on the device only if 1 proc per gpu
		_time_device=true;

		#if 0
		// XXX: the following setting triggers a memory leak with OpenCL and MPI
		// setting _time_device=true for all processes doesn't seem to be a
		// problem with either (no segfault, no (large) memory leak.
		// thus keeping this disabled for now. may need to review later.
		// 2018-07-23 <akohlmey@gmail.com>
		if (_procs_per_gpu>1)
		_time_device=false;
		#endif

		// Set up a per device communicator
		MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);

+7 −7

Original line number	Diff line number	Diff line
		@@ -127,10 +127,10 @@ void Neighbor::alloc(bool &success) {
		dev_packed.clear();
		success=success && (dev_packed.alloc((_max_nbors+2)_max_atoms,dev,
		_packed_permissions)==UCL_SUCCESS);
		dev_acc.clear();
		success=success && (dev_acc.alloc(_max_atoms,*dev,
		dev_ilist.clear();
		success=success && (dev_ilist.alloc(_max_atoms,*dev,
		UCL_READ_WRITE)==UCL_SUCCESS);
		_c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes();
		_c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes();
		}
		if (_max_host>0) {
		nbor_host.clear();
		@@ -197,7 +197,7 @@ void Neighbor::clear() {

		host_packed.clear();
		host_acc.clear();
		dev_acc.clear();
		dev_ilist.clear();
		dev_nbor.clear();
		nbor_host.clear();
		dev_packed.clear();
		@@ -281,7 +281,7 @@ void Neighbor::get_host(const int inum, int ilist, int numj,
		}
		UCL_D_Vec<int> acc_view;
		acc_view.view_offset(inum,dev_nbor,inum*2);
		ucl_copy(acc_view,host_acc,true);
		ucl_copy(acc_view,host_acc,inum*2,true);

		UCL_H_Vec<int> host_view;
		host_view.alloc(_max_atoms,*dev,UCL_READ_WRITE);
		@@ -289,7 +289,7 @@ void Neighbor::get_host(const int inum, int ilist, int numj,
		int i=ilist[ii];
		host_view[i] = ii;
		}
		ucl_copy(dev_acc,host_view,true);
		ucl_copy(dev_ilist,host_view,true);

		time_nbor.stop();

		@@ -364,7 +364,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int ilist, int numj,
		}
		UCL_D_Vec<int> acc_view;
		acc_view.view_offset(inum,dev_nbor,inum*2);
		ucl_copy(acc_view,host_acc,true);
		ucl_copy(acc_view,host_acc,inum*2,true);
		time_nbor.stop();

		if (_use_packing==false) {

+2 −2

Original line number	Diff line number	Diff line
		@@ -110,7 +110,7 @@ class Neighbor {
		}
		if (_time_device) {
		time_nbor.add_to_total();
		time_kernel.add_to_total();
		if (_use_packing==false) time_kernel.add_to_total();
		if (_gpu_nbor==2) {
		time_hybrid1.add_to_total();
		time_hybrid2.add_to_total();
		@@ -200,7 +200,7 @@ class Neighbor {
		/// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
		UCL_H_Vec<int> host_acc;
		/// Device storage for accessing atom indices from the neighbor list (3-body)
		UCL_D_Vec<int> dev_acc;
		UCL_D_Vec<int> dev_ilist;

		// ----------------- Data for GPU Neighbor Calculation ---------------