Add additional types and constraints (6669b523) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/models/symmetry_function_regression.py

+16 −4

Original line number	Diff line number	Diff line
		@@ -171,7 +171,7 @@ class ANIRegression(TensorGraph):
		feed_dict[self.atom_feats] = np.array(X[:upper_lim, :, :], dtype=float)
		return self.session.run([self.grad], feed_dict=feed_dict)

		def pred_one(self, X, atomic_nums):
		def pred_one(self, X, atomic_nums, constraints=None):
		"""
		Makes an energy prediction for a set of atomic coordinates.

		@@ -182,6 +182,8 @@ class ANIRegression(TensorGraph):
		dtype is float-like
		atomic_nums: np.array
		numpy array of shape (a,) where a is the same as that of X.
		constraints: unused
		This parameter is mainly for compatibility purposes for scipy optimize

		Returns
		-------
		@@ -201,7 +203,7 @@ class ANIRegression(TensorGraph):
		dd = dc.data.NumpyDataset(np.array(X).reshape((1, self.max_atoms, 4)), np.array(0), np.array(1))
		return self.predict(dd)[0]

		def grad_one(self, X, atomic_nums):
		def grad_one(self, X, atomic_nums, constraints=None):
		"""
		Computes gradients for that of a single structure.

		@@ -212,6 +214,9 @@ class ANIRegression(TensorGraph):
		dtype is float-like
		atomic_nums: np.array
		numpy array of shape (a,) where a is the same as that of X.
		constraints: np.array
		numpy array of indices of X used for constraining a subset
		of the atoms of the molecule.

		Returns
		-------
		@@ -230,9 +235,16 @@ class ANIRegression(TensorGraph):
		dd = dc.data.NumpyDataset(inp, np.array([1]), np.array([1]))
		res = self.compute_grad(dd)[0][0][0]
		res = res[:num_atoms, 1:]

		if constraints is not None:
		for idx in constraints:
		res[idx][0] = 0
		res[idx][1] = 0
		res[idx][2] = 0

		return res.reshape((num_atoms*3,))

		def minimize_structure(self, X, atomic_nums):
		def minimize_structure(self, X, atomic_nums, constraints=None):
		"""
		Minimizes a structure, as defined by a set of coordinates and their atomic
		numbers.
		@@ -271,7 +283,7 @@ class ANIRegression(TensorGraph):
		res = scipy.optimize.minimize(
		self.pred_one,
		X,
		args=(atomic_nums,),
		args=(atomic_nums, constraints),
		jac=self.grad_one,
		method="BFGS",
		tol=1e-6,

deepchem/models/tensorgraph/models/test_symmetry_functions.py

+29 −0

Original line number	Diff line number	Diff line
		@@ -68,5 +68,34 @@ class TestANIRegression(unittest.TestCase):

		np.testing.assert_array_almost_equal(grad_approx, grad_exact, decimal=3)

		grad_exact_constrained = model.grad_one(new_x, new_atomic_nums, constraints=[0, 2])

		assert grad_exact_constrained[0] == 0
		assert grad_exact_constrained[1] == 0
		assert grad_exact_constrained[2] == 0

		assert grad_exact_constrained[3] == grad_exact[3]
		assert grad_exact_constrained[4] == grad_exact[4]
		assert grad_exact_constrained[5] == grad_exact[5]

		assert grad_exact_constrained[6] == 0
		assert grad_exact_constrained[7] == 0
		assert grad_exact_constrained[8] == 0

		min_coords = model.minimize_structure(new_x, new_atomic_nums, constraints=[0,2])

		assert min_coords[0][0] == new_x[0]
		assert min_coords[0][1] == new_x[1]
		assert min_coords[0][2] == new_x[2]

		# assert min_coords[1][0] != new_x[3]
		# assert min_coords[1][1] != new_x[4]
		# assert min_coords[1][2] != new_x[5]

		assert min_coords[2][0] == new_x[6]
		assert min_coords[2][1] == new_x[7]
		assert min_coords[2][2] == new_x[8]


		if __name__ == '__main__':
		unittest.main()
		No newline at end of file

examples/roitberg/app.py

+13 −2

Original line number	Diff line number	Diff line
		@@ -37,11 +37,22 @@ def minimize():
		if not content or not 'X' in content:
		abort(400)
		X = np.array(content['X'])

		constraints = None

		if 'constraints' in content:
		constraints = content['constraints']
		print('setting constraints')


		num_atoms = X.shape[0]
		x0 = X[:, 1:]
		a0 = X[:, :1]

		res = webapp.model.minimize_structure(x0, a0)


		res = webapp.model.minimize_structure(x0, a0, constraints)
		res = res.reshape((num_atoms, 3))
		y = webapp.model.pred_one(res, a0).tolist()[0]

		return flask.jsonify({'X': res.tolist()}), 200
		No newline at end of file
		return flask.jsonify({'X': res.tolist(), 'y': y}), 200
		No newline at end of file

examples/roitberg/roitberg.py

+50 −10

Original line number	Diff line number	Diff line
		@@ -12,8 +12,26 @@ def convert_species_to_atomic_nums(s):
		res.append(PERIODIC_TABLE[k])
		return np.array(res, dtype=np.float32)


		def load_roiterberg_ANI(relative=True):
		def load_roiterberg_ANI(mode="atomization"):
		"""
		Load the ANI dataset.

		Parameters
		----------
		mode: str
		Accepted modes are "relative", "atomization", or "absolute". These settings are used
		to adjust the dynamic range of the model, with absolute having the greatest and relative
		having the lowest. Note that for atomization we approximate the single atom energy
		using a different level of theory


		Returns
		-------
		tuples
		Elements returned are 3-tuple (a,b,c) where and b are the train and test datasets, respectively,
		and c is an array of indices denoting the group of each

		"""
		if "ROITBERG_ANI" not in os.environ:
		raise ValueError(
		"Please set environment variable ROITBERG_ANI to where the ani_dgb_s0x.h5 files are."
		@@ -28,9 +46,9 @@ def load_roiterberg_ANI(relative=True):
		'ani_gdb_s01.h5',
		'ani_gdb_s02.h5',
		'ani_gdb_s03.h5',
		# 'ani_gdb_s04.h5',
		# 'ani_gdb_s05.h5',
		# 'ani_gdb_s06.h5',
		'ani_gdb_s04.h5',
		'ani_gdb_s05.h5',
		'ani_gdb_s06.h5',
		# 'ani_gdb_s07.h5',
		# 'ani_gdb_s08.h5'
		]
		@@ -77,10 +95,29 @@ def load_roiterberg_ANI(relative=True):
		nonpadded = convert_species_to_atomic_nums(S)
		Z_padded[:nonpadded.shape[0]] = nonpadded

		if relative:
		if mode == "relative":
		offset = np.amin(E)
		else:
		elif mode == "atomization":

		# approximation via B3LYP/6-31G* in Jaguar
		# to shrink the dynamic range
		atomizationMapInHartrees = {
		0: 0,
		1: -0.50027278266,
		6: -37.77568079030,
		7: -54.47752723207,
		8: -74.95668301867,
		16: -398.04142581461
		}

		offset = 0

		for z in nonpadded:
		offset -= atomizationMapInHartrees[z]
		elif mode == "absolute":
		offset = 0
		else:
		raise Exception("Unsupported mode: ", mode)

		for k in range(len(E)):
		R_padded = np.zeros((23, 3), dtype=np.float32)
		@@ -88,7 +125,7 @@ def load_roiterberg_ANI(relative=True):

		X = np.concatenate([np.expand_dims(Z_padded, 1), R_padded], axis=1)

		y = E[k] - offset # offset is zero if we're not computing relative
		y = E[k] - offset

		if len(X_cache) == shard_size:

		@@ -149,7 +186,7 @@ if __name__ == "__main__":
		dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
		]

		model_dir = "/tmp/ani.pkl"
		model_dir = "/tmp/ani3.pkl"

		if os.path.exists(model_dir):
		print("Restoring existing model...")
		@@ -157,7 +194,7 @@ if __name__ == "__main__":
		else:
		print("Fitting new model...")

		train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(relative=True)
		train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(mode="relative")

		splitter = dc.splits.RandomGroupSplitter(broadcast(train_valid_dataset, all_groups))

		@@ -172,6 +209,9 @@ if __name__ == "__main__":
		transform_y=True, dataset=train_dataset)
		]

		print("Total training set shape: ", train_dataset.get_shape())


		for transformer in transformers:
		train_dataset = transformer.transform(train_dataset)
		valid_dataset = transformer.transform(valid_dataset)

Admin message