Merge pull request #1495 from VIGS25/make-estimator (7e034d6e) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/models/symmetry_function_regression.py

+124 −62

Original line number	Diff line number	Diff line
		@@ -16,7 +16,7 @@ import tensorflow as tf
		import deepchem as dc

		from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat, Exp
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay, ReduceSum, Reshape
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
		from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
		@@ -49,12 +49,18 @@ class BPSymmetryFunctionRegression(TensorGraph):

		super(BPSymmetryFunctionRegression, self).__init__(**kwargs)

		self.build_graph()
		self._build_graph()

		def build_graph(self):
		self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))
		previous_layer = self.atom_feats
		def _build_graph(self):
		self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms * self.n_feat))

		reshaped_atom_feats = Reshape(
		in_layers=[self.atom_feats], shape=(-1, self.max_atoms, self.n_feat))
		reshaped_atom_flags = Reshape(
		in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))

		previous_layer = reshaped_atom_feats

		Hiddens = []
		for n_hidden in self.layer_structures:
		@@ -65,23 +71,27 @@ class BPSymmetryFunctionRegression(TensorGraph):
		Hiddens.append(Hidden)
		previous_layer = Hiddens[-1]

		costs = []
		self.labels_fd = []
		for task in range(self.n_tasks):
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
		output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
		out_channels=1 * self.n_tasks,
		activation_fn=None,
		in_layers=[Hiddens[-1]])
		output = BPGather(
		self.max_atoms, in_layers=[regression, reshaped_atom_flags])
		self.add_output(output)

		label = Label(shape=(None, 1))
		self.labels_fd.append(label)
		cost = L2Loss(in_layers=[label, output])
		costs.append(cost)
		label = Label(shape=(None, self.n_tasks, 1))
		loss = ReduceSum(L2Loss(in_layers=[label, output]))
		weights = Weights(shape=(None, self.n_tasks))

		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights])
		self.set_loss(loss)
		weighted_loss = WeightedError(in_layers=[loss, weights])
		self.set_loss(weighted_loss)

		def compute_features_on_batch(self, X_b):
		flags = np.sign(np.array(X_b[:, :, 0]))
		atom_flags = np.stack([flags] * self.max_atoms, axis=2) * \
		np.stack([flags] * self.max_atoms, axis=1)
		atom_feats = np.array(X_b[:, :, 1:], dtype=np.float32)
		return [atom_feats, atom_flags]

		def default_generator(self,
		dataset,
		@@ -99,17 +109,35 @@ class BPSymmetryFunctionRegression(TensorGraph):

		feed_dict = dict()
		if y_b is not None and not predict:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		feed_dict[self.labels[0]] = y_b
		if w_b is not None and not predict:
		feed_dict[self.weights] = w_b
		feed_dict[self.task_weights[0]] = w_b

		atom_feats, atom_flags = self.compute_features_on_batch(X_b)
		atom_feats = atom_feats.reshape(-1, self.max_atoms * self.n_feat)
		atom_flags = atom_flags.reshape(-1, self.max_atoms * self.max_atoms)
		feed_dict[self.atom_feats] = atom_feats
		feed_dict[self.atom_flags] = atom_flags

		flags = np.sign(np.array(X_b[:, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		feature_col = tf.feature_column.input_layer(features, [column])
		if feature_col.dtype != column.dtype:
		feature_col = tf.cast(feature_col, column.dtype)
		tensors[layer] = feature_col

		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = labels

		return tensors


		class ANIRegression(TensorGraph):

		@@ -188,10 +216,14 @@ class ANIRegression(TensorGraph):
		feed_dict = dict()
		X = dataset.X
		flags = np.sign(np.array(X[:upper_lim, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		atom_flags = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_numbers] = np.array(X[:upper_lim, :, 0], dtype=int)
		feed_dict[self.atom_feats] = np.array(X[:upper_lim, :, :], dtype=float)
		feed_dict[self.atom_flags] = atom_flags.reshape(
		-1, self.max_atoms * self.max_atoms)
		atom_numbers = np.array(X[:upper_lim, :, 0], dtype=int)
		feed_dict[self.atom_numbers] = atom_numbers
		atom_feats = np.array(X[:upper_lim, :, :], dtype=float)
		feed_dict[self.atom_feats] = atom_feats.reshape(-1, self.max_atoms * 4)
		return self.session.run([self.grad], feed_dict=feed_dict)

		def pred_one(self, X, atomic_nums, constraints=None):
		@@ -258,7 +290,8 @@ class ANIRegression(TensorGraph):
		X = Z
		inp = np.array(X).reshape((1, self.max_atoms, 4))
		dd = dc.data.NumpyDataset(inp, np.array([1]), np.array([1]))
		res = self.compute_grad(dd)[0][0][0]
		res = self.compute_grad(dd)[0][0]
		res = res.reshape(self.max_atoms, 4)
		res = res[:num_atoms, 1:]

		if constraints is not None:
		@@ -304,11 +337,16 @@ class ANIRegression(TensorGraph):
		def build_graph(self):

		self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
		self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms, 4))
		self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms * 4))

		reshaped_atom_flags = Reshape(
		in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))
		reshaped_atom_feats = Reshape(
		in_layers=[self.atom_feats], shape=(-1, self.max_atoms, 4))

		previous_layer = ANIFeat(
		in_layers=self.atom_feats, max_atoms=self.max_atoms)
		in_layers=reshaped_atom_feats, max_atoms=self.max_atoms)

		self.featurized = previous_layer

		@@ -323,25 +361,31 @@ class ANIRegression(TensorGraph):
		Hiddens.append(Hidden)
		previous_layer = Hiddens[-1]

		costs = []
		self.labels_fd = []
		for task in range(self.n_tasks):
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
		output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
		out_channels=1 * self.n_tasks,
		activation_fn=None,
		in_layers=[Hiddens[-1]])
		output = BPGather(
		self.max_atoms, in_layers=[regression, reshaped_atom_flags])
		self.add_output(output)

		label = Label(shape=(None, 1))
		self.labels_fd.append(label)
		cost = L2Loss(in_layers=[label, output])
		costs.append(cost)
		label = Label(shape=(None, self.n_tasks, 1))
		loss = ReduceSum(L2Loss(in_layers=[label, output]))
		weights = Weights(shape=(None, self.n_tasks))

		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights])
		weighted_loss = WeightedError(in_layers=[loss, weights])
		if self.exp_loss:
		loss = Exp(in_layers=[loss])
		self.set_loss(loss)
		weighted_loss = Exp(in_layers=[weighted_loss])
		self.set_loss(weighted_loss)

		def compute_features_on_batch(self, X_b):
		flags = np.sign(np.array(X_b[:, :, 0]))
		atom_flags = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		atom_numbers = np.array(X_b[:, :, 0], dtype=np.int32)
		atom_feats = np.array(X_b[:, :, :], dtype=np.float32)

		return [atom_feats, atom_numbers, atom_flags]

		def default_generator(self,
		dataset,
		@@ -359,18 +403,36 @@ class ANIRegression(TensorGraph):

		feed_dict = dict()
		if y_b is not None and not predict:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		feed_dict[self.labels[0]] = y_b
		if w_b is not None and not predict:
		feed_dict[self.weights] = w_b

		flags = np.sign(np.array(X_b[:, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, :], dtype=float)
		feed_dict[self.task_weights[0]] = w_b

		atom_feats, atom_numbers, atom_flags = self.compute_features_on_batch(
		X_b)
		atom_feats = atom_feats.reshape(-1, self.max_atoms * 4)
		atom_flags = atom_flags.reshape(-1, self.max_atoms * self.max_atoms)
		feed_dict[self.atom_feats] = atom_feats
		feed_dict[self.atom_numbers] = atom_numbers
		feed_dict[self.atom_flags] = atom_flags
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		feature_col = tf.feature_column.input_layer(features, [column])
		if feature_col.dtype != column.dtype:
		feature_col = tf.cast(feature_col, column.dtype)
		tensors[layer] = feature_col

		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = labels

		return tensors

		def save_numpy(self):
		"""
		Save to a portable numpy file. Note that this relies on the names to be consistent

deepchem/models/tensorgraph/symmetry_functions.py

+44 −9

Original line number	Diff line number	Diff line
		@@ -46,7 +46,11 @@ class DistanceMatrix(Layer):
		# Calculate pairwise distance
		d = tf.sqrt(tf.reduce_sum(tf.square(tensor1 - tensor2), axis=3))
		# Masking for valid atom index
		self.out_tensor = d * tf.to_float(atom_flags)
		out_tensor = d * tf.to_float(atom_flags)
		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor


		class DistanceCutoff(Layer):
		@@ -79,7 +83,11 @@ class DistanceCutoff(Layer):
		d = 0.5 * (tf.cos(np.pi * d / self.Rc) + 1)
		out_tensor = d * d_flag
		out_tensor = out_tensor * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
		out_tensor = out_tensor

		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		class RadialSymmetry(Layer):
		@@ -142,9 +150,14 @@ class RadialSymmetry(Layer):
		tf.expand_dims(atom_number_embedded[:, :, atom_type], axis=1),
		axis=3)
		out_tensors.append(tf.reduce_sum(out_tensor * selected_atoms, axis=2))
		self.out_tensor = tf.concat(out_tensors, axis=2)
		out_tensor = tf.concat(out_tensors, axis=2)
		else:
		self.out_tensor = tf.reduce_sum(out_tensor, axis=2)
		out_tensor = tf.reduce_sum(out_tensor, axis=2)

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor


		class AngularSymmetry(Layer):
		@@ -227,9 +240,14 @@ class AngularSymmetry(Layer):
		out_tensor = tf.pow(1 + lambd * tf.cos(theta), zeta) * \
		tf.exp(-ita * (tf.square(R_ij) + tf.square(R_ik) + tf.square(R_jk))) * \
		f_R_ij * f_R_ik * f_R_jk
		self.out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3]) * \
		out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3]) * \
		tf.pow(tf.constant(2.), 1 - tf.reshape(self.zeta, (1, 1, -1)))

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor


		class AngularSymmetryMod(Layer):
		""" Angular Symmetry Function """
		@@ -345,9 +363,14 @@ class AngularSymmetryMod(Layer):
		tf.expand_dims(selected_atoms, axis=1), axis=4)
		out_tensors.append(
		tf.reduce_sum(out_tensor * selected_atoms, axis=[2, 3]))
		self.out_tensor = tf.concat(out_tensors, axis=2)
		out_tensor = tf.concat(out_tensors, axis=2)
		else:
		self.out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3])
		out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3])

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor


		class BPFeatureMerge(Layer):
		@@ -369,7 +392,12 @@ class BPFeatureMerge(Layer):

		out_tensor = tf.concat(
		[atom_embedding, radial_symmetry, angular_symmetry], axis=2)
		self.out_tensor = out_tensor * atom_flags[:, :, 0:1]
		out_tensor = out_tensor * atom_flags[:, :, 0:1]

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor


		class BPGather(Layer):
		@@ -390,6 +418,8 @@ class BPGather(Layer):
		out_tensor = tf.reduce_sum(out_tensor * flags[:, :, 0:1], axis=1)
		self.out_tensor = out_tensor

		return out_tensor


		class AtomicDifferentiatedDense(Layer):
		""" Separate Dense module for different atoms """
		@@ -453,7 +483,12 @@ class AtomicDifferentiatedDense(Layer):
		output = tf.reshape(output * tf.expand_dims(mask, 2),
		(-1, self.max_atoms, self.out_channels))
		outputs.append(output)
		self.out_tensor = tf.add_n(outputs)
		out_tensor = tf.add_n(outputs)

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor

		def none_tensors(self):
		w, b, out_tensor = self.W, self.b, self.out_tensor

deepchem/models/tensorgraph/tests/test_estimators.py

+151 −2

Original line number	Diff line number	Diff line
		@@ -467,9 +467,11 @@ class TestEstimators(unittest.TestCase):
		input_file = os.path.join(current_dir, "example_DTNN.mat")
		dataset = loadmat(input_file)

		num_vals_to_use = 20

		np.random.seed(123)
		X = dataset['X']
		y = dataset['T'].astype(np.float32)
		X = dataset['X'][:num_vals_to_use]
		y = dataset['T'][:num_vals_to_use].astype(np.float32)
		w = np.ones_like(y)
		dataset = dc.data.NumpyDataset(X, y, w, ids=None)
		n_tasks = y.shape[1]
		@@ -527,3 +529,150 @@ class TestEstimators(unittest.TestCase):

		results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
		assert results['error'] < 0.1

		def test_bpsymm_regression_model(self):
		"""Test creating an estimator for BPSymmetry Regression model."""
		tasks, dataset, transformers = dc.molnet.load_qm7_from_mat(
		featurizer='BPSymmetryFunction', move_mean=False)

		num_samples_to_use = 5
		train, _, _ = dataset
		X = train.X[:num_samples_to_use]
		y = train.y[:num_samples_to_use]
		w = train.w[:num_samples_to_use]
		ids = train.ids[:num_samples_to_use]

		dataset = dc.data.NumpyDataset(X, y, w, ids)

		max_atoms = 23
		batch_size = 16
		layer_structures = [128, 128, 64]

		ANItransformer = dc.trans.ANITransformer(
		max_atoms=max_atoms, atomic_number_differentiated=False)
		dataset = ANItransformer.transform(dataset)
		n_feat = ANItransformer.get_num_feats() - 1

		model = dc.models.BPSymmetryFunctionRegression(
		len(tasks),
		max_atoms,
		n_feat,
		layer_structures=layer_structures,
		batch_size=batch_size,
		learning_rate=0.001,
		use_queue=False,
		mode="regression")

		metrics = {'error': tf.metrics.mean_absolute_error}

		def input_fn(epochs):
		X, y, w = dataset.make_iterator(
		batch_size=batch_size, epochs=epochs).get_next()
		atom_feats, atom_flags = tf.py_func(
		model.compute_features_on_batch, [X], Tout=[tf.float32, tf.float32])
		atom_feats = tf.reshape(
		atom_feats,
		shape=(tf.shape(atom_feats)[0], model.max_atoms * model.n_feat))
		atom_flags = tf.reshape(
		atom_flags,
		shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

		features = dict()
		features['atom_feats'] = atom_feats
		features['atom_flags'] = atom_flags
		features['weights'] = w
		return features, y

		atom_feats = tf.feature_column.numeric_column(
		'atom_feats', shape=(max_atoms * n_feat,), dtype=tf.float32)
		atom_flags = tf.feature_column.numeric_column(
		'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
		weight_col = tf.feature_column.numeric_column(
		'weights', shape=(len(tasks),), dtype=tf.float32)

		estimator = model.make_estimator(
		feature_columns=[atom_feats, atom_flags],
		weight_column=weight_col,
		metrics=metrics)
		estimator.train(input_fn=lambda: input_fn(100))
		results = estimator.evaluate(input_fn=lambda: input_fn(1))

		assert results['error'] < 0.1

		def test_ani_regression(self):
		"""Test creating an estimator for ANI Regression."""

		max_atoms = 4

		X = np.array(
		[[
		[1, 5.0, 3.2, 1.1],
		[6, 1.0, 3.4, -1.1],
		[1, 2.3, 3.4, 2.2],
		[0, 0, 0, 0],
		], [
		[8, 2.0, -1.4, -1.1],
		[7, 6.3, 2.4, 3.2],
		[0, 0, 0, 0],
		[0, 0, 0, 0],
		]],
		dtype=np.float32)

		y = np.array([[2.0], [1.1]], dtype=np.float32)

		layer_structures = [128, 128, 64]
		atom_number_cases = [1, 6, 7, 8]

		kwargs = {
		"n_tasks": 1,
		"max_atoms": max_atoms,
		"layer_structures": layer_structures,
		"atom_number_cases": atom_number_cases,
		"batch_size": 2,
		"learning_rate": 0.001,
		"use_queue": False,
		"mode": "regression"
		}

		model = dc.models.ANIRegression(**kwargs)
		dataset = dc.data.NumpyDataset(X, y, n_tasks=1)

		metrics = {'error': tf.metrics.mean_absolute_error}

		def input_fn(epochs):
		X, y, w = dataset.make_iterator(batch_size=2, epochs=epochs).get_next()
		atom_feats, atom_numbers, atom_flags = tf.py_func(
		model.compute_features_on_batch, [X],
		Tout=[tf.float32, tf.int32, tf.float32])
		atom_feats = tf.reshape(
		atom_feats, shape=(tf.shape(atom_feats)[0], model.max_atoms * 4))
		atom_numbers = tf.reshape(
		atom_numbers, shape=(tf.shape(atom_numbers)[0], model.max_atoms))
		atom_flags = tf.reshape(
		atom_flags,
		shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

		features = dict()
		features['atom_feats'] = atom_feats
		features['atom_numbers'] = atom_numbers
		features['atom_flags'] = atom_flags
		features['weights'] = w
		return features, y

		atom_feats = tf.feature_column.numeric_column(
		'atom_feats', shape=(max_atoms * 4,), dtype=tf.float32)
		atom_numbers = tf.feature_column.numeric_column(
		'atom_numbers', shape=(max_atoms,), dtype=tf.int32)
		atom_flags = tf.feature_column.numeric_column(
		'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
		weight_col = tf.feature_column.numeric_column(
		'weights', shape=(kwargs["n_tasks"],), dtype=tf.float32)

		estimator = model.make_estimator(
		feature_columns=[atom_feats, atom_numbers, atom_flags],
		weight_column=weight_col,
		metrics=metrics)
		estimator.train(input_fn=lambda: input_fn(100))

		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['error'] < 0.1

deepchem/molnet/load_function/qm7_datasets.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -51,7 +51,7 @@ def load_qm7_from_mat(featurizer='CoulombMatrix',
		)
		dataset = scipy.io.loadmat(dataset_file)
		X = np.concatenate([np.expand_dims(dataset['Z'], 2), dataset['R']], axis=2)
		y = dataset['T']
		y = dataset['T'].reshape(-1, 1) # scipy.io.loadmat puts samples on axis 1
		w = np.ones_like(y)
		dataset = deepchem.data.DiskDataset.from_numpy(X, y, w, ids=None)
		else:

Admin message