Added make_estimator for BPSymmetry, ANIRegression (ec30df0f) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/models/symmetry_function_regression.py

+110 −57

Original line number	Diff line number	Diff line
		@@ -16,7 +16,7 @@ import tensorflow as tf
		import deepchem as dc

		from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat, Exp
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay, ReduceSum, Reshape
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
		from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
		@@ -49,12 +49,18 @@ class BPSymmetryFunctionRegression(TensorGraph):

		super(BPSymmetryFunctionRegression, self).__init__(**kwargs)

		self.build_graph()
		self._build_graph()

		def build_graph(self):
		self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))
		previous_layer = self.atom_feats
		def _build_graph(self):
		self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms * self.n_feat))

		reshaped_atom_feats = Reshape(
		in_layers=[self.atom_feats], shape=(-1, self.max_atoms, self.n_feat))
		reshaped_atom_flags = Reshape(
		in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))

		previous_layer = reshaped_atom_feats

		Hiddens = []
		for n_hidden in self.layer_structures:
		@@ -65,23 +71,27 @@ class BPSymmetryFunctionRegression(TensorGraph):
		Hiddens.append(Hidden)
		previous_layer = Hiddens[-1]

		costs = []
		self.labels_fd = []
		for task in range(self.n_tasks):
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
		output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
		out_channels=1 * self.n_tasks,
		activation_fn=None,
		in_layers=[Hiddens[-1]])
		output = BPGather(
		self.max_atoms, in_layers=[regression, reshaped_atom_flags])
		self.add_output(output)

		label = Label(shape=(None, 1))
		self.labels_fd.append(label)
		cost = L2Loss(in_layers=[label, output])
		costs.append(cost)
		label = Label(shape=(None, self.n_tasks, 1))
		loss = ReduceSum(L2Loss(in_layers=[label, output]))
		weights = Weights(shape=(None, self.n_tasks))

		weighted_loss = WeightedError(in_layers=[loss, weights])
		self.set_loss(weighted_loss)

		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights])
		self.set_loss(loss)
		def compute_features_on_batch(self, X_b):
		flags = np.sign(np.array(X_b[:, :, 0]))
		atom_flags = np.stack([flags] * self.max_atoms, axis=2) * \
		np.stack([flags] * self.max_atoms, axis=1)
		atom_feats = np.array(X_b[:, :, 1:], dtype=np.float32)
		return [atom_feats, atom_flags]

		def default_generator(self,
		dataset,
		@@ -99,17 +109,33 @@ class BPSymmetryFunctionRegression(TensorGraph):

		feed_dict = dict()
		if y_b is not None and not predict:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		feed_dict[self.labels[0]] = y_b
		if w_b is not None and not predict:
		feed_dict[self.weights] = w_b
		feed_dict[self.task_weights[0]] = w_b

		atom_feats, atom_flags = self.compute_features_on_batch(X_b)
		feed_dict[self.atom_feats] = atom_feats
		feed_dict[self.atom_flags] = atom_flags

		flags = np.sign(np.array(X_b[:, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		feature_col = tf.feature_column.input_layer(features, [column])
		if feature_col.dtype != column.dtype:
		feature_col = tf.cast(feature_col, column.dtype)
		tensors[layer] = feature_col

		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = labels

		return tensors


		class ANIRegression(TensorGraph):

		@@ -304,11 +330,16 @@ class ANIRegression(TensorGraph):
		def build_graph(self):

		self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
		self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms, 4))
		self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms * 4))

		reshaped_atom_flags = Reshape(
		in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))
		reshaped_atom_feats = Reshape(
		in_layers=[self.atom_feats], shape=(-1, self.max_atoms, 4))

		previous_layer = ANIFeat(
		in_layers=self.atom_feats, max_atoms=self.max_atoms)
		in_layers=reshaped_atom_feats, max_atoms=self.max_atoms)

		self.featurized = previous_layer

		@@ -323,25 +354,31 @@ class ANIRegression(TensorGraph):
		Hiddens.append(Hidden)
		previous_layer = Hiddens[-1]

		costs = []
		self.labels_fd = []
		for task in range(self.n_tasks):
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
		output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
		out_channels=1 * self.n_tasks,
		activation_fn=None,
		in_layers=[Hiddens[-1]])
		output = BPGather(
		self.max_atoms, in_layers=[regression, reshaped_atom_flags])
		self.add_output(output)

		label = Label(shape=(None, 1))
		self.labels_fd.append(label)
		cost = L2Loss(in_layers=[label, output])
		costs.append(cost)
		label = Label(shape=(None, self.n_tasks, 1))
		loss = ReduceSum(L2Loss(in_layers=[label, output]))
		weights = Weights(shape=(None, self.n_tasks))

		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights])
		weighted_loss = WeightedError(in_layers=[loss, weights])
		if self.exp_loss:
		loss = Exp(in_layers=[loss])
		self.set_loss(loss)
		weighted_loss = Exp(in_layers=[weighted_loss])
		self.set_loss(weighted_loss)

		def compute_features_on_batch(self, X_b):
		flags = np.sign(np.array(X_b[:, :, 0]))
		atom_flags = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		atom_numbers = np.array(X_b[:, :, 0], dtype=np.int32)
		atom_feats = np.array(X_b[:, :, :], dtype=np.float32)

		return [atom_feats, atom_numbers, atom_flags]

		def default_generator(self,
		dataset,
		@@ -359,18 +396,34 @@ class ANIRegression(TensorGraph):

		feed_dict = dict()
		if y_b is not None and not predict:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		feed_dict[self.labels[0]] = y_b
		if w_b is not None and not predict:
		feed_dict[self.weights] = w_b
		feed_dict[self.task_weights[0]] = w_b

		flags = np.sign(np.array(X_b[:, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, :], dtype=float)
		atom_feats, atom_numbers, atom_flags = self.compute_features_on_batch(
		X_b)
		feed_dict[self.atom_feats] = atom_feats
		feed_dict[self.atom_numbers] = atom_numbers
		feed_dict[self.atom_flags] = atom_flags
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		feature_col = tf.feature_column.input_layer(features, [column])
		if feature_col.dtype != column.dtype:
		feature_col = tf.cast(feature_col, column.dtype)
		tensors[layer] = feature_col

		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = labels

		return tensors

		def save_numpy(self):
		"""
		Save to a portable numpy file. Note that this relies on the names to be consistent

deepchem/models/tensorgraph/tests/test_estimators.py

+151 −2

Original line number	Diff line number	Diff line
		@@ -467,9 +467,11 @@ class TestEstimators(unittest.TestCase):
		input_file = os.path.join(current_dir, "example_DTNN.mat")
		dataset = loadmat(input_file)

		num_vals_to_use = 5

		np.random.seed(123)
		X = dataset['X']
		y = dataset['T'].astype(np.float32)
		X = dataset['X'][:num_vals_to_use]
		y = dataset['T'][:num_vals_to_use].astype(np.float32)
		w = np.ones_like(y)
		dataset = dc.data.NumpyDataset(X, y, w, ids=None)
		n_tasks = y.shape[1]
		@@ -527,3 +529,150 @@ class TestEstimators(unittest.TestCase):

		results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
		assert results['error'] < 0.1

		def test_bpsymm_regression_model(self):
		"""Test creating an estimator for BPSymmetry Regression model."""
		tasks, dataset, transformers = dc.molnet.load_qm7_from_mat(
		featurizer='BPSymmetryFunction', move_mean=False)

		num_samples_to_use = 5
		train, _, _ = dataset
		X = train.X[:num_samples_to_use]
		y = train.y[:num_samples_to_use]
		w = train.w[:num_samples_to_use]
		ids = train.ids[:num_samples_to_use]

		dataset = dc.data.NumpyDataset(X, y, w, ids)

		max_atoms = 23
		batch_size = 16
		layer_structures = [128, 128, 64]

		ANItransformer = dc.trans.ANITransformer(
		max_atoms=max_atoms, atomic_number_differentiated=False)
		dataset = ANItransformer.transform(dataset)
		n_feat = ANItransformer.get_num_feats() - 1

		model = dc.models.BPSymmetryFunctionRegression(
		len(tasks),
		max_atoms,
		n_feat,
		layer_structures=layer_structures,
		batch_size=batch_size,
		learning_rate=0.001,
		use_queue=False,
		mode="regression")

		metrics = {'error': tf.metrics.mean_absolute_error}

		def input_fn(epochs):
		X, y, w = dataset.make_iterator(
		batch_size=batch_size, epochs=epochs).get_next()
		atom_feats, atom_flags = tf.py_func(
		model.compute_features_on_batch, [X], Tout=[tf.float32, tf.float32])
		atom_feats = tf.reshape(
		atom_feats,
		shape=(tf.shape(atom_feats)[0], model.max_atoms * model.n_feat))
		atom_flags = tf.reshape(
		atom_flags,
		shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

		features = dict()
		features['atom_feats'] = atom_feats
		features['atom_flags'] = atom_flags
		features['weights'] = w
		return features, y

		atom_feats = tf.feature_column.numeric_column(
		'atom_feats', shape=(max_atoms * n_feat,), dtype=tf.float32)
		atom_flags = tf.feature_column.numeric_column(
		'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
		weight_col = tf.feature_column.numeric_column(
		'weights', shape=(len(tasks),), dtype=tf.float32)

		estimator = model.make_estimator(
		feature_columns=[atom_feats, atom_flags],
		weight_column=weight_col,
		metrics=metrics)
		estimator.train(input_fn=lambda: input_fn(100))
		results = estimator.evaluate(input_fn=lambda: input_fn(1))

		assert results['error'] < 0.1

		def test_ani_regression(self):
		"""Test creating an estimator for ANI Regression."""

		max_atoms = 4

		X = np.array(
		[[
		[1, 5.0, 3.2, 1.1],
		[6, 1.0, 3.4, -1.1],
		[1, 2.3, 3.4, 2.2],
		[0, 0, 0, 0],
		], [
		[8, 2.0, -1.4, -1.1],
		[7, 6.3, 2.4, 3.2],
		[0, 0, 0, 0],
		[0, 0, 0, 0],
		]],
		dtype=np.float32)

		y = np.array([[2.0], [1.1]], dtype=np.float32)

		layer_structures = [128, 128, 64]
		atom_number_cases = [1, 6, 7, 8]

		kwargs = {
		"n_tasks": 1,
		"max_atoms": max_atoms,
		"layer_structures": layer_structures,
		"atom_number_cases": atom_number_cases,
		"batch_size": 2,
		"learning_rate": 0.001,
		"use_queue": False,
		"mode": "regression"
		}

		model = dc.models.ANIRegression(**kwargs)
		dataset = dc.data.NumpyDataset(X, y, n_tasks=1)

		metrics = {'error': tf.metrics.mean_absolute_error}

		def input_fn(epochs):
		X, y, w = dataset.make_iterator(batch_size=2, epochs=epochs).get_next()
		atom_feats, atom_numbers, atom_flags = tf.py_func(
		model.compute_features_on_batch, [X],
		Tout=[tf.float32, tf.int32, tf.float32])
		atom_feats = tf.reshape(
		atom_feats, shape=(tf.shape(atom_feats)[0], model.max_atoms * 4))
		atom_numbers = tf.reshape(
		atom_numbers, shape=(tf.shape(atom_numbers)[0], model.max_atoms))
		atom_flags = tf.reshape(
		atom_flags,
		shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

		features = dict()
		features['atom_feats'] = atom_feats
		features['atom_numbers'] = atom_numbers
		features['atom_flags'] = atom_flags
		features['weights'] = w
		return features, y

		atom_feats = tf.feature_column.numeric_column(
		'atom_feats', shape=(max_atoms * 4,), dtype=tf.float32)
		atom_numbers = tf.feature_column.numeric_column(
		'atom_numbers', shape=(max_atoms,), dtype=tf.int32)
		atom_flags = tf.feature_column.numeric_column(
		'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
		weight_col = tf.feature_column.numeric_column(
		'weights', shape=(kwargs["n_tasks"],), dtype=tf.float32)

		estimator = model.make_estimator(
		feature_columns=[atom_feats, atom_numbers, atom_flags],
		weight_column=weight_col,
		metrics=metrics)
		estimator.train(input_fn=lambda: input_fn(100))

		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['error'] < 0.1

Admin message