Continuing debugging (5bc94ca6) · Commits · 钟慕尧 / deepchem

deepchem/models/graph_models.py

+29 −15

Original line number	Diff line number	Diff line
		@@ -351,9 +351,21 @@ class DAGModel(KerasModel):

		Lusci, Alessandro, Gianluca Pollastri, and Pierre Baldi. "Deep architectures and deep learning in chemoinformatics: the prediction of aqueous solubility for drug-like molecules." Journal of chemical information and modeling 53.7 (2013): 1563-1575.

		The basic idea for this paper is that a molecule is usually viewed as an undirected graph. However, you can convert it to a series of directed graphs. The idea is that for each atom, you make a DAG using that atom as the vertex of the DAG and edges pointing "inwards" to it. This transformation is implemented in dc.trans.transformers.DAGTransformer.UG_to_DAG.

		This model accepts ConvMols as input, just as GraphConvModel does, but these ConvMol objects must be transformed by dc.trans.DAGTransformer.
		The basic idea for this paper is that a molecule is usually
		viewed as an undirected graph. However, you can convert it to
		a series of directed graphs. The idea is that for each atom,
		you make a DAG using that atom as the vertex of the DAG and
		edges pointing "inwards" to it. This transformation is
		implemented in
		`dc.trans.transformers.DAGTransformer.UG_to_DAG`.

		This model accepts ConvMols as input, just as GraphConvModel
		does, but these ConvMol objects must be transformed by
		dc.trans.DAGTransformer.

		As a note, performance of this model can be a little
		sensitive to initialization. It might be worth training a few
		different instantiations to get a stable set of parameters.
		"""

		def __init__(self,
		@@ -415,9 +427,13 @@ class DAGModel(KerasModel):
		if uncertainty:
		if mode != "regression":
		raise ValueError("Uncertainty is only supported in regression mode")
		if dropout == 0.0:
		if dropout is None or dropout == 0.0:
		raise ValueError('Dropout must be included to predict uncertainty')

		############################################
		print("self.dropout")
		print(self.dropout)
		############################################
		# Build the model.

		atom_features = Input(shape=(self.n_atom_feat,))
		@@ -426,7 +442,6 @@ class DAGModel(KerasModel):
		calculation_masks = Input(shape=(self.max_atoms,), dtype=tf.bool)
		membership = Input(shape=tuple(), dtype=tf.int32)
		n_atoms = Input(shape=tuple(), dtype=tf.int32)
		dropout_switch = tf.keras.Input(shape=tuple())
		dag_layer1 = layers.DAGLayer(
		n_graph_feat=self.n_graph_feat,
		n_atom_feat=self.n_atom_feat,
		@@ -435,14 +450,14 @@ class DAGModel(KerasModel):
		dropout=self.dropout,
		batch_size=batch_size)([
		atom_features, parents, calculation_orders, calculation_masks,
		n_atoms, dropout_switch
		n_atoms
		])
		dag_gather = layers.DAGGather(
		n_graph_feat=self.n_graph_feat,
		n_outputs=self.n_outputs,
		max_atoms=self.max_atoms,
		layer_sizes=self.layer_sizes_gather,
		dropout=self.dropout)([dag_layer1, membership, dropout_switch])
		dropout=self.dropout)([dag_layer1, membership])
		n_tasks = self.n_tasks
		if self.mode == 'classification':
		n_classes = self.n_classes
		@@ -453,12 +468,7 @@ class DAGModel(KerasModel):
		output_types = ['prediction', 'loss']
		loss = SoftmaxCrossEntropy()
		else:
		fc_layer_size = 50
		inter = Dense(fc_layer_size)(dag_gather)
		if self.dropout is not None and self.dropout > 0.0:
		inter = Dropout(rate=self.dropout)(inter)
		#output = Dense(n_tasks)(dag_gather)
		output = Dense(n_tasks)(inter)
		output = Dense(n_tasks)(dag_gather)
		if self.uncertainty:
		log_var = Dense(n_tasks)(dag_gather)
		var = Activation(tf.exp)(log_var)
		@@ -474,8 +484,12 @@ class DAGModel(KerasModel):
		loss = L2Loss()
		model = tf.keras.Model(
		inputs=[
		atom_features, parents, calculation_orders, calculation_masks,
		membership, n_atoms, dropout_switch
		atom_features,
		parents,
		calculation_orders,
		calculation_masks,
		membership,
		n_atoms #, dropout_switch
		],
		outputs=outputs)
		super(DAGModel, self).__init__(

deepchem/models/keras_model.py

+68 −54

Original line number	Diff line number	Diff line
		@@ -56,8 +56,9 @@ class KerasModel(Model):

		You can optionally provide an output_types argument, which
		describes how to interpret the model's outputs. This should
		be a list of strings, one for each output. Each entry must
		have one of the following values:
		be a list of strings, one for each output. You can use an
		arbitrary output_type for a output, but some output_types are
		special and will undergo extra processing:

		- 'prediction': This is a normal output, and will be returned by predict().
		If output types are not specified, all outputs are assumed
		@@ -89,8 +90,9 @@ class KerasModel(Model):
		and dropout most be enabled during uncertainty prediction.
		Otherwise, the uncertainties it computes will be inaccurate.

		- 'embedding': This output is an embedding that the model
		generates internally which should be returned to users.
		- other: Arbitrary output_types can be used to extract outputs
		produced by the model, but will have no additional
		processing performed.
		"""

		def __init__(self,
		@@ -151,12 +153,12 @@ class KerasModel(Model):
		self._prediction_outputs = None
		self._loss_outputs = None
		self._variance_outputs = None
		self._embedding_outputs = None
		self._other_outputs = None
		else:
		self._prediction_outputs = []
		self._loss_outputs = []
		self._variance_outputs = []
		self._embedding_outputs = []
		self._other_outputs = []
		for i, type in enumerate(output_types):
		if type == 'prediction':
		self._prediction_outputs.append(i)
		@@ -164,10 +166,8 @@ class KerasModel(Model):
		self._loss_outputs.append(i)
		elif type == 'variance':
		self._variance_outputs.append(i)
		elif type == 'embedding':
		self._embedding_outputs.append(i)
		else:
		raise ValueError('Unknown output type "%s"' % type)
		self._other_outputs.append(i)
		if len(self._loss_outputs) == 0:
		self._loss_outputs = self._prediction_outputs
		self._built = False
		@@ -436,12 +436,14 @@ class KerasModel(Model):
		loss=loss,
		callbacks=callbacks)

		def _predict(self, generator, transformers, outputs, uncertainty, embedding):
		def _predict(self, generator, transformers, outputs, uncertainty,
		other_output_types):
		"""
		Predict outputs for data provided by a generator.

		This is the private implementation of prediction. Do not call it directly.
		Instead call one of the public prediction methods.
		This is the private implementation of prediction. Do not
		call it directly. Instead call one of the public prediction
		methods.

		Parameters
		----------
		@@ -460,18 +462,19 @@ class KerasModel(Model):
		specifies whether this is being called as part of estimating uncertainty.
		If True, it sets the training flag so that dropout will be enabled, and
		returns the values of the uncertainty outputs.
		embedding: bool
		specifies whether this is being called as part of generating embeddings.
		other_output_types: list, optional
		Provides a list of other outputs to predict from model.
		Each such output should have a unique output_type so it
		can be retrieved from the model.
		Returns:
		a NumPy array of the model produces a single output, or a list of arrays
		if it produces multiple outputs
		"""
		results = None
		variances = None
		embeddings = None
		if uncertainty and embedding:
		if uncertainty and other_output_types:
		raise ValueError(
		'This model cannot compute uncertainties and embeddings simultaneously. Please invoke one at a time.'
		'This model cannot compute uncertainties and other output types simultaneously. Please invoke one at a time.'
		)
		if uncertainty:
		assert outputs is None
		@@ -480,10 +483,10 @@ class KerasModel(Model):
		if len(self._variance_outputs) != len(self._prediction_outputs):
		raise ValueError(
		'The number of variances must exactly match the number of outputs')
		if embedding:
		if other_output_types:
		assert outputs is None
		if self._embedding_outputs is None or len(self._embedding_outputs) == 0:
		raise ValueError('This model cannot compute embeddings.')
		if self._other_outputs is None or len(self._other_outputs) == 0:
		raise ValueError('This model cannot compute other outputs.')
		if (outputs is not None and self.model.inputs is not None and
		len(self.model.inputs) == 0):
		raise ValueError(
		@@ -520,8 +523,9 @@ class KerasModel(Model):
		else:
		for i, t in enumerate(var):
		variances[i].append(t)
		if embedding:
		embeddings = [output_values[i] for i in self._embedding_outputs]
		# TODO(rbharath): You should be able to invoke both of these simulataneously...
		if self._other_outputs is not None:
		output_values = [output_values[i] for i in self._other_outputs]
		if self._prediction_outputs is not None:
		output_values = [output_values[i] for i in self._prediction_outputs]
		if len(transformers) > 0:
		@@ -539,19 +543,18 @@ class KerasModel(Model):
		# Concatenate arrays to create the final results.
		final_results = []
		final_variances = []
		final_embeddings = []
		for r in results:
		final_results.append(np.concatenate(r, axis=0))
		if uncertainty:
		for v in variances:
		final_variances.append(np.concatenate(v, axis=0))
		return zip(final_results, final_variances)
		if embedding:
		final_embeddings = embeddings
		if len(final_embeddings) == 1:
		return final_embeddings[0]
		else:
		return final_embeddings
		#if other_output_types:
		# final_other_outputs = embeddings
		# if len(final_embeddings) == 1:
		# return final_embeddings[0]
		# else:
		# return final_embeddings
		# If only one output, just return array
		if len(final_results) == 1:
		return final_results[0]
		@@ -563,7 +566,11 @@ class KerasModel(Model):
		"""Evaluate the model for a set of inputs."""
		return self.model(inputs, training=False)

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		def predict_on_generator(self,
		generator,
		transformers=[],
		outputs=None,
		output_types=None):
		"""
		Parameters
		----------
		@@ -574,10 +581,13 @@ class KerasModel(Model):
		Transformers that the input data has been transformed by. The output
		is passed through these transformers to undo the transformations.
		outputs: Tensor or list of Tensors
		The outputs to return. If this is None, the model's standard prediction
		outputs will be returned. Alternatively one or more Tensors within the
		model may be specified, in which case the output of those Tensors will be
		returned.
		The outputs to return. If this is None, the model's
		standard prediction outputs will be returned.
		Alternatively one or more Tensors within the model may be
		specified, in which case the output of those Tensors will
		be returned.
		output_types: String or list of Strings

		Returns:
		a NumPy array of the model produces a single output, or a list of arrays
		if it produces multiple outputs
		@@ -635,7 +645,7 @@ class KerasModel(Model):
		dataset = NumpyDataset(X=X, y=None)
		return self.predict_uncertainty(dataset, masks)

		def predict(self, dataset, transformers=[], outputs=None):
		def predict(self, dataset, transformers=[], outputs=None, output_types=None):
		"""
		Uses self to make predictions on provided Dataset object.

		@@ -651,6 +661,8 @@ class KerasModel(Model):
		outputs will be returned. Alternatively one or more Tensors within the
		model may be specified, in which case the output of those Tensors will be
		returned.
		output_types: list of Strings
		The output types to return. Will retrieve all outputs of these types from the model.

		Returns
		-------
		@@ -659,25 +671,27 @@ class KerasModel(Model):
		"""
		generator = self.default_generator(
		dataset, mode='predict', pad_batches=False)
		return self.predict_on_generator(generator, transformers, outputs)

		def predict_embedding(self, dataset):
		"""
		Predicts embeddings created by underlying model

		Parameters
		----------
		dataset: dc.data.Dataset
		Dataset to make prediction on

		Returns
		-------
		a NumPy array of the embeddings model produces, or a list
		of arrays if it produces multiple embeddings
		"""
		generator = self.default_generator(
		dataset, mode='predict', pad_batches=False)
		return self._predict(generator, [], None, False, True)
		return self.predict_on_generator(generator, transformers, outputs,
		output_types)


		# def predict_embedding(self, dataset):
		# """
		# Predicts embeddings created by underlying model
		#
		# Parameters
		# ----------
		# dataset: dc.data.Dataset
		# Dataset to make prediction on
		#
		# Returns
		# -------
		# a NumPy array of the embeddings model produces, or a list
		# of arrays if it produces multiple embeddings
		# """
		# generator = self.default_generator(
		# dataset, mode='predict', pad_batches=False)
		# return self._predict(generator, [], None, False, True)

		def predict_uncertainty(self, dataset, masks=50):
		"""

deepchem/models/layers.py

+51 −14

Original line number	Diff line number	Diff line
		@@ -4,6 +4,7 @@ import tensorflow_probability as tfp
		import numpy as np
		import collections
		from tensorflow.keras import activations, initializers, backend
		from tensorflow.keras.layers import Dropout


		class InteratomicL2Distances(tf.keras.layers.Layer):
		@@ -2361,14 +2362,23 @@ class DTNNGather(tf.keras.layers.Layer):
		return tf.math.segment_sum(output, atom_membership)


		def _DAGgraph_step(batch_inputs, W_list, b_list, activation_fn, dropout,
		dropout_switch):
		def _DAGgraph_step(
		batch_inputs,
		W_list,
		b_list,
		activation_fn,
		#dropout,
		dropouts,
		#dropout_switch):
		training):
		outputs = batch_inputs
		for idw, W in enumerate(W_list):
		#for idw, W in enumerate(W_list):
		for idw, (dropout, W) in enumerate(zip(dropouts, W_list)):
		outputs = tf.nn.bias_add(tf.matmul(outputs, W), b_list[idw])
		outputs = activation_fn(outputs)
		if not dropout is None:
		outputs = tf.nn.dropout(outputs, rate=dropout * dropout_switch)
		if dropout is not None:
		#outputs = tf.nn.dropout(outputs, rate=dropout * dropout_switch)
		outputs = dropout(outputs, training=training)
		return outputs


		@@ -2454,6 +2464,7 @@ class DAGLayer(tf.keras.layers.Layer):
		""""Construct internal trainable weights."""
		self.W_list = []
		self.b_list = []
		self.dropouts = []
		init = initializers.get(self.init)
		prev_layer_size = self.n_inputs
		for layer_size in self.layer_sizes:
		@@ -2461,14 +2472,22 @@ class DAGLayer(tf.keras.layers.Layer):
		self.b_list.append(backend.zeros(shape=[
		layer_size,
		]))
		if self.dropout is not None and self.dropout > 0.0:
		self.dropouts.append(Dropout(rate=self.dropout))
		else:
		self.dropouts.append(None)
		prev_layer_size = layer_size
		self.W_list.append(init([prev_layer_size, self.n_outputs]))
		self.b_list.append(backend.zeros(shape=[
		self.n_outputs,
		]))
		if self.dropout is not None and self.dropout > 0.0:
		self.dropouts.append(Dropout(rate=self.dropout))
		else:
		self.dropouts.append(None)
		self.built = True

		def call(self, inputs):
		def call(self, inputs, training=True):
		"""
		parent layers: atom_features, parents, calculation_orders, calculation_masks, n_atoms
		"""
		@@ -2481,7 +2500,7 @@ class DAGLayer(tf.keras.layers.Layer):
		calculation_masks = inputs[3]

		n_atoms = tf.squeeze(inputs[4])
		dropout_switch = tf.squeeze(inputs[5])
		#dropout_switch = tf.squeeze(inputs[5])
		graph_features = tf.zeros((self.max_atoms * self.batch_size,
		self.max_atoms + 1, self.n_graph_feat))

		@@ -2513,8 +2532,8 @@ class DAGLayer(tf.keras.layers.Layer):
		# of shape: (batch_sizemax_atoms) n_graph_features
		# representing the graph features of target atoms in each graph
		batch_outputs = _DAGgraph_step(batch_inputs, self.W_list, self.b_list,
		self.activation_fn, self.dropout,
		dropout_switch)
		self.activation_fn, self.dropouts,
		training)

		# index for targe atoms
		target_index = tf.stack([tf.range(n_atoms), parents[:, count, 0]], axis=1)
		@@ -2580,6 +2599,7 @@ class DAGGather(tf.keras.layers.Layer):
		def build(self, input_shape):
		self.W_list = []
		self.b_list = []
		self.dropouts = []
		init = initializers.get(self.init)
		prev_layer_size = self.n_graph_feat
		for layer_size in self.layer_sizes:
		@@ -2587,25 +2607,42 @@ class DAGGather(tf.keras.layers.Layer):
		self.b_list.append(backend.zeros(shape=[
		layer_size,
		]))
		if self.dropout is not None and self.dropout > 0.0:
		self.dropouts.append(Dropout(rate=self.dropout))
		else:
		self.dropouts.append(None)
		prev_layer_size = layer_size
		self.W_list.append(init([prev_layer_size, self.n_outputs]))
		self.b_list.append(backend.zeros(shape=[
		self.n_outputs,
		]))
		if self.dropout is not None and self.dropout > 0.0:
		self.dropouts.append(Dropout(rate=self.dropout))
		else:
		self.dropouts.append(None)
		self.built = True

		def call(self, inputs):
		def call(self, inputs, training=True):
		"""
		parent layers: atom_features, membership
		"""
		atom_features = inputs[0]
		membership = inputs[1]
		dropout_switch = tf.squeeze(inputs[2])
		#dropout_switch = tf.squeeze(inputs[2])
		# Extract atom_features
		graph_features = tf.math.segment_sum(atom_features, membership)
		# sum all graph outputs
		return _DAGgraph_step(graph_features, self.W_list, self.b_list,
		self.activation_fn, self.dropout, dropout_switch)
		#return _DAGgraph_step(graph_features, self.W_list, self.b_list,
		# self.activation_fn, self.dropout, dropout_switch)
		return _DAGgraph_step(
		graph_features,
		self.W_list,
		self.b_list,
		self.activation_fn,
		#self.dropout,
		self.dropouts,
		#dropout_switch)
		training)


		class MessagePassing(tf.keras.layers.Layer):

deepchem/models/tests/test_graph_models.py

+16 −8

Original line number	Diff line number	Diff line
		@@ -4,6 +4,7 @@ import numpy as np
		import scipy

		import deepchem as dc
		import tensorflow as tf
		from deepchem.data import NumpyDataset
		from deepchem.models import GraphConvModel, DAGModel, WeaveModel, MPNNModel
		from deepchem.molnet import load_bace_classification, load_delaney
		@@ -69,6 +70,7 @@ class TestGraphModels(unittest.TestCase):
		batch_size=batch_size,
		dense_layer_size=3,
		mode='classification')
		assert 0 == 1

		model.fit(dataset, nb_epoch=1)
		neural_fingerprints = model.predict_embedding(dataset)
		@@ -182,12 +184,14 @@ class TestGraphModels(unittest.TestCase):
		batch_size=batch_size,
		use_queue=False)

		model.fit(dataset, nb_epoch=30)
		model.fit(dataset, nb_epoch=40)
		scores = model.evaluate(dataset, [metric], transformers)
		assert scores['mean-roc_auc_score'] >= 0.9

		@attr("slow")
		def test_dag_regression_model(self):
		np.random.seed(1234)
		tf.random.set_seed(1234)
		tasks, dataset, transformers, metric = self.get_dataset(
		'regression', 'GraphConv')

		@@ -204,12 +208,14 @@ class TestGraphModels(unittest.TestCase):
		batch_size=batch_size,
		use_queue=False)

		model.fit(dataset, nb_epoch=400)
		model.fit(dataset, nb_epoch=1200)
		scores = model.evaluate(dataset, [metric], transformers)
		assert all(s < 0.15 for s in scores['mean_absolute_error'])

		@attr("slow")
		def test_dag_regression_uncertainty(self):
		np.random.seed(1234)
		tf.random.set_seed(1234)
		tasks, dataset, transformers, metric = self.get_dataset(
		'regression', 'GraphConv')

		@@ -222,13 +228,13 @@ class TestGraphModels(unittest.TestCase):
		len(tasks),
		max_atoms=max_atoms,
		mode='regression',
		learning_rate=0.03,
		learning_rate=0.003,
		batch_size=batch_size,
		use_queue=False,
		dropout=0.1,
		dropout=0.05,
		uncertainty=True)

		model.fit(dataset, nb_epoch=1000)
		model.fit(dataset, nb_epoch=750)

		# Predict the output and uncertainty.
		pred, std = model.predict_uncertainty(dataset)
		@@ -236,8 +242,10 @@ class TestGraphModels(unittest.TestCase):
		mean_value = np.mean(np.abs(dataset.y))
		mean_std = np.mean(std)
		# The DAG models have high error with dropout
		# Despite a lot of effort tweaking it , there appears to be
		# a limit to how low the error can go with dropout.
		#assert mean_error < 0.5 * mean_value
		assert mean_error < mean_value
		assert mean_error < .7 * mean_value
		assert mean_std > 0.5 * mean_error
		assert mean_std < mean_value

		@@ -257,7 +265,7 @@ class TestGraphModels(unittest.TestCase):
		M=1,
		batch_size=batch_size)

		model.fit(dataset, nb_epoch=60)
		model.fit(dataset, nb_epoch=30)
		scores = model.evaluate(dataset, [metric], transformers)
		assert scores['mean-roc_auc_score'] >= 0.9

		@@ -277,7 +285,7 @@ class TestGraphModels(unittest.TestCase):
		M=1,
		batch_size=batch_size)

		model.fit(dataset, nb_epoch=70)
		model.fit(dataset, nb_epoch=50)
		scores = model.evaluate(dataset, [metric], transformers)
		assert all(s < 0.1 for s in scores['mean_absolute_error'])

deepchem/models/tests/test_layers.py

+10 −5

Original line number	Diff line number	Diff line
		@@ -415,15 +415,19 @@ class TestLayers(test_util.TensorFlowTestCase):
		# molecules in the batch, just as it is for the graph conv.
		# This means that n_atoms is the batch-size
		n_atoms = batch_size
		dropout_switch = False
		#dropout_switch = False
		layer = layers.DAGLayer(
		n_graph_feat=n_graph_feat,
		n_atom_feat=n_atom_feat,
		max_atoms=max_atoms,
		layer_sizes=layer_sizes)
		outputs = layer([
		atom_features, parents, calculation_orders, calculation_masks, n_atoms,
		dropout_switch
		atom_features,
		parents,
		calculation_orders,
		calculation_masks,
		n_atoms,
		#dropout_switch
		])
		## TODO(rbharath): What is the shape of outputs supposed to be?
		## I'm getting (7, 30) here. Where does 7 come from??
		@@ -445,5 +449,6 @@ class TestLayers(test_util.TensorFlowTestCase):
		layer_sizes=layer_sizes)
		atom_features = np.random.rand(batch_size, n_atom_feat)
		membership = np.sort(np.random.randint(0, batch_size, size=(batch_size)))
		dropout_switch = False
		outputs = layer([atom_features, membership, dropout_switch])
		#dropout_switch = False
		#outputs = layer([atom_features, membership, dropout_switch])
		outputs = layer([atom_features, membership])

Admin message