Cleaning up tests (f70938eb) · Commits · 钟慕尧 / deepchem

deepchem/feat/graph_features.py

+79 −1

Original line number	Diff line number	Diff line
		@@ -70,11 +70,20 @@ reference_lists = [
		]

		intervals = get_intervals(reference_lists)
		# We use E-Z notation for stereochemistry
		# https://en.wikipedia.org/wiki/E%E2%80%93Z_notation
		possible_bond_stereo = ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]
		bond_fdim_base = 6


		def get_feature_list(atom):
		"""Returns a list of possible features for this atom.

		Parameters
		----------
		atom: RDKit.rdchem.Atom
		Atom to get features for
		"""
		features = 6 * [0]
		features[0] = safe_index(possible_atom_list, atom.GetSymbol())
		features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())
		@@ -113,7 +122,13 @@ def id_to_features(id, intervals):


		def atom_to_id(atom):
		"""Return a unique id corresponding to the atom type"""
		"""Return a unique id corresponding to the atom type

		Parameters
		----------
		atom: RDKit.rdchem.Atom
		Atom to convert to ids.
		"""
		features = get_feature_list(atom)
		return features_to_id(features, intervals)

		@@ -122,6 +137,19 @@ def atom_features(atom,
		bool_id_feat=False,
		explicit_H=False,
		use_chirality=False):
		"""Helper method used to compute per-atom feature vectors.

		Many different featurization methods compute per-atom features such as ConvMolFeaturizer, WeaveFeaturizer. This method computes such features.

		Parameters
		----------
		bool_id_feat: bool, optional
		Return an array of unique identifiers corresponding to atom type.
		explicit_H: bool, optional
		If true, model hydrogens explicitly
		use_chirality: bool, optional
		If true, use chirality information.
		"""
		if bool_id_feat:
		return np.array([atom_to_id(atom)])
		else:
		@@ -199,6 +227,16 @@ def atom_features(atom,


		def bond_features(bond, use_chirality=False):
		"""Helper method used to compute bond feature vectors.

		Many different featurization methods compute bond features
		such as WeaveFeaturizer. This method computes such features.

		Parameters
		----------
		use_chirality: bool, optional
		If true, use chirality information.
		"""
		from rdkit import Chem
		bt = bond.GetBondType()
		bond_feats = [
		@@ -215,6 +253,26 @@ def bond_features(bond, use_chirality=False):

		def pair_features(mol, edge_list, canon_adj_list, bt_len=6,
		graph_distance=True):
		"""Helper method used to compute atom pair feature vectors.

		Many different featurization methods compute atom pair features
		such as WeaveFeaturizer. Note that atom pair features could be
		for pairs of atoms which aren't necessarily bonded to one
		another.

		Parameters
		----------
		mol: TODO
		TODO
		edge_list: list
		List of edges t oconsider
		canon_adj_list: list
		TODO
		bt_len: int, optional
		TODO
		graph_distance: bool, optional
		TODO
		"""
		if graph_distance:
		max_distance = 7
		else:
		@@ -271,6 +329,10 @@ def find_distance(a1, num_atoms, canon_adj_list, max_distance=7):


		class ConvMolFeaturizer(Featurizer):
		"""This class implements the featurization to implement graph convolutions from the Duvenaud graph convolution paper

		Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
		"""
		name = ['conv_mol']

		def __init__(self, master_atom=False, use_chirality=False,
		@@ -381,10 +443,26 @@ class ConvMolFeaturizer(Featurizer):


		class WeaveFeaturizer(Featurizer):
		"""This class implements the featurization to implement Weave convolutions from the Google graph convolution paper.

		Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.
		"""

		name = ['weave_mol']

		def __init__(self, graph_distance=True, explicit_H=False,
		use_chirality=False):
		"""
		Parameters
		----------
		graph_distance: bool, optional
		If true, use graph distance. Otherwise, use Euclidean
		distance.
		explicit_H: bool, optional
		If true, model hydrogens in the molecule.
		use_chirality: bool, optional
		If true, use chiral information in the featurization
		"""
		# Distance is either graph distance(True) or Euclidean distance(False,
		# only support datasets providing Cartesian coordinates)
		self.graph_distance = graph_distance

deepchem/feat/mol_graphs.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -386,8 +386,12 @@ class MultiConvMol(object):


		class WeaveMol(object):
		"""Holds information about a molecule
		Molecule struct used in weave models
		"""Molecular featurization object for weave convolutions.

		These objects are produced by WeaveFeaturizer, and feed into
		WeaveModel. The underlying implementation is inspired by:

		Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.
		"""

		def __init__(self, nodes, pairs):

deepchem/models/atomic_conv.py

+15 −10

Original line number	Diff line number	Diff line
		@@ -54,6 +54,7 @@ def initializeWeightsBiases(prev_layer_size,


		class AtomicConvScore(Layer):
		"""The scoring function used by the atomic convolution models."""

		def __init__(self, atom_types, layer_sizes, **kwargs):
		super(AtomicConvScore, self).__init__(**kwargs)
		@@ -145,6 +146,19 @@ class AtomicConvScore(Layer):


		class AtomicConvModel(KerasModel):
		"""Implements an Atomic Convolution Model.

		Implements the atomic convolutional networks as introduced in

		Gomes, Joseph, et al. "Atomic convolutional networks for predicting protein-ligand binding affinity." arXiv preprint arXiv:1703.10603 (2017).

		The atomic convolutional networks function as a variant of
		graph convolutions. The difference is that the "graph" here is
		the nearest neighbors graph in 3D space. The AtomicConvModel
		leverages these connections in 3D space to train models that
		learn to predict energetic state starting from the spatial
		geometry of the model.
		"""

		def __init__(self,
		frag1_num_atoms=70,
		@@ -163,16 +177,7 @@ class AtomicConvModel(KerasModel):
		layer_sizes=[32, 32, 16],
		learning_rate=0.001,
		**kwargs):
		"""Implements an Atomic Convolution Model.

		Implements the atomic convolutional networks as introduced in
		https://arxiv.org/abs/1703.10603. The atomic convolutional networks
		function as a variant of graph convolutions. The difference is that the
		"graph" here is the nearest neighbors graph in 3D space. The
		AtomicConvModel leverages these connections in 3D space to train models
		that learn to predict energetic state starting from the spatial
		geometry of the model.

		"""
		Params
		------
		frag1_num_atoms: int

deepchem/models/graph_models.py

+92 −34

Original line number	Diff line number	Diff line
		@@ -30,6 +30,21 @@ class TrimGraphOutput(tf.keras.layers.Layer):


		class WeaveModel(KerasModel):
		"""Implements Google-style Weave Graph Convolutions

		This model implements the Weave style graph convolutions
		from the following paper.

		Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.

		The biggest difference between WeaveModel style convolutions
		and GraphConvModel style convolutions is that Weave
		convolutions model bond features explicitly. This has the
		side effect that it needs to construct a NxN matrix
		explicitly to model bond interactions. This may cause
		scaling issues, but may possibly allow for better modeling
		of subtle bond effects.
		"""

		def __init__(self,
		n_tasks,
		@@ -90,7 +105,9 @@ class WeaveModel(KerasModel):
		update_pair=False)(
		[weave_layer1A, weave_layer1P, pair_split, atom_to_pair])
		dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer2A)
		batch_norm1 = BatchNormalization(epsilon=1e-5)(dense1)
		# Batch normalization causes issues, spitting out NaNs if
		# allowed to train
		batch_norm1 = BatchNormalization(epsilon=1e-5, trainable=False)(dense1)
		weave_gather = layers.WeaveGather(
		batch_size, n_input=self.n_graph_feat,
		gaussian_expand=True)([batch_norm1, atom_split])
		@@ -170,6 +187,12 @@ class WeaveModel(KerasModel):


		class DTNNModel(KerasModel):
		"""Deep Tensor Neural Networks

		This class implements deep tensor neural networks as first defined in

		Schütt, Kristof T., et al. "Quantum-chemical insights from deep tensor neural networks." Nature communications 8.1 (2017): 1-8.
		"""

		def __init__(self,
		n_tasks,
		@@ -322,6 +345,16 @@ class DTNNModel(KerasModel):


		class DAGModel(KerasModel):
		"""Directed Acyclic Graph models for molecular property prediction.

		This model is based on the following paper:

		Lusci, Alessandro, Gianluca Pollastri, and Pierre Baldi. "Deep architectures and deep learning in chemoinformatics: the prediction of aqueous solubility for drug-like molecules." Journal of chemical information and modeling 53.7 (2013): 1563-1575.

		The basic idea for this paper is that a molecule is usually viewed as an undirected graph. However, you can convert it to a series of directed graphs. The idea is that for each atom, you make a DAG using that atom as the vertex of the DAG and edges pointing "inwards" to it. This transformation is implemented in dc.trans.transformers.DAGTransformer.UG_to_DAG.

		This model accepts ConvMols as input, just as GraphConvModel does, but these ConvMol objects must be transformed by dc.trans.DAGTransformer.
		"""

		def __init__(self,
		n_tasks,
		@@ -337,16 +370,7 @@ class DAGModel(KerasModel):
		uncertainty=False,
		batch_size=100,
		**kwargs):
		"""Directed Acyclic Graph models for molecular property prediction.

		This model is based on the following paper:

		Lusci, Alessandro, Gianluca Pollastri, and Pierre Baldi. "Deep architectures and deep learning in chemoinformatics: the prediction of aqueous solubility for drug-like molecules." Journal of chemical information and modeling 53.7 (2013): 1563-1575.

		The basic idea for this paper is that a molecule is usually viewed as an undirected graph. However, you can convert it to a series of directed graphs. The idea is that for each atom, you make a DAG using that atom as the vertex of the DAG and edges pointing "inwards" to it. This transformation is implemented in dc.trans.transformers.DAGTransformer.UG_to_DAG.

		This model accepts ConvMols as input, just as GraphConvModel does, but these ConvMol objects must be transformed by dc.trans.DAGTransformer.

		"""
		Parameters
		----------
		n_tasks: int
		@@ -429,7 +453,12 @@ class DAGModel(KerasModel):
		output_types = ['prediction', 'loss']
		loss = SoftmaxCrossEntropy()
		else:
		output = Dense(n_tasks)(dag_gather)
		fc_layer_size = 50
		inter = Dense(fc_layer_size)(dag_gather)
		if self.dropout is not None and self.dropout > 0.0:
		inter = Dropout(rate=self.dropout)(inter)
		#output = Dense(n_tasks)(dag_gather)
		output = Dense(n_tasks)(inter)
		if self.uncertainty:
		log_var = Dense(n_tasks)(dag_gather)
		var = Activation(tf.exp)(log_var)
		@@ -514,6 +543,7 @@ class _GraphConvKerasModel(tf.keras.Model):
		mode="classification",
		number_atom_features=75,
		n_classes=2,
		batch_normalize=True,
		uncertainty=False,
		batch_size=100):
		"""An internal keras model class.
		@@ -548,12 +578,11 @@ class _GraphConvKerasModel(tf.keras.Model):
		for layer_size in graph_conv_layers
		]
		self.batch_norms = [
		BatchNormalization(fused=False)
		BatchNormalization(fused=False) if batch_normalize else None
		for _ in range(len(graph_conv_layers) + 1)
		]
		self.dropouts = [
		layers.SwitchedDropout(rate=rate) if rate > 0.0 else None
		for rate in dropout
		Dropout(rate=rate) if rate > 0.0 else None for rate in dropout
		]
		self.graph_pools = [layers.GraphPool() for _ in graph_conv_layers]
		self.dense = Dense(dense_layer_size, activation=tf.nn.relu)
		@@ -571,29 +600,30 @@ class _GraphConvKerasModel(tf.keras.Model):
		self.uncertainty_trim = TrimGraphOutput()
		self.uncertainty_activation = Activation(tf.exp)

		def call(self, inputs):
		def call(self, inputs, training=False):
		atom_features = inputs[0]
		degree_slice = tf.cast(inputs[1], dtype=tf.int32)
		membership = tf.cast(inputs[2], dtype=tf.int32)
		n_samples = tf.cast(inputs[3], dtype=tf.int32)
		dropout_switch = inputs[4]
		deg_adjs = [tf.cast(deg_adj, dtype=tf.int32) for deg_adj in inputs[5:]]
		deg_adjs = [tf.cast(deg_adj, dtype=tf.int32) for deg_adj in inputs[4:]]

		in_layer = atom_features
		for i in range(len(self.graph_convs)):
		gc_in = [in_layer, degree_slice, membership] + deg_adjs
		gc1 = self.graph_convs[i](gc_in)
		batch_norm1 = self.batch_norms[i](gc1)
		if self.dropouts[i] is not None:
		batch_norm1 = self.dropouts[i]([batch_norm1, dropout_switch])
		gp_in = [batch_norm1, degree_slice, membership] + deg_adjs
		if self.batch_norms[i] is not None:
		gc1 = self.batch_norms[i](gc1, training=training)
		if training and self.dropouts[i] is not None:
		gc1 = self.dropouts[i](gc1, training=training)
		gp_in = [gc1, degree_slice, membership] + deg_adjs
		in_layer = self.graph_pools[i](gp_in)
		dense = self.dense(in_layer)
		batch_norm3 = self.batch_norms[-1](dense)
		if self.dropouts[-1] is not None:
		batch_norm3 = self.dropouts[1]([batch_norm3, dropout_switch])
		neural_fingerprint = self.graph_gather(
		[batch_norm3, degree_slice, membership] + deg_adjs)
		if self.batch_norms[-1] is not None:
		dense = self.batch_norms[-1](dense, training=training)
		if training and self.dropouts[-1] is not None:
		dense = self.dropouts[1](dense, training=training)
		neural_fingerprint = self.graph_gather([dense, degree_slice, membership] +
		deg_adjs)
		if self.mode == 'classification':
		logits = self.reshape(self.reshape_dense(neural_fingerprint))
		logits = self.trim([logits, n_samples])
		@@ -614,6 +644,15 @@ class _GraphConvKerasModel(tf.keras.Model):


		class GraphConvModel(KerasModel):
		"""Graph Convolutional Models.

		This class implements the graph convolutional model from the
		following paper:


		Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.

		"""

		def __init__(self,
		n_tasks,
		@@ -624,6 +663,7 @@ class GraphConvModel(KerasModel):
		number_atom_features=75,
		n_classes=2,
		batch_size=100,
		batch_normalize=True,
		uncertainty=False,
		**kwargs):
		"""The wrapper class for graph convolutions.
		@@ -653,6 +693,8 @@ class GraphConvModel(KerasModel):
		function atom_features in graph_features
		n_classes: int
		the number of classes to predict (only used in classification mode)
		batch_normalize: True
		if True, apply batch normalization to model
		uncertainty: bool
		if True, include extra outputs and loss terms to enable the uncertainty
		in outputs to be predicted
		@@ -670,6 +712,7 @@ class GraphConvModel(KerasModel):
		mode=mode,
		number_atom_features=number_atom_features,
		n_classes=n_classes,
		batch_normalize=batch_normalize,
		uncertainty=uncertainty,
		batch_size=batch_size)
		if mode == "classification":
		@@ -707,13 +750,16 @@ class GraphConvModel(KerasModel):
		-1, self.n_tasks, self.n_classes)
		multiConvMol = ConvMol.agglomerate_mols(X_b)
		n_samples = np.array(X_b.shape[0])
		if mode == 'predict':
		dropout = np.array(0.0)
		else:
		dropout = np.array(1.0)
		#if mode == 'predict':
		# dropout = np.array(0.0)
		#else:
		# dropout = np.array(1.0)
		inputs = [
		multiConvMol.get_atom_features(), multiConvMol.deg_slice,
		np.array(multiConvMol.membership), n_samples, dropout
		multiConvMol.get_atom_features(),
		multiConvMol.deg_slice,
		#np.array(multiConvMol.membership), n_samples, dropout
		np.array(multiConvMol.membership),
		n_samples
		]
		for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
		inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
		@@ -722,7 +768,19 @@ class GraphConvModel(KerasModel):

		class MPNNModel(KerasModel):
		""" Message Passing Neural Network,
		default structures built according to https://arxiv.org/abs/1511.06391 """

		Message Passing Neural Networks treat graph convolutional
		operations as an instantiation of a more general message
		passing schem. Recall that message passing in a graph is when
		nodes in a graph send each other "messages" and update their
		internal state as a consequence of these messages.

		Ordering structures in this model are built according to


		Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters: Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).

		"""

		def __init__(self,
		n_tasks,

deepchem/models/keras_model.py

+68 −54

Original line number	Diff line number	Diff line
		@@ -21,61 +21,74 @@ from deepchem.utils.evaluate import GeneratorEvaluator
		class KerasModel(Model):
		"""This is a DeepChem model implemented by a Keras model.

		This class provides several advantages over using the Keras model's fitting
		and prediction methods directly.

		1. It provides better integration with the rest of DeepChem, such as direct
		support for Datasets and Transformers.

		2. It defines the loss in a more flexible way. In particular, Keras does not
		support multidimensional weight matrices, which makes it impossible to
		implement most multitask models with Keras.

		3. It provides various additional features not found in the Keras Model class,
		such as uncertainty prediction and saliency mapping.

		The loss function for a model can be defined in two different ways. For
		models that have only a single output and use a standard loss function, you
		can simply provide a dc.models.losses.Loss object. This defines the loss for
		each sample or sample/task pair. The result is automatically multiplied by
		the weights and averaged over the batch. Any additional losses computed by
		model layers, such as weight decay penalties, are also added.

		For more complicated cases, you can instead provide a function that directly
		computes the total loss. It must be of the form f(outputs, labels, weights),
		taking the list of outputs from the model, the expected values, and any weight
		matrices. It should return a scalar equal to the value of the loss function
		for the batch. No additional processing is done to the result; it is up to
		you to do any weighting, averaging, adding of penalty terms, etc.

		You can optionally provide an output_types argument, which describes how to
		interpret the model's outputs. This should be a list of strings, one for each
		output. Each entry must have one of the following values:
		This class provides several advantages over using the Keras
		model's fitting and prediction methods directly.

		1. It provides better integration with the rest of DeepChem,
		such as direct support for Datasets and Transformers.

		2. It defines the loss in a more flexible way. In particular,
		Keras does not support multidimensional weight matrices,
		which makes it impossible to implement most multitask
		models with Keras.

		3. It provides various additional features not found in the
		Keras Model class, such as uncertainty prediction and
		saliency mapping.

		The loss function for a model can be defined in two different
		ways. For models that have only a single output and use a
		standard loss function, you can simply provide a
		dc.models.losses.Loss object. This defines the loss for each
		sample or sample/task pair. The result is automatically
		multiplied by the weights and averaged over the batch. Any
		additional losses computed by model layers, such as weight
		decay penalties, are also added.

		For more complicated cases, you can instead provide a function
		that directly computes the total loss. It must be of the form
		f(outputs, labels, weights), taking the list of outputs from
		the model, the expected values, and any weight matrices. It
		should return a scalar equal to the value of the loss function
		for the batch. No additional processing is done to the
		result; it is up to you to do any weighting, averaging, adding
		of penalty terms, etc.

		You can optionally provide an output_types argument, which
		describes how to interpret the model's outputs. This should
		be a list of strings, one for each output. Each entry must
		have one of the following values:

		- 'prediction': This is a normal output, and will be returned by predict().
		If output types are not specified, all outputs are assumed to be of this
		type.

		- 'loss': This output will be used in place of the normal outputs for
		computing the loss function. For example, models that output probability
		distributions usually do it by computing unbounded numbers (the logits),
		then passing them through a softmax function to turn them into
		probabilities. When computing the cross entropy, it is more numerically
		stable to use the logits directly rather than the probabilities. You can
		do this by having the model produce both probabilities and logits as
		outputs, then specifying output_types=['prediction', 'loss']. When
		predict() is called, only the first output (the probabilities) will be
		returned. But during training, it is the second output (the logits) that
		will be passed to the loss function.

		- 'variance': This output is used for estimating the uncertainty in another
		output. To create a model that can estimate uncertainty, there must be the
		same number of 'prediction' and 'variance' outputs. Each variance output
		must have the same shape as the corresponding prediction output, and each
		element is an estimate of the variance in the corresponding prediction.
		Also be aware that if a model supports uncertainty, it MUST use dropout on
		every layer, and dropout most be enabled during uncertainty prediction.
		If output types are not specified, all outputs are assumed
		to be of this type.

		- 'loss': This output will be used in place of the normal
		outputs for computing the loss function. For example,
		models that output probability distributions usually do it
		by computing unbounded numbers (the logits), then passing
		them through a softmax function to turn them into
		probabilities. When computing the cross entropy, it is more
		numerically stable to use the logits directly rather than
		the probabilities. You can do this by having the model
		produce both probabilities and logits as outputs, then
		specifying output_types=['prediction', 'loss']. When
		predict() is called, only the first output (the
		probabilities) will be returned. But during training, it is
		the second output (the logits) that will be passed to the
		loss function.

		- 'variance': This output is used for estimating the
		uncertainty in another output. To create a model that can
		estimate uncertainty, there must be the same number of
		'prediction' and 'variance' outputs. Each variance output
		must have the same shape as the corresponding prediction
		output, and each element is an estimate of the variance in
		the corresponding prediction. Also be aware that if a model
		supports uncertainty, it MUST use dropout on every layer,
		and dropout most be enabled during uncertainty prediction.
		Otherwise, the uncertainties it computes will be inaccurate.

		- 'embedding': This output is an embedding that the model
		generates internally which should be returned to users.
		"""
		@@ -374,6 +387,7 @@ class KerasModel(Model):
		def apply_gradient_for_batch(inputs, labels, weights, loss):
		with tf.GradientTape() as tape:
		outputs = self.model(inputs, training=True)
		#outputs = self.model(inputs)
		if isinstance(outputs, tf.Tensor):
		outputs = [outputs]
		if self._loss_outputs is not None:
		@@ -469,7 +483,7 @@ class KerasModel(Model):
		if embedding:
		assert outputs is None
		if self._embedding_outputs is None or len(self._embedding_outputs) == 0:
		raise ValueError('This model cannot compute embneddings.')
		raise ValueError('This model cannot compute embeddings.')
		if (outputs is not None and self.model.inputs is not None and
		len(self.model.inputs) == 0):
		raise ValueError(

Admin message