Changes (3596f6b3) · Commits · 钟慕尧 / deepchem

deepchem/models/graph_models.py

+47 −21

Original line number	Diff line number	Diff line
		@@ -4,9 +4,9 @@ import deepchem as dc
		import numpy as np
		import tensorflow as tf

		from typing import List, Union
		from typing import List, Union, Tuple, Iterable
		from deepchem.utils.typing import OneOrMany, KerasLossFn
		from deepchem.data import NumpyDataset, pad_features
		from deepchem.data import Dataset, NumpyDataset, pad_features
		from deepchem.feat.graph_features import ConvMolFeaturizer
		from deepchem.feat.mol_graphs import ConvMol
		from deepchem.metrics import to_one_hot
		@@ -107,6 +107,7 @@ class WeaveModel(KerasModel):
		if not isinstance(n_pair_feat, collections.Sequence):
		n_pair_feat = [n_pair_feat] * n_weave

		self.n_tasks = n_tasks
		self.n_atom_feat = n_atom_feat
		self.n_pair_feat = n_pair_feat
		self.n_hidden = n_hidden
		@@ -176,11 +177,31 @@ class WeaveModel(KerasModel):
		model, loss, output_types=output_types, batch_size=batch_size, **kwargs)

		def default_generator(self,
		dataset,
		epochs=1,
		mode='fit',
		dataset: Dataset,
		epochs: int = 1,
		mode: float = 'fit',
		deterministic=True,
		pad_batches=True):
		pad_batches=True) -> Iterable[Tuple[List, List, List]]:
		"""Convert a dataset into the tensors needed for learning.

		Parameters
		----------
		dataset: `dc.data.Dataset`
		Dataset to convert
		epochs: int, optional (Default 1)
		Number of times to walk over `dataset`
		mode: str, optional (Default 'fit')
		Ignored in this implementation.
		deterministic: bool, optional (Default True)
		Whether the dataset should be walked in a deterministic fashion
		pad_batches: bool, optional (Default True)
		If true, each returned batch will have size `self.batch_size`.

		Returns
		-------
		Iterator which walks over the batches
		"""

		for epoch in range(epochs):
		for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
		batch_size=self.batch_size,
		@@ -215,7 +236,7 @@ class WeaveModel(KerasModel):
		# pair features
		pair_feat.append(
		np.reshape(mol.get_pair_features(),
		(n_atoms * n_atoms, self.n_pair_feat)))
		(n_atoms * n_atoms, self.n_pair_feat[0])))

		inputs = [
		np.concatenate(atom_feat, axis=0),
		@@ -230,9 +251,12 @@ class WeaveModel(KerasModel):
		class DTNNModel(KerasModel):
		"""Deep Tensor Neural Networks

		This class implements deep tensor neural networks as first defined in
		This class implements deep tensor neural networks as first defined in [1]_

		Schütt, Kristof T., et al. "Quantum-chemical insights from deep tensor neural networks." Nature communications 8.1 (2017): 1-8.
		References
		----------
		.. [1] Schütt, Kristof T., et al. "Quantum-chemical insights from deep
		tensor neural networks." Nature communications 8.1 (2017): 1-8.
		"""

		def __init__(self,
		@@ -538,7 +562,7 @@ class DAGModel(KerasModel):
		mode='fit',
		deterministic=True,
		pad_batches=True):
		"""TensorGraph style implementation"""
		"""Convert a dataset into the tensors needed for learning"""
		for epoch in range(epochs):
		for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
		batch_size=self.batch_size,
		@@ -738,10 +762,11 @@ class GraphConvModel(KerasModel):
		dense_layer_size: int
		Width of channels for Atom Level Dense Layer before GraphPool
		dropout: list or float
		the dropout probablity to use for each layer. The length of this list should equal
		len(graph_conv_layers)+1 (one value for each convolution layer, and one for the
		dense layer). Alternatively this may be a single value instead of a list, in which
		case the same value is used for every layer.
		the dropout probablity to use for each layer. The length of this list
		should equal len(graph_conv_layers)+1 (one value for each convolution
		layer, and one for the dense layer). Alternatively this may be a single
		value instead of a list, in which case the same value is used for every
		layer.
		mode: str
		Either "classification" or "regression"
		number_atom_features: int
		@@ -822,11 +847,12 @@ class MPNNModel(KerasModel):
		nodes in a graph send each other "messages" and update their
		internal state as a consequence of these messages.

		Ordering structures in this model are built according to


		Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters: Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).
		Ordering structures in this model are built according to [1]_

		References
		----------
		.. [1] Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters:
		Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).
		"""

		def __init__(self,

deepchem/models/layers.py

+107 −3

Original line number	Diff line number	Diff line
		@@ -8,7 +8,24 @@ from tensorflow.keras.layers import Dropout


		class InteratomicL2Distances(tf.keras.layers.Layer):
		"""Compute (squared) L2 Distances between atoms given neighbors."""
		"""Compute (squared) L2 Distances between atoms given neighbors.

		This class computes pairwise distances between its inputs.

		Examples
		--------
		>>> import numpy as np
		>>> import deepchem as dc
		>>> atoms = 5
		>>> neighbors = 2
		>>> coords = np.random.rand(atoms, 3)
		>>> neighbor_list = np.random.randint(0, atoms, size=(atoms, neighbors))
		>>> layer = InteratomicL2Distances(atoms, neighbors, 3)
		>>> result = np.array(layer([coords, neighbor_list]))
		>>> result.shape
		(5, 2)

		"""

		def __init__(self, N_atoms: int, M_nbrs: int, ndim: int, **kwargs):
		"""Constructor for this layer.
		@@ -40,7 +57,12 @@ class InteratomicL2Distances(tf.keras.layers.Layer):
		Parameters
		----------
		inputs: list
		Should be of form `inputs=[coords, nbr_list]` where `coords` is a tensor of shape `(None, N, 3)` and `nbr_list` is a list.
		Should be of form `inputs=[coords, nbr_list]` where `coords` is a
		tensor of shape `(None, N, 3)` and `nbr_list` is a list.

		Returns
		-------
		Tensor of shape `(N_atoms, M_nbrs)` with interatomic distances.
		"""
		if len(inputs) != 2:
		raise ValueError("InteratomicDistances requires coords,nbr_list")
		@@ -2062,6 +2084,88 @@ class WeaveLayer(tf.keras.layers.Layer):
		There are 2 types of transformation, atom->atom, atom->pair,
		pair->atom, pair->pair that this model implements.

		Examples
		--------
		This layer expects 4 inputs in a list of the form `[atom_features,
		pair_features, pair_split, atom_to_pair]`. We'll walk through the structure
		of these inputs. Let's start with some basic definitions.

		>>> import deepchem as dc
		>>> import numpy as np

		Suppose you have a batch of molecules

		>>> smiles = ["CCC", "C"]

		Note that there are 4 atoms in total in this system. This layer expects its
		input molecules to be batched together.

		>>> total_n_atoms = 4

		Let's suppose that we have a featurizer that computes `n_atom_feat` features
		per atom.

		>>> n_atom_feat = 75

		Then conceptually, `atom_feat` is the array of shape `(total_n_atoms,
		n_atom_feat)` of atomic features. For simplicity, let's just go with a
		random such matrix.

		>>> atom_feat = np.random.rand(total_n_atoms, n_atom_feat)

		Let's suppose we have `n_pair_feat` pairwise features

		>>> n_pair_feat = 14

		For each molecule, we compute a matrix of shape `(n_atoms*n_atoms,
		n_pair_feat)` of pairwise features for each pair of atoms in the molecule.
		Let's construct this conceptually for our example.

		>>> pair_feat = [np.random.rand(11, n_pair_feat), np.random.rand(33, n_pair_feat)]
		>>> pair_feat = np.concatenate(pair_feat, axis=0)
		>>> pair_feat.shape
		(10, 14)

		`pair_split` is an index into `pair_feat` which tells us which atom each row belongs to. In our case, we hve

		>>> pair_split = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3])

		That is, the first 9 entries belong to "CCC" and the last entry to "C". The
		final entry `atom_to_pair` goes in a little more in-depth than `pair_split`
		and tells us the precise pair each pair feature belongs to. In our case

		>>> atom_to_pair = np.array([[0, 0],
		... [0, 1],
		... [0, 2],
		... [1, 0],
		... [1, 1],
		... [1, 2],
		... [2, 0],
		... [2, 1],
		... [2, 2],
		... [3, 3]])

		Let's now define the actual layer

		>>> layer = WeaveLayer()

		And invoke it

		>>> [A, P] = layer([atom_feat, pair_feat, pair_split, atom_to_pair])

		The weave layer produces new atom/pair features. Let's check their shapes

		>>> A = np.array(A)
		>>> A.shape
		(4, 50)
		>>> P = np.array(P)
		>>> P.shape
		(10, 50)

		The 4 is `total_num_atoms` and the 10 is the total number of pairs. Where
		does `50` come from? It's from the default arguments `n_atom_input_feat` and
		`n_pair_input_feat`.

		References
		----------
		.. [1] Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond
		@@ -2180,7 +2284,7 @@ class WeaveLayer(tf.keras.layers.Layer):
		])
		self.built = True

		def call(self, inputs: List):
		def call(self, inputs: List) -> List:
		"""Creates weave tensors.

		Parameters

Admin message