Commit 3596f6b3 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Changes

parent bd0c2434
Loading
Loading
Loading
Loading
+47 −21
Original line number Diff line number Diff line
@@ -4,9 +4,9 @@ import deepchem as dc
import numpy as np
import tensorflow as tf

from typing import List, Union
from typing import List, Union, Tuple, Iterable
from deepchem.utils.typing import OneOrMany, KerasLossFn
from deepchem.data import NumpyDataset, pad_features
from deepchem.data import Dataset, NumpyDataset, pad_features
from deepchem.feat.graph_features import ConvMolFeaturizer
from deepchem.feat.mol_graphs import ConvMol
from deepchem.metrics import to_one_hot
@@ -107,6 +107,7 @@ class WeaveModel(KerasModel):
    if not isinstance(n_pair_feat, collections.Sequence):
      n_pair_feat = [n_pair_feat] * n_weave

    self.n_tasks = n_tasks
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    self.n_hidden = n_hidden
@@ -176,11 +177,31 @@ class WeaveModel(KerasModel):
        model, loss, output_types=output_types, batch_size=batch_size, **kwargs)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        mode='fit',
                        dataset: Dataset,
                        epochs: int = 1,
                        mode: float = 'fit',
                        deterministic=True,
                        pad_batches=True):
                        pad_batches=True) -> Iterable[Tuple[List, List, List]]:
    """Convert a dataset into the tensors needed for learning.

    Parameters
    ----------
    dataset: `dc.data.Dataset`
      Dataset to convert
    epochs: int, optional (Default 1)
      Number of times to walk over `dataset`
    mode: str, optional (Default 'fit')
      Ignored in this implementation.
    deterministic: bool, optional (Default True)
      Whether the dataset should be walked in a deterministic fashion
    pad_batches: bool, optional (Default True)
      If true, each returned batch will have size `self.batch_size`.

    Returns
    -------
    Iterator which walks over the batches
    """

    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
@@ -215,7 +236,7 @@ class WeaveModel(KerasModel):
          # pair features
          pair_feat.append(
              np.reshape(mol.get_pair_features(),
                         (n_atoms * n_atoms, self.n_pair_feat)))
                         (n_atoms * n_atoms, self.n_pair_feat[0])))

        inputs = [
            np.concatenate(atom_feat, axis=0),
@@ -230,9 +251,12 @@ class WeaveModel(KerasModel):
class DTNNModel(KerasModel):
  """Deep Tensor Neural Networks

  This class implements deep tensor neural networks as first defined in
  This class implements deep tensor neural networks as first defined in [1]_

  Schütt, Kristof T., et al. "Quantum-chemical insights from deep tensor neural networks." Nature communications 8.1 (2017): 1-8.
  References
  ----------
  .. [1] Schütt, Kristof T., et al. "Quantum-chemical insights from deep
  tensor neural networks." Nature communications 8.1 (2017): 1-8.
  """

  def __init__(self,
@@ -538,7 +562,7 @@ class DAGModel(KerasModel):
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    """TensorGraph style implementation"""
    """Convert a dataset into the tensors needed for learning"""
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
@@ -738,10 +762,11 @@ class GraphConvModel(KerasModel):
    dense_layer_size: int
      Width of channels for Atom Level Dense Layer before GraphPool
    dropout: list or float
      the dropout probablity to use for each layer.  The length of this list should equal
      len(graph_conv_layers)+1 (one value for each convolution layer, and one for the
      dense layer).  Alternatively this may be a single value instead of a list, in which
      case the same value is used for every layer.
      the dropout probablity to use for each layer.  The length of this list
      should equal len(graph_conv_layers)+1 (one value for each convolution
      layer, and one for the dense layer).  Alternatively this may be a single
      value instead of a list, in which case the same value is used for every
      layer.
    mode: str
      Either "classification" or "regression"
    number_atom_features: int
@@ -822,11 +847,12 @@ class MPNNModel(KerasModel):
  nodes in a graph send each other "messages" and update their
  internal state as a consequence of these messages.

  Ordering structures in this model are built according to


Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters: Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).
  Ordering structures in this model are built according to [1]_

  References
  ----------
  .. [1] Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters:
  Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).
  """

  def __init__(self,
+107 −3
Original line number Diff line number Diff line
@@ -8,7 +8,24 @@ from tensorflow.keras.layers import Dropout


class InteratomicL2Distances(tf.keras.layers.Layer):
  """Compute (squared) L2 Distances between atoms given neighbors."""
  """Compute (squared) L2 Distances between atoms given neighbors.

  This class computes pairwise distances between its inputs.

  Examples
  --------
  >>> import numpy as np
  >>> import deepchem as dc
  >>> atoms = 5
  >>> neighbors = 2
  >>> coords = np.random.rand(atoms, 3)
  >>> neighbor_list = np.random.randint(0, atoms, size=(atoms, neighbors))
  >>> layer = InteratomicL2Distances(atoms, neighbors, 3)
  >>> result = np.array(layer([coords, neighbor_list]))
  >>> result.shape
  (5, 2)

  """

  def __init__(self, N_atoms: int, M_nbrs: int, ndim: int, **kwargs):
    """Constructor for this layer.
@@ -40,7 +57,12 @@ class InteratomicL2Distances(tf.keras.layers.Layer):
    Parameters
    ----------
    inputs: list
      Should be of form `inputs=[coords, nbr_list]` where `coords` is a tensor of shape `(None, N, 3)` and `nbr_list` is a list.
      Should be of form `inputs=[coords, nbr_list]` where `coords` is a
      tensor of shape `(None, N, 3)` and `nbr_list` is a list.

    Returns
    -------
    Tensor of shape `(N_atoms, M_nbrs)` with interatomic distances.
    """
    if len(inputs) != 2:
      raise ValueError("InteratomicDistances requires coords,nbr_list")
@@ -2062,6 +2084,88 @@ class WeaveLayer(tf.keras.layers.Layer):
  There are 2 types of transformation, atom->atom, atom->pair,
  pair->atom, pair->pair that this model implements.

  Examples
  --------
  This layer expects 4 inputs in a list of the form `[atom_features,
  pair_features, pair_split, atom_to_pair]`. We'll walk through the structure
  of these inputs. Let's start with some basic definitions.

  >>> import deepchem as dc
  >>> import numpy as np

  Suppose you have a batch of molecules

  >>> smiles = ["CCC", "C"]

  Note that there are 4 atoms in total in this system. This layer expects its
  input molecules to be batched together.

  >>> total_n_atoms = 4

  Let's suppose that we have a featurizer that computes `n_atom_feat` features
  per atom.

  >>> n_atom_feat = 75

  Then conceptually, `atom_feat` is the array of shape `(total_n_atoms,
  n_atom_feat)` of atomic features. For simplicity, let's just go with a
  random such matrix.

  >>> atom_feat = np.random.rand(total_n_atoms, n_atom_feat)

  Let's suppose we have `n_pair_feat` pairwise features

  >>> n_pair_feat = 14

  For each molecule, we compute a matrix of shape `(n_atoms*n_atoms,
  n_pair_feat)` of pairwise features for each pair of atoms in the molecule.
  Let's construct this conceptually for our example.

  >>> pair_feat = [np.random.rand(1*1, n_pair_feat), np.random.rand(3*3, n_pair_feat)]
  >>> pair_feat = np.concatenate(pair_feat, axis=0)
  >>> pair_feat.shape
  (10, 14)

  `pair_split` is an index into `pair_feat` which tells us which atom each row belongs to. In our case, we hve

  >>> pair_split = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3])

  That is, the first 9 entries belong to "CCC" and the last entry to "C". The
  final entry `atom_to_pair` goes in a little more in-depth than `pair_split`
  and tells us the precise pair each pair feature belongs to. In our case

  >>> atom_to_pair = np.array([[0, 0],
  ...                          [0, 1],
  ...                          [0, 2],
  ...                          [1, 0],
  ...                          [1, 1],
  ...                          [1, 2],
  ...                          [2, 0],
  ...                          [2, 1],
  ...                          [2, 2],
  ...                          [3, 3]])

  Let's now define the actual layer

  >>> layer = WeaveLayer()

  And invoke it

  >>> [A, P] = layer([atom_feat, pair_feat, pair_split, atom_to_pair])

  The weave layer produces new atom/pair features. Let's check their shapes

  >>> A = np.array(A)
  >>> A.shape
  (4, 50)
  >>> P = np.array(P)
  >>> P.shape
  (10, 50)

  The 4 is `total_num_atoms` and the 10 is the total number of pairs. Where
  does `50` come from? It's from the default arguments `n_atom_input_feat` and
  `n_pair_input_feat`.

  References
  ----------
  .. [1] Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond
@@ -2180,7 +2284,7 @@ class WeaveLayer(tf.keras.layers.Layer):
      ])
    self.built = True

  def call(self, inputs: List):
  def call(self, inputs: List) -> List:
    """Creates weave tensors.

    Parameters