Commit f70938eb authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Cleaning up tests

parent bac742c5
Loading
Loading
Loading
Loading
+79 −1
Original line number Diff line number Diff line
@@ -70,11 +70,20 @@ reference_lists = [
]

intervals = get_intervals(reference_lists)
# We use E-Z notation for stereochemistry
# https://en.wikipedia.org/wiki/E%E2%80%93Z_notation
possible_bond_stereo = ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]
bond_fdim_base = 6


def get_feature_list(atom):
  """Returns a list of possible features for this atom.

  Parameters
  ----------
  atom: RDKit.rdchem.Atom
    Atom to get features for 
  """
  features = 6 * [0]
  features[0] = safe_index(possible_atom_list, atom.GetSymbol())
  features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())
@@ -113,7 +122,13 @@ def id_to_features(id, intervals):


def atom_to_id(atom):
  """Return a unique id corresponding to the atom type"""
  """Return a unique id corresponding to the atom type

  Parameters
  ----------
  atom: RDKit.rdchem.Atom
    Atom to convert to ids.
  """
  features = get_feature_list(atom)
  return features_to_id(features, intervals)

@@ -122,6 +137,19 @@ def atom_features(atom,
                  bool_id_feat=False,
                  explicit_H=False,
                  use_chirality=False):
  """Helper method used to compute per-atom feature vectors.

  Many different featurization methods compute per-atom features such as ConvMolFeaturizer, WeaveFeaturizer. This method computes such features.

  Parameters
  ----------
  bool_id_feat: bool, optional
    Return an array of unique identifiers corresponding to atom type.
  explicit_H: bool, optional
    If true, model hydrogens explicitly
  use_chirality: bool, optional
    If true, use chirality information.
  """
  if bool_id_feat:
    return np.array([atom_to_id(atom)])
  else:
@@ -199,6 +227,16 @@ def atom_features(atom,


def bond_features(bond, use_chirality=False):
  """Helper method used to compute bond feature vectors.

  Many different featurization methods compute bond features
  such as WeaveFeaturizer. This method computes such features.

  Parameters
  ----------
  use_chirality: bool, optional
    If true, use chirality information.
  """
  from rdkit import Chem
  bt = bond.GetBondType()
  bond_feats = [
@@ -215,6 +253,26 @@ def bond_features(bond, use_chirality=False):

def pair_features(mol, edge_list, canon_adj_list, bt_len=6,
                  graph_distance=True):
  """Helper method used to compute atom pair feature vectors.

  Many different featurization methods compute atom pair features
  such as WeaveFeaturizer. Note that atom pair features could be
  for pairs of atoms which aren't necessarily bonded to one
  another. 

  Parameters
  ----------
  mol: TODO
    TODO
  edge_list: list
    List of edges t oconsider
  canon_adj_list: list
    TODO
  bt_len: int, optional
    TODO
  graph_distance: bool, optional
    TODO
  """
  if graph_distance:
    max_distance = 7
  else:
@@ -271,6 +329,10 @@ def find_distance(a1, num_atoms, canon_adj_list, max_distance=7):


class ConvMolFeaturizer(Featurizer):
  """This class implements the featurization to implement graph convolutions from the Duvenaud graph convolution paper

Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
  """
  name = ['conv_mol']

  def __init__(self, master_atom=False, use_chirality=False,
@@ -381,10 +443,26 @@ class ConvMolFeaturizer(Featurizer):


class WeaveFeaturizer(Featurizer):
  """This class implements the featurization to implement Weave convolutions from the Google graph convolution paper.

  Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.
  """

  name = ['weave_mol']

  def __init__(self, graph_distance=True, explicit_H=False,
               use_chirality=False):
    """
    Parameters
    ----------
    graph_distance: bool, optional
      If true, use graph distance. Otherwise, use Euclidean
      distance.
    explicit_H: bool, optional
      If true, model hydrogens in the molecule.
    use_chirality: bool, optional
      If true, use chiral information in the featurization
    """
    # Distance is either graph distance(True) or Euclidean distance(False,
    # only support datasets providing Cartesian coordinates)
    self.graph_distance = graph_distance
+6 −2
Original line number Diff line number Diff line
@@ -386,8 +386,12 @@ class MultiConvMol(object):


class WeaveMol(object):
  """Holds information about a molecule
  Molecule struct used in weave models
  """Molecular featurization object for weave convolutions.

  These objects are produced by WeaveFeaturizer, and feed into
  WeaveModel. The underlying implementation is inspired by:

  Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.
  """

  def __init__(self, nodes, pairs):
+15 −10
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ def initializeWeightsBiases(prev_layer_size,


class AtomicConvScore(Layer):
  """The scoring function used by the atomic convolution models."""

  def __init__(self, atom_types, layer_sizes, **kwargs):
    super(AtomicConvScore, self).__init__(**kwargs)
@@ -145,6 +146,19 @@ class AtomicConvScore(Layer):


class AtomicConvModel(KerasModel):
  """Implements an Atomic Convolution Model.

  Implements the atomic convolutional networks as introduced in

  Gomes, Joseph, et al. "Atomic convolutional networks for predicting protein-ligand binding affinity." arXiv preprint arXiv:1703.10603 (2017).

  The atomic convolutional networks function as a variant of
  graph convolutions. The difference is that the "graph" here is
  the nearest neighbors graph in 3D space. The AtomicConvModel
  leverages these connections in 3D space to train models that
  learn to predict energetic state starting from the spatial
  geometry of the model.
  """

  def __init__(self,
               frag1_num_atoms=70,
@@ -163,16 +177,7 @@ class AtomicConvModel(KerasModel):
               layer_sizes=[32, 32, 16],
               learning_rate=0.001,
               **kwargs):
    """Implements an Atomic Convolution Model.

    Implements the atomic convolutional networks as introduced in
    https://arxiv.org/abs/1703.10603. The atomic convolutional networks
    function as a variant of graph convolutions. The difference is that the
    "graph" here is the nearest neighbors graph in 3D space. The
    AtomicConvModel leverages these connections in 3D space to train models
    that learn to predict energetic state starting from the spatial
    geometry of the model.

    """   
    Params
    ------
    frag1_num_atoms: int
+92 −34
Original line number Diff line number Diff line
@@ -30,6 +30,21 @@ class TrimGraphOutput(tf.keras.layers.Layer):


class WeaveModel(KerasModel):
  """Implements Google-style Weave Graph Convolutions

  This model implements the Weave style graph convolutions
  from the following paper.

  Kearnes, Steven, et al. "Molecular graph convolutions: moving beyond fingerprints." Journal of computer-aided molecular design 30.8 (2016): 595-608.

  The biggest difference between WeaveModel style convolutions
  and GraphConvModel style convolutions is that Weave
  convolutions model bond features explicitly. This has the
  side effect that it needs to construct a NxN matrix
  explicitly to model bond interactions. This may cause
  scaling issues, but may possibly allow for better modeling
  of subtle bond effects.
  """

  def __init__(self,
               n_tasks,
@@ -90,7 +105,9 @@ class WeaveModel(KerasModel):
        update_pair=False)(
            [weave_layer1A, weave_layer1P, pair_split, atom_to_pair])
    dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer2A)
    batch_norm1 = BatchNormalization(epsilon=1e-5)(dense1)
    # Batch normalization causes issues, spitting out NaNs if
    # allowed to train
    batch_norm1 = BatchNormalization(epsilon=1e-5, trainable=False)(dense1)
    weave_gather = layers.WeaveGather(
        batch_size, n_input=self.n_graph_feat,
        gaussian_expand=True)([batch_norm1, atom_split])
@@ -170,6 +187,12 @@ class WeaveModel(KerasModel):


class DTNNModel(KerasModel):
  """Deep Tensor Neural Networks

  This class implements deep tensor neural networks as first defined in

  Schütt, Kristof T., et al. "Quantum-chemical insights from deep tensor neural networks." Nature communications 8.1 (2017): 1-8.
  """

  def __init__(self,
               n_tasks,
@@ -322,6 +345,16 @@ class DTNNModel(KerasModel):


class DAGModel(KerasModel):
  """Directed Acyclic Graph models for molecular property prediction.

    This model is based on the following paper: 

    Lusci, Alessandro, Gianluca Pollastri, and Pierre Baldi. "Deep architectures and deep learning in chemoinformatics: the prediction of aqueous solubility for drug-like molecules." Journal of chemical information and modeling 53.7 (2013): 1563-1575.

   The basic idea for this paper is that a molecule is usually viewed as an undirected graph. However, you can convert it to a series of directed graphs. The idea is that for each atom, you make a DAG using that atom as the vertex of the DAG and edges pointing "inwards" to it. This transformation is implemented in dc.trans.transformers.DAGTransformer.UG_to_DAG.

   This model accepts ConvMols as input, just as GraphConvModel does, but these ConvMol objects must be transformed by dc.trans.DAGTransformer. 
   """

  def __init__(self,
               n_tasks,
@@ -337,16 +370,7 @@ class DAGModel(KerasModel):
               uncertainty=False,
               batch_size=100,
               **kwargs):
    """Directed Acyclic Graph models for molecular property prediction.

    This model is based on the following paper: 

    Lusci, Alessandro, Gianluca Pollastri, and Pierre Baldi. "Deep architectures and deep learning in chemoinformatics: the prediction of aqueous solubility for drug-like molecules." Journal of chemical information and modeling 53.7 (2013): 1563-1575.

   The basic idea for this paper is that a molecule is usually viewed as an undirected graph. However, you can convert it to a series of directed graphs. The idea is that for each atom, you make a DAG using that atom as the vertex of the DAG and edges pointing "inwards" to it. This transformation is implemented in dc.trans.transformers.DAGTransformer.UG_to_DAG.

   This model accepts ConvMols as input, just as GraphConvModel does, but these ConvMol objects must be transformed by dc.trans.DAGTransformer. 

    """   
    Parameters
    ----------
    n_tasks: int
@@ -429,7 +453,12 @@ class DAGModel(KerasModel):
      output_types = ['prediction', 'loss']
      loss = SoftmaxCrossEntropy()
    else:
      output = Dense(n_tasks)(dag_gather)
      fc_layer_size = 50
      inter = Dense(fc_layer_size)(dag_gather)
      if self.dropout is not None and self.dropout > 0.0:
        inter = Dropout(rate=self.dropout)(inter)
      #output = Dense(n_tasks)(dag_gather)
      output = Dense(n_tasks)(inter)
      if self.uncertainty:
        log_var = Dense(n_tasks)(dag_gather)
        var = Activation(tf.exp)(log_var)
@@ -514,6 +543,7 @@ class _GraphConvKerasModel(tf.keras.Model):
               mode="classification",
               number_atom_features=75,
               n_classes=2,
               batch_normalize=True,
               uncertainty=False,
               batch_size=100):
    """An internal keras model class.
@@ -548,12 +578,11 @@ class _GraphConvKerasModel(tf.keras.Model):
        for layer_size in graph_conv_layers
    ]
    self.batch_norms = [
        BatchNormalization(fused=False)
        BatchNormalization(fused=False) if batch_normalize else None
        for _ in range(len(graph_conv_layers) + 1)
    ]
    self.dropouts = [
        layers.SwitchedDropout(rate=rate) if rate > 0.0 else None
        for rate in dropout
        Dropout(rate=rate) if rate > 0.0 else None for rate in dropout
    ]
    self.graph_pools = [layers.GraphPool() for _ in graph_conv_layers]
    self.dense = Dense(dense_layer_size, activation=tf.nn.relu)
@@ -571,29 +600,30 @@ class _GraphConvKerasModel(tf.keras.Model):
        self.uncertainty_trim = TrimGraphOutput()
        self.uncertainty_activation = Activation(tf.exp)

  def call(self, inputs):
  def call(self, inputs, training=False):
    atom_features = inputs[0]
    degree_slice = tf.cast(inputs[1], dtype=tf.int32)
    membership = tf.cast(inputs[2], dtype=tf.int32)
    n_samples = tf.cast(inputs[3], dtype=tf.int32)
    dropout_switch = inputs[4]
    deg_adjs = [tf.cast(deg_adj, dtype=tf.int32) for deg_adj in inputs[5:]]
    deg_adjs = [tf.cast(deg_adj, dtype=tf.int32) for deg_adj in inputs[4:]]

    in_layer = atom_features
    for i in range(len(self.graph_convs)):
      gc_in = [in_layer, degree_slice, membership] + deg_adjs
      gc1 = self.graph_convs[i](gc_in)
      batch_norm1 = self.batch_norms[i](gc1)
      if self.dropouts[i] is not None:
        batch_norm1 = self.dropouts[i]([batch_norm1, dropout_switch])
      gp_in = [batch_norm1, degree_slice, membership] + deg_adjs
      if self.batch_norms[i] is not None:
        gc1 = self.batch_norms[i](gc1, training=training)
      if training and self.dropouts[i] is not None:
        gc1 = self.dropouts[i](gc1, training=training)
      gp_in = [gc1, degree_slice, membership] + deg_adjs
      in_layer = self.graph_pools[i](gp_in)
    dense = self.dense(in_layer)
    batch_norm3 = self.batch_norms[-1](dense)
    if self.dropouts[-1] is not None:
      batch_norm3 = self.dropouts[1]([batch_norm3, dropout_switch])
    neural_fingerprint = self.graph_gather(
        [batch_norm3, degree_slice, membership] + deg_adjs)
    if self.batch_norms[-1] is not None:
      dense = self.batch_norms[-1](dense, training=training)
    if training and self.dropouts[-1] is not None:
      dense = self.dropouts[1](dense, training=training)
    neural_fingerprint = self.graph_gather([dense, degree_slice, membership] +
                                           deg_adjs)
    if self.mode == 'classification':
      logits = self.reshape(self.reshape_dense(neural_fingerprint))
      logits = self.trim([logits, n_samples])
@@ -614,6 +644,15 @@ class _GraphConvKerasModel(tf.keras.Model):


class GraphConvModel(KerasModel):
  """Graph Convolutional Models.

  This class implements the graph convolutional model from the
  following paper:


  Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.

  """

  def __init__(self,
               n_tasks,
@@ -624,6 +663,7 @@ class GraphConvModel(KerasModel):
               number_atom_features=75,
               n_classes=2,
               batch_size=100,
               batch_normalize=True,
               uncertainty=False,
               **kwargs):
    """The wrapper class for graph convolutions.
@@ -653,6 +693,8 @@ class GraphConvModel(KerasModel):
        function atom_features in graph_features
    n_classes: int
      the number of classes to predict (only used in classification mode)
    batch_normalize: True
      if True, apply batch normalization to model
    uncertainty: bool
      if True, include extra outputs and loss terms to enable the uncertainty
      in outputs to be predicted
@@ -670,6 +712,7 @@ class GraphConvModel(KerasModel):
        mode=mode,
        number_atom_features=number_atom_features,
        n_classes=n_classes,
        batch_normalize=batch_normalize,
        uncertainty=uncertainty,
        batch_size=batch_size)
    if mode == "classification":
@@ -707,13 +750,16 @@ class GraphConvModel(KerasModel):
              -1, self.n_tasks, self.n_classes)
        multiConvMol = ConvMol.agglomerate_mols(X_b)
        n_samples = np.array(X_b.shape[0])
        if mode == 'predict':
          dropout = np.array(0.0)
        else:
          dropout = np.array(1.0)
        #if mode == 'predict':
        #  dropout = np.array(0.0)
        #else:
        #  dropout = np.array(1.0)
        inputs = [
            multiConvMol.get_atom_features(), multiConvMol.deg_slice,
            np.array(multiConvMol.membership), n_samples, dropout
            multiConvMol.get_atom_features(),
            multiConvMol.deg_slice,
            #np.array(multiConvMol.membership), n_samples, dropout
            np.array(multiConvMol.membership),
            n_samples
        ]
        for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
          inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
@@ -722,7 +768,19 @@ class GraphConvModel(KerasModel):

class MPNNModel(KerasModel):
  """ Message Passing Neural Network,
      default structures built according to https://arxiv.org/abs/1511.06391 """

  Message Passing Neural Networks treat graph convolutional
  operations as an instantiation of a more general message
  passing schem. Recall that message passing in a graph is when
  nodes in a graph send each other "messages" and update their
  internal state as a consequence of these messages.

  Ordering structures in this model are built according to


Vinyals, Oriol, Samy Bengio, and Manjunath Kudlur. "Order matters: Sequence to sequence for sets." arXiv preprint arXiv:1511.06391 (2015).

  """

  def __init__(self,
               n_tasks,
+68 −54
Original line number Diff line number Diff line
@@ -21,61 +21,74 @@ from deepchem.utils.evaluate import GeneratorEvaluator
class KerasModel(Model):
  """This is a DeepChem model implemented by a Keras model.

  This class provides several advantages over using the Keras model's fitting
  and prediction methods directly.

  1. It provides better integration with the rest of DeepChem, such as direct
     support for Datasets and Transformers.

  2. It defines the loss in a more flexible way.  In particular, Keras does not
     support multidimensional weight matrices, which makes it impossible to
     implement most multitask models with Keras.

  3. It provides various additional features not found in the Keras Model class,
     such as uncertainty prediction and saliency mapping.

  The loss function for a model can be defined in two different ways.  For
  models that have only a single output and use a standard loss function, you
  can simply provide a dc.models.losses.Loss object.  This defines the loss for
  each sample or sample/task pair.  The result is automatically multiplied by
  the weights and averaged over the batch.  Any additional losses computed by
  model layers, such as weight decay penalties, are also added.

  For more complicated cases, you can instead provide a function that directly
  computes the total loss.  It must be of the form f(outputs, labels, weights),
  taking the list of outputs from the model, the expected values, and any weight
  matrices.  It should return a scalar equal to the value of the loss function
  for the batch.  No additional processing is done to the result; it is up to
  you to do any weighting, averaging, adding of penalty terms, etc.

  You can optionally provide an output_types argument, which describes how to
  interpret the model's outputs.  This should be a list of strings, one for each
  output.  Each entry must have one of the following values:
  This class provides several advantages over using the Keras
  model's fitting and prediction methods directly.

  1. It provides better integration with the rest of DeepChem,
     such as direct support for Datasets and Transformers.

  2. It defines the loss in a more flexible way.  In particular,
     Keras does not support multidimensional weight matrices,
     which makes it impossible to implement most multitask
     models with Keras.

  3. It provides various additional features not found in the
     Keras Model class, such as uncertainty prediction and
     saliency mapping.

  The loss function for a model can be defined in two different
  ways.  For models that have only a single output and use a
  standard loss function, you can simply provide a
  dc.models.losses.Loss object.  This defines the loss for each
  sample or sample/task pair.  The result is automatically
  multiplied by the weights and averaged over the batch.  Any
  additional losses computed by model layers, such as weight
  decay penalties, are also added.

  For more complicated cases, you can instead provide a function
  that directly computes the total loss.  It must be of the form
  f(outputs, labels, weights), taking the list of outputs from
  the model, the expected values, and any weight matrices.  It
  should return a scalar equal to the value of the loss function
  for the batch.  No additional processing is done to the
  result; it is up to you to do any weighting, averaging, adding
  of penalty terms, etc.

  You can optionally provide an output_types argument, which
  describes how to interpret the model's outputs.  This should
  be a list of strings, one for each output.  Each entry must
  have one of the following values:

  - 'prediction': This is a normal output, and will be returned by predict().
    If output types are not specified, all outputs are assumed to be of this
    type.

  - 'loss': This output will be used in place of the normal outputs for
    computing the loss function.  For example, models that output probability
    distributions usually do it by computing unbounded numbers (the logits),
    then passing them through a softmax function to turn them into
    probabilities.  When computing the cross entropy, it is more numerically
    stable to use the logits directly rather than the probabilities.  You can
    do this by having the model produce both probabilities and logits as
    outputs, then specifying output_types=['prediction', 'loss'].  When
    predict() is called, only the first output (the probabilities) will be
    returned.  But during training, it is the second output (the logits) that
    will be passed to the loss function.

  - 'variance': This output is used for estimating the uncertainty in another
    output.  To create a model that can estimate uncertainty, there must be the
    same number of 'prediction' and 'variance' outputs.  Each variance output
    must have the same shape as the corresponding prediction output, and each
    element is an estimate of the variance in the corresponding prediction.
    Also be aware that if a model supports uncertainty, it MUST use dropout on
    every layer, and dropout most be enabled during uncertainty prediction.
    If output types are not specified, all outputs are assumed
    to be of this type.

  - 'loss': This output will be used in place of the normal
    outputs for computing the loss function.  For example,
    models that output probability distributions usually do it
    by computing unbounded numbers (the logits), then passing
    them through a softmax function to turn them into
    probabilities.  When computing the cross entropy, it is more
    numerically stable to use the logits directly rather than
    the probabilities.  You can do this by having the model
    produce both probabilities and logits as outputs, then
    specifying output_types=['prediction', 'loss'].  When
    predict() is called, only the first output (the
    probabilities) will be returned.  But during training, it is
    the second output (the logits) that will be passed to the
    loss function.

  - 'variance': This output is used for estimating the
    uncertainty in another output.  To create a model that can
    estimate uncertainty, there must be the same number of
    'prediction' and 'variance' outputs.  Each variance output
    must have the same shape as the corresponding prediction
    output, and each element is an estimate of the variance in
    the corresponding prediction.  Also be aware that if a model
    supports uncertainty, it MUST use dropout on every layer,
    and dropout most be enabled during uncertainty prediction.
    Otherwise, the uncertainties it computes will be inaccurate.
    
  - 'embedding': This output is an embedding that the model
    generates internally which should be returned to users.
  """
@@ -374,6 +387,7 @@ class KerasModel(Model):
    def apply_gradient_for_batch(inputs, labels, weights, loss):
      with tf.GradientTape() as tape:
        outputs = self.model(inputs, training=True)
        #outputs = self.model(inputs)
        if isinstance(outputs, tf.Tensor):
          outputs = [outputs]
        if self._loss_outputs is not None:
@@ -469,7 +483,7 @@ class KerasModel(Model):
    if embedding:
      assert outputs is None
      if self._embedding_outputs is None or len(self._embedding_outputs) == 0:
        raise ValueError('This model cannot compute embneddings.')
        raise ValueError('This model cannot compute embeddings.')
    if (outputs is not None and self.model.inputs is not None and
        len(self.model.inputs) == 0):
      raise ValueError(
Loading