Commit 87bdca6a authored by peastman's avatar peastman
Browse files

New mechanism for enabling dropout during uncertainty prediction

parent 0c241128
Loading
Loading
Loading
Loading
+37 −13
Original line number Diff line number Diff line
@@ -66,7 +66,8 @@ class KerasModel(Model):
    must have the same shape as the corresponding prediction output, and each
    element is an estimate of the variance in the corresponding prediction.
    Also be aware that if a model supports uncertainty, it MUST use dropout on
    every layer.  Otherwise, the uncertainties it computes will be inaccurate.
    every layer, and dropout most be enabled during uncertainty prediction.
    Otherwise, the uncertainties it computes will be inaccurate.
  """

  def __init__(self,
@@ -196,13 +197,9 @@ class KerasModel(Model):
    if len(self._input_placeholders) == 1:
      self._output_tensors = self.model(
          self._input_placeholders[0], training=False)
      self._uncertainty_tensors = self.model(
          self._input_placeholders[0], training=True)
    else:
      self._output_tensors = self.model(
          self._input_placeholders, training=False)
      self._uncertainty_tensors = self.model(
          self._input_placeholders, training=True)
    if isinstance(self._output_tensors, tf.Tensor):
      self._output_tensors = [self._output_tensors]
    if self._prediction_outputs is None:
@@ -337,7 +334,9 @@ class KerasModel(Model):
        # In eager mode we execute the loss function, accumulating the gradients.

        with tf.GradientTape() as tape:
          outputs = self.model(inputs[0])
          if len(inputs) == 1:
            inputs = inputs[0]
          outputs = self.model(inputs)
          if isinstance(outputs, tf.Tensor):
            outputs = [outputs]
          if self._loss_outputs is not None:
@@ -469,15 +468,13 @@ class KerasModel(Model):
                self.model.inputs, outputs)
          output_values = self._output_functions[outputs](inputs)
        else:
          output_values = self.model(inputs, training=uncertainty)
          output_values = self.model(inputs, training=False)
          output_values = [t.numpy() for t in output_values]
      else:

        # In graph mode we execute the output tensors.

        if uncertainty:
          fetches = self._uncertainty_tensors
        elif outputs is not None:
        if outputs is not None:
          fetches = outputs
        else:
          fetches = self._output_tensors
@@ -618,7 +615,8 @@ class KerasModel(Model):
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    generator = self.default_generator(
        dataset, mode='predict', pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty(self, dataset, masks=50):
@@ -650,7 +648,7 @@ class KerasModel(Model):
    sum_var = []
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
          dataset, mode='uncertainty', pad_batches=False)
      results = self._predict(generator, [], None, True)
      if len(sum_pred) == 0:
        for p, v in results:
@@ -808,9 +806,35 @@ class KerasModel(Model):
  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    """Create a generator that iterates batches for a dataset.

    Subclasses may override this method to customize how model inputs are
    generated from the data.

    Parameters
    ----------
    dataset: Dataset
      the data to iterate
    epochs: int
      the number of times to iterate over the full dataset
    mode: str
      allowed values are 'fit' (called during training), 'predict' (called
      during prediction), and 'uncertainty' (called during uncertainty
      prediction)
    deterministic: bool
      whether to iterate over the dataset in order, or randomly shuffle the
      data for each epoch
    pad_batches: bool
      whether to pad each batch up to this model's preferred batch size

    Returns
    -------
    a generator that iterates batches, each represented as a tuple of lists:
    ([inputs], [outputs], [weights])
    """
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
+28 −7
Original line number Diff line number Diff line
@@ -507,6 +507,24 @@ class IterRefLSTMEmbedding(tf.keras.layers.Layer):
    return [x + p, xp + q]


class SwitchedDropout(tf.keras.layers.Layer):
  """Apply dropout based on an input.

  This is required for uncertainty prediction.  The standard Keras Dropout
  layer only performs dropout during training, but we sometimes need to do it
  during prediction.  The second input to this layer should be a scalar equal to
  0 or 1, indicating whether to perform dropout.
  """

  def __init__(self, rate, **kwargs):
    self.rate = rate
    super(SwitchedDropout, self).__init__(**kwargs)

  def call(self, inputs):
    rate = self.rate * tf.squeeze(inputs[1])
    return tf.nn.dropout(inputs[0], rate=rate)


class WeightedLinearCombo(tf.keras.layers.Layer):
  """Computes a weighted linear combination of input layers, with the weights defined by trainable variables."""

@@ -2048,14 +2066,14 @@ class DTNNGather(tf.keras.layers.Layer):
    return tf.segment_sum(output, atom_membership)


def _DAGgraph_step(batch_inputs, W_list, b_list, activation, dropout, training):
def _DAGgraph_step(batch_inputs, W_list, b_list, activation, dropout,
                   dropout_switch):
  outputs = batch_inputs
  for idw, W in enumerate(W_list):
    outputs = tf.nn.xw_plus_b(outputs, W, b_list[idw])
    outputs = activation(outputs)
    rate_scale = 1.0 if training else 0.0
    if not dropout is None:
      outputs = tf.nn.dropout(outputs, rate=dropout * rate_scale)
      outputs = tf.nn.dropout(outputs, rate=dropout * dropout_switch)
  return outputs


@@ -2123,7 +2141,7 @@ class DAGLayer(tf.keras.layers.Layer):
    ]))
    self.built = True

  def call(self, inputs, training=None):
  def call(self, inputs):
    """
    parent layers: atom_features, parents, calculation_orders, calculation_masks, n_atoms
    """
@@ -2136,6 +2154,7 @@ class DAGLayer(tf.keras.layers.Layer):
    calculation_masks = inputs[3]

    n_atoms = tf.squeeze(inputs[4])
    dropout_switch = tf.squeeze(inputs[5])
    # initialize graph features for each graph
    graph_features_initial = tf.zeros((self.max_atoms * self.batch_size,
                                       self.max_atoms + 1, self.n_graph_feat))
@@ -2174,7 +2193,8 @@ class DAGLayer(tf.keras.layers.Layer):
      # of shape: (batch_size*max_atoms) * n_graph_features
      # representing the graph features of target atoms in each graph
      batch_outputs = _DAGgraph_step(batch_inputs, self.W_list, self.b_list,
                                     self.activation, self.dropout, training)
                                     self.activation, self.dropout,
                                     dropout_switch)

      # index for targe atoms
      target_index = tf.stack([tf.range(n_atoms), parents[:, count, 0]], axis=1)
@@ -2241,17 +2261,18 @@ class DAGGather(tf.keras.layers.Layer):
    ]))
    self.built = True

  def call(self, inputs, training=None):
  def call(self, inputs):
    """
    parent layers: atom_features, membership
    """
    atom_features = inputs[0]
    membership = inputs[1]
    dropout_switch = tf.squeeze(inputs[2])
    # Extract atom_features
    graph_features = tf.segment_sum(atom_features, membership)
    # sum all graph outputs
    return _DAGgraph_step(graph_features, self.W_list, self.b_list,
                          self.activation, self.dropout, training)
                          self.activation, self.dropout, dropout_switch)


class MessagePassing(tf.keras.layers.Layer):
+39 −18
Original line number Diff line number Diff line
@@ -13,8 +13,10 @@ import collections

import deepchem as dc
from deepchem.models import KerasModel
from deepchem.models.layers import SwitchedDropout
from deepchem.utils.save import log
from deepchem.metrics import to_one_hot, from_one_hot
from tensorflow.keras.layers import Input, Dense, Reshape, Softmax, Dropout, Activation

logger = logging.getLogger(__name__)

@@ -94,7 +96,7 @@ class MultitaskClassifier(KerasModel):

    # Add the input features.

    mol_features = tf.keras.Input(shape=(n_features,))
    mol_features = Input(shape=(n_features,))
    prev_layer = mol_features

    # Add the dense layers
@@ -102,7 +104,7 @@ class MultitaskClassifier(KerasModel):
    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts,
        activation_fns):
      layer = tf.keras.layers.Dense(
      layer = Dense(
          size,
          activation=activation_fn,
          kernel_initializer=tf.truncated_normal_initializer(
@@ -110,12 +112,12 @@ class MultitaskClassifier(KerasModel):
          bias_initializer=tf.constant_initializer(value=bias_const),
          kernel_regularizer=regularizer)(prev_layer)
      if dropout > 0.0:
        layer = tf.keras.layers.Dropout(rate=dropout)(layer)
        layer = Dropout(rate=dropout)(layer)
      prev_layer = layer
    self.neural_fingerprint = prev_layer
    logits = tf.keras.layers.Reshape((n_tasks, n_classes))(
        tf.keras.layers.Dense(n_tasks * n_classes)(prev_layer))
    output = tf.keras.layers.Softmax()(logits)
    logits = Reshape((n_tasks,
                      n_classes))(Dense(n_tasks * n_classes)(prev_layer))
    output = Softmax()(logits)
    model = tf.keras.Model(inputs=mol_features, outputs=[output, logits])
    super(MultitaskClassifier, self).__init__(
        model,
@@ -126,7 +128,7 @@ class MultitaskClassifier(KerasModel):
  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
@@ -215,7 +217,8 @@ class MultitaskRegressor(KerasModel):

    # Add the input features.

    mol_features = tf.keras.Input(shape=(n_features,))
    mol_features = Input(shape=(n_features,))
    dropout_switch = Input(shape=tuple())
    prev_layer = mol_features

    # Add the dense layers
@@ -223,7 +226,7 @@ class MultitaskRegressor(KerasModel):
    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts,
        activation_fns):
      layer = tf.keras.layers.Dense(
      layer = Dense(
          size,
          activation=activation_fn,
          kernel_initializer=tf.truncated_normal_initializer(
@@ -231,22 +234,22 @@ class MultitaskRegressor(KerasModel):
          bias_initializer=tf.constant_initializer(value=bias_const),
          kernel_regularizer=regularizer)(prev_layer)
      if dropout > 0.0:
        layer = tf.keras.layers.Dropout(rate=dropout)(layer)
        layer = SwitchedDropout(rate=dropout)([layer, dropout_switch])
      prev_layer = layer
    self.neural_fingerprint = prev_layer
    output = tf.keras.layers.Reshape((n_tasks, 1))(tf.keras.layers.Dense(
    output = Reshape((n_tasks, 1))(Dense(
        n_tasks,
        kernel_initializer=tf.truncated_normal_initializer(
            stddev=weight_init_stddevs[-1]),
        bias_initializer=tf.constant_initializer(
            value=bias_init_consts[-1]))(prev_layer))
    if uncertainty:
      log_var = tf.keras.layers.Reshape((n_tasks, 1))(tf.keras.layers.Dense(
      log_var = Reshape((n_tasks, 1))(Dense(
          n_tasks,
          kernel_initializer=tf.truncated_normal_initializer(
              stddev=weight_init_stddevs[-1]),
          bias_initializer=tf.constant_initializer(value=0.0))(prev_layer))
      var = tf.keras.layers.Activation(tf.exp)(log_var)
      var = Activation(tf.exp)(log_var)
      outputs = [output, var, output, log_var]
      output_types = ['prediction', 'variance', 'loss', 'loss']

@@ -257,10 +260,28 @@ class MultitaskRegressor(KerasModel):
      outputs = [output]
      output_types = ['prediction']
      loss = dc.models.losses.L2Loss()
    model = tf.keras.Model(inputs=mol_features, outputs=outputs)
    model = tf.keras.Model(
        inputs=[mol_features, dropout_switch], outputs=outputs)
    super(MultitaskRegressor, self).__init__(
        model, loss, output_types=output_types, **kwargs)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        if mode == 'predict':
          dropout = np.array(0.0)
        else:
          dropout = np.array(1.0)
        yield ([X_b, dropout], [y_b], [w_b])


class MultitaskFitTransformRegressor(MultitaskRegressor):
  """Implements a MultitaskRegressor that performs on-the-fly transformation during fit/predict.
@@ -327,7 +348,7 @@ class MultitaskFitTransformRegressor(MultitaskRegressor):
  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
@@ -338,12 +359,12 @@ class MultitaskFitTransformRegressor(MultitaskRegressor):
        if y_b is not None:
          y_b = y_b.reshape(-1, self.n_tasks, 1)
        if X_b is not None:
          if not predict:
          if mode == 'fit':
            for transformer in self.fit_transformers:
              X_b = transformer.X_transform(X_b)
        yield ([X_b], [y_b], [w_b])

  def predict_on_generator(self, generator, transformers=[]):
  def predict_on_generator(self, generator, transformers=[], outputs=None):

    def transform_generator():
      for inputs, labels, weights in generator:
@@ -354,4 +375,4 @@ class MultitaskFitTransformRegressor(MultitaskRegressor):
          yield ([X_t], labels, weights)

    return super(MultitaskFitTransformRegressor, self).predict_on_generator(
        transform_generator(), transformers)
        transform_generator(), transformers, outputs)
+81 −74

File changed.

Preview size limit exceeded, changes collapsed.

+1 −1
Original line number Diff line number Diff line
@@ -167,7 +167,7 @@ class RobustMultitaskClassifier(KerasModel):
  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        mode='fit',
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
Loading