Unverified Commit e2477458 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1218 from peastman/uncertainty

Predict uncertainty in outputs
parents 776d1a68 07512da2
Loading
Loading
Loading
Loading
+30 −3
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.metrics import to_one_hot

from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum, ReduceMean, Exp

logger = logging.getLogger(__name__)

@@ -211,6 +211,7 @@ class MultiTaskRegressor(TensorGraph):
               weight_decay_penalty_type="l2",
               dropouts=0.5,
               activation_fns=tf.nn.relu,
               uncertainty=False,
               **kwargs):
    """Create a MultiTaskRegressor.

@@ -244,6 +245,9 @@ class MultiTaskRegressor(TensorGraph):
      the Tensorflow activation function to apply to each layer.  The length of this list should equal
      len(layer_sizes).  Alternatively this may be a single value instead of a list, in which case the
      same value is used for every layer.
    uncertainty: bool
      if True, include extra outputs and loss terms to enable the uncertainty
      in outputs to be predicted
    """
    super(MultiTaskRegressor, self).__init__(**kwargs)
    self.n_tasks = n_tasks
@@ -257,6 +261,10 @@ class MultiTaskRegressor(TensorGraph):
      dropouts = [dropouts] * n_layers
    if not isinstance(activation_fns, collections.Sequence):
      activation_fns = [activation_fns] * n_layers
    if uncertainty:
      if any(d == 0.0 for d in dropouts):
        raise ValueError(
            'Dropout must be included in every layer to predict uncertainty')

    # Add the input features.

@@ -296,7 +304,26 @@ class MultiTaskRegressor(TensorGraph):
        ])
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, 1))
    weights = Weights(shape=(None, n_tasks))
    weights = Weights(shape=(None, n_tasks, 1))
    if uncertainty:
      log_var = Reshape(
          shape=(-1, n_tasks, 1),
          in_layers=[
              Dense(
                  in_layers=[prev_layer],
                  out_channels=n_tasks,
                  weights_initializer=TFWrapper(
                      tf.truncated_normal_initializer,
                      stddev=weight_init_stddevs[-1]),
                  biases_initializer=TFWrapper(
                      tf.constant_initializer, value=0.0))
          ])
      var = Exp(log_var)
      self.add_variance(var)
      diff = labels - output
      weighted_loss = weights * (diff * diff / var + log_var)
      weighted_loss = ReduceSum(ReduceMean(weighted_loss, axis=[1, 2]))
    else:
      weighted_loss = ReduceSum(L2Loss(in_layers=[labels, output, weights]))
    if weight_decay_penalty != 0.0:
      weighted_loss = WeightDecay(
+158 −13
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ class TensorGraph(Model):
    self.features = list()
    self.labels = list()
    self.outputs = list()
    self.variances = list()
    self.task_weights = list()
    self.submodels = list()
    self.loss = Constant(0)
@@ -363,8 +364,13 @@ class TensorGraph(Model):
      return results[0]
    return results

  def predict_on_generator(self, generator, transformers=[], outputs=None):
  def _predict(self, generator, transformers, outputs, uncertainty):
    """
    Predict outputs for data provided by a generator.

    This is the private implementation of prediction.  Do not call it directly.
    Instead call one of the public prediction methods.

    Parameters
    ----------
    generator: Generator
@@ -376,6 +382,10 @@ class TensorGraph(Model):
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
      returns the values of the uncertainty outputs.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
@@ -385,9 +395,19 @@ class TensorGraph(Model):
      outputs = self.outputs
    elif not isinstance(outputs, collections.Sequence):
      outputs = [outputs]
    if uncertainty:
      if len(self.variances) == 0:
        raise ValueError('This model cannot compute uncertainties')
      if len(self.variances) != len(outputs):
        raise ValueError(
            'The number of variances must exactly match the number of outputs')
      tensors = outputs + self.variances
    else:
      tensors = outputs

    with self._get_tf("Graph").as_default():
      # Gather results for each output
      results = [[] for out in outputs]
      results = [[] for out in tensors]
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
@@ -397,7 +417,7 @@ class TensorGraph(Model):
            args=(self, generator, self._get_tf("Graph"), self.session,
                  n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(generator, False):
      for feed_dict in self._create_feed_dicts(generator, uncertainty):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
@@ -409,7 +429,7 @@ class TensorGraph(Model):
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        feed_results = self._run_graph(outputs, feed_dict, False)
        feed_results = self._run_graph(tensors, feed_dict, uncertainty)
        if tfe.in_eager_mode():
          feed_results = [f.numpy() for f in feed_results]
        if len(feed_results) > 1:
@@ -428,9 +448,29 @@ class TensorGraph(Model):
      # If only one output, just return array
      if len(final_results) == 1:
        return final_results[0]
      elif uncertainty:
        return zip(final_results[:len(outputs)], final_results[len(outputs):])
      else:
        return final_results

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    """
    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dictionaries for TensorGraph.
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs.
      If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    return self._predict(generator, transformers, outputs, False)

  def predict_proba_on_generator(self, generator, transformers=[],
                                 outputs=None):
    """
@@ -457,6 +497,33 @@ class TensorGraph(Model):
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty_on_batch(self, X, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    X: ndarray
      the input data, as a Numpy array.
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict_uncertainty(dataset, masks)

  def predict_proba_on_batch(self, X, transformers=[], outputs=None):
    """Generates predictions for input samples, processing samples in a batch.

@@ -484,10 +551,9 @@ class TensorGraph(Model):
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs[0] (single
      output). If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
      If outputs is None, then will assume outputs=self.outputs. If outputs is
      a Layer/Tensor, then will evaluate and return as a single ndarray. If
      outputs is a list of Layers/Tensors, will return a list of ndarrays.

    Returns
    -------
@@ -496,6 +562,58 @@ class TensorGraph(Model):
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty(self, dataset, masks=50):
    """
    Predict the model's outputs, along with the uncertainty in each one.

    The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
    It involves repeating the prediction many times with different dropout masks.
    The prediction is computed as the average over all the predictions.  The
    uncertainty includes both the variation among the predicted values (epistemic
    uncertainty) and the model's own estimates for how well it fits the data
    (aleatoric uncertainty).  Not all models support uncertainty prediction.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    masks: int
      the number of dropout masks to average over

    Returns
    -------
    for each output, a tuple (y_pred, y_std) where y_pred is the predicted
    value of the output, and each element of y_std estimates the standard
    deviation of the corresponding element of y_pred
    """
    sum_pred = []
    sum_sq_pred = []
    sum_var = []
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
      results = self._predict(generator, [], self.outputs, True)
      if len(sum_pred) == 0:
        for p, v in results:
          sum_pred.append(p)
          sum_sq_pred.append(p * p)
          sum_var.append(v)
      else:
        for j, (p, v) in enumerate(results):
          sum_pred[j] += p
          sum_sq_pred[j] += p * p
          sum_var[j] += v
    output = []
    std = []
    for i in range(len(sum_pred)):
      p = sum_pred[i] / masks
      output.append(p)
      std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
    if len(output) == 1:
      return (output[0], std[0])
    else:
      return zip(output, std)

  def predict_proba(self, dataset, transformers=[], outputs=None):
    """
    Parameters
@@ -505,10 +623,9 @@ class TensorGraph(Model):
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs[0] (single
      output). If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.
      If outputs is None, then will assume outputs=self.outputs. If outputs is
      a Layer/Tensor, then will evaluate and return as a single ndarray. If
      outputs is a list of Layers/Tensors, will return a list of ndarrays.

    Returns
    -------
@@ -527,7 +644,7 @@ class TensorGraph(Model):
      sorted_layers.append(layer)

    sorted_layers = []
    for l in self.features + self.labels + self.task_weights + self.outputs:
    for l in self.features + self.labels + self.task_weights + self.outputs + self.variances:
      add_layers_to_list(l, sorted_layers)
    add_layers_to_list(self.loss, sorted_layers)
    for submodel in self.submodels:
@@ -565,6 +682,8 @@ class TensorGraph(Model):
        build_layers(self.loss, tensors)
        for output in self.outputs:
          build_layers(output, tensors)
        for variance in self.variances:
          build_layers(variance, tensors)
        for submodel in self.submodels:
          build_layers(submodel.loss, tensors)

@@ -666,9 +785,24 @@ class TensorGraph(Model):
    self.loss = layer

  def add_output(self, layer):
    """Add an output layer that can be computed by predict()"""
    self._add_layer(layer)
    self.outputs.append(layer)

  def add_variance(self, layer):
    """Add a layer that computes the variance in an output.

    If a model supports uncertainty, it must call add_variance() once for every
    output.  Each variance layer has the same shape as the corresponding output,
    and each element computes an estimate of the variance from aleatoric
    uncertainty in the corresponding element of the output.

    In addition, if a model supports uncertainty it MUST use dropout on every
    layer.  Otherwise, the uncertainties it computes will be inaccurate.
    """
    self._add_layer(layer)
    self.variances.append(layer)

  def set_optimizer(self, optimizer):
    """Set the optimizer to use for fitting."""
    self.optimizer = optimizer
@@ -971,6 +1105,17 @@ class TensorGraph(Model):
        feed_dict = {}
        for key, value in d.items():
          if isinstance(key, Input):
            # Add or remove dimensions of size 1 to match the shape of the layer.
            value_dims = len(value.shape)
            layer_dims = len(key.shape)
            if value_dims < layer_dims:
              if all(i == 1 for i in key.shape[value_dims:]):
                value = tf.reshape(value,
                                   list(value.shape) + [1] *
                                   (layer_dims - value_dims))
            if value_dims > layer_dims:
              if all(i == 1 for i in value.shape[layer_dims:]):
                value = tf.reshape(value, value.shape[:layer_dims])
            feed_dict[key] = tf.cast(value, key.dtype)
          else:
            feed_dict[key] = value
+30 −0
Original line number Diff line number Diff line
@@ -939,3 +939,33 @@ class TestOverfit(test_util.TensorFlowTestCase):
    # Eval model on train
    scores = model.evaluate(dataset, [metric])
    assert scores[metric.name] < .2

  def test_multitask_regressor_uncertainty(self):
    """Test computing uncertainty for a MultitaskRegressor."""
    n_tasks = 1
    n_samples = 30
    n_features = 1
    noise = 0.1

    # Generate dummy dataset
    X = np.random.rand(n_samples, n_features, 1)
    y = 10 * X + np.random.normal(scale=noise, size=(n_samples, n_tasks, 1))
    dataset = dc.data.NumpyDataset(X, y)

    model = dc.models.MultiTaskRegressor(
        n_tasks,
        n_features,
        layer_sizes=[200],
        weight_init_stddevs=[.1],
        batch_size=n_samples,
        dropouts=0.1,
        learning_rate=0.003,
        uncertainty=True)

    # Fit trained model
    model.fit(dataset, nb_epoch=2500)

    # Predict the output and uncertainty.
    pred, std = model.predict_uncertainty(dataset)
    assert np.mean(np.abs(y - pred)) < 1.0
    assert noise < np.mean(std) < 1.0
+231 −0

File added.

Preview size limit exceeded, changes collapsed.

+5 −0
Original line number Diff line number Diff line
@@ -88,3 +88,8 @@ def test_seqtoseq_fingerprint():
def test_dataset_preparation():
  nb, errors = _notebook_read("dataset_preparation.ipynb")
  assert errors == []


def test_uncertainty():
  nb, errors = _notebook_read("Uncertainty.ipynb")
  assert errors == []