Commit 53a8b7b7 authored by peastman's avatar peastman
Browse files

More features in KerasModel

parent e649d199
Loading
Loading
Loading
Loading
+83 −14
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from deepchem.data import NumpyDataset
from deepchem.models.losses import Loss
from deepchem.models.models import Model
from deepchem.models.tensorgraph.optimizers import Adam
from deepchem.utils.evaluate import GeneratorEvaluator


class KerasModel(Model):
@@ -152,6 +153,13 @@ class KerasModel(Model):
      return
    self._ensure_built()
    self._inputs_built = True
    if len(self.model.inputs) > 0:
      self._input_dtypes = [t.dtype.as_numpy_dtype for t in self.model.inputs]
    else:
      self._input_dtypes = [
          np.float32 if x.dtype == np.float64 else x.dtype
          for x in example_inputs
      ]
    if tf.executing_eagerly():
      return
    if len(self.model.inputs) > 0:
@@ -161,7 +169,8 @@ class KerasModel(Model):
      # example batch.
      input_shapes = [(None,) + i.shape[1:] for i in example_inputs]
      self._input_placeholders = [
          tf.placeholder(dtype=tf.float32, shape=s) for s in input_shapes
          tf.placeholder(dtype=tf.as_dtype(t), shape=s)
          for s, t in zip(input_shapes, self._input_dtypes)
      ]
      if len(input_shapes) == 1:
        self.model.build(input_shapes[0])
@@ -190,15 +199,23 @@ class KerasModel(Model):
      return
    self._create_inputs(example_batch[0])
    self._training_ops_built = True
    self._label_dtypes = [
        np.float32 if x.dtype == np.float64 else x.dtype
        for x in example_batch[1]
    ]
    self._weights_dtypes = [
        np.float32 if x.dtype == np.float64 else x.dtype
        for x in example_batch[2]
    ]
    if tf.executing_eagerly():
      return
    self._label_placeholders = [
        tf.placeholder(dtype=tf.float32, shape=t.shape)
        for t in example_batch[1]
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        for x, t in zip(example_batch[1], self._label_dtypes)
    ]
    self._weights_placeholders = [
        tf.placeholder(dtype=tf.float32, shape=t.shape)
        for t in example_batch[2]
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        for x, t in zip(example_batch[2], self._weights_dtypes)
    ]
    self._loss_tensor = self._loss_fn(
        [self._output_tensors[i] for i in self._loss_outputs],
@@ -290,7 +307,7 @@ class KerasModel(Model):

    for batch in generator:
      self._create_training_ops(batch)
      inputs, labels, weights = batch
      inputs, labels, weights = self._prepare_batch(batch)
      if tf.executing_eagerly():

        # In eager mode we execute the loss function, accumulating the gradients.
@@ -371,8 +388,9 @@ class KerasModel(Model):
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list
      List of dc.trans.Transformers.
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
@@ -392,6 +410,7 @@ class KerasModel(Model):
    for batch in generator:
      inputs, labels, weights = batch
      self._create_inputs(inputs)
      inputs, _, _ = self._prepare_batch((inputs, None, None))
      if tf.executing_eagerly():

        # In eager mode we invoke the model directly.
@@ -458,8 +477,9 @@ class KerasModel(Model):
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    transformers: list
      List of dc.trans.Transformers.
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
@@ -473,8 +493,9 @@ class KerasModel(Model):
    ----------
    X: ndarray
      the input data, as a Numpy array.
    transformers: List
      List of dc.trans.Transformers
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.

    Returns
    -------
@@ -519,8 +540,9 @@ class KerasModel(Model):
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.

    Returns
    -------
@@ -582,6 +604,34 @@ class KerasModel(Model):
    else:
      return zip(output, std)

  def evaluate_generator(self,
                         generator,
                         metrics,
                         transformers=[],
                         per_task_metrics=False):
    """Evaluate the performance of this model on the data produced by a generator.

    Parameters
    ----------
    generator: generator
      this should generate batches, each represented as a tuple of the form
      (inputs, labels, weights).
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
    evaluator = GeneratorEvaluator(self, generator, transformers, labels=None)
    return evaluator.compute_model_performance(metrics, per_task_metrics)

  def compute_saliency(self, X):
    """Compute the saliency map for an input sample.

@@ -607,6 +657,7 @@ class KerasModel(Model):
    input_shape = X.shape
    X = np.reshape(X, [1] + list(X.shape))
    self._create_inputs([X])
    X, _, _ = self._prepare_batch((X, None, None))
    if tf.executing_eagerly():
      # In eager mode we use a GradientTape to compute gradients.

@@ -659,6 +710,24 @@ class KerasModel(Model):
      return final_result[0]
    return final_result

  def _prepare_batch(self, batch):
    inputs, labels, weights = batch
    inputs = [
        x if x.dtype == t else x.astype(t)
        for x, t in zip(inputs, self._input_dtypes)
    ]
    if labels is not None:
      labels = [
          x if x.dtype == t else x.astype(t)
          for x, t in zip(labels, self._label_dtypes)
      ]
    if weights is not None:
      weights = [
          x if x.dtype == t else x.astype(t)
          for x, t in zip(weights, self._weights_dtypes)
      ]
    return (inputs, labels, weights)

  def default_generator(self,
                        dataset,
                        epochs=1,
+19 −11
Original line number Diff line number Diff line
@@ -942,27 +942,35 @@ class TensorGraph(Model):
    self.session = session

  def evaluate_generator(self,
                         feed_dict_generator,
                         generator,
                         metrics,
                         transformers=[],
                         labels=None,
                         outputs=None,
                         weights=[],
                         per_task_metrics=False):
    """Evaluate the performance of this model on the data produced by a generator.

    Parameters
    ----------
    generator: Generator
      Generator that constructs feed dicts for TensorGraph.
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list
      List of deepchem.transformers.Transformer
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
    if labels is None:
      raise ValueError
    n_tasks = len(self.default_outputs)
    n_classes = self.default_outputs[0].out_tensor.get_shape()[-1].value
    evaluator = GeneratorEvaluator(
        self,
        feed_dict_generator,
        transformers,
        labels=labels,
        outputs=outputs,
        weights=weights,
        n_tasks=n_tasks,
        n_classes=n_classes)
        self, generator, transformers, labels=labels, weights=weights)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
+10 −10
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ class TestKerasModel(unittest.TestCase):
    """Test fitting a KerasModel defined as a graph."""
    n_data_points = 10
    n_features = 2
    X = np.random.rand(n_data_points, n_features).astype(np.float32)
    X = np.random.rand(n_data_points, n_features)
    y = (X[:, 0] > X[:, 1]).astype(np.float32)
    dataset = dc.data.NumpyDataset(X, y)
    inputs = tf.keras.Input(shape=(n_features,))
@@ -29,7 +29,7 @@ class TestKerasModel(unittest.TestCase):
    assert np.all(np.isclose(prediction, y.flatten(), atol=0.4))
    metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
    scores = model.evaluate(dataset, [metric])
    assert scores[metric.name] > .9
    assert scores[metric.name] > 0.9

  def test_overfit_graph_model_eager(self):
    """Test fitting a KerasModel defined as a graph, in eager mode."""
@@ -40,7 +40,7 @@ class TestKerasModel(unittest.TestCase):
    """Test fitting a KerasModel defined as a sequential model."""
    n_data_points = 10
    n_features = 2
    X = np.random.rand(n_data_points, n_features).astype(np.float32)
    X = np.random.rand(n_data_points, n_features)
    y = (X[:, 0] > X[:, 1]).astype(np.float32)
    dataset = dc.data.NumpyDataset(X, y)
    keras_model = tf.keras.Sequential([
@@ -53,8 +53,9 @@ class TestKerasModel(unittest.TestCase):
    prediction = np.squeeze(model.predict_on_batch(X))
    assert np.all(np.isclose(prediction, y.flatten(), atol=0.4))
    metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
    scores = model.evaluate(dataset, [metric])
    assert scores[metric.name] > .9
    generator = model.default_generator(dataset)
    scores = model.evaluate_generator(generator, [metric])
    assert scores[metric.name] > 0.9

  def test_overfit_sequential_model_eager(self):
    """Test fitting a KerasModel defined as a sequential model, in eager mode."""
@@ -98,9 +99,8 @@ class TestKerasModel(unittest.TestCase):
    n_samples = 30
    n_features = 1
    noise = 0.1
    X = np.random.rand(n_samples, n_features).astype(np.float32)
    y = (10 * X + np.random.normal(
        scale=noise, size=(n_samples, n_features))).astype(np.float32)
    X = np.random.rand(n_samples, n_features)
    y = (10 * X + np.random.normal(scale=noise, size=(n_samples, n_features)))
    dataset = dc.data.NumpyDataset(X, y)

    # Build a model that predicts uncertainty.
@@ -147,7 +147,7 @@ class TestKerasModel(unittest.TestCase):
        tf.keras.layers.Dense(n_tasks)
    ])
    model = dc.models.KerasModel(keras_model, dc.models.losses.L2Loss())
    x = np.random.random(n_features).astype(np.float32)
    x = np.random.random(n_features)
    s = model.compute_saliency(x)
    assert s.shape[0] == n_tasks
    assert s.shape[1] == n_features
@@ -179,7 +179,7 @@ class TestKerasModel(unittest.TestCase):
    output2 = tf.keras.layers.Reshape((1, 5))(tf.keras.layers.Dense(5)(flatten))
    keras_model = tf.keras.Model(inputs=inputs, outputs=[output1, output2])
    model = dc.models.KerasModel(keras_model, dc.models.losses.L2Loss())
    x = np.random.random((2, 3)).astype(np.float32)
    x = np.random.random((2, 3))
    s = model.compute_saliency(x)
    assert len(s) == 2
    assert s[0].shape == (4, 1, 2, 3)
+22 −32
Original line number Diff line number Diff line
@@ -136,15 +136,7 @@ class GeneratorEvaluator(object):
  Evaluate a Metric over a model and Generator.
  """

  def __init__(self,
               model,
               generator,
               transformers,
               labels,
               outputs=None,
               n_tasks=1,
               n_classes=2,
               weights=list()):
  def __init__(self, model, generator, transformers, labels, weights=list()):
    """
    Parameters
    ----------
@@ -156,29 +148,17 @@ class GeneratorEvaluator(object):
      Tranformers to "undo" when applied to the models outputs
    labels: list of Layer
      layers which are keys in the generator to compare to outputs
    outputs: list of Layer
      if None will use the outputs of the model
    weights: np.array
      Must be of the shape (n_samples, n_tasks)
      if weights[sample][task] is 0 that sample will not be used
      for computing the task metric
    weights: list of Layer
      layers which are keys in the generator for weight matrices
    """
    self.model = model
    self.generator = generator
    self.n_tasks = n_tasks
    self.n_classes = n_classes
    self.output_transformers = [
        transformer for transformer in transformers if transformer.transform_y
    ]
    if outputs is None:
      self.output_keys = model.outputs
    else:
      self.output_keys = outputs
    self.label_keys = labels
    self.weights = weights
    if len(self.label_keys) != len(self.output_keys):
      raise ValueError("Must have same number of labels and outputs")
    if len(self.label_keys) != 1:
    if labels is not None and len(labels) != 1:
      raise ValueError("GeneratorEvaluator currently only supports one label")

  def compute_model_performance(self, metrics, per_task_metrics=False):
@@ -192,11 +172,20 @@ class GeneratorEvaluator(object):
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.
    """
    self.model.build()
    y = []
    w = []

    def generator_closure():
      if self.label_keys is None:
        # This is a KerasModel.
        for batch in self.generator:
          inputs, labels, weights = batch
          y.append(labels[0])
          if len(weights) > 0:
            w.append(weights[0])
          yield batch
      else:
        # This is a TensorGraph.
        for feed_dict in self.generator:
          y.append(feed_dict[self.label_keys[0]])
          if len(self.weights) > 0:
@@ -219,14 +208,15 @@ class GeneratorEvaluator(object):
      w = np.reshape(w, newshape=y.shape)

    # Compute multitask metrics
    n_classes = y.shape[-1]
    for metric in metrics:
      if per_task_metrics:
        multitask_scores[metric.name], computed_metrics = metric.compute_metric(
            y, y_pred, w, per_task_metrics=True, n_classes=self.n_classes)
            y, y_pred, w, per_task_metrics=True, n_classes=n_classes)
        all_task_scores[metric.name] = computed_metrics
      else:
        multitask_scores[metric.name] = metric.compute_metric(
            y, y_pred, w, per_task_metrics=False, n_classes=self.n_classes)
            y, y_pred, w, per_task_metrics=False, n_classes=n_classes)

    if not per_task_metrics:
      return multitask_scores