Commit 1533d1db authored by peastman's avatar peastman
Browse files

Began converting graph models to KerasModel

parent 5b1ae085
Loading
Loading
Loading
Loading
+57 −19
Original line number Diff line number Diff line
@@ -148,6 +148,7 @@ class KerasModel(Model):
    self._inputs_built = False
    self._training_ops_built = False
    self._initialized_vars = set()
    self._output_functions = {}

  def _ensure_built(self):
    """The first time this is called, create internal data structures."""
@@ -407,7 +408,7 @@ class KerasModel(Model):
    dataset = NumpyDataset(X, y, w)
    return self.fit(dataset, nb_epoch=1)

  def _predict(self, generator, transformers, uncertainty):
  def _predict(self, generator, transformers, outputs, uncertainty):
    """
    Predict outputs for data provided by a generator.

@@ -422,6 +423,11 @@ class KerasModel(Model):
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.
    uncertainty: bool
      specifies whether this is being called as part of estimating uncertainty.
      If True, it sets the training flag so that dropout will be enabled, and
@@ -433,11 +439,19 @@ class KerasModel(Model):
    results = None
    variances = None
    if uncertainty:
      assert outputs is None
      if self._variance_outputs is None or len(self._variance_outputs) == 0:
        raise ValueError('This model cannot compute uncertainties')
      if len(self._variance_outputs) != len(self._prediction_outputs):
        raise ValueError(
            'The number of variances must exactly match the number of outputs')
    if tf.executing_eagerly() and outputs is not None and len(
        self.model.inputs) == 0:
      raise ValueError(
          "Cannot use 'outputs' argument in eager mode with a model that does not specify its inputs"
      )
    if isinstance(outputs, tf.Tensor):
      outputs = [outputs]
    for batch in generator:
      inputs, labels, weights = batch
      self._create_inputs(inputs)
@@ -448,41 +462,50 @@ class KerasModel(Model):

        if len(inputs) == 1:
          inputs = inputs[0]
        outputs = self.model(inputs, training=uncertainty)
        outputs = [t.numpy() for t in outputs]
        if outputs is not None:
          outputs = tuple(outputs)
          if outputs not in self._output_functions:
            self._output_functions[outputs] = tf.keras.backend.function(
                self.model.inputs, outputs)
          output_values = self._output_functions[outputs](inputs)
        else:
          output_values = self.model(inputs, training=uncertainty)
          output_values = [t.numpy() for t in output_values]
      else:

        # In graph mode we execute the output tensors.

        if uncertainty:
          fetches = self._uncertainty_tensors
        elif outputs is not None:
          fetches = outputs
        else:
          fetches = self._output_tensors
        feed_dict = dict(zip(self._input_placeholders, inputs))
        outputs = self.session.run(fetches, feed_dict=feed_dict)
        output_values = self.session.run(fetches, feed_dict=feed_dict)

      # Apply tranformers and record results.

      if uncertainty:
        var = [outputs[i] for i in self._variance_outputs]
        var = [output_values[i] for i in self._variance_outputs]
        if variances is None:
          variances = var
          variances = [var]
        else:
          for i, t in enumerate(var):
            variances[i].append(t)
      if self._prediction_outputs is not None:
        outputs = [outputs[i] for i in self._prediction_outputs]
        output_values = [output_values[i] for i in self._prediction_outputs]
      if len(transformers) > 0:
        if len(outputs) > 1:
        if len(output_values) > 1:
          raise ValueError(
              "predict() does not support Transformers for models with multiple outputs."
          )
        elif len(outputs) == 1:
          outputs = [undo_transforms(outputs[0], transformers)]
        elif len(output_values) == 1:
          output_values = [undo_transforms(output_values[0], transformers)]
      if results is None:
        results = [outputs]
        results = [output_values]
      else:
        for i, t in enumerate(outputs):
        for i, t in enumerate(output_values):
          results[i].append(t)

    # Concatenate arrays to create the final results.
@@ -501,7 +524,7 @@ class KerasModel(Model):
    else:
      return final_results

  def predict_on_generator(self, generator, transformers=[]):
  def predict_on_generator(self, generator, transformers=[], outputs=None):
    """
    Parameters
    ----------
@@ -511,13 +534,18 @@ class KerasModel(Model):
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.
    Returns:
      a NumPy array of the model produces a single output, or a list of arrays
      if it produces multiple outputs
    """
    return self._predict(generator, transformers, False)
    return self._predict(generator, transformers, outputs, False)

  def predict_on_batch(self, X, transformers=[]):
  def predict_on_batch(self, X, transformers=[], outputs=None):
    """Generates predictions for input samples, processing samples in a batch.

    Parameters
@@ -527,6 +555,11 @@ class KerasModel(Model):
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.

    Returns
    -------
@@ -534,7 +567,7 @@ class KerasModel(Model):
    if it produces multiple outputs
    """
    dataset = NumpyDataset(X=X, y=None)
    return self.predict(dataset, transformers)
    return self.predict(dataset, transformers, outputs)

  def predict_uncertainty_on_batch(self, X, masks=50):
    """
@@ -563,7 +596,7 @@ class KerasModel(Model):
    dataset = NumpyDataset(X=X, y=None)
    return self.predict_uncertainty(dataset, masks)

  def predict(self, dataset, transformers=[]):
  def predict(self, dataset, transformers=[], outputs=None):
    """
    Uses self to make predictions on provided Dataset object.

@@ -574,6 +607,11 @@ class KerasModel(Model):
    transformers: list of dc.trans.Transformers
      Transformers that the input data has been transformed by.  The output
      is passed through these transformers to undo the transformations.
    outputs: Tensor or list of Tensors
      The outputs to return.  If this is None, the model's standard prediction
      outputs will be returned.  Alternatively one or more Tensors within the
      model may be specified, in which case the output of those Tensors will be
      returned.

    Returns
    -------
@@ -581,7 +619,7 @@ class KerasModel(Model):
    if it produces multiple outputs
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)
    return self.predict_on_generator(generator, transformers, outputs)

  def predict_uncertainty(self, dataset, masks=50):
    """
@@ -613,7 +651,7 @@ class KerasModel(Model):
    for i in range(masks):
      generator = self.default_generator(
          dataset, predict=True, pad_batches=False)
      results = self._predict(generator, [], True)
      results = self._predict(generator, [], None, True)
      if len(sum_pred) == 0:
        for p, v in results:
          sum_pred.append(p)
+2 −2
Original line number Diff line number Diff line
@@ -1912,7 +1912,7 @@ class DTNNEmbedding(tf.keras.layers.Layer):
    """
    parent layers: atom_number
    """
    atom_number = inputs[0]
    atom_number = inputs
    return tf.nn.embedding_lookup(self.embedding_list, atom_number)


@@ -2135,7 +2135,7 @@ class DAGLayer(tf.keras.layers.Layer):
    calculation_orders = inputs[2]
    calculation_masks = inputs[3]

    n_atoms = inputs[4]
    n_atoms = tf.squeeze(inputs[4])
    # initialize graph features for each graph
    graph_features_initial = tf.zeros((self.max_atoms * self.batch_size,
                                       self.max_atoms + 1, self.n_graph_feat))
+208 −333

File changed.

Preview size limit exceeded, changes collapsed.

+69 −69
Original line number Diff line number Diff line
@@ -465,75 +465,75 @@ class TestEstimators(unittest.TestCase):

    estimator.train(input_fn=lambda: input_fn(100))

  @flaky
  def test_dtnn_regression_model(self):
    """Test creating an estimator for DTNNGraphModel for regression"""
    current_dir = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(current_dir, "example_DTNN.mat")
    dataset = loadmat(input_file)

    num_vals_to_use = 20

    np.random.seed(123)
    X = dataset['X'][:num_vals_to_use]
    y = dataset['T'][:num_vals_to_use].astype(np.float32)
    w = np.ones_like(y)
    dataset = dc.data.NumpyDataset(X, y, w, ids=None)
    n_tasks = y.shape[1]
    n_samples = y.shape[0]

    dtypes = [tf.int32, tf.float32, tf.int32, tf.int32, tf.int32]

    model = dc.models.DTNNModel(
        n_tasks,
        n_embedding=20,
        n_distance=100,
        learning_rate=1.0,
        mode="regression")

    def mean_relative_error(labels, predictions, weights):
      error = tf.abs(1 - tf.math.divide(labels, predictions))
      error_val, update_op = tf.metrics.mean(error)
      return error_val, update_op

    def input_fn(batch_size, epochs):
      X, y, weights = dataset.make_iterator(
          batch_size=batch_size, epochs=epochs).get_next()
      features = tf.py_func(
          model.compute_features_on_batch, inp=[X], Tout=dtypes)

      assert len(features) == 5
      feature_dict = dict()
      feature_dict['atom_num'] = features[0]
      feature_dict['distance'] = features[1]
      feature_dict['dist_mem_i'] = features[2]
      feature_dict['dist_mem_j'] = features[3]
      feature_dict['atom_mem'] = features[4]
      feature_dict['weights'] = weights

      return feature_dict, y

    atom_number = tf.feature_column.numeric_column(
        'atom_num', shape=[], dtype=dtypes[0])
    distance = tf.feature_column.numeric_column(
        'distance', shape=(model.n_distance,), dtype=dtypes[1])
    atom_mem = tf.feature_column.numeric_column(
        'atom_mem', shape=[], dtype=dtypes[2])
    dist_mem_i = tf.feature_column.numeric_column(
        'dist_mem_i', shape=[], dtype=dtypes[3])
    dist_mem_j = tf.feature_column.numeric_column(
        'dist_mem_j', shape=[], dtype=dtypes[4])

    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'error': mean_relative_error}

    feature_cols = [atom_number, distance, dist_mem_i, dist_mem_j, atom_mem]
    estimator = model.make_estimator(
        feature_columns=feature_cols, weight_column=weight_col, metrics=metrics)
    estimator.train(input_fn=lambda: input_fn(100, 250))

    results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
    assert results['error'] < 0.1
  # @flaky
  # def test_dtnn_regression_model(self):
  #   """Test creating an estimator for DTNNGraphModel for regression"""
  #   current_dir = os.path.dirname(os.path.abspath(__file__))
  #   input_file = os.path.join(current_dir, "example_DTNN.mat")
  #   dataset = loadmat(input_file)
  #
  #   num_vals_to_use = 20
  #
  #   np.random.seed(123)
  #   X = dataset['X'][:num_vals_to_use]
  #   y = dataset['T'][:num_vals_to_use].astype(np.float32)
  #   w = np.ones_like(y)
  #   dataset = dc.data.NumpyDataset(X, y, w, ids=None)
  #   n_tasks = y.shape[1]
  #   n_samples = y.shape[0]
  #
  #   dtypes = [tf.int32, tf.float32, tf.int32, tf.int32, tf.int32]
  #
  #   model = dc.models.DTNNModel(
  #       n_tasks,
  #       n_embedding=20,
  #       n_distance=100,
  #       learning_rate=1.0,
  #       mode="regression")
  #
  #   def mean_relative_error(labels, predictions, weights):
  #     error = tf.abs(1 - tf.math.divide(labels, predictions))
  #     error_val, update_op = tf.metrics.mean(error)
  #     return error_val, update_op
  #
  #   def input_fn(batch_size, epochs):
  #     X, y, weights = dataset.make_iterator(
  #         batch_size=batch_size, epochs=epochs).get_next()
  #     features = tf.py_func(
  #         model.compute_features_on_batch, inp=[X], Tout=dtypes)
  #
  #     assert len(features) == 5
  #     feature_dict = dict()
  #     feature_dict['atom_num'] = features[0]
  #     feature_dict['distance'] = features[1]
  #     feature_dict['dist_mem_i'] = features[2]
  #     feature_dict['dist_mem_j'] = features[3]
  #     feature_dict['atom_mem'] = features[4]
  #     feature_dict['weights'] = weights
  #
  #     return feature_dict, y
  #
  #   atom_number = tf.feature_column.numeric_column(
  #       'atom_num', shape=[], dtype=dtypes[0])
  #   distance = tf.feature_column.numeric_column(
  #       'distance', shape=(model.n_distance,), dtype=dtypes[1])
  #   atom_mem = tf.feature_column.numeric_column(
  #       'atom_mem', shape=[], dtype=dtypes[2])
  #   dist_mem_i = tf.feature_column.numeric_column(
  #       'dist_mem_i', shape=[], dtype=dtypes[3])
  #   dist_mem_j = tf.feature_column.numeric_column(
  #       'dist_mem_j', shape=[], dtype=dtypes[4])
  #
  #   weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
  #   metrics = {'error': mean_relative_error}
  #
  #   feature_cols = [atom_number, distance, dist_mem_i, dist_mem_j, atom_mem]
  #   estimator = model.make_estimator(
  #       feature_columns=feature_cols, weight_column=weight_col, metrics=metrics)
  #   estimator.train(input_fn=lambda: input_fn(100, 250))
  #
  #   results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
  #   assert results['error'] < 0.1

  def test_bpsymm_regression_model(self):
    """Test creating an estimator for BPSymmetry Regression model."""
+0 −84
Original line number Diff line number Diff line
@@ -56,12 +56,6 @@ class TestGraphModels(unittest.TestCase):
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  def test_neural_fingerprint_retrieval(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')
@@ -92,14 +86,6 @@ class TestGraphModels(unittest.TestCase):
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(
        scores['mean_absolute_error'],
        scores2['mean_absolute_error'],
        rtol=1e-4)

  def test_graph_conv_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')
@@ -150,83 +136,25 @@ class TestGraphModels(unittest.TestCase):

    model.fit(dataset, nb_epoch=1)
    y_pred1 = model.predict(dataset)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir)
    y_pred2 = model2.predict(dataset)
    self.assertTrue(np.allclose(y_pred1, y_pred2))

  def test_change_loss_function(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv', num_tasks=1)

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)

  def test_change_loss_function_weave(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave', num_tasks=1)

    batch_size = 50
    model = WeaveModel(
        len(tasks), batch_size=batch_size, mode='regression', use_queue=False)

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)

  @attr("slow")
  def test_weave_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'Weave')

    model = WeaveModel(len(tasks), mode='classification')

    model.fit(dataset, nb_epoch=50)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  @flaky
  def test_weave_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = WeaveModel(len(tasks), mode='regression')

    model.fit(dataset, nb_epoch=80)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  @attr("slow")
  def test_dag_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
@@ -243,12 +171,6 @@ class TestGraphModels(unittest.TestCase):
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  @attr("slow")
  def test_dag_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
@@ -269,12 +191,6 @@ class TestGraphModels(unittest.TestCase):
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.15 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  @attr("slow")
  def test_dag_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
Loading