Commit 898295dc authored by peastman's avatar peastman
Browse files

make_estimator() works with MultiTaskClassifier and MultiTaskRegressor

parent a6d9b817
Loading
Loading
Loading
Loading
+14 −23
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.metrics import to_one_hot

from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMaxCrossEntropy, L2Loss, ReduceSum
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum


class MultiTaskClassifier(TensorGraph):
@@ -112,15 +112,16 @@ class MultiTaskClassifier(TensorGraph):

    # Compute the loss function for each label.

    output = Reshape(
    logits = Reshape(
        shape=(-1, n_tasks, n_classes),
        in_layers=[
            Dense(in_layers=[prev_layer], out_channels=n_tasks * n_classes)
        ])
    output = SoftMax(logits)
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, n_classes))
    weights = Weights(shape=(None, n_tasks))
    loss = SoftMaxCrossEntropy(in_layers=[labels, output])
    loss = SoftMaxCrossEntropy(in_layers=[labels, logits])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    if weight_decay_penalty != 0.0:
      weighted_loss = WeightDecay(
@@ -152,6 +153,16 @@ class MultiTaskClassifier(TensorGraph):
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode):
    tensors = {}
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = tf.one_hot(tf.cast(labels, tf.int32), self.n_classes)
    return tensors

  def predict_proba(self, dataset, transformers=[], outputs=None):
    return super(MultiTaskClassifier, self).predict(dataset, transformers,
                                                    outputs)
@@ -289,26 +300,6 @@ class MultiTaskRegressor(TensorGraph):
          in_layers=[weighted_loss])
    self.set_loss(weighted_loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b.reshape(-1, self.n_tasks, 1)
        if X_b is not None:
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict


class MultiTaskFitTransformRegressor(MultiTaskRegressor):
  """Implements a MultiTaskRegressor that performs on-the-fly transformation during fit/predict.
+34 −7
Original line number Diff line number Diff line
@@ -902,12 +902,7 @@ class TensorGraph(Model):
    def model_fn(features, labels, mode):
      # Define the inputs.

      tensors = {}
      for layer, column in zip(self.features, feature_columns):
        tensors[layer] = tf.feature_column.input_layer(features, [column])
      if weight_column is not None:
        tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
      tensors[self.labels[0]] = labels
      tensors = self.create_estimator_inputs(feature_columns, weight_column, features, labels, mode)
      for layer, tensor in tensors.items():
        layer.add_summary_to_tg(tensor)

@@ -927,7 +922,7 @@ class TensorGraph(Model):
          weights = tensors[self.task_weights[0]]
        eval_metric_ops = {}
        for name, function in metrics.items():
          eval_metric_ops[name] = function(labels, predictions, weights)
          eval_metric_ops[name] = function(tensors[self.labels[0]], predictions, weights)
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)
      if mode == tf.estimator.ModeKeys.TRAIN:
        loss = create_tensors(self.loss, tensors, 1)
@@ -941,6 +936,29 @@ class TensorGraph(Model):

    return tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)

  def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode):
    """This is called by make_estimator() to create tensors for the inputs.

    feature_columns and weight_column are the arguments passed to
    make_estimator().  features, labels, and mode are the arguments passed to
    the estimator's model function.  This method creates and returns a dict with
    one entry for every Feature, Label, or Weights layer in the graph.  The keys
    are the layers, and the values are the tensors that correspond to them.

    Any subclass that overrides default_generator() must also override this
    method.
    """
    if self.__class__.default_generator is not TensorGraph.default_generator:
      raise ValueError("Class overrides default_generator() but not create_estimator_inputs()")
    tensors = {}
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype)
    return tensors

def _enqueue_batch(tg, generator, graph, sess, n_enqueued, final_sample):
  """
  Function to load data into
@@ -963,6 +981,15 @@ def _enqueue_batch(tg, generator, graph, sess, n_enqueued, final_sample):
      for layer in tg.features + tg.labels + tg.task_weights:
        if layer in feed_dict:
          value = feed_dict[layer]
          # Add or remove dimensions of size 1 to match the shape of the layer.
          value_dims = len(value.shape)
          layer_dims = len(layer.shape)
          if value_dims < layer_dims:
            if all(i==1 for i in layer.shape[value_dims:]):
              value = value.reshape(list(value.shape)+[1]*(layer_dims-value_dims))
          if value_dims > layer_dims:
            if all(i==1 for i in value.shape[layer_dims:]):
              value = value.reshape(value.shape[:layer_dims])
        else:
          value = np.zeros(
              [0] + list(layer.shape[1:]), dtype=layer.dtype.as_numpy_dtype)