Unverified Commit 88fcd2d4 authored by Karl Leswing's avatar Karl Leswing Committed by GitHub
Browse files

Merge pull request #1259 from peastman/proba

Removed predict_proba()
parents 43814216 c3168479
Loading
Loading
Loading
Loading
+1 −51
Original line number Diff line number Diff line
@@ -80,18 +80,6 @@ class Model(BaseEstimator):
    raise NotImplementedError(
        "Each model is responsible for its own predict_on_batch method.")

  def predict_proba_on_batch(self, X):
    """
    Makes predictions of class probabilities on given batch of new data.

    Parameters
    ----------
    X: np.ndarray
      Features
    """
    raise NotImplementedError(
        "Each model is responsible for its own predict_on_batch method.")

  def reload(self):
    """
    Reload trained model from disk.
@@ -152,18 +140,9 @@ class Model(BaseEstimator):
      y_pred_batch = self.predict_on_batch(X_batch)
      # Discard any padded predictions
      y_pred_batch = y_pred_batch[:n_samples]
      y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks))
      y_pred_batch = undo_transforms(y_pred_batch, transformers)
      y_preds.append(y_pred_batch)
    y_pred = np.vstack(y_preds)

    # The iterbatches does padding with zero-weight examples on the last batch.
    # Remove padded examples.
    n_samples = len(dataset)
    y_pred = np.reshape(y_pred, (n_samples, n_tasks))
    # Special case to handle singletasks.
    if n_tasks == 1:
      y_pred = np.reshape(y_pred, (n_samples,))
    y_pred = np.concatenate(y_preds)
    return y_pred

  def evaluate(self, dataset, metrics, transformers=[], per_task_metrics=False):
@@ -195,35 +174,6 @@ class Model(BaseEstimator):
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def predict_proba(self,
                    dataset,
                    transformers=[],
                    batch_size=None,
                    n_classes=2):
    """
    TODO: Do transformers even make sense here?

    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    y_preds = []
    n_tasks = self.get_num_tasks()
    for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(
        batch_size, deterministic=True):
      n_samples = len(X_batch)
      y_pred_batch = self.predict_proba_on_batch(X_batch)
      y_pred_batch = y_pred_batch[:n_samples]
      y_pred_batch = np.reshape(y_pred_batch, (n_samples, n_tasks, n_classes))
      y_pred_batch = undo_transforms(y_pred_batch, transformers)
      y_preds.append(y_pred_batch)
    y_pred = np.vstack(y_preds)
    # The iterbatches does padding with zero-weight examples on the last batch.
    # Remove padded examples.
    n_samples = len(dataset)
    y_pred = y_pred[:n_samples]
    y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes))
    return y_pred

  def get_task_type(self):
    """
    Currently models can only be classifiers or regressors.
+6 −33
Original line number Diff line number Diff line
@@ -105,12 +105,13 @@ class SingletaskToMultitask(Model):
    """
    n_tasks = len(self.tasks)
    n_samples = X.shape[0]
    y_pred = np.zeros((n_samples, n_tasks))
    y_preds = []
    for ind, task in enumerate(self.tasks):
      task_model = self.model_builder(self.task_model_dirs[task])
      task_model.reload()

      y_pred[:, ind] = task_model.predict_on_batch(X)
      y_preds.append(task_model.predict_on_batch(X))
    y_pred = np.stack(y_preds, axis=1)
    return y_pred

  def predict(self, dataset, transformers=[]):
@@ -119,44 +120,16 @@ class SingletaskToMultitask(Model):
    """
    n_tasks = len(self.tasks)
    n_samples = len(dataset)
    y_pred = np.zeros((n_samples, n_tasks))
    y_preds = []
    for ind, task in enumerate(self.tasks):
      task_model = self.model_builder(self.task_model_dirs[task])
      task_model.reload()

      y_pred[:, ind] = task_model.predict(dataset, [])
      y_preds.append(task_model.predict(dataset, []))
    y_pred = np.stack(y_preds, axis=1)
    y_pred = undo_transforms(y_pred, transformers)
    return y_pred

  def predict_proba_on_batch(self, X, n_classes=2):
    """
    Concatenates results from all singletask models.
    """
    n_tasks = len(self.tasks)
    n_samples = X.shape[0]
    y_pred = np.zeros((n_samples, n_tasks, n_classes))
    for ind, task in enumerate(self.tasks):
      task_model = self.model_builder(self.task_model_dirs[task])
      task_model.reload()

      y_pred[:, ind] = task_model.predict_proba_on_batch(X)
    return y_pred

  def predict_proba(self, dataset, transformers=[], n_classes=2):
    """
    Concatenates results from all singletask models.
    """
    n_tasks = len(self.tasks)
    n_samples = len(dataset)
    y_pred = np.zeros((n_samples, n_tasks, n_classes))
    for ind, task in enumerate(self.tasks):
      task_model = self.model_builder(self.task_model_dirs[task])
      task_model.reload()

      y_pred[:, ind] = np.squeeze(
          task_model.predict_proba(dataset, transformers, n_classes))
    return y_pred

  def save(self):
    """Save all models

+4 −15
Original line number Diff line number Diff line
@@ -77,21 +77,10 @@ class SklearnModel(Model):
      Ignored for Sklearn Model. Only used for Tensorflow models
      with rigid batch-size requirements.
    """
    return self.model_instance.predict(X)

  def predict_proba_on_batch(self, X, pad_batch=False):
    """
    Makes per-class predictions on batch of data.

    Parameters
    ----------
    X: np.ndarray
      Features
    pad_batch: bool, optional
      Ignored for Sklearn Model. Only used for Tensorflow models
      with rigid batch-size requirements.
    """
    try:
      return self.model_instance.predict_proba(X)
    except AttributeError:
      return self.model_instance.predict(X)

  def predict(self, X, transformers=[]):
    """
+1 −11
Original line number Diff line number Diff line
@@ -196,15 +196,5 @@ class TensorflowMultitaskIRVClassifier(TensorGraph):
        IRVRegularize(predictions, self.penalty, in_layers=[predictions])
    self.set_loss(loss)
    outputs = Stack(axis=1, in_layers=outputs)
    outputs = Concat(axis=2, in_layers=[1 - outputs, outputs])
    self.add_output(outputs)

  def predict(self, dataset, transformers=[], outputs=None):
    out = super(TensorflowMultitaskIRVClassifier, self).predict(
        dataset, transformers=transformers, outputs=outputs)
    out = np.round(out).astype(int)
    return out

  def predict_proba(self, dataset, transformers=[], outputs=None):
    out = super(TensorflowMultitaskIRVClassifier, self).predict_proba(
        dataset, transformers=transformers, outputs=outputs)
    return np.concatenate([1 - out, out], axis=2)
+0 −30
Original line number Diff line number Diff line
@@ -168,36 +168,6 @@ class MultitaskClassifier(TensorGraph):
          tf.cast(labels, tf.int32), self.n_classes)
    return tensors

  def predict_proba(self, dataset, transformers=[], outputs=None):
    return super(MultitaskClassifier, self).predict(dataset, transformers,
                                                    outputs)

  def predict(self, dataset, transformers=[], outputs=None):
    """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs = self.outputs[0] (single
      output). If outputs is a Layer/Tensor, then will evaluate and return as a
      single ndarray. If outputs is a list of Layers/Tensors, will return a list
      of ndarrays.

    Returns
    -------
    y_pred: numpy ndarray or list of numpy ndarrays
    """
    # Results is of shape (n_samples, n_tasks, n_classes)
    retval = super(MultitaskClassifier, self).predict(dataset, transformers,
                                                      outputs)
    # retval is of shape (n_samples, n_tasks)
    return np.argmax(retval, axis=2)


class MultitaskRegressor(TensorGraph):

Loading