Commit 385b763b authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Changes

parent f7a42ba0
Loading
Loading
Loading
Loading
+200 −114
Original line number Diff line number Diff line
@@ -19,47 +19,111 @@ from scipy.stats import pearsonr

logger = logging.getLogger(__name__)


def matthews_corrcoef(*args, **kwargs):
  logger.warning("matthews_corrcoef is deprecated. Use sklearn.metrics.matthews_corrcoef instead. dc.metrics.matthews_corrcoef will be removed in a future version of DeepChem.")
  logger.warning(
      "matthews_corrcoef is deprecated. Use sklearn.metrics.matthews_corrcoef instead. dc.metrics.matthews_corrcoef will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.matthews_corrcoef(*args, **kwargs)


def recall_score(*args, **kwargs):
  logger.warning("recall_score is deprecated. Use sklearn.metrics.recall_score instead. dc.metrics.recall_score will be removed in a future version of DeepChem.")
  logger.warning(
      "recall_score is deprecated. Use sklearn.metrics.recall_score instead. dc.metrics.recall_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.recall_score(*args, **kwargs)


def r2_score(*args, **kwargs):
  logger.warning("r2_score is deprecated. Use sklearn.metrics.r2_score instead. dc.metrics.r2_score will be removed in a future version of DeepChem.")
  logger.warning(
      "r2_score is deprecated. Use sklearn.metrics.r2_score instead. dc.metrics.r2_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.r2_score(*args, **kwargs)


def mean_squared_error(*args, **kwargs):
  logger.warning("mean_squared_error is deprecated. Use sklearn.metrics.mean_squared_error instead. dc.metrics.mean_squared_error will be removed in a future version of DeepChem.")
  logger.warning(
      "mean_squared_error is deprecated. Use sklearn.metrics.mean_squared_error instead. dc.metrics.mean_squared_error will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.mean_squared_error(*args, **kwargs)


def mean_absolute_error(*args, **kwargs):
  logger.warning("mean_absolute_error is deprecated. Use sklearn.metrics.mean_absolute_error instead. dc.metrics.mean_absolute_error will be removed in a future version of DeepChem.")
  logger.warning(
      "mean_absolute_error is deprecated. Use sklearn.metrics.mean_absolute_error instead. dc.metrics.mean_absolute_error will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.mean_absolute_error(*args, **kwargs)


def precision_score(*args, **kwargs):
  logger.warning("precision_score is deprecated. Use sklearn.metrics.precision_score instead. dc.metrics.precision_score will be removed in a future version of DeepChem.")
  logger.warning(
      "precision_score is deprecated. Use sklearn.metrics.precision_score instead. dc.metrics.precision_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.precision_score(*args, **kwargs)


def precision_recall_curve(*args, **kwargs):
  logger.warning("precision_recall_curve is deprecated. Use sklearn.metrics.precision_recall_curve instead. dc.metrics.precision_recall_curve will be removed in a future version of DeepChem.")
  logger.warning(
      "precision_recall_curve is deprecated. Use sklearn.metrics.precision_recall_curve instead. dc.metrics.precision_recall_curve will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.precision_recall_curve(*args, **kwargs)


def auc(*args, **kwargs):
  logger.warning("auc is deprecated. Use sklearn.metrics.auc instead. dc.metrics.auc will be removed in a future version of DeepChem.")
  logger.warning(
      "auc is deprecated. Use sklearn.metrics.auc instead. dc.metrics.auc will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.auc(*args, **kwargs)


def jaccard_score(*args, **kwargs):
  logger.warning("jaccard_score is deprecated. Use sklearn.metrics.jaccard_score instead. dc.metrics.jaccard_score will be removed in a future version of DeepChem.")
  logger.warning(
      "jaccard_score is deprecated. Use sklearn.metrics.jaccard_score instead. dc.metrics.jaccard_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.jaccard_score(*args, **kwargs)


def f1_score(*args, **kwargs):
  logger.warning("f1_score is deprecated. Use sklearn.metrics.f1_score instead. dc.metrics.f1_score will be removed in a future version of DeepChem.")
  logger.warning(
      "f1_score is deprecated. Use sklearn.metrics.f1_score instead. dc.metrics.f1_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.f1_score(*args, **kwargs)


def threshold_predictions(y, threshold=0.5):
  """Threshold predictions from classification model.

  Parameters
  ----------
  y: np.ndarray
    Must have shape `(N, n_classes)` and be class probabilities.
  threshold: float, optional (Default 0.5)
    The threshold probability for the positive class. Note that this
    threshold will only be applied for binary classifiers (where
    `n_classes==2`). If specified for multiclass problems, will be
    ignored.

  Returns
  -------
  y_out: np.ndarray
    Of shape `(N,)` with class predictions as integers ranging from 0
    to `n_classes-1`.
  """
  if not isinstance(y, np.ndarray) or not len(y.shape) == 2:
    raise ValueError("y must be a ndarray of shape (N, n_classes)")
  N = y.shape[0]
  n_classes = y.shape[1]
  if not np.allclose(np.sum(y, axis=1), np.ones(N)):
    raise ValueError(
        "y must be a class probability matrix with rows summing to 1.")
  if n_classes != 2:
    y_out = np.argmax(y, axis=1)
    return y_out
  else:
    y_out = np.where(y[:, 1] >= threshold, np.ones(N), np.zeros(N))
    return y_out


def normalize_weight_shape(w, n_samples, n_tasks):
  """A utility function to correct the shape of the weight array.

@@ -97,8 +161,14 @@ def normalize_weight_shape(w, n_samples, n_tasks):
      # This is a little arcane but it repeats w across tasks.
      w_out = np.tile(w, (n_tasks, 1)).T
    elif len(w.shape) == 2:
      if w.shape != (n_samples, n_tasks):
      if w.shape == (n_samples, 1):
        # If w.shape == (n_samples, 1) handle it as 1D
        w = np.squeeze(w, axis=1)
        w_out = np.tile(w, (n_tasks, 1)).T
      elif w.shape != (n_samples, n_tasks):
        raise ValueError("Shape for w doens't match (n_samples, n_tasks)")
      else:
        # w.shape == (n_samples, n_tasks)
        w_out = w
    else:
      raise ValueError("w must be of dimension 1, 2, or 3")
@@ -108,7 +178,6 @@ def normalize_weight_shape(w, n_samples, n_tasks):
  return w_out



def normalize_prediction_shape(y, mode=None, n_classes=None):
  """A utility function to correct the shape of the input array.

@@ -175,7 +244,9 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
      elif len(y.shape) == 3:
        y_out = y
      else:
        raise ValueError("y must be an array of dimension 1, 2, or 3 for classification problems.")
        raise ValueError(
            "y must be an array of dimension 1, 2, or 3 for classification problems."
        )
    else:
      # In this clase, y is a scalar. We assume that `y` is binary
      # since it's hard to do anything else in this case.
@@ -193,16 +264,22 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
        y_out = y
      elif len(y.shape) == 3:
        if y.shape[-1] != 1:
          raise ValueError("y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems.")
          raise ValueError(
              "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
          )
        y_out = np.squeeze(y, axis=-1)
      else:
        raise ValueError("y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems.")
        raise ValueError(
            "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
        )
    else:
      # In this clase, y is a scalar.
      try:
        y = float(y)
      except TypeError:
        raise ValueError("y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems.")
        raise ValueError(
            "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
        )
      y = np.array(y)
      y_out = np.reshape(y, (1, 1))
  else:
@@ -211,6 +288,7 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
    y_out = y
  return y_out


def to_one_hot(y, n_classes=2):
  """Transforms label vector into one-hot encoding.

@@ -447,13 +525,16 @@ class Metric(object):
  The `Metric` class provides a wrapper for standardizing the API
  around different classes of metrics that may be useful for DeepChem
  models. The implementation provides a few non-standard conveniences
  such as built-in support for multitask and multiclass metrics, and
  support for multidimensional outputs.
  such as built-in support for multitask and multiclass metrics.

  There are a variety of different metrics this class aims to support.
  At the most simple, metrics for classification and regression that
  assume that values to compare are scalars. More complicated, there
  may perhaps be two image arrays that need to be compared.
  Metrics for classification and regression that assume that values to
  compare are scalars are supported.

  At present, this class doesn't support metric computation on models
  which don't present scalar outputs. For example, if you have a
  generative model which predicts images or molecules, you will need
  to write a custom evaluation and metric setup.
  """

  def __init__(self,
@@ -467,46 +548,53 @@ class Metric(object):
    Parameters
    ----------
    metric: function
      function that takes args y_true, y_pred (in that order) and
      computes desired score.
    task_averager: function, optional
      Function that takes args y_true, y_pred (in that order) and
      computes desired score. If sample weights are to be considered,
      `metric` may take in an additional keyword argument
      `sample_weight`.
    task_averager: function, optional (default, np.mean)
      If not None, should be a function that averages metrics across
      tasks. For example, task_averager=np.mean. If task_averager is
      provided, this metric will be assumed to be multitask and
      `self.is_multitask` will be set to True. 
      tasks. 
    name: str, optional (default None)
      Name of this metric
    threshold: float, optional (default None)
    threshold: float, optional (default None) (DEPRECATED)
      Used for binary metrics and is the threshold for the positive
      class.
    mode: str, optional (default None)
      Should usually be "classification" or "regression."
    compute_energy_metric: bool, optional (default None)
    compute_energy_metric: bool, optional (default None) (DEPRECATED)
      Deprecated metric. Will be removed in a future version of
      DeepChem. Do not use.
    """
    if threshold is not None:
      logger.warn(
          "threshold is deprecated and will be removed in a future version of DeepChem. Set threshold in compute_metric instead"
      )
    if compute_energy_metric is not None:
      self.compute_energy_metric = compute_energy_metric
      logger.warn("compute_energy_metric is deprecated and will be removed in a future version of DeepChem.")
      logger.warn(
          "compute_energy_metric is deprecated and will be removed in a future version of DeepChem."
      )
    else:
      self.compute_energy_metric = False
    self.metric = metric
    if task_averager is None:
      self.task_averager = np.mean
    else:
      self.task_averager = task_averager
    self.is_multitask = (self.task_averager is not None)
    if name is None:
      if not self.is_multitask:
      if task_averager is None:
        if hasattr(self.metric, '__name__'):
          self.name = self.metric.__name__
        else:
          self.name = "unknown metric"
      else:
        if hasattr(self.metric, '__name__'):
          self.name = self.task_averager.__name__ + "-" + self.metric.__name__
          self.name = task_averager.__name__ + "-" + self.metric.__name__
        else:
          self.name = "unknown metric"
    else:
      self.name = name
    self.threshold = threshold
    if mode is None:
      # These are some smart defaults
      if self.metric.__name__ in [
@@ -521,14 +609,9 @@ class Metric(object):
      ]:
        mode = "regression"
      else:
        logger.info("Support for non classification/regression metrics is new. Check your results carefully.")
    # Attempts to set threshold defaults intelligently
    if self.metric.__name__ in [
        "accuracy_score", "balanced_accuracy_score", "recall_score",
        "matthews_corrcoef", "roc_auc_score", "precision_score",
        "f1_score"
    ] and threshold is None:
      self.threshold = 0.5
        logger.info(
            "Could not detect mode of classifier. Check your results carefully."
        )
      self.mode = mode

  def compute_metric(self,
@@ -536,8 +619,10 @@ class Metric(object):
                     y_pred,
                     w=None,
                     n_classes=2,
                     filter_nans=True,
                     per_task_metrics=False):
                     filter_nans=False,
                     per_task_metrics=False,
                     use_sample_weights=False,
                     threshold=None):
    """Compute a performance metric for each task.

    Parameters
@@ -555,18 +640,28 @@ class Metric(object):
      specified,  must be of shape `(N, n_tasks)`.
    n_classes: int, optional
      Number of classes in data for classification tasks.
    filter_nans: bool, optional
    filter_nans: bool, optional (default False) (DEPRECATED)
      Remove NaN values in computed metrics
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.
    use_sample_weights: bool, optional (default False)
      If set, use per-sample weights `w`.
    threshold: float or bool, optional (default None)
      If set, apply a thresholding operation to values. This option isj
      only sensible on classification tasks. If float, this will be
      applied as a binary classification value. If bool, then
      thresholding will be applied to a multiclass prediction and will
      pick the maximum probability class.

    Returns
    -------
    A numpy nd.array containing metric values for each task.
    """
    # TODO: How about non standard shapes?
    y_true = normalize_prediction_shape(y_true, mode=self.mode, n_classes=n_classes)
    y_pred = normalize_prediction_shape(y_pred, mode=self.mode, n_classes=n_classes)
    y_true = normalize_prediction_shape(
        y_true, mode=self.mode, n_classes=n_classes)
    y_pred = normalize_prediction_shape(
        y_pred, mode=self.mode, n_classes=n_classes)
    # This is safe now because of normalization above
    n_samples = y_true.shape[0]
    n_tasks = y_pred.shape[1]
@@ -576,78 +671,69 @@ class Metric(object):
      y_task = y_true[:, task]
      y_pred_task = y_pred[:, task]
      w_task = w[:, task]

      metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
      if threshold is not None:
        y_task = threshold_predictions(y_task, threshold=threshold)
        y_task = to_one_hot(y_task, n_classes=n_classes)
        y_pred_task = threshold_predictions(y_pred_task, threshold=threshold)
        y_pred_task = to_one_hot(y_pred_task, n_classes=n_classes)

      metric_value = self.compute_singletask_metric(
          y_task,
          y_pred_task,
          w_task,
          n_samples=n_samples,
          use_sample_weights=use_sample_weights)
      computed_metrics.append(metric_value)
    ##################
    print("y_true.shape")
    print(y_true.shape)
    print("y_pred.shape")
    print(y_pred.shape)
    print("computed_metrics")
    print(computed_metrics)
    ##################
    logger.info("computed_metrics: %s" % str(computed_metrics))
    if n_tasks == 1:
      computed_metrics = computed_metrics[0]
    if not self.is_multitask:
      return computed_metrics
    else:

    if filter_nans:
      computed_metrics = np.array(computed_metrics)
      computed_metrics = computed_metrics[~np.isnan(computed_metrics)]
    # DEPRECATED. WILL BE REMOVED IN NEXT DEEPCHEM VERSION
    if self.compute_energy_metric:
      force_error = self.task_averager(computed_metrics[1:]) * 4961.47596096
        logger.info("Force error (metric: np.mean(%s)): %f kJ/mol/A" % (self.name,
                                                                  force_error))
      logger.info("Force error (metric: np.mean(%s)): %f kJ/mol/A" %
                  (self.name, force_error))
      return computed_metrics[0]
    elif not per_task_metrics:
      return self.task_averager(computed_metrics)
    else:
      return self.task_averager(computed_metrics), computed_metrics

  def compute_singletask_metric(self, y_true, y_pred, w):
  def compute_singletask_metric(self,
                                y_true,
                                y_pred,
                                w=None,
                                n_samples=None,
                                use_sample_weights=False):
    """Compute a metric value.

    Parameters
    ----------
    y_true: list
      A list of arrays containing true values for each task.
    y_pred: list
      A list of arrays containing predicted values for each task.
    y_true: `np.ndarray`
      True values array. This array must be of shape `(N,
      n_classes)` if classification and `(N,)` if regression.
    y_pred: `np.ndarray`
      Predictions array. This array must be of shape `(N, n_classes)`
      if classification and `(N,)` if regression.
    w: `np.ndarray`, optional (default None)
      Sample weight array. This array must be of shape `(N,)`
    n_samples: int, optional (default None)
      The number of samples in the dataset. This is `N`
    use_sample_weights: bool, optional (default False)
      If set, use per-sample weights `w`.

    Returns
    -------
    Float metric value.

    Raises
    ------
    NotImplementedError: If metric_str is not in METRICS.
    metric_value: float
      The computed value of the metric.
    """

    y_true = np.array(np.squeeze(y_true[w != 0]))
    y_pred = np.array(np.squeeze(y_pred[w != 0]))

    if len(y_true.shape) == 0:
      n_samples = 1
    if n_samples is None:
      n_samples = len(y_true)
    if use_sample_weights:
      metric_value = self.metric(y_true, y_pred, sample_weight=w)
    else:
      n_samples = y_true.shape[0]
    # If there are no nonzero examples, metric is ill-defined.
    if not y_true.size:
      return np.nan
    if self.threshold is not None and len(y_pred.shape) == 1:
      y_pred = np.expand_dims(y_pred, 0)
    if self.threshold is not None:
      y_pred = y_pred[:, 1]
      y_pred = np.greater(y_pred, self.threshold)
    if len(y_true.shape) == 0:
      y_true = np.expand_dims(y_true, 0)
    if len(y_pred.shape) == 0:
      y_pred = np.expand_dims(y_pred, 0)
    try:
      metric_value = self.metric(y_true, y_pred)
    except (AssertionError, ValueError) as e:
      warnings.warn("Error calculating metric %s: %s" % (self.name, e))
      metric_value = np.nan
    return metric_value
+45 −20
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ import os
import shutil
import tempfile
import sklearn
import logging
from sklearn.base import BaseEstimator

import logging
@@ -28,7 +29,7 @@ logger = logging.getLogger(__name__)

class Model(BaseEstimator):
  """
  Abstract base class for different ML models.
  Abstract base class for DeepChem models.
  """

  def __init__(self,
@@ -37,13 +38,21 @@ class Model(BaseEstimator):
               **kwargs) -> None:
    """Abstract class for all models.

    Parameters
    This is intended only for convenience of subclass implementations
    and should not be invoked directly.

    Parameters:
    -----------
    model_instance: object
      Wrapper around ScikitLearn/Keras/Tensorflow model object.
    model_dir: str
      Path to directory where model will be stored.
    """
    model_dir: str, optional (default None)
      Path to directory where model will be stored. If not specified,
      model will be stored in a temporary directory.
    """
    if self.__class__.__name__ == "Model":
      raise ValueError(
          "This constructor is for an abstract class and should never be called directly. Can only call from subclass constructors."
      )
    self.model_dir_is_temp = False
    if model_dir is not None:
      if not os.path.exists(model_dir):
@@ -185,30 +194,46 @@ class Model(BaseEstimator):
    """
    Evaluates the performance of this model on specified dataset.

    This function uses `Evaluator` under the hood to perform model
    evaluation. As a result, it inherits the same limitations of
    `Evaluator`. Namely, that only regression and classification
    models can be evaluated in this fashion. For generator models, you
    will need to overwrite this method to perform a custom evaluation.

    Keyword arguments specified here will be passed to
    `Evaluator.compute_model_performance`.

    Parameters
    ----------
    dataset: dc.data.Dataset
    dataset: `dc.data.Dataset`
      Dataset object.
    metric: deepchem.metrics.Metric
      Evaluation metric
    metrics: dc.metrics.Metric/list[dc.metrics.Metric]/function
      The set of metrics provided. This class attempts to do some
      intelligent handling of input. If a single `dc.metrics.Metric`
      object is provided or a list is provided, it will evaluate
      `self.model` on these metrics. If a function is provided, it is
      assumed to be a metric function that this method will attempt to
      wrap in a `dc.metrics.Metric` object. A metric function must
      accept two arguments, `y_true, y_pred` both of which are
      `np.ndarray` objects and return a floating point score. The
      metric function may also accept a keyword argument
      `sample_weight` to account for per-sample weights.
    transformers: list
      List of deepchem.transformers.Transformer
    per_task_metrics: bool
      If True, return per-task scores.
      List of `dc.trans.Transformer` objects. These transformations
      must have been applied to `dataset` previously. The dataset will
      be untransformed for metric evaluation.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    multitask_scores: dict
      Dictionary mapping names of metrics to metric scores.
    all_task_scores: dict, optional
      If `per_task_metrics == True` is passed as a keyword argument,
      then returns a second dictionary of scores for each task
      separately.
    """
    evaluator = Evaluator(self, dataset, transformers)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores
    return evaluator.compute_model_performance(metrics, **kwargs)

  def get_task_type(self) -> str:
    """
+16 −6
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
Code for processing datasets using scikit-learn.
"""
import numpy as np
import logging
from sklearn.cross_decomposition import PLSRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
@@ -21,20 +22,29 @@ NON_WEIGHTED_MODELS = [
    LassoCV, BayesianRidge
]

logger = logging.getLogger(__name__)


class SklearnModel(Model):
  """
  Abstract base class for different ML models.
  """Wrapper class that wraps scikit-learn models as DeepChem models.

  When you're working with scikit-learn and DeepChem, at times it can
  be useful to wrap a scikit-learn model as a DeepChem model. The
  reason for this might be that you want to do an apples-to-apples
  comparison of a scikit-learn model to another DeepChem model, or
  perhaps you want to use the hyperparameter tuning capabilities in
  `dc.hyper`. The `SklearnModel` class provides a
  """

  def __init__(self, model_instance=None, model_dir=None, **kwargs):
    """
    Parameters
    ----------
    model_instance: sklearn model
      Instance of model to wrap.
    model_dir: str
      If specified, the model will be saved in this directory.
    model_instance: `sklearn.base.BaseEstimator`
      Must be a scikit-learn `BaseEstimator Class`.
    model_dir: str, optional (default None)
      If specified the model will be stored in this directory. Else, a
      temporary directory will be used.
    kwargs: dict
      kwargs['use_weights'] is a bool which determines if we pass weights into
      self.model_instance.fit()
+3 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ Scikit-learn wrapper interface of xgboost

import numpy as np
import os
import logging
from deepchem.models import Model
from deepchem.models.sklearn_models import SklearnModel
from deepchem.utils.save import load_from_disk
@@ -11,6 +12,8 @@ from deepchem.utils.save import save_to_disk
from sklearn.model_selection import train_test_split, GridSearchCV
import tempfile

logger = logging.getLogger(__name__)


class XGBoostModel(SklearnModel):
  """
+41 −33

File changed.

Preview size limit exceeded, changes collapsed.

Loading