Unverified Commit dcd8272f authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1866 from deepchem/metrics_docs

Add more documentation to metrics
parents 43b48b08 a021cf19
Loading
Loading
Loading
Loading
+158 −86
Original line number Diff line number Diff line
@@ -2,8 +2,8 @@

import numpy as np
import warnings
from deepchem.utils.save import log
import sklearn.metrics
import logging
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import recall_score
from sklearn.metrics import r2_score
@@ -16,14 +16,23 @@ from sklearn.metrics import jaccard_score
from sklearn.metrics import f1_score
from scipy.stats import pearsonr

logger = logging.getLogger(__name__)


def to_one_hot(y, n_classes=2):
  """Transforms label vector into one-hot encoding.

    Turns y into vector of shape [n_samples, 2] (assuming binary labels).
  Turns y into vector of shape `(n_samples, n_classes)` with a one-hot
  encoding. 

  Parameters
  ----------
  y: np.ndarray
      A vector of shape [n_samples, 1]
    A vector of shape `(n_samples, 1)`

  Returns
  -------
  A numpy.ndarray of shape `(n_samples, n_classes)`.
  """
  n_samples = np.shape(y)[0]
  y_hot = np.zeros((n_samples, n_classes))
@@ -34,8 +43,16 @@ def to_one_hot(y, n_classes=2):
def from_one_hot(y, axis=1):
  """Transorms label vector from one-hot encoding.

  Parameters
  ----------
  y: np.ndarray
      A vector of shape [n_samples, num_classes]
    A vector of shape `(n_samples, num_classes)`
  axis: int, optional (default 1)
    The axis with one-hot encodings to reduce on.

  Returns
  -------
  A numpy.ndarray of shape `(n_samples,)`
  """
  return np.argmax(y, axis=axis)

@@ -62,6 +79,24 @@ def roc_auc_score(y, y_pred):


def accuracy_score(y, y_pred):
  """Compute accuracy score

  Computes accuracy score for classification tasks. Works for both
  binary and multiclass classification.

  Parameters
  ----------
  y: np.ndarray
    Of shape `(N_samples,)`
  y_pred: np.ndarray
    Of shape `(N_samples,)`

  Returns
  -------
  score: float
    The fraction of correctly classified samples. A number between 0
    and 1.
  """
  y = _ensure_class_labels(y)
  y_pred = _ensure_class_labels(y_pred)
  return sklearn.metrics.accuracy_score(y, y_pred)
@@ -83,8 +118,7 @@ def pearson_r2_score(y, y_pred):


def jaccard_index(y, y_pred):
  """Computes Jaccard Index which is the Intersection Over Union metric
       which is commonly used in image segmentation tasks
  """Computes Jaccard Index which is the Intersection Over Union metric which is commonly used in image segmentation tasks

  Parameters
  ----------
@@ -95,13 +129,17 @@ def jaccard_index(y, y_pred):


def pixel_error(y, y_pred):
  """defined as 1 - the maximal F-score of pixel similarity,
       or squared Euclidean distance between the original and the result labels.
  """An error metric in case y, y_pred are images.

  Defined as 1 - the maximal F-score of pixel similarity, or squared
  Euclidean distance between the original and the result labels.

  Parameters
  ----------
      y: ground truth array
      y_pred: predicted array
  y: np.ndarray
    ground truth array
  y_pred: np.ndarray
    predicted array
  """
  return 1 - f1_score(y, y_pred)

@@ -133,16 +171,22 @@ def kappa_score(y_true, y_pred):

  Note that this implementation of Cohen's kappa expects binary labels.

    Args:
      y_true: Numpy array containing true values.
      y_pred: Numpy array containing predicted values.
  Parameters
  ----------
  y_true: np.ndarray
    Numpy array containing true values.
  y_pred: np.ndarray
    Numpy array containing predicted values.

    Returns:
      kappa: Numpy array containing kappa for each classification task.
  Returns
  -------
  kappa: np.ndarray
    Numpy array containing kappa for each classification task.

    Raises:
      AssertionError: If y_true and y_pred are not the same size, or if class
        labels are not in [0, 1].
  Raises
  ------
  AssertionError: If y_true and y_pred are not the same size, or if
  class labels are not in [0, 1].
  """
  assert len(y_true) == len(y_pred), 'Number of examples does not match.'
  yt = np.asarray(y_true, dtype=int)
@@ -165,11 +209,8 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
  """BEDROC metric implemented according to Truchon and Bayley that modifies
  the ROC score by allowing for a factor of early recognition

    References:
      The original paper by Truchon et al. is located at
      https://pubs.acs.org/doi/pdf/10.1021/ci600426e

    Args:
  Parameters
  ----------
  y_true (array_like):
    Binary class labels. 1 for positive class, 0 otherwise
  y_pred (array_like):
@@ -177,9 +218,14 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
  alpha (float), default 20.0:
    Early recognition parameter

    Returns:
  Returns
  -------
  float: Value in [0, 1] that indicates the degree of early recognition

  Notes
  -----
  The original paper by Truchon et al. is located at
  https://pubs.acs.org/doi/pdf/10.1021/ci600426e
  """

  assert len(y_true) == len(y_pred), 'Number of examples do not match'
@@ -203,23 +249,45 @@ def bedroc_score(y_true, y_pred, alpha=20.0):


class Metric(object):
  """Wrapper class for computing user-defined metrics."""
  """Wrapper class for computing user-defined metrics.

  There are a variety of different metrics this class aims to support.
  At the most simple, metrics for classification and regression that
  assume that values to compare are scalars. More complicated, there
  may perhaps be two image arrays that need to be compared.

  The `Metric` class provides a wrapper for standardizing the API
  around different classes of metrics that may be useful for DeepChem
  models. The implementation provides a few non-standard conveniences
  such as built-in support for multitask and multiclass metrics, and
  support for multidimensional outputs.
  """

  def __init__(self,
               metric,
               task_averager=None,
               name=None,
               threshold=None,
               verbose=True,
               mode=None,
               compute_energy_metric=False):
    """
        Args:
          metric: function that takes args y_true, y_pred (in that order) and
    Parameters
    ----------
    metric: function
      function that takes args y_true, y_pred (in that order) and
      computes desired score.
          task_averager: If not None, should be a function that averages metrics
                  across tasks. For example, task_averager=np.mean. If task_averager
                  is provided, this task will be inherited as a multitask metric.
    task_averager: function, optional
      If not None, should be a function that averages metrics across
      tasks. For example, task_averager=np.mean. If task_averager is
      provided, this task will be inherited as a multitask metric.
    name: str, optional
      Name of this metric
    threshold: float, optional
      Used for binary metrics and is the threshold for the positive
      class
    mode: str, optional
      Must be either classification or regression.
    compute_energy_metric: TODO(rbharath): Should this be removed? 
    """
    self.metric = metric
    self.task_averager = task_averager
@@ -231,13 +299,12 @@ class Metric(object):
        self.name = self.task_averager.__name__ + "-" + self.metric.__name__
    else:
      self.name = name
    self.verbose = verbose
    self.threshold = threshold
    if mode is None:
      if self.metric.__name__ in [
          "roc_auc_score", "matthews_corrcoef", "recall_score",
          "accuracy_score", "kappa_score", "precision_score",
          "balanced_accuracy_score", "prc_auc_score", "f1_score"
          "balanced_accuracy_score", "prc_auc_score", "f1_score", "bedroc_score"
      ]:
        mode = "classification"
      elif self.metric.__name__ in [
@@ -311,7 +378,7 @@ class Metric(object):

      metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
      computed_metrics.append(metric_value)
    log("computed_metrics: %s" % str(computed_metrics), self.verbose)
    logger.info("computed_metrics: %s" % str(computed_metrics))
    if n_tasks == 1:
      computed_metrics = computed_metrics[0]
    if not self.is_multitask:
@@ -334,14 +401,19 @@ class Metric(object):
  def compute_singletask_metric(self, y_true, y_pred, w):
    """Compute a metric value.

    Args:
      y_true: A list of arrays containing true values for each task.
      y_pred: A list of arrays containing predicted values for each task.
    Parameters
    ----------
    y_true: list
      A list of arrays containing true values for each task.
    y_pred: list
      A list of arrays containing predicted values for each task.

    Returns:
    Returns
    -------
    Float metric value.

    Raises:
    Raises
    ------
    NotImplementedError: If metric_str is not in METRICS.
    """