Commit 6b467aa2 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Changes

parent e649727c
Loading
Loading
Loading
Loading
+7 −77
Original line number Diff line number Diff line
@@ -4,7 +4,6 @@ import numpy as np
import warnings
import sklearn.metrics
import logging
# TODO: Imported metrics will be removed in a futrue version of DeepCHem
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import recall_score
from sklearn.metrics import r2_score
@@ -20,76 +19,6 @@ from scipy.stats import pearsonr
logger = logging.getLogger(__name__)


def matthews_corrcoef(*args, **kwargs):
  logger.warning(
      "matthews_corrcoef is deprecated. Use sklearn.metrics.matthews_corrcoef instead. dc.metrics.matthews_corrcoef will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.matthews_corrcoef(*args, **kwargs)


def recall_score(*args, **kwargs):
  logger.warning(
      "recall_score is deprecated. Use sklearn.metrics.recall_score instead. dc.metrics.recall_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.recall_score(*args, **kwargs)


def r2_score(*args, **kwargs):
  logger.warning(
      "r2_score is deprecated. Use sklearn.metrics.r2_score instead. dc.metrics.r2_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.r2_score(*args, **kwargs)


def mean_squared_error(*args, **kwargs):
  logger.warning(
      "mean_squared_error is deprecated. Use sklearn.metrics.mean_squared_error instead. dc.metrics.mean_squared_error will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.mean_squared_error(*args, **kwargs)


def mean_absolute_error(*args, **kwargs):
  logger.warning(
      "mean_absolute_error is deprecated. Use sklearn.metrics.mean_absolute_error instead. dc.metrics.mean_absolute_error will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.mean_absolute_error(*args, **kwargs)


def precision_score(*args, **kwargs):
  logger.warning(
      "precision_score is deprecated. Use sklearn.metrics.precision_score instead. dc.metrics.precision_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.precision_score(*args, **kwargs)


def precision_recall_curve(*args, **kwargs):
  logger.warning(
      "precision_recall_curve is deprecated. Use sklearn.metrics.precision_recall_curve instead. dc.metrics.precision_recall_curve will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.precision_recall_curve(*args, **kwargs)


def auc(*args, **kwargs):
  logger.warning(
      "auc is deprecated. Use sklearn.metrics.auc instead. dc.metrics.auc will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.auc(*args, **kwargs)


def jaccard_score(*args, **kwargs):
  logger.warning(
      "jaccard_score is deprecated. Use sklearn.metrics.jaccard_score instead. dc.metrics.jaccard_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.jaccard_score(*args, **kwargs)


def f1_score(*args, **kwargs):
  logger.warning(
      "f1_score is deprecated. Use sklearn.metrics.f1_score instead. dc.metrics.f1_score will be removed in a future version of DeepChem."
  )
  return sklearn.metrics.f1_score(*args, **kwargs)


def threshold_predictions(y, threshold=0.5):
  """Threshold predictions from classification model.

@@ -135,7 +64,7 @@ def normalize_weight_shape(w, n_samples, n_tasks):
  w: np.ndarray
    `w` can be `None` or a scalar or a `np.ndarray` of shape
    `(n_samples,)` or of shape `(n_samples, n_tasks)`. If `w` is a
    sclar, it's assumed to be the same weight for all samples/tasks.
    scalar, it's assumed to be the same weight for all samples/tasks.
  n_samples: int
    The number of samples in the dataset. If `w` is not None, we should
    have `n_samples = w.shape[0]` if `w` is a ndarray
@@ -198,7 +127,7 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
  ----------
  y: np.ndarray
    If `mode=="classification"`, `y` is an array of shape `(N,)` or
    `(N, n_classes)` or `(N, n_tasks, n_classes)`. If `y` is of shape
    `(N, n_classes)` or `(N, n_tasks, n_classes)`. If `y` is an array of shape
    `(N,)` in order to impute the number of classes correctly, `y`
    must take values from `0` to `n_classes-1` as integers. If
    `mode=="regression"`, `y` is an array of shape `(N,)` or `(N,
@@ -265,12 +194,12 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
      elif len(y.shape) == 3:
        if y.shape[-1] != 1:
          raise ValueError(
              "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
              "y must a float scalar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
          )
        y_out = np.squeeze(y, axis=-1)
      else:
        raise ValueError(
            "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
            "y must a float scalar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
        )
    else:
      # In this clase, y is a scalar.
@@ -278,14 +207,14 @@ def normalize_prediction_shape(y, mode=None, n_classes=None):
        y = float(y)
      except TypeError:
        raise ValueError(
            "y must a float sclar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
            "y must a float scalar or a ndarray of shape `(N,)` or `(N, n_tasks)` or `(N, n_tasks, 1)` for regression problems."
        )
      y = np.array(y)
      y_out = np.reshape(y, (1, 1))
  else:
    # If mode isn't classification or regression don't perform any
    # transformations.
    y_out = y
    raise ValueError("mode must be either classification or regression.")
  return y_out


@@ -688,6 +617,7 @@ class Metric(object):
    if n_tasks == 1:
      computed_metrics = computed_metrics[0]

    # DEPRECATED. WILL BE REMOVED IN NEXT DEEPCHEM VERSION
    if filter_nans:
      computed_metrics = np.array(computed_metrics)
      computed_metrics = computed_metrics[~np.isnan(computed_metrics)]
+3 −1
Original line number Diff line number Diff line
@@ -33,7 +33,9 @@ class SklearnModel(Model):
  reason for this might be that you want to do an apples-to-apples
  comparison of a scikit-learn model to another DeepChem model, or
  perhaps you want to use the hyperparameter tuning capabilities in
  `dc.hyper`. The `SklearnModel` class provides a
  `dc.hyper`. The `SklearnModel` class provides a wrapper around scikit-learn
  models that allows scikit-learn models to be trained on `Dataset` objects
  and evaluated with the same metrics as other DeepChem models.`
  """

  def __init__(self, model_instance=None, model_dir=None, **kwargs):
+6 −6
Original line number Diff line number Diff line
@@ -51,14 +51,14 @@ def output_predictions(dataset, y_preds, csv_out):
  csv_out: str
    Name of file to write predictions to.
  """
  mol_ids = dataset.ids
  data_ids = dataset.ids
  n_tasks = len(dataset.get_task_names())
  y_preds = np.reshape(y_preds, (len(y_preds), n_tasks))
  assert len(y_preds) == len(mol_ids)
  assert len(y_preds) == len(data_ids)
  with open(csv_out, "w") as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["ID"] + dataset.get_task_names())
    for mol_id, y_pred in zip(mol_ids, y_preds):
    for mol_id, y_pred in zip(data_ids, y_preds):
      csvwriter.writerow([mol_id] + list(y_pred))


@@ -218,14 +218,14 @@ class Evaluator(object):
    logger.warning(
        "Evaluator.output_predictions is deprecated. Please use dc.utils.evaluate.output_predictions instead. This method will be removed in a future version of DeepChem."
    )
    mol_ids = self.dataset.ids
    data_ids = self.dataset.ids
    n_tasks = len(self.dataset.get_task_names())
    y_preds = np.reshape(y_preds, (len(y_preds), n_tasks))
    assert len(y_preds) == len(mol_ids)
    assert len(y_preds) == len(data_ids)
    with open(csv_out, "w") as csvfile:
      csvwriter = csv.writer(csvfile)
      csvwriter.writerow(["ID"] + self.dataset.get_task_names())
      for mol_id, y_pred in zip(mol_ids, y_preds):
      for mol_id, y_pred in zip(data_ids, y_preds):
        csvwriter.writerow([mol_id] + list(y_pred))

  def compute_model_performance(self,
+24 −0
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@ depending on the type of model at hand.

Metric Utilities
----------------
Metric utility functions allow for some common manipulations such as
switching to/from one-hot representations.

.. autofunction:: deepchem.metrics.to_one_hot

@@ -17,6 +19,28 @@ Metric Utilities

Metric Functions
----------------
DeepChem has a variety of different metrics which are useful for measuring model performance. A number (but not all) of these metrics are directly sourced from :code:`sklearn`.

.. autofunction:: deepchem.metrics.matthews_corrcoef

.. autofunction:: deepchem.metrics.recall_score

.. autofunction:: deepchem.metrics.r2_score

.. autofunction:: deepchem.metrics.mean_squared_error

.. autofunction:: deepchem.metrics.mean_absolute_error

.. autofunction:: deepchem.metrics.precision_score

.. autofunction:: deepchem.metrics.precision_recall_curve

.. autofunction:: deepchem.metrics.auc

.. autofunction:: deepchem.metrics.jaccard_score

.. autofunction:: deepchem.metrics.f1_score

.. autofunction:: deepchem.metrics.roc_auc_score

.. autofunction:: deepchem.metrics.accuracy_score