Unverified Commit eb3ab25e authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1928 from deepchem/metric_overhaul

Overhauling the Metrics
parents 14f0ff9a 0a441ace
Loading
Loading
Loading
Loading
+645 −222

File changed.

Preview size limit exceeded, changes collapsed.

+0 −89
Original line number Diff line number Diff line
"""
Tests for metricsT.
"""
__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2016, Stanford University"
__license__ = "MIT"

import numpy as np
import deepchem as dc
from tensorflow.python.platform import googletest
from deepchem import metrics


class MetricsTest(googletest.TestCase):

  def test_kappa_score(self):
    y_true = [1, 0, 1, 0]
    y_pred = [0.8, 0.2, 0.3, 0.4]  # [1, 0, 0, 0] with 0.5 threshold
    kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
    observed_agreement = 3.0 / 4.0
    expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
    expected_kappa = np.true_divide(observed_agreement - expected_agreement,
                                    1.0 - expected_agreement)
    self.assertAlmostEqual(kappa, expected_kappa)

  def test_one_sample(self):
    """Test that the metrics won't raise error even in an extreme condition
    where there is only one sample with w > 0.
    """
    np.random.seed(123)
    n_samples = 2
    y_true = np.array([0, 0])
    y_pred = np.random.rand(n_samples, 2)
    w = np.array([0, 1])
    all_metrics = [
        dc.metrics.Metric(dc.metrics.recall_score),
        dc.metrics.Metric(dc.metrics.matthews_corrcoef),
        dc.metrics.Metric(dc.metrics.roc_auc_score)
    ]
    for metric in all_metrics:
      score = metric.compute_singletask_metric(y_true, y_pred, w)
      self.assertTrue(np.isnan(score) or score == 0)

  def test_r2_score(self):
    """Test that R^2 metric passes basic sanity tests"""
    np.random.seed(123)
    n_samples = 10
    y_true = np.random.rand(n_samples,)
    y_pred = np.random.rand(n_samples,)
    regression_metric = dc.metrics.Metric(dc.metrics.r2_score)
    assert np.isclose(
        dc.metrics.r2_score(y_true, y_pred),
        regression_metric.compute_metric(y_true, y_pred))

  def test_one_hot(self):
    y = np.array([0, 0, 1, 0, 1, 1, 0])
    y_hot = metrics.to_one_hot(y)
    expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1,
                                                                          0]])
    yp = metrics.from_one_hot(y_hot)
    assert np.array_equal(expected, y_hot)
    assert np.array_equal(y, yp)

  def test_bedroc_score(self):

    num_actives = 20
    num_total = 400

    y_true_actives = np.ones(num_actives)
    y_true_inactives = np.zeros(num_total - num_actives)
    y_true = np.concatenate([y_true_actives, y_true_inactives])

    # Best score case
    y_pred_best = dc.metrics.to_one_hot(
        np.concatenate([y_true_actives, y_true_inactives]))
    best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
    self.assertAlmostEqual(best_score, 1.0)

    # Worst score case
    worst_pred_actives = np.zeros(num_actives)
    worst_pred_inactives = np.ones(num_total - num_actives)
    y_pred_worst = dc.metrics.to_one_hot(
        np.concatenate([worst_pred_actives, worst_pred_inactives]))
    worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
    self.assertAlmostEqual(worst_score, 0.0, 4)


if __name__ == '__main__':
  googletest.main()
+72 −0
Original line number Diff line number Diff line
"""
Tests for metricsT.
"""
import numpy as np
import deepchem as dc
import unittest
from deepchem import metrics


def test_kappa_score():
  y_true = [1, 0, 1, 0]
  y_pred = [0.8, 0.2, 0.3, 0.4]  # [1, 0, 0, 0] with 0.5 threshold
  kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
  observed_agreement = 3.0 / 4.0
  expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
  expected_kappa = np.true_divide(observed_agreement - expected_agreement,
                                  1.0 - expected_agreement)
  np.testing.assert_almost_equal(kappa, expected_kappa)


def test_one_sample():
  """Test that the metrics won't raise error even in an extreme condition
  where there is only one sample with w > 0.
  """
  np.random.seed(123)
  n_samples = 2
  y_true = np.random.randint(2, size=(n_samples,))
  y_pred = np.random.randint(2, size=(n_samples,))
  w = np.array([0, 1])
  all_metrics = [
      dc.metrics.Metric(dc.metrics.recall_score),
      dc.metrics.Metric(dc.metrics.matthews_corrcoef),
      dc.metrics.Metric(dc.metrics.roc_auc_score)
  ]
  for metric in all_metrics:
    score = metric.compute_singletask_metric(y_true, y_pred, w)


def test_r2_score():
  """Test that R^2 metric passes basic sanity tests"""
  np.random.seed(123)
  n_samples = 10
  y_true = np.random.rand(n_samples,)
  y_pred = np.random.rand(n_samples,)
  regression_metric = dc.metrics.Metric(dc.metrics.r2_score, n_tasks=1)
  assert np.isclose(
      dc.metrics.r2_score(y_true, y_pred),
      regression_metric.compute_metric(y_true, y_pred))


def test_bedroc_score():
  """Test BEDROC."""
  num_actives = 20
  num_total = 400

  y_true_actives = np.ones(num_actives)
  y_true_inactives = np.zeros(num_total - num_actives)
  y_true = np.concatenate([y_true_actives, y_true_inactives])

  # Best score case
  y_pred_best = dc.metrics.to_one_hot(
      np.concatenate([y_true_actives, y_true_inactives]))
  best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
  np.testing.assert_almost_equal(best_score, 1.0)

  # Worst score case
  worst_pred_actives = np.zeros(num_actives)
  worst_pred_inactives = np.ones(num_total - num_actives)
  y_pred_worst = dc.metrics.to_one_hot(
      np.concatenate([worst_pred_actives, worst_pred_inactives]))
  worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
  np.testing.assert_almost_equal(worst_score, 0.0, 4)
+194 −0
Original line number Diff line number Diff line
"""Test normalization of input."""

import numpy as np
import unittest
import deepchem as dc
from deepchem.metrics import to_one_hot
from deepchem.metrics import from_one_hot
from deepchem.metrics import threshold_predictions
from deepchem.metrics import handle_classification_mode
from deepchem.metrics import normalize_prediction_shape
from deepchem.metrics import normalize_weight_shape


def test_one_hot():
  """Test the one hot encoding."""
  y = np.array([0, 0, 1, 0, 1, 1, 0])
  y_hot = to_one_hot(y)
  expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0]])
  yp = from_one_hot(y_hot)
  assert np.array_equal(expected, y_hot)
  assert np.array_equal(y, yp)


def test_handle_classification_mode_none():
  """Test proper thresholding."""
  y = np.random.rand(10, 2)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y = np.expand_dims(y, 1)
  y_expected = y
  y_out = handle_classification_mode(y, None)
  assert y_out.shape == (10, 1, 2)
  assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold():
  """Test proper thresholding."""
  y = np.random.rand(10, 2)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y = np.expand_dims(y, 1)
  y_expected = np.argmax(np.squeeze(y), axis=1)[:, np.newaxis]
  y_out = handle_classification_mode(y, "threshold", threshold_value=0.5)
  assert y_out.shape == (10, 1)
  assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold_nonstandard():
  """Test proper thresholding."""
  y = np.random.rand(10, 2)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y_expected = np.where(y[:, 1] >= 0.3, np.ones(10),
                        np.zeros(10))[:, np.newaxis]
  y = np.expand_dims(y, 1)
  y_out = handle_classification_mode(y, "threshold", threshold_value=0.3)
  assert y_out.shape == (10, 1)
  assert np.array_equal(y_out, y_expected)


def test_handle_classification_mode_threshold_one_hot():
  """Test proper thresholding."""
  y = np.random.rand(10, 2)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y = np.expand_dims(y, 1)
  y_expected = np.expand_dims(
      to_one_hot(np.argmax(np.squeeze(y), axis=1), n_classes=2), 1)
  y_out = handle_classification_mode(
      y, "threshold-one-hot", threshold_value=0.5)
  assert y_out.shape == (10, 1, 2)
  assert np.array_equal(y_out, y_expected)


def test_threshold_predictions_binary():
  """Test thresholding of binary predictions."""
  # Get a random prediction matrix
  y = np.random.rand(10, 2)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y_thresh = threshold_predictions(y, 0.5)
  assert y_thresh.shape == (10,)
  assert (y_thresh == np.argmax(y, axis=1)).all()


def test_threshold_predictions_multiclass():
  """Test thresholding of multiclass predictions."""
  y = np.random.rand(10, 5)
  y = y / np.sum(y, axis=1)[:, np.newaxis]
  y_thresh = threshold_predictions(y)
  assert y_thresh.shape == (10,)
  assert (y_thresh == np.argmax(y, axis=1)).all()


def test_normalize_1d_classification_binary():
  """Tests 1d classification normalization."""
  y = np.array([0, 0, 1, 0, 1, 1, 0])
  expected = np.array([[[1., 0.]], [[1., 0.]], [[0., 1.]], [[1., 0.]],
                       [[0., 1.]], [[0., 1.]], [[1., 0.]]])
  y_out = normalize_prediction_shape(
      y, mode="classification", n_tasks=1, n_classes=2)
  assert y_out.shape == (7, 1, 2)
  assert np.array_equal(expected, y_out)


def test_normalize_1d_classification_multiclass():
  """Tests 1d classification normalization."""
  y = np.random.randint(5, size=(200,))
  y_expected = np.expand_dims(to_one_hot(y, n_classes=5), 1)
  y_out = normalize_prediction_shape(
      y, mode="classification", n_tasks=1, n_classes=5)
  assert y_out.shape == (200, 1, 5)
  assert np.array_equal(y_expected, y_out)


def test_normalize_1d_classification_multiclass_explicit_nclasses():
  """Tests 1d classification normalization."""
  y = np.random.randint(5, size=(10,))
  y_expected = np.expand_dims(to_one_hot(y, n_classes=10), 1)
  y_out = normalize_prediction_shape(
      y, mode="classification", n_classes=10, n_tasks=1)
  assert y_out.shape == (10, 1, 10)
  assert np.array_equal(y_expected, y_out)


def test_normalize_2d_classification_binary():
  """Tests 2d classification normalization."""
  # Of shape (N, n_classes)
  y = np.random.randint(2, size=(10, 1))
  y_expected = np.expand_dims(dc.metrics.to_one_hot(np.squeeze(y)), 1)
  y_out = normalize_prediction_shape(
      y, mode="classification", n_tasks=1, n_classes=2)
  assert y_out.shape == (10, 1, 2)
  assert np.array_equal(y_expected, y_out)


def test_normalize_3d_classification_binary():
  """Tests 1d classification normalization."""
  # Of shape (N, 1, n_classes)
  y = np.random.randint(2, size=(10,))
  y = dc.metrics.to_one_hot(y, n_classes=2)
  y = np.expand_dims(y, 1)
  y_expected = y
  y_out = normalize_prediction_shape(
      y, mode="classification", n_tasks=1, n_classes=2)
  assert y_out.shape == (10, 1, 2)
  assert np.array_equal(y_expected, y_out)


def test_normalize_1d_regression():
  """Tests 1d regression normalization."""
  y = np.random.rand(10)
  y_expected = y[:, np.newaxis]
  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=1)
  assert y_out.shape == (10, 1)
  assert np.array_equal(y_expected, y_out)


def test_normalize_2d_regression():
  """Tests 2d regression normalization."""
  y = np.random.rand(10, 5)
  y_expected = y
  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
  assert y_out.shape == (10, 5)
  assert np.array_equal(y_expected, y_out)


def test_normalize_3d_regression():
  """Tests 3d regression normalization."""
  y = np.random.rand(10, 5, 1)
  y_expected = np.squeeze(y)
  y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
  assert y_out.shape == (10, 5)
  assert np.array_equal(y_expected, y_out)


def test_scalar_weight_normalization():
  """Test normalization of weights."""
  w_out = normalize_weight_shape(w=5, n_samples=10, n_tasks=5)
  assert w_out.shape == (10, 5)
  assert np.all(w_out == 5 * np.ones((10, 5)))


def test_1d_weight_normalization():
  """Test normalization of weights."""
  w = np.random.rand(10)
  # This has w for each task.
  w_expected = np.array([w, w, w, w, w]).T
  w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
  assert w_out.shape == (10, 5)
  assert np.all(w_out == w_expected)


def test_2d_weight_normalization():
  """Test normalization of weights."""
  w = np.random.rand(10, 5)
  w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
  assert w_out.shape == (10, 5)
  assert np.all(w_out == w)
+9 −5
Original line number Diff line number Diff line
@@ -657,11 +657,15 @@ class GraphConvModel(KerasModel):
  """Graph Convolutional Models.

  This class implements the graph convolutional model from the
  following paper:


  Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
  following paper [1]_. These graph convolutions start with a per-atom set of
  descriptors for each atom in a molecule, then combine and recombine these
  descriptors over convolutional layers.

  References
  ----------
  .. [1] Duvenaud, David K., et al. "Convolutional networks on graphs for
  learning molecular fingerprints." Advances in neural information processing
  systems. 2015.
  """

  def __init__(self,
Loading