Merge pull request #1928 from deepchem/metric_overhaul (eb3ab25e) · Commits · 钟慕尧 / deepchem

deepchem/metrics/init.py

+645 −222

File changed.

Preview size limit exceeded, changes collapsed.

deepchem/metrics/tests/metrics_test.py

deleted100644 → 0

+0 −89

Original line number	Diff line number	Diff line
		"""
		Tests for metricsT.
		"""
		__author__ = "Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"

		import numpy as np
		import deepchem as dc
		from tensorflow.python.platform import googletest
		from deepchem import metrics


		class MetricsTest(googletest.TestCase):

		def test_kappa_score(self):
		y_true = [1, 0, 1, 0]
		y_pred = [0.8, 0.2, 0.3, 0.4] # [1, 0, 0, 0] with 0.5 threshold
		kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
		observed_agreement = 3.0 / 4.0
		expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
		expected_kappa = np.true_divide(observed_agreement - expected_agreement,
		1.0 - expected_agreement)
		self.assertAlmostEqual(kappa, expected_kappa)

		def test_one_sample(self):
		"""Test that the metrics won't raise error even in an extreme condition
		where there is only one sample with w > 0.
		"""
		np.random.seed(123)
		n_samples = 2
		y_true = np.array([0, 0])
		y_pred = np.random.rand(n_samples, 2)
		w = np.array([0, 1])
		all_metrics = [
		dc.metrics.Metric(dc.metrics.recall_score),
		dc.metrics.Metric(dc.metrics.matthews_corrcoef),
		dc.metrics.Metric(dc.metrics.roc_auc_score)
		]
		for metric in all_metrics:
		score = metric.compute_singletask_metric(y_true, y_pred, w)
		self.assertTrue(np.isnan(score) or score == 0)

		def test_r2_score(self):
		"""Test that R^2 metric passes basic sanity tests"""
		np.random.seed(123)
		n_samples = 10
		y_true = np.random.rand(n_samples,)
		y_pred = np.random.rand(n_samples,)
		regression_metric = dc.metrics.Metric(dc.metrics.r2_score)
		assert np.isclose(
		dc.metrics.r2_score(y_true, y_pred),
		regression_metric.compute_metric(y_true, y_pred))

		def test_one_hot(self):
		y = np.array([0, 0, 1, 0, 1, 1, 0])
		y_hot = metrics.to_one_hot(y)
		expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1,
		0]])
		yp = metrics.from_one_hot(y_hot)
		assert np.array_equal(expected, y_hot)
		assert np.array_equal(y, yp)

		def test_bedroc_score(self):

		num_actives = 20
		num_total = 400

		y_true_actives = np.ones(num_actives)
		y_true_inactives = np.zeros(num_total - num_actives)
		y_true = np.concatenate([y_true_actives, y_true_inactives])

		# Best score case
		y_pred_best = dc.metrics.to_one_hot(
		np.concatenate([y_true_actives, y_true_inactives]))
		best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
		self.assertAlmostEqual(best_score, 1.0)

		# Worst score case
		worst_pred_actives = np.zeros(num_actives)
		worst_pred_inactives = np.ones(num_total - num_actives)
		y_pred_worst = dc.metrics.to_one_hot(
		np.concatenate([worst_pred_actives, worst_pred_inactives]))
		worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
		self.assertAlmostEqual(worst_score, 0.0, 4)


		if __name__ == '__main__':
		googletest.main()

deepchem/metrics/tests/test_metrics.py

0 → 100644

+72 −0

Original line number	Diff line number	Diff line
		"""
		Tests for metricsT.
		"""
		import numpy as np
		import deepchem as dc
		import unittest
		from deepchem import metrics


		def test_kappa_score():
		y_true = [1, 0, 1, 0]
		y_pred = [0.8, 0.2, 0.3, 0.4] # [1, 0, 0, 0] with 0.5 threshold
		kappa = dc.metrics.kappa_score(y_true, np.greater(y_pred, 0.5))
		observed_agreement = 3.0 / 4.0
		expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
		expected_kappa = np.true_divide(observed_agreement - expected_agreement,
		1.0 - expected_agreement)
		np.testing.assert_almost_equal(kappa, expected_kappa)


		def test_one_sample():
		"""Test that the metrics won't raise error even in an extreme condition
		where there is only one sample with w > 0.
		"""
		np.random.seed(123)
		n_samples = 2
		y_true = np.random.randint(2, size=(n_samples,))
		y_pred = np.random.randint(2, size=(n_samples,))
		w = np.array([0, 1])
		all_metrics = [
		dc.metrics.Metric(dc.metrics.recall_score),
		dc.metrics.Metric(dc.metrics.matthews_corrcoef),
		dc.metrics.Metric(dc.metrics.roc_auc_score)
		]
		for metric in all_metrics:
		score = metric.compute_singletask_metric(y_true, y_pred, w)


		def test_r2_score():
		"""Test that R^2 metric passes basic sanity tests"""
		np.random.seed(123)
		n_samples = 10
		y_true = np.random.rand(n_samples,)
		y_pred = np.random.rand(n_samples,)
		regression_metric = dc.metrics.Metric(dc.metrics.r2_score, n_tasks=1)
		assert np.isclose(
		dc.metrics.r2_score(y_true, y_pred),
		regression_metric.compute_metric(y_true, y_pred))


		def test_bedroc_score():
		"""Test BEDROC."""
		num_actives = 20
		num_total = 400

		y_true_actives = np.ones(num_actives)
		y_true_inactives = np.zeros(num_total - num_actives)
		y_true = np.concatenate([y_true_actives, y_true_inactives])

		# Best score case
		y_pred_best = dc.metrics.to_one_hot(
		np.concatenate([y_true_actives, y_true_inactives]))
		best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
		np.testing.assert_almost_equal(best_score, 1.0)

		# Worst score case
		worst_pred_actives = np.zeros(num_actives)
		worst_pred_inactives = np.ones(num_total - num_actives)
		y_pred_worst = dc.metrics.to_one_hot(
		np.concatenate([worst_pred_actives, worst_pred_inactives]))
		worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
		np.testing.assert_almost_equal(worst_score, 0.0, 4)

deepchem/metrics/tests/test_normalize.py

0 → 100644

+194 −0

Original line number	Diff line number	Diff line
		"""Test normalization of input."""

		import numpy as np
		import unittest
		import deepchem as dc
		from deepchem.metrics import to_one_hot
		from deepchem.metrics import from_one_hot
		from deepchem.metrics import threshold_predictions
		from deepchem.metrics import handle_classification_mode
		from deepchem.metrics import normalize_prediction_shape
		from deepchem.metrics import normalize_weight_shape


		def test_one_hot():
		"""Test the one hot encoding."""
		y = np.array([0, 0, 1, 0, 1, 1, 0])
		y_hot = to_one_hot(y)
		expected = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0]])
		yp = from_one_hot(y_hot)
		assert np.array_equal(expected, y_hot)
		assert np.array_equal(y, yp)


		def test_handle_classification_mode_none():
		"""Test proper thresholding."""
		y = np.random.rand(10, 2)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y = np.expand_dims(y, 1)
		y_expected = y
		y_out = handle_classification_mode(y, None)
		assert y_out.shape == (10, 1, 2)
		assert np.array_equal(y_out, y_expected)


		def test_handle_classification_mode_threshold():
		"""Test proper thresholding."""
		y = np.random.rand(10, 2)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y = np.expand_dims(y, 1)
		y_expected = np.argmax(np.squeeze(y), axis=1)[:, np.newaxis]
		y_out = handle_classification_mode(y, "threshold", threshold_value=0.5)
		assert y_out.shape == (10, 1)
		assert np.array_equal(y_out, y_expected)


		def test_handle_classification_mode_threshold_nonstandard():
		"""Test proper thresholding."""
		y = np.random.rand(10, 2)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y_expected = np.where(y[:, 1] >= 0.3, np.ones(10),
		np.zeros(10))[:, np.newaxis]
		y = np.expand_dims(y, 1)
		y_out = handle_classification_mode(y, "threshold", threshold_value=0.3)
		assert y_out.shape == (10, 1)
		assert np.array_equal(y_out, y_expected)


		def test_handle_classification_mode_threshold_one_hot():
		"""Test proper thresholding."""
		y = np.random.rand(10, 2)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y = np.expand_dims(y, 1)
		y_expected = np.expand_dims(
		to_one_hot(np.argmax(np.squeeze(y), axis=1), n_classes=2), 1)
		y_out = handle_classification_mode(
		y, "threshold-one-hot", threshold_value=0.5)
		assert y_out.shape == (10, 1, 2)
		assert np.array_equal(y_out, y_expected)


		def test_threshold_predictions_binary():
		"""Test thresholding of binary predictions."""
		# Get a random prediction matrix
		y = np.random.rand(10, 2)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y_thresh = threshold_predictions(y, 0.5)
		assert y_thresh.shape == (10,)
		assert (y_thresh == np.argmax(y, axis=1)).all()


		def test_threshold_predictions_multiclass():
		"""Test thresholding of multiclass predictions."""
		y = np.random.rand(10, 5)
		y = y / np.sum(y, axis=1)[:, np.newaxis]
		y_thresh = threshold_predictions(y)
		assert y_thresh.shape == (10,)
		assert (y_thresh == np.argmax(y, axis=1)).all()


		def test_normalize_1d_classification_binary():
		"""Tests 1d classification normalization."""
		y = np.array([0, 0, 1, 0, 1, 1, 0])
		expected = np.array([[[1., 0.]], [[1., 0.]], [[0., 1.]], [[1., 0.]],
		[[0., 1.]], [[0., 1.]], [[1., 0.]]])
		y_out = normalize_prediction_shape(
		y, mode="classification", n_tasks=1, n_classes=2)
		assert y_out.shape == (7, 1, 2)
		assert np.array_equal(expected, y_out)


		def test_normalize_1d_classification_multiclass():
		"""Tests 1d classification normalization."""
		y = np.random.randint(5, size=(200,))
		y_expected = np.expand_dims(to_one_hot(y, n_classes=5), 1)
		y_out = normalize_prediction_shape(
		y, mode="classification", n_tasks=1, n_classes=5)
		assert y_out.shape == (200, 1, 5)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_1d_classification_multiclass_explicit_nclasses():
		"""Tests 1d classification normalization."""
		y = np.random.randint(5, size=(10,))
		y_expected = np.expand_dims(to_one_hot(y, n_classes=10), 1)
		y_out = normalize_prediction_shape(
		y, mode="classification", n_classes=10, n_tasks=1)
		assert y_out.shape == (10, 1, 10)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_2d_classification_binary():
		"""Tests 2d classification normalization."""
		# Of shape (N, n_classes)
		y = np.random.randint(2, size=(10, 1))
		y_expected = np.expand_dims(dc.metrics.to_one_hot(np.squeeze(y)), 1)
		y_out = normalize_prediction_shape(
		y, mode="classification", n_tasks=1, n_classes=2)
		assert y_out.shape == (10, 1, 2)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_3d_classification_binary():
		"""Tests 1d classification normalization."""
		# Of shape (N, 1, n_classes)
		y = np.random.randint(2, size=(10,))
		y = dc.metrics.to_one_hot(y, n_classes=2)
		y = np.expand_dims(y, 1)
		y_expected = y
		y_out = normalize_prediction_shape(
		y, mode="classification", n_tasks=1, n_classes=2)
		assert y_out.shape == (10, 1, 2)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_1d_regression():
		"""Tests 1d regression normalization."""
		y = np.random.rand(10)
		y_expected = y[:, np.newaxis]
		y_out = normalize_prediction_shape(y, mode="regression", n_tasks=1)
		assert y_out.shape == (10, 1)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_2d_regression():
		"""Tests 2d regression normalization."""
		y = np.random.rand(10, 5)
		y_expected = y
		y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
		assert y_out.shape == (10, 5)
		assert np.array_equal(y_expected, y_out)


		def test_normalize_3d_regression():
		"""Tests 3d regression normalization."""
		y = np.random.rand(10, 5, 1)
		y_expected = np.squeeze(y)
		y_out = normalize_prediction_shape(y, mode="regression", n_tasks=5)
		assert y_out.shape == (10, 5)
		assert np.array_equal(y_expected, y_out)


		def test_scalar_weight_normalization():
		"""Test normalization of weights."""
		w_out = normalize_weight_shape(w=5, n_samples=10, n_tasks=5)
		assert w_out.shape == (10, 5)
		assert np.all(w_out == 5 * np.ones((10, 5)))


		def test_1d_weight_normalization():
		"""Test normalization of weights."""
		w = np.random.rand(10)
		# This has w for each task.
		w_expected = np.array([w, w, w, w, w]).T
		w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
		assert w_out.shape == (10, 5)
		assert np.all(w_out == w_expected)


		def test_2d_weight_normalization():
		"""Test normalization of weights."""
		w = np.random.rand(10, 5)
		w_out = normalize_weight_shape(w, n_samples=10, n_tasks=5)
		assert w_out.shape == (10, 5)
		assert np.all(w_out == w)

deepchem/models/graph_models.py

+9 −5

Original line number	Diff line number	Diff line
		@@ -657,11 +657,15 @@ class GraphConvModel(KerasModel):
		"""Graph Convolutional Models.

		This class implements the graph convolutional model from the
		following paper:


		Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015.
		following paper [1]_. These graph convolutions start with a per-atom set of
		descriptors for each atom in a molecule, then combine and recombine these
		descriptors over convolutional layers.

		References
		----------
		.. [1] Duvenaud, David K., et al. "Convolutional networks on graphs for
		learning molecular fingerprints." Advances in neural information processing
		systems. 2015.
		"""

		def __init__(self,

Admin message