Changed BEDROC to use rdkit; Added tests (f3501670) · Commits · 钟慕尧 / deepchem

deepchem/metrics/init.py

+8 −20

Original line number	Diff line number	Diff line
		@@ -188,30 +188,18 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
		np.unique(y_true).astype(int),
		[0, 1]), ('Class labels must be binary: %s' % np.unique(y_true))

		# Calculate ratio of actives to inactives
		N = len(y_true)
		n = sum(y_true == 1)
		R_a = n / N
		from rdkit.ML.Scoring.Scoring import CalcBEDROC

		# The expression for the rie_denominator
		rie_denom = R_a * (1 - np.exp(-alpha)) / (np.exp(alpha / N) - 1)
		yt = np.asarray(y_true)
		yp = np.asarray(y_pred)

		# Rank orders and rie_numerator
		order = np.argsort(y_pred)
		r_i = (y_true[order] == 1).nonzero()[0]
		rie_numerator = np.sum(np.exp(-alpha * r_i / N))
		yt = yt.flatten()
		yp = yp[:, 1].flatten() # Index 1 because one_hot predictions

		# Rie_score
		rie_score = rie_numerator / rie_denom
		scores = list(zip(yt, yp))
		scores = sorted(scores, key=lambda pair: pair[1], reverse=True)

		# Factor to be multipled
		factor = R_a * np.sinh(
		alpha / 2) / (np.cosh(alpha / 2) - np.cosh(alpha / 2 - (alpha * R_a)))

		bedroc_score = rie_score * factor
		bedroc_score += 1 / (1 - np.exp(alpha * (1 - R_a)))

		return bedroc_score
		return CalcBEDROC(scores, 0, alpha)


		class Metric(object):

+24 −1

Original line number	Diff line number	Diff line
		@@ -24,7 +24,7 @@ class MetricsTest(googletest.TestCase):
		expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
		expected_kappa = np.true_divide(observed_agreement - expected_agreement,
		1.0 - expected_agreement)
		self.assertAlmostEquals(kappa, expected_kappa)
		self.assertAlmostEqual(kappa, expected_kappa)

		def test_r2_score(self):
		"""Test that R^2 metric passes basic sanity tests"""
		@@ -46,6 +46,29 @@ class MetricsTest(googletest.TestCase):
		assert np.array_equal(expected, y_hot)
		assert np.array_equal(y, yp)

		def test_bedroc_score(self):

		num_actives = 20
		num_total = 400

		y_true_actives = np.ones(num_actives)
		y_true_inactives = np.zeros(num_total - num_actives)
		y_true = np.concatenate([y_true_actives, y_true_inactives])

		# Best score case
		y_pred_best = dc.metrics.to_one_hot(
		np.concatenate([y_true_actives, y_true_inactives]))
		best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
		self.assertAlmostEqual(best_score, 1.0)

		# Worst score case
		worst_pred_actives = np.zeros(num_actives)
		worst_pred_inactives = np.ones(num_total - num_actives)
		y_pred_worst = dc.metrics.to_one_hot(
		np.concatenate([worst_pred_actives, worst_pred_inactives]))
		worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
		self.assertAlmostEqual(worst_score, 0.0, 4)


		if __name__ == '__main__':
		googletest.main()