Commit f3501670 authored by Vignesh's avatar Vignesh
Browse files

Changed BEDROC to use rdkit; Added tests

parent d951b038
Loading
Loading
Loading
Loading
+8 −20
Original line number Diff line number Diff line
@@ -188,30 +188,18 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
      np.unique(y_true).astype(int),
      [0, 1]), ('Class labels must be binary: %s' % np.unique(y_true))

  # Calculate ratio of actives to inactives
  N = len(y_true)
  n = sum(y_true == 1)
  R_a = n / N
  from rdkit.ML.Scoring.Scoring import CalcBEDROC

  # The expression for the rie_denominator
  rie_denom = R_a * (1 - np.exp(-alpha)) / (np.exp(alpha / N) - 1)
  yt = np.asarray(y_true)
  yp = np.asarray(y_pred)

  # Rank orders and rie_numerator
  order = np.argsort(y_pred)
  r_i = (y_true[order] == 1).nonzero()[0]
  rie_numerator = np.sum(np.exp(-alpha * r_i / N))
  yt = yt.flatten()
  yp = yp[:, 1].flatten()  # Index 1 because one_hot predictions

  # Rie_score
  rie_score = rie_numerator / rie_denom
  scores = list(zip(yt, yp))
  scores = sorted(scores, key=lambda pair: pair[1], reverse=True)

  # Factor to be multipled
  factor = R_a * np.sinh(
      alpha / 2) / (np.cosh(alpha / 2) - np.cosh(alpha / 2 - (alpha * R_a)))

  bedroc_score = rie_score * factor
  bedroc_score += 1 / (1 - np.exp(alpha * (1 - R_a)))

  return bedroc_score
  return CalcBEDROC(scores, 0, alpha)


class Metric(object):
+24 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ class MetricsTest(googletest.TestCase):
    expected_agreement = ((2 * 1) + (2 * 3)) / 4.0**2
    expected_kappa = np.true_divide(observed_agreement - expected_agreement,
                                    1.0 - expected_agreement)
    self.assertAlmostEquals(kappa, expected_kappa)
    self.assertAlmostEqual(kappa, expected_kappa)

  def test_r2_score(self):
    """Test that R^2 metric passes basic sanity tests"""
@@ -46,6 +46,29 @@ class MetricsTest(googletest.TestCase):
    assert np.array_equal(expected, y_hot)
    assert np.array_equal(y, yp)

  def test_bedroc_score(self):

    num_actives = 20
    num_total = 400

    y_true_actives = np.ones(num_actives)
    y_true_inactives = np.zeros(num_total - num_actives)
    y_true = np.concatenate([y_true_actives, y_true_inactives])

    # Best score case
    y_pred_best = dc.metrics.to_one_hot(
        np.concatenate([y_true_actives, y_true_inactives]))
    best_score = dc.metrics.bedroc_score(y_true, y_pred_best)
    self.assertAlmostEqual(best_score, 1.0)

    # Worst score case
    worst_pred_actives = np.zeros(num_actives)
    worst_pred_inactives = np.ones(num_total - num_actives)
    y_pred_worst = dc.metrics.to_one_hot(
        np.concatenate([worst_pred_actives, worst_pred_inactives]))
    worst_score = dc.metrics.bedroc_score(y_true, y_pred_worst)
    self.assertAlmostEqual(worst_score, 0.0, 4)


if __name__ == '__main__':
  googletest.main()