Commit dc4eba99 authored by peastman's avatar peastman
Browse files

Fixed more incorrect code in metrics and evaluator

parent 1fadab6e
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -222,14 +222,10 @@ class Metric(object):
    else:
      n_tasks = y_pred.shape[1]
    if w is None or len(w) == 0:
      w = np.ones_like(y_true)
    assert y_true.shape[0] == y_pred.shape[0] == w.shape[0]
      w = np.ones((n_samples, n_tasks))
    computed_metrics = []
    for task in range(n_tasks):
      y_task = y_true[:, task]
      if self.mode == "regression":
        y_pred_task = y_pred[:, task]
      else:
      y_pred_task = y_pred[:, task]
      w_task = w[:, task]

@@ -280,6 +276,10 @@ class Metric(object):
      return np.nan
    if self.threshold is not None:
      y_pred = np.greater(y_pred, threshold)
    if len(y_true.shape) == 0:
      y_true = np.expand_dims(y_true, 0)
    if len(y_pred.shape) == 0:
      y_pred = np.expand_dims(y_pred, 0)
    try:
      metric_value = self.metric(y_true, y_pred)
    except (AssertionError, ValueError) as e:
+7 −18
Original line number Diff line number Diff line
@@ -178,6 +178,8 @@ class GeneratorEvaluator(object):
    self.weights = weights
    if len(self.label_keys) != len(self.output_keys):
      raise ValueError("Must have same number of labels and outputs")
    if len(self.label_keys) != 1:
      raise ValueError("GeneratorEvaluator currently only supports one label")

  def compute_model_performance(self, metrics, per_task_metrics=False):
    """
@@ -196,30 +198,17 @@ class GeneratorEvaluator(object):

    def generator_closure():
      for feed_dict in self.generator:
        labels = []
        for layer in self.label_keys:
          labels.append(feed_dict[layer])
          del feed_dict[layer]
        for weight in self.weights:
          w.append(feed_dict[weight])
          del feed_dict[weight]
        y.append(np.array(labels))
        y.append(feed_dict[self.label_keys[0]])
        if len(self.weights) > 0:
          w.append(feed_dict[self.weights[0]])
        yield feed_dict

    if not len(metrics):
      return {}
    else:
      mode = metrics[0].mode
    if mode == "classification":
      y_pred = self.model.predict_on_generator(generator_closure())
      y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
      y = np.reshape(y, newshape=(-1, self.n_tasks, self.n_classes))
      y = from_one_hot(y, axis=-1)
    else:
    y_pred = self.model.predict_on_generator(generator_closure())
      y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
      y = np.reshape(y, newshape=(-1, self.n_tasks))
      y_pred = np.reshape(y_pred, newshape=(-1, self.n_tasks))
    y = np.concatenate(y, axis=0)
    multitask_scores = {}
    all_task_scores = {}

+44 −90
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ import numpy as np
from deepchem.data import NumpyDataset
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import Dense, ReduceMean, SoftMax, SoftMaxCrossEntropy
from deepchem.models.tensorgraph.layers import Feature, Label
from deepchem.models.tensorgraph.layers import Feature, Label, Reshape
from deepchem.models.tensorgraph.layers import ReduceSquareDifference
from nose.tools import assert_true
from flaky import flaky
@@ -17,6 +17,8 @@ class TestGeneratorEvaluator(TestCase):
  def test_compute_model_performance_multitask_classifier(self):
    n_data_points = 20
    n_features = 1
    n_tasks = 2
    n_classes = 2

    X = np.ones(shape=(n_data_points // 2, n_features)) * -1
    X1 = np.ones(shape=(n_data_points // 2, n_features))
@@ -25,43 +27,29 @@ class TestGeneratorEvaluator(TestCase):
    class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))])
    y1 = np.concatenate((class_0, class_1))
    y2 = np.concatenate((class_1, class_0))
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1), NumpyDataset(y2)]

    databag = Databag()
    y = np.stack([y1, y2], axis=1)
    dataset = NumpyDataset(X, y)

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    entropies = []
    labels = []
    for i in range(2):
      label = Label(shape=(None, 2))
      labels.append(label)
      dense = Dense(out_channels=2, in_layers=[features])
      output = SoftMax(in_layers=[dense])
      smce = SoftMaxCrossEntropy(in_layers=[label, dense])

      entropies.append(smce)
      outputs.append(output)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=entropies)
    label = Label(shape=(None, n_tasks, n_classes))
    dense = Dense(out_channels=n_tasks * n_classes, in_layers=[features])
    logits = Reshape(shape=(None, n_tasks, n_classes), in_layers=dense)
    output = SoftMax(in_layers=[logits])
    smce = SoftMaxCrossEntropy(in_layers=[label, logits])
    total_loss = ReduceMean(in_layers=smce)

    tg = dc.models.TensorGraph(learning_rate=0.01, batch_size=n_data_points)
    for output in outputs:
    tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    tg.fit(dataset, nb_epoch=1000)
    metric = dc.metrics.Metric(
        dc.metrics.roc_auc_score, np.mean, mode="classification")

    scores = tg.evaluate_generator(
        databag.iterbatches(), [metric], labels=labels, per_task_metrics=True)
        tg.default_generator(dataset), [metric],
        labels=[label],
        per_task_metrics=True)
    scores = list(scores[1].values())
    # Loosening atol to see if tests stop failing sporadically
    assert_true(np.all(np.isclose(scores, [1.0, 1.0], atol=0.50)))
@@ -75,44 +63,28 @@ class TestGeneratorEvaluator(TestCase):
    X = np.concatenate((X, X1))
    class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))])
    class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))])
    y1 = np.concatenate((class_0, class_1))
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1)]

    databag = Databag()
    y = np.concatenate((class_0, class_1))
    dataset = NumpyDataset(X, y)

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    entropies = []
    labels = []
    for i in range(1):
    label = Label(shape=(None, 2))
      labels.append(label)
    dense = Dense(out_channels=2, in_layers=[features])
    output = SoftMax(in_layers=[dense])
    smce = SoftMaxCrossEntropy(in_layers=[label, dense])

      entropies.append(smce)
      outputs.append(output)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=entropies)
    total_loss = ReduceMean(in_layers=smce)

    tg = dc.models.TensorGraph(learning_rate=0.1)
    for output in outputs:
    tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    tg.fit(dataset, nb_epoch=1000)
    metric = dc.metrics.Metric(
        dc.metrics.roc_auc_score, np.mean, mode="classification")

    scores = tg.evaluate_generator(
        databag.iterbatches(), [metric], labels=labels, per_task_metrics=True)
        tg.default_generator(dataset), [metric],
        labels=[label],
        per_task_metrics=True)
    scores = list(scores[1].values())
    assert_true(np.isclose(scores, [1.0], atol=0.05))

@@ -120,52 +92,34 @@ class TestGeneratorEvaluator(TestCase):
    random_seed = 42
    n_data_points = 20
    n_features = 2
    n_tasks = 2
    np.random.seed(seed=random_seed)

    X = np.random.rand(n_data_points, n_features)
    y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1)
    y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1)
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1), NumpyDataset(y2)]

    databag = Databag()
    y1 = np.array([0.5 for x in range(n_data_points)])
    y2 = np.array([-0.5 for x in range(n_data_points)])
    y = np.stack([y1, y2], axis=1)
    dataset = NumpyDataset(X, y)

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []
    losses = []
    labels = []
    for i in range(2):
      label = Label(shape=(None, 1))
      dense = Dense(out_channels=1, in_layers=[features])
    label = Label(shape=(None, n_tasks))
    dense = Dense(out_channels=n_tasks, in_layers=[features])
    loss = ReduceSquareDifference(in_layers=[dense, label])

      outputs.append(dense)
      losses.append(loss)
      labels.append(label)
      databag.add_dataset(label, ys[i])

    total_loss = ReduceMean(in_layers=losses)

    tg = dc.models.TensorGraph(
        mode="regression",
        batch_size=20,
        random_seed=random_seed,
        learning_rate=0.1)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)
    tg = dc.models.TensorGraph(random_seed=random_seed, learning_rate=0.1)
    tg.add_output(dense)
    tg.set_loss(loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1000, batch_size=tg.batch_size, pad_batches=True))
    tg.fit(dataset, nb_epoch=1000)
    metric = [
        dc.metrics.Metric(
            dc.metrics.mean_absolute_error, np.mean, mode="regression"),
    ]
    scores = tg.evaluate_generator(
        databag.iterbatches(), metric, labels=labels, per_task_metrics=True)
        tg.default_generator(dataset),
        metric,
        labels=[label],
        per_task_metrics=True)
    scores = list(scores[1].values())
    assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))

+1 −1
Original line number Diff line number Diff line
@@ -108,7 +108,7 @@ def compute_scores(optimize):
  print()
  print('Cross entropy loss:', np.mean(losses))
  print('Prediction accuracy:', accuracy_score(y_true, y_pred > 0.5))
  print('ROC AUC:', dc.metrics.compute_roc_auc_scores(y_true, y_pred))
  print('ROC AUC:', dc.metrics.roc_auc_scores(y_true, y_pred))
  print()