Commit de7eb3fe authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Multitask eval runs to completion. Need to fix csv saving still.

parent f4a665f8
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -324,9 +324,6 @@ def create_model(args):
  compute_aucs, compute_recall, compute_accuracy, compute_matthews_corrcoef = (
      False, False, False, False)
  compute_r2s, compute_rms = False, False
  print("create_model()")
  print("args.task_type")
  print(args.task_type)
  if args.task_type == "classification":
    compute_aucs, compute_recall, compute_accuracy, compute_matthews_corrcoef = (
        True, True, True, True)
+85 −60
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ import csv
import numpy as np
import warnings
import sys
from deep_chem.utils.preprocess import labels_to_weights
from deep_chem.utils.preprocess import undo_transform_outputs
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
@@ -36,41 +35,57 @@ def compute_model_performance(raw_test_data, test_data, task_types, models,
    print("Evaluating model %d" % index, file=print_file)
    print("Target %s" % target, file=print_file)
    (y_test, w_test) = test_data[target]
    w_test = w_test.ravel()
    task_X_test = X_test[w_test.nonzero()]
    task_y_test = y_test[w_test.nonzero()]
    task_y_test_raw = y_test[w_test.nonzero()]
    # Find nonzero rows in weight matrix
    # http://stackoverflow.com/questions/23726026/finding-which-rows-have-all-elements-as-zeros-in-a-matrix-with-numpy
    nonzero_ws = np.where(w_test.any(axis=1))[0]
    task_w_test = w_test[nonzero_ws]
    task_X_test = X_test[nonzero_ws]
    task_y_test = y_test[nonzero_ws]
    task_y_test_raw = y_test[nonzero_ws]
    model = models[target]
    # TODO(rbharath): Multitask vs. singletask is explicitly cased here and
    # below. This is awkward. More elegant way of handling this? 
    if target == "all":
      eval_task_types = task_types.copy()
    else:
      eval_task_types = {target: task_types[target]}
    
    results = eval_model(
        test_ids, task_X_test, task_y_test, task_y_test_raw, model,
        {target: task_types[target]}, modeltype=modeltype,
        eval_task_types, modeltype=modeltype,
        output_transforms=output_transforms)
    if target == "all":
      all_results.update(results)
    else:
      all_results[target] = results[target]
    # Classification Metrics
    if aucs:
      auc_vals.update(compute_roc_auc_scores(results, task_types))
      auc_vals.update(compute_roc_auc_scores(results, task_w_test, task_types))
    if mcc:
      mcc_vals.update(compute_matthews_corr(results, task_w_test, task_types))
    if recall:
      recall_vals.update(compute_recall_score(results, task_w_test, task_types))
    if accuracy:
      accuracy_vals.update(compute_accuracy_score(results, task_w_test, task_types))
    # Regression Metrics
    if r2s:
      r2_vals.update(compute_r2_scores(results, task_types))
    if rms:
      rms_vals.update(compute_rms_scores(results, task_types))
    if mcc:
      mcc_vals.update(compute_matthews_corr(results, task_types))
    if recall:
      recall_vals.update(compute_recall_score(results, task_types))
    if accuracy:
      accuracy_vals.update(compute_accuracy_score(results, task_types))

  # Print classification metrics
  if aucs:
    print("Mean AUC: %f" % np.mean(np.array(auc_vals.values())), file=print_file)
  if r2s:
    print("Mean R^2: %f" % np.mean(np.array(r2_vals.values())), file=print_file)
  if rms:
    print("Mean RMS: %f" % np.mean(np.array(rms_vals.values())), file=print_file)
  if mcc:
    print("Mean MCC: %f" % np.mean(np.array(mcc_vals.values())), file=print_file)
  if recall:
    print("Mean Recall: %f" % np.mean(np.array(recall_vals.values())), file=print_file)
  if accuracy:
    print("Mean Accuracy: %f" % np.mean(np.array(accuracy_vals.values())), file=print_file)
  # Print regression metrics
  if r2s:
    print("Mean R^2: %f" % np.mean(np.array(r2_vals.values())), file=print_file)
  if rms:
    print("Mean RMS: %f" % np.mean(np.array(rms_vals.values())), file=print_file)

  return all_results, aucs, r2s, rms

@@ -184,20 +199,20 @@ def compute_r2_scores(results, task_types):
  Parameters
  ----------
  results: dict
    A dictionary of type produced by eval_regression_model which maps target-names to
    A dictionary of type produced by eval_regression_model which maps task-names to
    pairs of lists (ytrue, yscore).
  task_types: dict
    dict mapping target names to output type. Each output type must be either
    dict mapping task names to output type. Each output type must be either
    "classification" or "regression".
  """
  scores = {}
  for target in results:
    if task_types[target] != "regression":
  for task in results:
    if task_types[task] != "regression":
      continue
    _, ytrue, yscore = results[target]
    _, ytrue, yscore = results[task]
    score = r2_score(ytrue, yscore)
    print("Target %s: R^2 %f" % (target, score))
    scores[target] = score
    print("Target %s: R^2 %f" % (task, score))
    scores[task] = score
  return scores

def compute_rms_scores(results, task_types):
@@ -206,79 +221,89 @@ def compute_rms_scores(results, task_types):
  Parameters
  ----------
  results: dict
    A dictionary of type produced by eval_regression_model which maps target-names to
    A dictionary of type produced by eval_regression_model which maps task-names to
    pairs of lists (ytrue, yscore).
  task_types: dict
    dict mapping target names to output type. Each output type must be either
    dict mapping task names to output type. Each output type must be either
    "classification" or "regression".
  """
  scores = {}
  for target in results:
    if task_types[target] != "regression":
  for task in results:
    if task_types[task] != "regression":
      continue
    _, ytrue, yscore = results[target]
    _, ytrue, yscore = results[task]
    rms = np.sqrt(mean_squared_error(ytrue, yscore))
    print("Target %s: RMS %f" % (target, rms))
    scores[target] = rms
    print("Target %s: RMS %f" % (task, rms))
    scores[task] = rms
  return scores

def compute_roc_auc_scores(results, task_types):
# TODO(rbharath): The following functions have a lot of boilerplate. Refactor?
def compute_roc_auc_scores(results, weights, task_types):
  """Transforms the results dict into roc-auc-scores and prints scores.

  Parameters
  ----------
  results: dict
  task_types: dict
    dict mapping target names to output type. Each output type must be either
    dict mapping task names to output type. Each output type must be either
    "classification" or "regression".
  """
  scores = {}
  for target in results:
    if task_types[target] != "classification":
  for ind, task in enumerate(sorted(results.keys())):
    if task_types[task] != "classification":
      continue
    _, ytrue, yscore = results[target]
    sample_weights = labels_to_weights(ytrue)
    wtrue = weights[:, ind].ravel()
    _, ytrue, yscore = results[task]
    ytrue, yscore = ytrue[wtrue.nonzero()], yscore[wtrue.nonzero()]
    try:
      score = roc_auc_score(ytrue, yscore[:, 1], sample_weight=sample_weights)
      score = roc_auc_score(ytrue, yscore[:, 1], sample_weight=wtrue)
    except Exception:
      warnings.warn("ROC AUC score calculation failed.")
      score = 0.5
    print("Target %s: AUC %f" % (target, score))
    scores[target] = score
    print("Target %s: AUC %f" % (task, score))
    scores[task] = score
  return scores

def compute_matthews_corr(results, task_types):
def compute_matthews_corr(results, weights, task_types):
  """Computes Matthews Correlation Coefficients."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
  for ind, task in enumerate(sorted(results.keys())):
    if task_types[task] != "classification":
      continue
    _, ytrue, ypred = results[target]
    wtrue = weights[:, ind].ravel()
    _, ytrue, ypred = results[task]
    ytrue, ypred = ytrue[wtrue.nonzero()], ypred[wtrue.nonzero()]

    mcc = matthews_corrcoef(ytrue, np.around(ypred[:, 1]))
    print("Target %s: MCC %f" % (target, mcc))
    scores[target] = mcc
    print("Target %s: MCC %f" % (task, mcc))
    scores[task] = mcc
  return scores

def compute_recall_score(results, task_types):
def compute_recall_score(results, weights, task_types):
  """Computes recall score."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
  for ind, task in enumerate(sorted(results.keys())):
    if task_types[task] != "classification":
      continue
    _, ytrue, ypred = results[target]
    wtrue = weights[:, ind].ravel()
    _, ytrue, ypred = results[task]
    ytrue, ypred = ytrue[wtrue.nonzero()], ypred[wtrue.nonzero()]

    recall = recall_score(ytrue, np.around(ypred[:, 1]))
    print("Target %s: Recall %f" % (target, recall))
    scores[target] = recall
    print("Target %s: Recall %f" % (task, recall))
    scores[task] = recall
  return scores

def compute_accuracy_score(results, task_types):
def compute_accuracy_score(results, weights, task_types):
  """Computes accuracy score."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
  for ind, task in enumerate(sorted(results.keys())):
    if task_types[task] != "classification":
      continue
    _, ytrue, ypred = results[target]
    wtrue = weights[:, ind].ravel()
    _, ytrue, ypred = results[task]
    ytrue, ypred = ytrue[wtrue.nonzero()], ypred[wtrue.nonzero()]
    accuracy = accuracy_score(ytrue, np.around(ypred[:, 1]))
    print("Target %s: Accuracy %f" % (target, accuracy))
    scores[target] = accuracy
    print("Target %s: Accuracy %f" % (task, accuracy))
    scores[task] = accuracy
  return scores
+25 −22
Original line number Diff line number Diff line
@@ -346,25 +346,28 @@ def scaffold_separate(dataset):
  # Sort from largest to smallest scaffold sets
  return [elt for (scaffold, elt) in sorted(scaffolds.items(), key=lambda x: -len(x[1]))]

def labels_to_weights(ytrue):
  """Uses the true labels to compute and output sample weights.

  Parameters
  ----------
  ytrue: list or np.ndarray
    True labels.
  """
  n_total = np.shape(ytrue)[0]
  n_positives = np.sum(ytrue)
  n_negatives = n_total - n_positives
  pos_weight = np.floor(n_negatives/n_positives)

  sample_weights = np.zeros(np.shape(ytrue)[0])
  for ind, entry in enumerate(ytrue):
    if entry == 0:  # negative
      sample_weights[ind] = 1
    elif entry == 1:  # positive
      sample_weights[ind] = pos_weight
    else:
      raise ValueError("ytrue can only contain 0s or 1s.")
  return sample_weights
#def labels_to_weights(ytrue):
#  """Uses the true labels to compute and output sample weights.
#
#  Parameters
#  ----------
#  ytrue: list or np.ndarray
#    True labels.
#  """
#  n_total = np.shape(ytrue)[0]
#  n_positives = np.sum(ytrue)
#  n_negatives = n_total - n_positives
#  pos_weight = np.floor(n_negatives/n_positives)
#
#  sample_weights = np.zeros(np.shape(ytrue)[0])
#  for ind, entry in enumerate(ytrue):
#    if entry == 0:  # negative
#      sample_weights[ind] = 1
#    elif entry == 1:  # positive
#      sample_weights[ind] = pos_weight
#    else:
#      print("labels_to_weights()")
#      print("ytrue")
#      print(ytrue)
#      raise ValueError("ytrue can only contain 0s or 1s.")
#  return sample_weights