Commit 480514ff authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Fixed issues that came up in analyzing BACE dataset.

parent 71e75033
Loading
Loading
Loading
Loading
+15 −8
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ def parse_args(input_args=None):
  featurize_cmd.add_argument("--id-endpoint", type=str, default=None,
                      help="Name of endpoint specifying unique identifier for molecule.\n"
                           "If none is specified, then smiles-endpoint is used as identifier.")
  # TODO(rbharath): This should be moved to train-tests-split
  featurize_cmd.add_argument("--threshold", type=float, default=None,
                      help="If specified, will be used to binarize real-valued prediction-endpoint.")
  featurize_cmd.add_argument("--name", required=1,
@@ -60,7 +61,7 @@ def parse_args(input_args=None):
                      help="Folder to generate processed dataset in.")
  featurize_cmd.set_defaults(func=featurize_input)

  # TRANSFORM FLAGS
  # Train/Test Splits flag 
  train_test_cmd = subparsers.add_parser("train-test-split",
                      help="Apply standard data transforms to raw features generated by featurize,\n"
                           "then split data into train/test and store data as (X,y) matrices.")
@@ -153,14 +154,23 @@ def parse_args(input_args=None):
  group.add_argument("--task-type", default="classification",
                      choices=["classification", "regression"],
                      help="Type of learning task.")
  group = eval_cmd.add_argument_group("metrics")
  group = eval_cmd.add_argument_group("Classification metrics")
  group.add_argument("--compute-aucs", action="store_true", default=False,
                      help="Compute AUC for trained models on test set.")
  group.add_argument("--compute-accuracy", action="store_true", default=False,
                      help="Compute accuracy for trained models on test set.")
  group.add_argument("--compute-recall", action="store_true", default=False,
                      help="Compute recall for trained models on test set.")
  group.add_argument("--compute-matthews-corrcoef", action="store_true", default=False,
                      help="Compute Matthews Correlation Coefficient for trained models on test set.")

  group = eval_cmd.add_argument_group("Regression metrics")
  group.add_argument("--compute-r2s", action="store_true", default=False,
                     help="Compute R^2 for trained models on test set.")
  group.add_argument("--compute-rms", action="store_true", default=False,
                     help="Compute RMS for trained models on test set.")
  group.add_argument("--csv-out", type=str, default=None,

  eval_cmd.add_argument("--csv-out", type=str, default=None,
                     help="Outputted predictions on the test set.")
  eval_cmd.set_defaults(func=eval_trained_model)

@@ -196,13 +206,9 @@ def train_test_input(args):
      args.input_transforms, output_transforms, feature_types=args.feature_types, 
      splittype=args.splittype, weight_positives=args.weight_positives,
      mode=args.mode)
  print "train_dict()"
  print train_dict
  trans_train_dict = transform_data(train_dict, args.input_transforms,
      args.output_transforms)
  trans_test_dict = transform_data(test_dict, args.input_transforms, args.output_transforms)
  print "train_dict()"
  print train_dict
  transforms = {"input_transforms": args.input_transforms,
                "output_transform": args.output_transforms}
  stored_train = {"raw": train_dict, "transformed": trans_train_dict, "transforms": transforms}
@@ -259,7 +265,8 @@ def eval_trained_model(args):

  results, aucs, r2s, rms = compute_model_performance(raw_test_dict, test_dict,
      task_types, model, args.modeltype, output_transforms, args.compute_aucs,
      args.compute_r2s, args.compute_rms) 
      args.compute_r2s, args.compute_rms, args.compute_recall,
      args.compute_accuracy, args.compute_matthews_corrcoef) 
  if args.csv_out is not None:
    results_to_csv(results, args.csv_out, task_type=args.task_type)

+82 −28
Original line number Diff line number Diff line
@@ -14,13 +14,16 @@ from deep_chem.utils.preprocess import undo_transform_outputs
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
from sklearn.metrics import r2_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from rdkit import Chem
from rdkit.Chem.Descriptors import ExactMolWt

def compute_model_performance(raw_test_data, test_data, task_types, models, modeltype,
    output_transforms, aucs=True, r2s=False, rms=False):
    output_transforms, aucs=True, r2s=False, rms=False, recall=False, accuracy=False, mcc=False):
  """Computes statistics for model performance on test set."""
  all_results, auc_vals, r2_vals, rms_vals = {}, {}, {}, {}
  all_results, auc_vals, r2_vals, rms_vals, mcc_vals, recall_vals, accuracy_vals = {}, {}, {}, {}, {}, {}, {}
  for index, target in enumerate(sorted(test_data.keys())):
    print "Evaluating model %d" % index
    print "Target %s" % target
@@ -36,6 +39,12 @@ def compute_model_performance(raw_test_data, test_data, task_types, models, mode
      r2_vals.update(compute_r2_scores(results, task_types))
    if rms:
      rms_vals.update(compute_rms_scores(results, task_types))
    if mcc:
      mcc_vals.update(compute_matthews_corr(results, task_types))
    if recall:
      recall_vals.update(compute_recall_score(results, task_types))
    if accuracy:
      recall_vals.update(compute_accuracy_score(results, task_types))

  if aucs:
    print "Mean AUC: %f" % np.mean(np.array(auc_vals.values()))
@@ -43,6 +52,13 @@ def compute_model_performance(raw_test_data, test_data, task_types, models, mode
    print "Mean R^2: %f" % np.mean(np.array(r2_vals.values()))
  if rms:
    print "Mean RMS: %f" % np.mean(np.array(rms_vals.values()))
  if mcc:
    print "Mean MCC: %f" % np.mean(np.array(mcc_vals.values()))
  if recall:
    print "Mean Recall: %f" % np.mean(np.array(recall_vals.values()))
  if accuracy:
    print "Mean Accuracy: %f" % np.mean(np.array(accuracy_vals.values()))
    
  return all_results, aucs, r2s, rms

def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):
@@ -91,8 +107,10 @@ def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):
    ypreds = model.predict(X)
  else:
    raise ValueError("Improper modeltype.")
  if type(ypreds) == np.ndarray:
    ypreds = np.squeeze(ypreds)
  ypreds = np.reshape(ypreds, (len(ypreds), n_targets))
  if type(ypreds) != list:
    ypreds = [ypreds]
  return ypreds

def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types, output_transforms, modeltype="sklearn"):
@@ -121,7 +139,7 @@ def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types, output_transforms
      task_types, modeltype=modeltype)
  results = {}
  for target_ind, target in enumerate(sorted_targets):
    ytrue_raw, ytrue, ypred = Ytrue_raw[:, target_ind], Ytrue[:, target_ind], ypreds[:, target_ind]
    ytrue_raw, ytrue, ypred = Ytrue_raw[:, target_ind], Ytrue[:, target_ind], ypreds[target_ind]
    ypred = undo_transform_outputs(ytrue_raw, ypred, output_transforms)
    results[target] = (ids, np.squeeze(ytrue_raw), np.squeeze(ypred))
  return results
@@ -143,33 +161,30 @@ def results_to_csv(results, out, task_type="classification"):
    print "Writing results on test set for target %s to %s" % (target, out)
    

def compute_roc_auc_scores(results, task_types):
  """Transforms the results dict into roc-auc-scores and prints scores.
def compute_r2_scores(results, task_types):
  """Transforms the results dict into R^2 values and prints them.

  Parameters
  ----------
  results: dict
    A dictionary of type produced by eval_regression_model which maps target-names to
    pairs of lists (ytrue, yscore).
  task_types: dict 
    dict mapping target names to output type. Each output type must be either
    "classification" or "regression".
  """
  scores = {}
  for target in results:
    if task_types[target] != "classification":
    if task_types[target] != "regression":
      continue
    _, ytrue, yscore = results[target]
    sample_weights = labels_to_weights(ytrue)
    try:
      score = roc_auc_score(ytrue, yscore[:,1], sample_weight=sample_weights)
    except Exception as e:
      warnings.warn("ROC AUC score calculation failed.")
      score = 0.5
    print "Target %s: AUC %f" % (target, score)
    score = r2_score(ytrue, yscore)
    print "Target %s: R^2 %f" % (target, score)
    scores[target] = score
  return scores

def compute_r2_scores(results, task_types):
  """Transforms the results dict into R^2 values and prints them.
def compute_rms_scores(results, task_types):
  """Transforms the results dict into RMS values and prints them.

  Parameters
  ----------
@@ -185,29 +200,68 @@ def compute_r2_scores(results, task_types):
    if task_types[target] != "regression":
      continue
    _, ytrue, yscore = results[target]
    score = r2_score(ytrue, yscore)
    print "Target %s: R^2 %f" % (target, score)
    scores[target] = score
    rms = np.sqrt(mean_squared_error(ytrue, yscore))
    print "Target %s: RMS %f" % (target, rms)
    scores[target] = rms 
  return scores

def compute_rms_scores(results, task_types):
  """Transforms the results dict into RMS values and prints them.
def compute_roc_auc_scores(results, task_types):
  """Transforms the results dict into roc-auc-scores and prints scores.

  Parameters
  ----------
  results: dict
    A dictionary of type produced by eval_regression_model which maps target-names to
    pairs of lists (ytrue, yscore).
  task_types: dict 
    dict mapping target names to output type. Each output type must be either
    "classification" or "regression".
  """
  scores = {}
  for target in results:
    if task_types[target] != "regression":
    if task_types[target] != "classification":
      continue
    _, ytrue, yscore = results[target]
    rms = np.sqrt(mean_squared_error(ytrue, yscore))
    print "Target %s: RMS %f" % (target, rms)
    scores[target] = rms 
    sample_weights = labels_to_weights(ytrue)
    try:
      score = roc_auc_score(ytrue, yscore[:,1], sample_weight=sample_weights)
    except Exception as e:
      warnings.warn("ROC AUC score calculation failed.")
      score = 0.5
    print "Target %s: AUC %f" % (target, score)
    scores[target] = score
  return scores

def compute_matthews_corr(results, task_types):
  """Computes Matthews Correlation Coefficients."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
      continue
    _, ytrue, ypred = results[target]
    mcc = matthews_corrcoef(ytrue, np.around(ypred[:,1]))
    print "Target %s: MCC %f" % (target, mcc)
    scores[target] = mcc
  return scores

def compute_recall_score(results, task_types):
  """Computes recall score."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
      continue
    _, ytrue, ypred = results[target]
    recall = recall_score(ytrue, np.around(ypred[:, 1]))
    print "Target %s: Recall %f" % (target, recall)
    scores[target] = recall 
  return scores

def compute_accuracy_score(results, task_types):
  """Computes accuracy score."""
  scores = {}
  for target in results:
    if task_types[target] != "classification":
      continue
    _, ytrue, ypred = results[target]
    accuracy = accuracy_score(ytrue, np.around(ypred[:, 1]))
    print "Target %s: Accuracy %f" % (target, accuracy)
    scores[target] = accuracy 
  return scores
+2 −1
Original line number Diff line number Diff line
@@ -53,7 +53,8 @@ def process_datasets(paths, input_transforms, output_transforms,
  else:
    raise ValueError("Unsupported mode for process_datasets.")
  print "Shape of Xtrain"
  print np.shape(train_dict['CANVAS-BACE'][1])
  target = train_dict.itervalues().next()
  print np.shape(target[1])
  return train_dict, test_dict 

def load_molecules(paths, feature_types=["fingerprints"]):
+3 −2
Original line number Diff line number Diff line
@@ -44,7 +44,6 @@ def transform_inputs(X, input_transforms):
    Z[:, feature] = feature_data
  return Z


def undo_normalization(y_orig, y_pred):
  """Undo the applied normalization transform."""
  old_mean = np.mean(y_orig)
@@ -53,7 +52,9 @@ def undo_normalization(y_orig, y_pred):

def undo_transform_outputs(y_raw, y_pred, output_transforms):
  """Undo transforms on y_pred, W_pred."""
  if output_transforms == ["log"]:
  if output_transforms == []:
    return y_pred
  elif output_transforms == ["log"]:
    return np.exp(y_pred)
  elif output_transforms == ["normalize"]:
    return undo_normalization(y_raw, y_pred)
+2 −2
Original line number Diff line number Diff line
@@ -65,8 +65,8 @@ def load_keras_model(filename):
    targets = pickle.load(f)
  models = {}
  for target in targets:
    json_filename = filename + ".json"
    h5_filename = filename + ".h5"
    json_filename = "%s-%s.%s" % (filename, target, "json")
    h5_filename = "%s-%s.%s" % (filename, target, "h5")
  
    with open(json_filename) as f:
      model = model_from_json(f.read())