Commit 71e75033 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Some cleanup to remove cruft code.

parent 87e68078
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -63,11 +63,10 @@ def fit_singletask_models(train_data, modeltype, task_types):

# TODO(rbharath): I believe this is broken. Update it to work with the rest of
# the package.
def fit_multitask_rf(train_data, test_data, task_types):
def fit_multitask_rf(train_data, task_types):
  """Fits a multitask RF model to provided dataset.
  """
  (_, X_train, y_train, W_train), (test, X_train, y_train, W_train) = (
      train_data, test_data) 
  (_, X_train, y_train, _) = train_data
  model = RandomForestClassifier(n_estimators=100, n_jobs=-1,
      class_weight="auto")
  model.fit(X_train, y_train)
+0 −16
Original line number Diff line number Diff line
@@ -26,14 +26,9 @@ def compute_model_performance(raw_test_data, test_data, task_types, models, mode
    print "Target %s" % target
    (test_ids, Xtest, ytest, wtest) = test_data[target]
    (_, _, ytest_raw, _) = raw_test_data[target]
    print "ytest"
    print ytest
    print "ytest_raw"
    print ytest_raw
    model = models[target]
    results = eval_model(test_ids, Xtest, ytest, ytest_raw, wtest, model, {target: task_types[target]}, 
                         modeltype=modeltype, output_transforms=output_transforms)
    #print results
    all_results[target] = results[target]
    if aucs:
      auc_vals.update(compute_roc_auc_scores(results, task_types))
@@ -125,20 +120,9 @@ def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types, output_transforms
  ypreds = model_predictions(X, model, len(task_types),
      task_types, modeltype=modeltype)
  results = {}
  print "eval_model()"
  print "Ytrue"
  print Ytrue
  print "Ytrue_raw"
  print Ytrue_raw
  for target_ind, target in enumerate(sorted_targets):
    ytrue_raw, ytrue, ypred = Ytrue_raw[:, target_ind], Ytrue[:, target_ind], ypreds[:, target_ind]
    ypred = undo_transform_outputs(ytrue_raw, ypred, output_transforms)
    #ytrue_trans = undo_transform_outputs(ytrue_raw, ytrue, output_transforms)
    print "ytrue_raw"
    print ytrue_raw
    #print "ytrue_trans"
    #print ytrue_trans
    #results[target] = (ids, np.squeeze(ytrue), np.squeeze(ypred))
    results[target] = (ids, np.squeeze(ytrue_raw), np.squeeze(ypred))
  return results

+0 −9
Original line number Diff line number Diff line
@@ -56,15 +56,6 @@ def process_datasets(paths, input_transforms, output_transforms,
  print np.shape(train_dict['CANVAS-BACE'][1])
  return train_dict, test_dict 

#def transform_data(data_dict, input_transforms, output_transforms):
#  """Transforms data using specified transforms"""
#  trans_dict = {}
#  for target in data_dict:
#    data = data_dict[target]
#    trans_data = transform_data(data, input_transforms, output_transforms)
#    trans_dict[target] = trans_data
#  return trans_dict

def load_molecules(paths, feature_types=["fingerprints"]):
  """Load dataset fingerprints and return fingerprints.