Commit 9c3f6211 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #409 from lilleswing/fix-benchmark.py

No More gdb7
parents c9be3663 fabd5aee
Loading
Loading
Loading
Loading
+276 −167
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ on datasets: muv, pcba, tox21, sider, toxcast
Giving regression performances of:
    MultitaskDNN(tf_regression),
    Graph convolution regression(graphconvreg)
on datasets: delaney, nci, kaggle, pdbbind, gdb7, chembl
on datasets: delaney, nci, kaggle, pdbbind, chembl

time estimation listed in README file

@@ -51,13 +51,17 @@ from delaney.delaney_datasets import load_delaney
from nci.nci_datasets import load_nci
from pdbbind.pdbbind_datasets import load_pdbbind_grid
from chembl.chembl_datasets import load_chembl
from gdb7.gdb7_datasets import load_gdb7
from qm7.qm7_datasets import load_qm7
from sampl.sampl_datasets import load_sampl
from clintox.clintox_datasets import load_clintox


def benchmark_loading_datasets(hyper_parameters,
                               dataset='tox21', model='tf', split=None,
                               reload=True, out_path='.'):
                               dataset='tox21',
                               model='tf',
                               split=None,
                               reload=True,
                               out_path='.'):
  """
  Loading dataset for benchmark test
  
@@ -79,8 +83,7 @@ def benchmark_loading_datasets(hyper_parameters,

  if dataset in ['muv', 'pcba', 'tox21', 'sider', 'toxcast', 'clintox']:
    mode = 'classification'
  elif dataset in ['kaggle', 'delaney', 'nci', 'pdbbind', 'chembl', 
                   'gdb7', 'sampl']:
  elif dataset in ['kaggle', 'delaney', 'nci', 'pdbbind', 'chembl', 'sampl']:
    mode = 'regression'
  else:
    raise ValueError('Dataset not supported')
@@ -112,29 +115,28 @@ def benchmark_loading_datasets(hyper_parameters,
    if not model in ['tf_regression']:
      return

  if dataset in ['gdb7']:
    featurizer = None
    if split in ['scaffold']: # gdb7 supports index, random and indice splitting
      return
    if not model in ['tf_regression']:
      return

  if split in ['year']:
    if not dataset in ['chembl']:
      return
  elif split in ['indice']:
    if not dataset in ['gdb7']:
    return
  elif not split in [None, 'index', 'random', 'scaffold', 'butina']:
    raise ValueError('Splitter function not supported')

  loading_functions = {'tox21': load_tox21, 'muv': load_muv,
                       'pcba': load_pcba, 'nci': load_nci,
                       'sider': load_sider, 'toxcast': load_toxcast,
                       'kaggle': load_kaggle, 'delaney': load_delaney,
  loading_functions = {
      'tox21': load_tox21,
      'muv': load_muv,
      'pcba': load_pcba,
      'nci': load_nci,
      'sider': load_sider,
      'toxcast': load_toxcast,
      'kaggle': load_kaggle,
      'delaney': load_delaney,
      'pdbbind': load_pdbbind_grid,
                       'chembl': load_chembl, 'gdb7': load_gdb7,
                       'sampl': load_sampl, 'clintox': load_clintox}
      'chembl': load_chembl,
      'sampl': load_sampl,
      'clintox': load_clintox
  }

  print('-------------------------------------')
  print('Benchmark %s on dataset: %s' % (model, dataset))
@@ -152,7 +154,7 @@ def benchmark_loading_datasets(hyper_parameters,
  train_dataset, valid_dataset, test_dataset = all_dataset
  time_finish_loading = time.time()
  # time_finish_loading-time_start is the time(s) used for dataset loading
  if dataset in ['kaggle', 'pdbbind', 'gdb7']:
  if dataset in ['kaggle', 'pdbbind']:
    n_features = train_dataset.get_data_shape()[0]
    # dataset has customized features

@@ -162,48 +164,67 @@ def benchmark_loading_datasets(hyper_parameters,
    if mode == 'classification':
      metric = 'auc'
      train_score, valid_score = benchmark_classification(
          train_dataset, valid_dataset, tasks, 
          transformers, hp, n_features, metric=metric,
          train_dataset,
          valid_dataset,
          tasks,
          transformers,
          hp,
          n_features,
          metric=metric,
          model=model)
    elif mode == 'regression':
      metric = 'r2'
      train_score, valid_score = benchmark_regression(
          train_dataset, valid_dataset, tasks, 
          transformers, hp, n_features, metric=metric,
          train_dataset,
          valid_dataset,
          tasks,
          transformers,
          hp,
          n_features,
          metric=metric,
          model=model)
    time_finish_fitting = time.time()

    
    with open(os.path.join(out_path, 'results.csv'), 'a') as f:
      writer = csv.writer(f)
      if mode == 'classification':
        for i in train_score:
          output_line = [count, dataset, str(split), mode, 'train', i, 
          output_line = [
              count, dataset, str(split), mode, 'train', i,
              train_score[i]['mean-roc_auc_score'], 'valid', i,
                         valid_score[i]['mean-roc_auc_score'],
                         'time_for_running',
                         time_finish_fitting-time_start_fitting]
              valid_score[i]['mean-roc_auc_score'], 'time_for_running',
              time_finish_fitting - time_start_fitting
          ]
          writer.writerow(output_line)
      else:
        for i in train_score:
          if metric == 'r2':
            output_line = [count, dataset, str(split), mode, 'train', i, 
            output_line = [
                count, dataset, str(split), mode, 'train', i,
                train_score[i]['mean-pearson_r2_score'], 'valid', i,
                           valid_score[i]['mean-pearson_r2_score'], 
                           'time_for_running',
                           time_finish_fitting-time_start_fitting]
                valid_score[i]['mean-pearson_r2_score'], 'time_for_running',
                time_finish_fitting - time_start_fitting
            ]
          elif metric == 'mae':
            output_line = [count, dataset, str(split), mode, 'train', i, 
            output_line = [
                count, dataset, str(split), mode, 'train', i,
                train_score[i]['mean-mean_absolute_error'], 'valid', i,
                           valid_score[i]['mean-mean_absolute_error'], 
                           'time_for_running',
                           time_finish_fitting-time_start_fitting]
                valid_score[i]['mean-mean_absolute_error'], 'time_for_running',
                time_finish_fitting - time_start_fitting
            ]

          writer.writerow(output_line)

def benchmark_classification(train_dataset, valid_dataset, tasks,
                             transformers, hyper_parameters, 
                             n_features, metric='auc', model='tf', seed=123):

def benchmark_classification(train_dataset,
                             valid_dataset,
                             tasks,
                             transformers,
                             hyper_parameters,
                             n_features,
                             metric='auc',
                             model='tf',
                             seed=123):
  """
  Calculate performance of different models on the specific dataset & tasks
  
@@ -256,12 +277,18 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow MultiTaskDNN model
    model_tf = dc.models.TensorflowMultiTaskClassifier(len(tasks),
        n_features, layer_sizes=layer_sizes, 
    model_tf = dc.models.TensorflowMultiTaskClassifier(
        len(tasks),
        n_features,
        layer_sizes=layer_sizes,
        weight_init_stddevs=weight_init_stddevs,
        bias_init_consts=bias_init_consts, dropouts=dropouts, penalty=penalty, 
        penalty_type=penalty_type, batch_size=batch_size, 
        learning_rate=learning_rate, seed=seed)
        bias_init_consts=bias_init_consts,
        dropouts=dropouts,
        penalty=penalty,
        penalty_type=penalty_type,
        batch_size=batch_size,
        learning_rate=learning_rate,
        seed=seed)

    print('-------------------------------------')
    print('Start fitting by multitask DNN')
@@ -293,16 +320,22 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow robust MultiTaskDNN model
    model_tf_robust = dc.models.RobustMultitaskClassifier(len(tasks),
        n_features, layer_sizes=layer_sizes, 
    model_tf_robust = dc.models.RobustMultitaskClassifier(
        len(tasks),
        n_features,
        layer_sizes=layer_sizes,
        weight_init_stddevs=weight_init_stddevs,
        bias_init_consts=bias_init_consts, dropouts=dropouts,
        bias_init_consts=bias_init_consts,
        dropouts=dropouts,
        bypass_layer_sizes=bypass_layer_sizes,
        bypass_weight_init_stddevs=bypass_weight_init_stddevs,
        bypass_bias_init_consts=bypass_bias_init_consts,
        bypass_dropouts=bypass_dropouts, penalty=penalty, 
        penalty_type=penalty_type, batch_size=batch_size,
        learning_rate=learning_rate, seed=seed)
        bypass_dropouts=bypass_dropouts,
        penalty=penalty,
        penalty_type=penalty_type,
        batch_size=batch_size,
        learning_rate=learning_rate,
        seed=seed)

    print('--------------------------------------------')
    print('Start fitting by robust multitask DNN')
@@ -324,9 +357,13 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow logistic regression model
    model_logreg = dc.models.TensorflowLogisticRegression(len(tasks),
        n_features, penalty=penalty, penalty_type=penalty_type, 
        batch_size=batch_size, learning_rate=learning_rate, 
    model_logreg = dc.models.TensorflowLogisticRegression(
        len(tasks),
        n_features,
        penalty=penalty,
        penalty_type=penalty_type,
        batch_size=batch_size,
        learning_rate=learning_rate,
        seed=seed)

    print('-------------------------------------')
@@ -364,15 +401,20 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
      graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      graph_model.add(dc.nn.GraphPool())
      # Gather Projection
      graph_model.add(dc.nn.Dense(int(n_fully_connected_nodes),
                                  activation='relu'))
      graph_model.add(
          dc.nn.Dense(int(n_fully_connected_nodes), activation='relu'))
      graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
      with tf.Session() as sess:
        model_graphconv = dc.models.MultitaskGraphClassifier(
          sess, graph_model, len(tasks), 
          batch_size=batch_size, learning_rate=learning_rate,
          optimizer_type="adam", beta1=.9, beta2=.999)
            sess,
            graph_model,
            len(tasks),
            batch_size=batch_size,
            learning_rate=learning_rate,
            optimizer_type="adam",
            beta1=.9,
            beta2=.999)

        print('-------------------------------------')
        print('Start fitting by graph convolution')
@@ -396,8 +438,8 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
      sklearn_model = RandomForestClassifier(
          class_weight="balanced", n_estimators=n_estimators, n_jobs=-1)
      return dc.models.sklearn_models.SklearnModel(sklearn_model, model_dir_rf)
    model_rf = dc.models.multitask.SingletaskToMultitask(
        tasks, model_builder)

    model_rf = dc.models.multitask.SingletaskToMultitask(tasks, model_builder)

    print('-------------------------------------')
    print('Start fitting by random forest')
@@ -413,9 +455,15 @@ def benchmark_classification(train_dataset, valid_dataset, tasks,
  return train_scores, valid_scores


def benchmark_regression(train_dataset, valid_dataset, tasks,
                         transformers, hyper_parameters, n_features, 
                         metric='r2', model='tf_regression', seed=123):
def benchmark_regression(train_dataset,
                         valid_dataset,
                         tasks,
                         transformers,
                         hyper_parameters,
                         n_features,
                         metric='r2',
                         model='tf_regression',
                         seed=123):
  """
  Calculate performance of different models on the specific dataset & tasks
  
@@ -451,7 +499,8 @@ def benchmark_regression(train_dataset, valid_dataset, tasks,
  if metric == 'r2':
    regression_metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
  elif metric == 'mae':
    regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean)
    regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error,
                                          np.mean)

  assert model in ['tf_regression', 'graphconvreg']

@@ -468,12 +517,18 @@ def benchmark_regression(train_dataset, valid_dataset, tasks,
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow MultiTaskDNN model
    model_tf_regression = dc.models.TensorflowMultiTaskRegressor(len(tasks),
        n_features, layer_sizes=layer_sizes, 
    model_tf_regression = dc.models.TensorflowMultiTaskRegressor(
        len(tasks),
        n_features,
        layer_sizes=layer_sizes,
        weight_init_stddevs=weight_init_stddevs,
        bias_init_consts=bias_init_consts, dropouts=dropouts, penalty=penalty, 
        penalty_type=penalty_type, batch_size=batch_size, 
        learning_rate=learning_rate, seed=seed)
        bias_init_consts=bias_init_consts,
        dropouts=dropouts,
        penalty=penalty,
        penalty_type=penalty_type,
        batch_size=batch_size,
        learning_rate=learning_rate,
        seed=seed)

    print('-----------------------------------------')
    print('Start fitting by multitask DNN regression')
@@ -509,15 +564,20 @@ def benchmark_regression(train_dataset, valid_dataset, tasks,
      graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      graph_model.add(dc.nn.GraphPool())
      # Gather Projection
      graph_model.add(dc.nn.Dense(int(n_fully_connected_nodes),
                                  activation='relu'))
      graph_model.add(
          dc.nn.Dense(int(n_fully_connected_nodes), activation='relu'))
      graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
      with tf.Session() as sess:
        model_graphconvreg = dc.models.MultitaskGraphRegressor(
          sess, graph_model, len(tasks), 
          batch_size=batch_size, learning_rate=learning_rate,
          optimizer_type="adam", beta1=.9, beta2=.999)
            sess,
            graph_model,
            len(tasks),
            batch_size=batch_size,
            learning_rate=learning_rate,
            optimizer_type="adam",
            beta1=.9,
            beta2=.999)

        print('-------------------------------------')
        print('Start fitting by graph convolution')
@@ -537,16 +597,29 @@ if __name__ == '__main__':
  # Global variables
  np.random.seed(123)

  parser = argparse.ArgumentParser(description='Deepchem benchmark: '+
  parser = argparse.ArgumentParser(
      description='Deepchem benchmark: ' +
      'giving performances of different learning models on datasets')
  parser.add_argument('-s', action='append', dest='splitter_args', default=[],
  parser.add_argument(
      '-s',
      action='append',
      dest='splitter_args',
      default=[],
      help='Choice of splitting function: index, random, scaffold')
  parser.add_argument('-m', action='append', dest='model_args', default=[], 
  parser.add_argument(
      '-m',
      action='append',
      dest='model_args',
      default=[],
      help='Choice of model: tf, tf_robust, logreg, graphconv, ' +
      'tf_regression, graphconvreg')
  parser.add_argument('-d', action='append', dest='dataset_args', default=[], 
  parser.add_argument(
      '-d',
      action='append',
      dest='dataset_args',
      default=[],
      help='Choice of dataset: tox21, sider, muv, toxcast, pcba, ' +
           'kaggle, delaney, nci, pdbbindi, chembl, gdb7, clintox')
      'kaggle, delaney, nci, pdbbindi, chembl, clintox')
  args = parser.parse_args()
  #Datasets and models used in the benchmark test
  splitters = args.splitter_args
@@ -556,52 +629,88 @@ if __name__ == '__main__':
  if len(splitters) == 0:
    splitters = ['index', 'random', 'scaffold']
  if len(models) == 0:
    models = ['tf', 'tf_robust', 'logreg', 'graphconv', 
              'tf_regression', 'graphconvreg']
    models = [
        'tf', 'tf_robust', 'logreg', 'graphconv', 'tf_regression',
        'graphconvreg'
    ]
  if len(datasets) == 0:
    datasets = ['tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox',
                'delaney', 'nci', 'kaggle', 'pdbbind', 'chembl', 'gdb7']
    datasets = [
        'tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox', 'delaney', 'nci',
        'kaggle', 'pdbbind', 'chembl'
    ]

  #input hyperparameters
  #tf: dropouts, learning rate, layer_sizes, weight initial stddev,penalty,
  #    batch_size
  hps = {}
  hps = {}
  hps['tf'] = [{'layer_sizes': [1500], 'weight_init_stddevs': [0.02], 
                'bias_init_consts': [1.], 'dropouts': [0.5], 'penalty': 0.1, 
                'penalty_type': 'l2', 'batch_size': 50, 'nb_epoch': 10, 
                'learning_rate': 0.001}]

  hps['tf_robust'] = [{'layer_sizes': [1500], 'weight_init_stddevs': [0.02], 
                       'bias_init_consts': [1.], 'dropouts': [0.5], 
  hps['tf'] = [{
      'layer_sizes': [1500],
      'weight_init_stddevs': [0.02],
      'bias_init_consts': [1.],
      'dropouts': [0.5],
      'penalty': 0.1,
      'penalty_type': 'l2',
      'batch_size': 50,
      'nb_epoch': 10,
      'learning_rate': 0.001
  }]

  hps['tf_robust'] = [{
      'layer_sizes': [1500],
      'weight_init_stddevs': [0.02],
      'bias_init_consts': [1.],
      'dropouts': [0.5],
      'bypass_layer_sizes': [200],
      'bypass_weight_init_stddevs': [0.02],
      'bypass_bias_init_consts': [1.],
                       'bypass_dropouts': [0.5], 'penalty': 0.1,
                       'penalty_type': 'l2', 'batch_size': 50, 
                       'nb_epoch': 10, 'learning_rate': 0.0005}]
             
  hps['logreg'] = [{'penalty': 0.1, 'penalty_type': 'l2', 'batch_size': 50, 
                    'nb_epoch': 10, 'learning_rate': 0.005}]
                
  hps['graphconv'] = [{'batch_size': 50, 'nb_epoch': 15, 
                       'learning_rate': 0.0005, 'n_filters': 64, 
                       'n_fully_connected_nodes': 128, 'seed': 123}]
      'bypass_dropouts': [0.5],
      'penalty': 0.1,
      'penalty_type': 'l2',
      'batch_size': 50,
      'nb_epoch': 10,
      'learning_rate': 0.0005
  }]

  hps['logreg'] = [{
      'penalty': 0.1,
      'penalty_type': 'l2',
      'batch_size': 50,
      'nb_epoch': 10,
      'learning_rate': 0.005
  }]

  hps['graphconv'] = [{
      'batch_size': 50,
      'nb_epoch': 15,
      'learning_rate': 0.0005,
      'n_filters': 64,
      'n_fully_connected_nodes': 128,
      'seed': 123
  }]

  hps['rf'] = [{'n_estimators': 500}]

  hps['tf_regression'] = [{'layer_sizes': [1000, 1000], 
  hps['tf_regression'] = [{
      'layer_sizes': [1000, 1000],
      'weight_init_stddevs': [0.02, 0.02],
      'bias_init_consts': [1., 1.],
      'dropouts': [0.25, 0.25],
                           'penalty': 0.0005, 'penalty_type': 'l2', 
                           'batch_size': 128, 'nb_epoch': 50, 
                           'learning_rate': 0.0008}]
  
  hps['graphconvreg'] = [{'batch_size': 128, 'nb_epoch': 20, 
                          'learning_rate': 0.0005, 'n_filters': 128, 
                          'n_fully_connected_nodes': 256, 'seed': 123}]

      'penalty': 0.0005,
      'penalty_type': 'l2',
      'batch_size': 128,
      'nb_epoch': 50,
      'learning_rate': 0.0008
  }]

  hps['graphconvreg'] = [{
      'batch_size': 128,
      'nb_epoch': 20,
      'learning_rate': 0.0005,
      'n_filters': 128,
      'n_fully_connected_nodes': 256,
      'seed': 123
  }]

  for split in splitters:
    for dataset in datasets: