Commit c3902bfa authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #306 from miaecle/kaggle

Regression model and datasets merged into benchmark
parents c33ebc04 8de2474a
Loading
Loading
Loading
Loading
+37 −20
Original line number Diff line number Diff line
@@ -203,28 +203,30 @@ passed a ``Featurizer`` object. DeepChem provides a number of
different subclasses of ``Featurizer`` for convenience:

### Performances
* Classification

Index splitting

|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.903              |0.705              |
|           |tensorflow(MT-NN)   |0.856              |0.763              |
|           |Multitask network   |0.856              |0.763              |
|           |robust MT-NN        |0.857              |0.767              |
|           |graph convolution   |0.872              |0.798              |
|muv        |logistic regression |0.963              |0.766              |
|           |tensorflow(MT-NN)   |0.904              |0.764              |
|           |Multitask network   |0.904              |0.764              |
|           |robust MT-NN        |0.934              |0.781              |
|           |graph convolution   |0.840              |0.823              |
|pcba       |logistic regression |0.809              |0.776              |
|           |tensorflow(MT-NN)   |0.826              |0.802              |
|           |Multitask network   |0.826              |0.802              |
|           |robust MT-NN        |0.809              |0.783              |
|           |graph convolution   |0.876              |0.852              |
|sider      |logistic regression |0.933              |0.620              |
|           |tensorflow(MT-NN)   |0.775              |0.634              |
|           |Multitask network   |0.775              |0.634              |
|           |robust MT-NN        |0.803              |0.632              |
|           |graph convolution   |0.708              |0.594              |
|toxcast    |logistic regression |0.721              |0.575              |
|           |tensorflow(MT-NN)   |0.830              |0.678              |
|           |Multitask network   |0.830              |0.678              |
|           |robust MT-NN        |0.825              |0.680              |
|           |graph convolution   |0.821              |0.720              |

@@ -233,23 +235,23 @@ Random splitting
|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.903              |0.741              |
|           |tensorflow(MT-NN)   |0.846              |0.812              |
|           |Multitask network   |0.846              |0.812              |
|           |robust MT-NN        |0.844              |0.793              |
|           |graph convolution   |0.872              |0.816              |
|muv        |logistic regression |0.961              |0.696              |
|           |tensorflow(MT-NN)   |0.895              |0.740              |
|           |Multitask network   |0.895              |0.740              |
|           |robust MT-NN        |0.914              |0.667              |
|           |graph convolution   |0.846              |0.776              |
|pcba       |logistic regression |0.807        	     |0.772              |
|           |tensorflow(MT-NN)   |0.811        	     |0.787              |
|           |Multitask network   |0.811        	     |0.787              |
|           |robust MT-NN        |0.809              |0.778              |
|           |graph convolution   |0.875       	     |0.844              |
|sider      |logistic regression |0.932        	     |0.628              |
|           |tensorflow(MT-NN)   |0.779        	     |0.665              |
|           |Multitask network   |0.779        	     |0.665              |
|           |robust MT-NN        |0.761              |0.621              |
|           |graph convolution   |0.706        	     |0.638              |
|toxcast    |logistic regression |0.737        	     |0.543              |
|           |tensorflow(MT-NN)   |0.831        	     |0.684              |
|           |Multitask network   |0.831        	     |0.684              |
|           |robust MT-NN        |0.814              |0.692              |
|           |graph convolution   |0.820        	     |0.692              |

@@ -258,26 +260,37 @@ Scaffold splitting
|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.900              |0.650              |
|           |tensorflow(MT-NN)   |0.863              |0.703              |
|           |Multitask network   |0.863              |0.703              |
|           |robust MT-NN        |0.861              |0.710              |
|           |graph convolution   |0.885              |0.732              |
|muv        |logistic regression |0.947              |0.767              |
|           |tensorflow(MT-NN)   |0.899              |0.762              |
|           |Multitask network   |0.899              |0.762              |
|           |robust MT-NN        |0.944              |0.726              |
|           |graph convolution   |0.872              |0.795              |
|pcba       |logistic regression |0.810              |0.742              |
|           |tensorflow(MT-NN)   |0.814              |0.760              |
|           |Multitask network   |0.814              |0.760              |
|           |robust MT-NN        |0.812              |0.756              |
|           |graph convolution   |0.874              |0.817              |
|sider      |logistic regression |0.926              |0.592              |
|           |tensorflow(MT-NN)   |0.776              |0.557              |
|           |Multitask network   |0.776              |0.557              |
|           |robust MT-NN        |0.797              |0.560              |
|           |graph convolution   |0.722              |0.583              |
|toxcast    |logistic regression |0.716              |0.492              |
|           |tensorflow(MT-NN)   |0.828              |0.617              |
|           |Multitask network   |0.828              |0.617              |
|           |robust MT-NN        |0.830              |0.614              |
|           |graph convolution   |0.832              |0.638              |

* Regression

|Dataset    |Model               |Splitting   |Train score/R2|Valid score/R2|
|-----------|--------------------|------------|--------------|--------------|
|delaney    |MT-NN regression    |Index       |0.773         |0.574         |
|           |MT-NN regression    |Random      |0.769         |0.591         |
|           |MT-NN regression    |Scaffold    |0.782         |0.426         |
|kaggle     |MT-NN regression    |User-defined|0.748         |0.452         |

* General features

Number of tasks and examples in the datasets

|Dataset    |N(tasks)	|N(samples) |
@@ -287,31 +300,35 @@ Number of tasks and examples in the datasets
|pcba       |128        |439863     |
|sider      |27         |1427       |
|toxcast    |617        |8615       |
|delaney    |1          |1128       |
|kaggle     |15         |173065     |

Time needed for benchmark test(~20h in total)

|Dataset    |Model               |Time(loading)/s |Time(running)/s|
|-----------|--------------------|----------------|---------------| 
|tox21      |logistic regression |30              |60             |
|           |tensorflow(MT-NN)   |30              |60             |
|           |Multitask network   |30              |60             |
|           |robust MT-NN        |30              |90             |
|           |graph convolution   |40              |160            |
|muv        |logistic regression |600             |450            |
|           |tensorflow(MT-NN)   |600             |400            |
|           |Multitask network   |600             |400            |
|           |robust MT-NN        |600             |550            |
|           |graph convolution   |800             |1800           |
|pcba       |logistic regression |1800            |10000          |
|           |tensorflow(MT-NN)	 |1800            |9000           |
|           |Multitask network 	 |1800            |9000           |
|           |robust MT-NN        |1800            |14000          |
|           |graph convolution   |2200            |14000          |
|sider      |logistic regression |15              |80             |
|           |tensorflow(MT-NN)	 |15              |75             |
|           |Multitask network 	 |15              |75             |
|           |robust MT-NN        |15              |150            |
|           |graph convolution   |20              |50             |
|toxcast    |logistic regression |80              |2600           |
|           |tensorflow(MT-NN)   |80              |2300           |
|           |Multitask network   |80              |2300           |
|           |robust MT-NN        |80              |4000           |
|           |graph convolution   |80              |900            |
|delaney    |MT-NN regression    |10              |40             |
|kaggle     |MT-NN regression    |2200            |3200           |


## Contributing to DeepChem
+176 −42
Original line number Diff line number Diff line
@@ -39,6 +39,8 @@ from pcba.pcba_datasets import load_pcba
from tox21.tox21_datasets import load_tox21
from toxcast.toxcast_datasets import load_toxcast
from sider.sider_datasets import load_sider
from kaggle.kaggle_datasets import load_kaggle
from delaney.delaney_datasets import load_delaney

def benchmark_loading_datasets(base_dir_o, hyper_parameters, 
                               dataset='tox21', model='tf', split=None,
@@ -70,15 +72,27 @@ def benchmark_loading_datasets(base_dir_o, hyper_parameters,
      path of result file
      
  """
  if not dataset in ['muv','nci','pcba','tox21','sider','toxcast']:
  
  if dataset in ['muv','nci','pcba','tox21','sider','toxcast']:
    mode = 'classification'
  elif dataset in ['kaggle', 'delaney']:
    mode = 'regression'
  else:
    raise ValueError('Dataset not supported')
  
  #assigning featurizer
  if model in ['graphconv']:
    featurizer = 'GraphConv'
    n_features = 71
  elif model in ['tf', 'tf_robust', 'logreg', 'rf']:
    featurizer = 'ECFP'
    n_features = 1024
  elif model in ['tf_regression']:
    if dataset in ['kaggle']:
      featurizer = None #kaggle dataset use its own features
      split = None #kaggle dataset is already splitted
    featurizer = 'ECFP'
    n_features = 1024
  else:
    raise ValueError('Model not supported')

@@ -87,7 +101,8 @@ def benchmark_loading_datasets(base_dir_o, hyper_parameters,
  
  loading_functions = {'tox21': load_tox21, 'muv': load_muv,
                       'pcba': load_pcba, 'nci': load_nci,
                       'sider': load_sider, 'toxcast': load_toxcast}
                       'sider': load_sider, 'toxcast': load_toxcast,
                       'kaggle': load_kaggle, 'delaney': load_delaney}
  
  print('-------------------------------------')
  print('Benchmark %s on dataset: %s' % (model, dataset))
@@ -106,11 +121,20 @@ def benchmark_loading_datasets(base_dir_o, hyper_parameters,
  train_dataset, valid_dataset, test_dataset = all_dataset
  time_finish_loading = time.time()
  #time_finish_loading-time_start is the time(s) used for dataset loading
  if dataset in ['kaggle']:
    n_features = train_dataset.get_data_shape()[0]
    #kaggle dataset has customized features
    
  #running model
  for count, hp in enumerate(hyper_parameters[model]):
    time_start_fitting = time.time()
    train_score,valid_score = benchmark_train_and_valid(base_dir,
    if mode == 'classification':
      train_score, valid_score = benchmark_classification(base_dir,
                                     train_dataset, valid_dataset, tasks, 
                                     transformers, hp, n_features,
                                     model=model, verbosity=verbosity)      
    elif mode == 'regression':
      train_score, valid_score = benchmark_regression(base_dir,
                                     train_dataset, valid_dataset, tasks, 
                                     transformers, hp, n_features,
                                     model=model, verbosity=verbosity)  
@@ -118,20 +142,25 @@ def benchmark_loading_datasets(base_dir_o, hyper_parameters,
    
    with open(os.path.join(out_path, 'results.csv'),'a') as f:
      f.write('\n'+str(count)+',')
      f.write(dataset+','+split+',train,')
      f.write(dataset+','+str(split)+','+mode+',train,')
      if mode == 'classification':
        for i in train_score:
          f.write(i+','+str(train_score[i]['mean-roc_auc_score'])+',')
        f.write('valid,')
        for i in valid_score:
          f.write(i+','+str(valid_score[i]['mean-roc_auc_score'])+',')
      else:
        for i in train_score:
          f.write(i+','+str(train_score[i]['mean-pearson_r2_score'])+',')
        f.write('valid,')
        for i in valid_score:
          f.write(i+','+str(valid_score[i]['mean-pearson_r2_score'])+',')
      f.write('time_for_running,'+
              str(time_finish_fitting-time_start_fitting)+',')
  #clear workspace         
  del tasks, all_dataset, transformers
  del train_dataset, valid_dataset, test_dataset

  return None

def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
def benchmark_classification(base_dir, train_dataset, valid_dataset, tasks,
                            transformers, hyper_parameters, 
                            n_features, model='tf', seed=123, 
                            verbosity='high'):
@@ -155,7 +184,7 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
  transformers : BalancingTransformer struct
      loaded properties of dataset from load_* function
  
  hyper_parameters : dict of list
  hyper_parameters : dict
      hyper parameters including dropout rate, learning rate, etc.
 
  n_features : integer
@@ -169,9 +198,9 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
  Returns
  -------
  train_scores : dict
	predicting results(AUC, R2) on training set
	predicting results(AUC) on training set
  valid_scores : dict
	predicting results(AUC, R2) on valid set
	predicting results(AUC) on valid set

  """
  train_scores = {}
@@ -209,10 +238,10 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
    model_tf.fit(train_dataset, nb_epoch=nb_epoch)
    
    # Evaluating tensorflow MultiTaskDNN model
    train_scores['tensorflow'] = model_tf.evaluate(
    train_scores['tf'] = model_tf.evaluate(
        train_dataset, [classification_metric], transformers)

    valid_scores['tensorflow'] = model_tf.evaluate(
    valid_scores['tf'] = model_tf.evaluate(
        valid_dataset, [classification_metric], transformers)

  if model == 'tf_robust':
@@ -234,7 +263,7 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow robust MultiTaskDNN model
    model_robust = dc.models.RobustMultitaskClassifier(len(tasks),
    model_tf_robust = dc.models.RobustMultitaskClassifier(len(tasks),
        n_features, layer_sizes=layer_sizes, 
        weight_init_stddevs=weight_init_stddevs,
        bias_init_consts=bias_init_consts, dropouts=dropouts,
@@ -247,13 +276,13 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
 
    print('--------------------------------------------')
    print('Start fitting by robust multitask DNN')
    model_robust.fit(train_dataset, nb_epoch=nb_epoch)
    model_tf_robust.fit(train_dataset, nb_epoch=nb_epoch)

    # Evaluating tensorflow robust MultiTaskDNN model
    train_scores['tf_robust'] = model_robust.evaluate(
    train_scores['tf_robust'] = model_tf_robust.evaluate(
        train_dataset, [classification_metric], transformers)

    valid_scores['tf_robust'] = model_robust.evaluate(
    valid_scores['tf_robust'] = model_tf_robust.evaluate(
        valid_dataset, [classification_metric], transformers)

  if model == 'logreg':
@@ -317,7 +346,7 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
          optimizer_type="adam", beta1=.9, beta2=.999, verbosity="high")
        
        print('-------------------------------------')
        print('Start fitting by logistic regression')
        print('Start fitting by graph convolution')
        # Fit trained model
        model_graphconv.fit(train_dataset, nb_epoch=nb_epoch)
        # Evaluating graph convolution model
@@ -347,14 +376,100 @@ def benchmark_train_and_valid(base_dir, train_dataset, valid_dataset, tasks,
    model_rf.fit(train_dataset)
    
    # Evaluating scikit random forest model
    train_scores['random_forest'] = model_rf.evaluate(
    train_scores['rf'] = model_rf.evaluate(
        train_dataset, [classification_metric], transformers)

    valid_scores['random_forest'] = model_rf.evaluate(
    valid_scores['rf'] = model_rf.evaluate(
        valid_dataset, [classification_metric], transformers)

  return train_scores, valid_scores

  
def benchmark_regression(base_dir, train_dataset, valid_dataset, tasks,
                         transformers, hyper_parameters, 
                         n_features, model='tf_regression', seed=123, 
                         verbosity='high'):
  """
  Calculate performance of different models on the specific dataset & tasks
  
  Parameters
  ----------
  base_dir : string
      path of working folder
      
  train_dataset : dataset struct
      loaded dataset using load_* or splitter function
      
  valid_dataset : dataset struct
      loaded dataset using load_* or splitter function
  
  tasks : list of string
      list of targets(tasks, datasets)
  
  transformers : BalancingTransformer struct
      loaded properties of dataset from load_* function
  
  hyper_parameters : dict
      hyper parameters including dropout rate, learning rate, etc.
 
  n_features : integer
      number of features, or length of binary fingerprints
  
  model : string,  optional (default='tf_regression')
      choice of which model to use, should be: tf_regression
  

  Returns
  -------
  train_scores : dict
	predicting results(R2) on training set
  valid_scores : dict
	predicting results(R2) on valid set

  """
  train_scores = {}
  valid_scores = {}
  
  # Initialize metrics
  regression_metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean,
                                        verbosity=verbosity)
  
  assert model in ['tf_regression']

  if model == 'tf_regression':
    # Loading hyper parameters
    layer_sizes = hyper_parameters['layer_sizes']
    weight_init_stddevs = hyper_parameters['weight_init_stddevs']
    bias_init_consts = hyper_parameters['bias_init_consts']
    dropouts = hyper_parameters['dropouts']
    penalty = hyper_parameters['penalty']
    penalty_type = hyper_parameters['penalty_type']
    batch_size = hyper_parameters['batch_size']
    nb_epoch = hyper_parameters['nb_epoch']
    learning_rate = hyper_parameters['learning_rate']

    # Building tensorflow MultiTaskDNN model
    model_tf_regression = dc.models.TensorflowMultiTaskRegressor(len(tasks),
        n_features, layer_sizes=layer_sizes, 
        weight_init_stddevs=weight_init_stddevs,
        bias_init_consts=bias_init_consts, dropouts=dropouts, penalty=penalty, 
        penalty_type=penalty_type, batch_size=batch_size, 
        learning_rate=learning_rate, verbosity=verbosity, seed=seed)
 
    print('-----------------------------------------')
    print('Start fitting by multitask DNN regression')
    model_tf_regression.fit(train_dataset, nb_epoch=nb_epoch)
    
    # Evaluating tensorflow MultiTaskDNN model
    train_scores['tf_regression'] = model_tf_regression.evaluate(
        train_dataset, [regression_metric], transformers)

    valid_scores['tf_regression'] = model_tf_regression.evaluate(
        valid_dataset, [regression_metric], transformers)

  return train_scores, valid_scores

    
if __name__ == '__main__':
  # Global variables
  np.random.seed(123)
@@ -383,9 +498,10 @@ if __name__ == '__main__':
  if len(splitters) == 0:
    splitters = ['index', 'random', 'scaffold']
  if len(models) == 0:
    models = ['tf', 'tf_robust', 'logreg', 'graphconv']
    models = ['tf', 'tf_robust', 'logreg', 'graphconv', 'tf_regression']
  if len(datasets) == 0:
    datasets = ['tox21', 'sider', 'muv', 'toxcast', 'pcba']
    datasets = ['tox21', 'sider', 'muv', 'toxcast', 'pcba', 
                'kaggle', 'delaney']

  #input hyperparameters
  #tf: dropouts, learning rate, layer_sizes, weight initial stddev,penalty,
@@ -415,8 +531,26 @@ if __name__ == '__main__':

  hps['rf'] = [{'n_estimators': 500}]

  hps['tf_regression'] = [{'layer_sizes': [1000, 1000], 
                           'weight_init_stddevs': [0.02, 0.02], 
                           'bias_init_consts': [1., 1.], 
                           'dropouts': [0.25, 0.25], 
                           'penalty': 0.0005, 'penalty_type': 'l2', 
                           'batch_size': 128, 'nb_epoch': 50, 
                           'learning_rate': 0.00008}]
         
  for split in splitters:
    for model in models:
    for dataset in datasets:
        benchmark_loading_datasets(base_dir_o, hps, dataset=dataset, model=model, 
                                   split=split, verbosity='high', out_path='.')
      if dataset in ['tox21', 'sider', 'muv', 'toxcast', 'pcba']:
        for model in models:
          if model in ['tf', 'tf_robust', 'logreg', 'graphconv']:
            benchmark_loading_datasets(base_dir_o, hps, dataset=dataset, 
                                       model=model, split=split, 
                                       verbosity='high', out_path='.')
      else:
        for model in models:
          if model in ['tf_regression']:
             benchmark_loading_datasets(base_dir_o, hps, dataset=dataset, 
                                        model=model, split=split, 
                                        verbosity='high', out_path='.')
+0 −0

Empty file added.

+5 −2
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ import numpy as np
import shutil
import deepchem as dc

def load_delaney(featurizer='ECFP'):
def load_delaney(featurizer='ECFP', split='index'):
  """Load delaney datasets."""
  # Featurize Delaney dataset
  print("About to featurize Delaney dataset.")
@@ -36,7 +36,10 @@ def load_delaney(featurizer='ECFP'):
  for transformer in transformers:
      dataset = transformer.transform(dataset)

  splitter = dc.splits.IndexSplitter()
  splitters = {'index': dc.splits.IndexSplitter(),
               'random': dc.splits.RandomSplitter(),
               'scaffold': dc.splits.ScaffoldSplitter()}
  splitter = splitters[split]
  train, valid, test = splitter.train_valid_test_split(dataset,
      compute_feature_statistics=False)
  return delaney_tasks, (train, valid, test), transformers
Loading