Commit 608ef37a authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Finished

parent acb0c8c7
Loading
Loading
Loading
Loading
+6 −20
Original line number Diff line number Diff line
@@ -205,6 +205,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):

    # Stores all results
    all_results = {}
    # Store all model references so we don't have to reload
    all_models = {}
    # Stores all model locations
    model_locations = {}

@@ -255,15 +257,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
        model_dir = tempfile.mkdtemp()
      # Add it on to the information needed for the constructor
      hyper_parameters["model_dir"] = model_dir
      ##########################################
      print("hyper_parameters")
      print(hyper_parameters)
      ##########################################
      model = self.model_builder(**hyper_parameters)
      model.fit(train_dataset)
      ##########################################
      print("SAVING MODEL")
      ##########################################
      try:
        model.save()
      # Some models autosave
@@ -280,6 +275,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
          f.write('\n')
      # Store all results
      all_results[hp_str] = score
      # Store reference to model
      all_models[hp_str] = model
      model_locations[hp_str] = model_dir
      # GPGO maximize performance by default, set performance to its negative value for minimization
      if use_max:
@@ -310,19 +307,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
        hyper_parameters[hp] = float(hp_opt[hp])
    hp_str = _convert_hyperparam_dict_to_filename(hyper_parameters)

    # Let's reinitialize the model with the best parameters
    model_dir = model_locations[hp_str]
    hyper_parameters["model_dir"] = model_dir
    best_model = self.model_builder(**hyper_parameters)
    ##########################################
    print("RESTORING BEST MODEL")
    ##########################################
    # Some models need to be explicitly reloaded
    try:
      best_model.restore()
    # Some models auto reload
    except NotImplementedError:
      pass
    # Let's fetch the model with the best parameters
    best_model = all_models[hp_str]

    # Compare best model to default hyperparameters
    if log_file:
+107 −108
Original line number Diff line number Diff line
@@ -34,68 +34,68 @@ class TestGaussianHyperparamOpt(unittest.TestCase):
    self.valid_dataset = dc.data.NumpyDataset(
        X=np.random.rand(20, 5), y=np.random.rand(20, 1))

#  def test_rf_example(self):
#    """Test a simple example of optimizing a RF model with a gaussian process."""
#
#    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
#    params_dict = {"n_estimators": 10}
#    transformers = []
#    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
#
#    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
#        params_dict,
#        self.train_dataset,
#        self.valid_dataset,
#        transformers,
#        metric,
#        max_iter=2)
#
#    valid_score = best_model.evaluate(self.valid_dataset, [metric],
#                                      transformers)
#    assert valid_score["pearson_r2_score"] == max(all_results.values())
#    assert valid_score["pearson_r2_score"] > 0
#
#  def test_rf_example_min(self):
#    """Test a simple example of optimizing a RF model with a gaussian process looking for minimum score."""
#
#    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
#    params_dict = {"n_estimators": 10}
#    transformers = []
#    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
#
#    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
#        params_dict,
#        self.train_dataset,
#        self.valid_dataset,
#        transformers,
#        metric,
#        use_max=False,
#        max_iter=2)
#
#    valid_score = best_model.evaluate(self.valid_dataset, [metric],
#                                      transformers)
#    assert valid_score["pearson_r2_score"] == min(all_results.values())
#    assert valid_score["pearson_r2_score"] > 0
#
#  def test_rf_with_logdir(self):
#    """Test that using a logdir can work correctly."""
#    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
#    params_dict = {"n_estimators": 10}
#    transformers = []
#    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
#    with tempfile.TemporaryDirectory() as tmpdirname:
#      best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
#          params_dict,
#          self.train_dataset,
#          self.valid_dataset,
#          transformers,
#          metric,
#          logdir=tmpdirname,
#          max_iter=2)
#    valid_score = best_model.evaluate(self.valid_dataset, [metric],
#                                      transformers)
#    assert valid_score["pearson_r2_score"] == max(all_results.values())
#    assert valid_score["pearson_r2_score"] > 0
  def test_rf_example(self):
    """Test a simple example of optimizing a RF model with a gaussian process."""

    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
    params_dict = {"n_estimators": 10}
    transformers = []
    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
        params_dict,
        self.train_dataset,
        self.valid_dataset,
        transformers,
        metric,
        max_iter=2)

    valid_score = best_model.evaluate(self.valid_dataset, [metric],
                                      transformers)
    assert valid_score["pearson_r2_score"] == max(all_results.values())
    assert valid_score["pearson_r2_score"] > 0

  def test_rf_example_min(self):
    """Test a simple example of optimizing a RF model with a gaussian process looking for minimum score."""

    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
    params_dict = {"n_estimators": 10}
    transformers = []
    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
        params_dict,
        self.train_dataset,
        self.valid_dataset,
        transformers,
        metric,
        use_max=False,
        max_iter=2)

    valid_score = best_model.evaluate(self.valid_dataset, [metric],
                                      transformers)
    assert valid_score["pearson_r2_score"] == min(all_results.values())
    assert valid_score["pearson_r2_score"] > 0

  def test_rf_with_logdir(self):
    """Test that using a logdir can work correctly."""
    optimizer = dc.hyper.GaussianProcessHyperparamOpt(self.rf_model_builder)
    params_dict = {"n_estimators": 10}
    transformers = []
    metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
    with tempfile.TemporaryDirectory() as tmpdirname:
      best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
          params_dict,
          self.train_dataset,
          self.valid_dataset,
          transformers,
          metric,
          logdir=tmpdirname,
          max_iter=2)
    valid_score = best_model.evaluate(self.valid_dataset, [metric],
                                      transformers)
    assert valid_score["pearson_r2_score"] == max(all_results.values())
    assert valid_score["pearson_r2_score"] > 0

  @flaky
  def test_multitask_example(self):
@@ -132,50 +132,49 @@ class TestGaussianHyperparamOpt(unittest.TestCase):
    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0

  @flaky
  def test_multitask_example_different_search_range(self):
    """Test a simple example of optimizing a multitask model with a gaussian process search with per-parameter search range."""
    # Generate dummy dataset
    np.random.seed(123)
    train_dataset = dc.data.NumpyDataset(
        np.random.rand(10, 3), np.zeros((10, 2)), np.ones((10, 2)),
        np.arange(10))
    valid_dataset = dc.data.NumpyDataset(
        np.random.rand(5, 3), np.zeros((5, 2)), np.ones((5, 2)), np.arange(5))

    optimizer = dc.hyper.GaussianProcessHyperparamOpt(
        lambda **p: dc.models.MultitaskRegressor(
            n_tasks=2,
            n_features=3,
            dropouts=[0.],
            weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
            #learning_rate=0.003, **p))
            **p))

    params_dict = {"learning_rate": 0.003, "batch_size": 10}
    # These are per-example multiplier
    search_range = {"learning_rate": 10, "batch_size": 4}
    transformers = []
    metric = dc.metrics.Metric(
        dc.metrics.mean_squared_error, task_averager=np.mean)

#  @flaky
#  def test_multitask_example_different_search_range(self):
#    """Test a simple example of optimizing a multitask model with a gaussian process search with per-parameter search range."""
#    # Generate dummy dataset
#    np.random.seed(123)
#    train_dataset = dc.data.NumpyDataset(
#        np.random.rand(10, 3), np.zeros((10, 2)), np.ones((10, 2)),
#        np.arange(10))
#    valid_dataset = dc.data.NumpyDataset(
#        np.random.rand(5, 3), np.zeros((5, 2)), np.ones((5, 2)), np.arange(5))
#
#    optimizer = dc.hyper.GaussianProcessHyperparamOpt(
#        lambda **p: dc.models.MultitaskRegressor(
#            n_tasks=2,
#            n_features=3,
#            dropouts=[0.],
#            weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
#            #learning_rate=0.003, **p))
#            **p))
#
#    params_dict = {"learning_rate": 0.003, "batch_size": 10}
#    # These are per-example multiplier
#    search_range = {"learning_rate": 10, "batch_size": 4}
#    transformers = []
#    metric = dc.metrics.Metric(
#        dc.metrics.mean_squared_error, task_averager=np.mean)
#
#    with tempfile.TemporaryDirectory() as tmpdirname:
#      best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
#          params_dict,
#          train_dataset,
#          valid_dataset,
#          transformers,
#          metric,
#          max_iter=2,
#          logdir=tmpdirname,
#          search_range=search_range,
#          use_max=False)
#      valid_score = best_model.evaluate(valid_dataset, [metric])
#    # Test that 2 parameters were optimized
#    for hp_str in all_results.keys():
#      # Recall that the key is a string of the form _batch_size_39_learning_rate_0.01 for example
#      assert "batch_size" in hp_str
#      assert "learning_rate" in hp_str
#    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
#    assert valid_score["mean-mean_squared_error"] > 0
    with tempfile.TemporaryDirectory() as tmpdirname:
      best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
          params_dict,
          train_dataset,
          valid_dataset,
          transformers,
          metric,
          max_iter=2,
          logdir=tmpdirname,
          search_range=search_range,
          use_max=False)
      valid_score = best_model.evaluate(valid_dataset, [metric])
    # Test that 2 parameters were optimized
    for hp_str in all_results.keys():
      # Recall that the key is a string of the form _batch_size_39_learning_rate_0.01 for example
      assert "batch_size" in hp_str
      assert "learning_rate" in hp_str
    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0
+1 −5
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Mar  7 00:07:10 2017

@author: zqwu
This file holds the current best set of hyperparameters for the Molnet benchmark.
"""
import deepchem

+59 −160
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 06 14:25:40 2017

@author: Zhenqin Wu
This file provides utilities to run the MoleculeNet benchmark suite.
"""
import os
import time
import csv
import logging
import numpy as np
import tensorflow as tf
import deepchem
@@ -15,6 +13,43 @@ from deepchem.molnet.run_benchmark_models import benchmark_classification, bench
from deepchem.molnet.check_availability import CheckFeaturizer, CheckSplit
from deepchem.molnet.preset_hyper_parameters import hps

logger = logging.getLogger(__name__)

# Loading functions available
loading_functions = {
    'bace_c': deepchem.molnet.load_bace_classification,
    'bace_r': deepchem.molnet.load_bace_regression,
    'bbbp': deepchem.molnet.load_bbbp,
    'chembl': deepchem.molnet.load_chembl,
    'clearance': deepchem.molnet.load_clearance,
    'clintox': deepchem.molnet.load_clintox,
    'delaney': deepchem.molnet.load_delaney,
    'factors': deepchem.molnet.load_factors,
    'hiv': deepchem.molnet.load_hiv,
    'hopv': deepchem.molnet.load_hopv,
    'hppb': deepchem.molnet.load_hppb,
    'kaggle': deepchem.molnet.load_kaggle,
    'kinase': deepchem.molnet.load_kinase,
    'lipo': deepchem.molnet.load_lipo,
    'muv': deepchem.molnet.load_muv,
    'nci': deepchem.molnet.load_nci,
    'pcba': deepchem.molnet.load_pcba,
    'pcba_146': deepchem.molnet.load_pcba_146,
    'pcba_2475': deepchem.molnet.load_pcba_2475,
    'pdbbind': deepchem.molnet.load_pdbbind_grid,
    'ppb': deepchem.molnet.load_ppb,
    'qm7': deepchem.molnet.load_qm7_from_mat,
    'qm7b': deepchem.molnet.load_qm7b_from_mat,
    'qm8': deepchem.molnet.load_qm8,
    'qm9': deepchem.molnet.load_qm9,
    'sampl': deepchem.molnet.load_sampl,
    'sider': deepchem.molnet.load_sider,
    'thermosol': deepchem.molnet.load_thermosol,
    'tox21': deepchem.molnet.load_tox21,
    'toxcast': deepchem.molnet.load_toxcast,
    'uv': deepchem.molnet.load_uv,
}


def run_benchmark(datasets,
                  model,
@@ -31,16 +66,21 @@ def run_benchmark(datasets,
                  test=False,
                  reload=True,
                  seed=123):
  """
  Run benchmark test on designated datasets with deepchem(or user-defined) model
  """Run MoleculeNet benchmark suite.

  This is a utility function to help run the MoleculeNet benchmark
  suite on a specified model and a specified dataset.

  Run benchmark test on designated datasets with deepchem(or
  user-defined) model.

  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl,
      clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba,
      pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast, uv, factors,
      kinase
      choice of which datasets to use, should be one of: bace_c,
      bace_r, bbbp, chembl, clearance, clintox, delaney, hiv, hopv,
      kaggle, lipo, muv, nci, pcba, pdbbind, ppb, qm7, qm7b, qm8, qm9,
      sampl, sider, tox21, toxcast, uv, factors, kinase
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network,
@@ -49,10 +89,10 @@ def run_benchmark(datasets,
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string, optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  direction: bool, optional(default=True)
      Optimization direction when doing hyperparameter search
      Maximization(True) or minimization(False)
      Choice of evaluation metrics, None = using the default metrics(AUC & R2)
  use_max: bool, (default True)
    Specifies whether to maximize or minimize `metric`.
    maximization(True) or minimization(False)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
@@ -110,46 +150,12 @@ def run_benchmark(datasets,
    if not split in [None] + CheckSplit[dataset]:
      continue

    loading_functions = {
        'bace_c': deepchem.molnet.load_bace_classification,
        'bace_r': deepchem.molnet.load_bace_regression,
        'bbbp': deepchem.molnet.load_bbbp,
        'chembl': deepchem.molnet.load_chembl,
        'clearance': deepchem.molnet.load_clearance,
        'clintox': deepchem.molnet.load_clintox,
        'delaney': deepchem.molnet.load_delaney,
        'factors': deepchem.molnet.load_factors,
        'hiv': deepchem.molnet.load_hiv,
        'hopv': deepchem.molnet.load_hopv,
        'hppb': deepchem.molnet.load_hppb,
        'kaggle': deepchem.molnet.load_kaggle,
        'kinase': deepchem.molnet.load_kinase,
        'lipo': deepchem.molnet.load_lipo,
        'muv': deepchem.molnet.load_muv,
        'nci': deepchem.molnet.load_nci,
        'pcba': deepchem.molnet.load_pcba,
        'pcba_146': deepchem.molnet.load_pcba_146,
        'pcba_2475': deepchem.molnet.load_pcba_2475,
        'pdbbind': deepchem.molnet.load_pdbbind_grid,
        'ppb': deepchem.molnet.load_ppb,
        'qm7': deepchem.molnet.load_qm7_from_mat,
        'qm7b': deepchem.molnet.load_qm7b_from_mat,
        'qm8': deepchem.molnet.load_qm8,
        'qm9': deepchem.molnet.load_qm9,
        'sampl': deepchem.molnet.load_sampl,
        'sider': deepchem.molnet.load_sider,
        'thermosol': deepchem.molnet.load_thermosol,
        'tox21': deepchem.molnet.load_tox21,
        'toxcast': deepchem.molnet.load_toxcast,
        'uv': deepchem.molnet.load_uv,
    }

    print('-------------------------------------')
    print('Benchmark on dataset: %s' % dataset)
    print('-------------------------------------')
    logger.info('-------------------------------------')
    logger.info('Benchmark on dataset: %s' % dataset)
    logger.info('-------------------------------------')
    # loading datasets
    if split is not None:
      print('Splitting function: %s' % split)
      logger.info('Splitting function: %s' % split)
      tasks, all_dataset, transformers = loading_functions[dataset](
          featurizer=featurizer, split=split, reload=reload)
    else:
@@ -173,8 +179,7 @@ def run_benchmark(datasets,
          valid_dataset,
          transformers,
          metric,
          direction=direction,
          n_features=n_features,
          use_max=use_max,
          n_tasks=len(tasks),
          max_iter=max_iter,
          search_range=search_range)
@@ -187,7 +192,6 @@ def run_benchmark(datasets,
            test_dataset,
            tasks,
            transformers,
            n_features,
            metric,
            model,
            test=test,
@@ -235,108 +239,3 @@ def run_benchmark(datasets,
    if hyper_param_search:
      with open(os.path.join(out_path, dataset + model + '.pkl'), 'w') as f:
        pickle.dump(hyper_parameters, f)


#
# Note by @XericZephyr. Reason why I spun off this function:
#   1. Some model needs dataset information.
#   2. It offers us possibility to **cache** the dataset
#      if the featurizer runs very slow, e.g., GraphConv.
#   2+. The cache can even happen at Travis CI to accelerate
#       CI testing.
#
def load_dataset(dataset, featurizer, split='random'):
  """
  Load specific dataset for benchmark.

  Parameters
  ----------
  dataset: string
      choice of which datasets to use, should be: tox21, muv, sider,
      toxcast, pcba, delaney, factors, hiv, hopv, kaggle, kinase, nci,
      clintox, hiv, pcba_128, pcba_146, pdbbind, chembl, qm7, qm7b, qm9,
      sampl, uv
  featurizer: string or dc.feat.Featurizer.
      choice of featurization.
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  """
  dataset_loading_functions = {
      'bace_c': deepchem.molnet.load_bace_classification,
      'bace_r': deepchem.molnet.load_bace_regression,
      'bbbp': deepchem.molnet.load_bbbp,
      'chembl': deepchem.molnet.load_chembl,
      'clearance': deepchem.molnet.load_clearance,
      'clintox': deepchem.molnet.load_clintox,
      'delaney': deepchem.molnet.load_delaney,
      'factors': deepchem.molnet.load_factors,
      'hiv': deepchem.molnet.load_hiv,
      'hopv': deepchem.molnet.load_hopv,
      'hppb': deepchem.molnet.load_hppb,
      'kaggle': deepchem.molnet.load_kaggle,
      'kinase': deepchem.molnet.load_kinase,
      'lipo': deepchem.molnet.load_lipo,
      'muv': deepchem.molnet.load_muv,
      'nci': deepchem.molnet.load_nci,
      'pcba': deepchem.molnet.load_pcba,
      'pcba_128': deepchem.molnet.load_pcba_128,
      'pcba_146': deepchem.molnet.load_pcba_146,
      'pcba_2475': deepchem.molnet.load_pcba_2475,
      'pdbbind': deepchem.molnet.load_pdbbind_grid,
      'ppb': deepchem.molnet.load_ppb,
      'qm7': deepchem.molnet.load_qm7_from_mat,
      'qm7b': deepchem.molnet.load_qm7b_from_mat,
      'qm8': deepchem.molnet.load_qm8,
      'qm9': deepchem.molnet.load_qm9,
      'sampl': deepchem.molnet.load_sampl,
      'sider': deepchem.molnet.load_sider,
      'thermosol': deepchem.molnet.load_thermosol,
      'tox21': deepchem.molnet.load_tox21,
      'toxcast': deepchem.molnet.load_toxcast,
      'uv': deepchem.molnet.load_uv
  }
  print('-------------------------------------')
  print('Loading dataset: %s' % dataset)
  print('-------------------------------------')
  # loading datasets
  if split is not None:
    print('Splitting function: %s' % split)
  tasks, all_dataset, transformers = dataset_loading_functions[dataset](
      featurizer=featurizer, split=split)
  return tasks, all_dataset, transformers


def benchmark_model(model, all_dataset, transformers, metric, test=False):
  """
  Benchmark custom model.

  model: user-defined model stucture
    For user define model, it should include function: fit, evaluate.

  all_dataset: (train, test, val) data tuple.
    Returned by `load_dataset` function.

  transformers

  metric: string
    choice of evaluation metrics.


  """
  time_start_fitting = time.time()
  train_score = .0
  valid_score = .0
  test_score = .0

  train_dataset, valid_dataset, test_dataset = all_dataset

  model.fit(train_dataset)
  train_score = model.evaluate(train_dataset, metric, transformers)
  valid_score = model.evaluate(valid_dataset, metric, transformers)
  if test:
    test_score = model.evaluate(test_dataset, metric, transformers)

  time_finish_fitting = time.time()
  time_for_running = time_finish_fitting - time_start_fitting

  return train_score, valid_score, test_score, time_for_running
+0 −2
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Mar  6 23:41:26 2017

Loading