Commit cd01f619 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Partial fixes to hyperparameter opt

parent f9fc6644
Loading
Loading
Loading
Loading
+3 −14
Original line number Diff line number Diff line
@@ -16,12 +16,8 @@ class HyperparamOpt(object):
  Provides simple hyperparameter search capabilities.
  """

  def __init__(self, model_class, tasks, task_types, fit_transformers=None,
               verbosity=None):
  def __init__(self, model_class, verbosity=None):
    self.model_class = model_class
    self.tasks = tasks
    self.task_types = task_types
    self.fit_transformers = fit_transformers
    assert verbosity in [None, "low", "high"]
    self.verbosity = verbosity

@@ -73,15 +69,8 @@ class HyperparamOpt(object):
            model_dir = tempfile.mkdtemp()
      else:
        model_dir = tempfile.mkdtemp()
      #TODO(JG) Fit transformers for TF models
      if self.fit_transformers:
        model = self.model_class(
            self.tasks, self.task_types, model_params, model_dir,
            fit_transformers=self.fit_transformers, verbosity=self.verbosity)
      else:
        model = self.model_class(
            self.tasks, self.task_types, model_params, model_dir,
            verbosity=self.verbosity)

      model = self.model_class(model_params, model_dir)
      model.fit(train_dataset)
      model.save()
    
+32 −70
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ from sklearn.ensemble import RandomForestRegressor
from deepchem.datasets import Dataset
from deepchem.hyperparameters import HyperparamOpt
from deepchem.models.keras_models.fcnet import MultiTaskDNN
from deepchem.models.keras_models import KerasModel
from deepchem.models.tensorflow_models import TensorflowModel
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.splits import ScaffoldSplitter
@@ -146,9 +147,9 @@ class TestHyperparamOptAPI(TestAPI):
    tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
             "task7", "task8", "task9", "task10", "task11", "task12",
             "task13", "task14", "task15", "task16"]
    task_types = {task: task_type for task in tasks}

    featurizer = CircularFingerprint(size=1024)
    n_features = 1024
    featurizer = CircularFingerprint(size=n_features)
    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
                        featurizer=featurizer,
@@ -161,30 +162,13 @@ class TestHyperparamOptAPI(TestAPI):

    transformers = []
    metric = Metric(metrics.matthews_corrcoef, np.mean, mode="classification")
    params_dict= {"nb_hidden": [5, 10],
                  "activation": ["relu"],
                  "dropout": [.5],
                  "learning_rate": [.01],
                  "momentum": [.9],
                  "nesterov": [False],
                  "decay": [1e-4],
                  "batch_size": [5],
                  "nb_epoch": [2],
                  "init": ["glorot_uniform"],
                  "nb_layers": [1],
                  "batchnorm": [False],
                  "data_shape": [train_dataset.get_data_shape()]}
    params_dict= {"n_hidden": [5, 10]}
      
    def model_builder(tasks, task_types, model_params, task_model_dir,
                      verbosity=None):
      g = tf.Graph()
      sess = tf.Session(graph=g)
      K.set_session(sess)
      with g.as_default():
        return MultiTaskDNN(tasks, task_types, model_params, task_model_dir,
                            model_instance=LogisticRegression())
    optimizer = HyperparamOpt(MultiTaskDNN, tasks, task_types,
                              verbosity="low")
    def model_builder(model_params, model_dir):
      keras_model = MultiTaskDNN(
          len(tasks), n_features, task_type, dropout=0., **model_params)
      return KerasModel(keras_model, self.model_dir)
    optimizer = HyperparamOpt(model_builder, verbosity="low")
    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
      params_dict, train_dataset, valid_dataset, transformers,
      metric, logdir=None)
@@ -198,9 +182,9 @@ class TestHyperparamOptAPI(TestAPI):
    tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
             "task7", "task8", "task9", "task10", "task11", "task12",
             "task13", "task14", "task15", "task16"]
    task_types = {task: task_type for task in tasks}

    featurizer = CircularFingerprint(size=1024)
    n_features = 1024
    featurizer = CircularFingerprint(size=n_features)

    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
@@ -212,38 +196,16 @@ class TestHyperparamOptAPI(TestAPI):
    train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
        dataset, self.train_dir, self.valid_dir, self.test_dir)

    transformers = []

    transformers = []
    metric = Metric(metrics.matthews_corrcoef, np.mean, mode="classification")
    params_dict = {"activation": ["relu"],
                    "momentum": [.9],
                    "batch_size": [50],
                    "init": ["glorot_uniform"],
                    "data_shape": [train_dataset.get_data_shape()],
                    "learning_rate": [1e-3],
                    "decay": [1e-6],
                    "nb_hidden": [1000], 
                    "nb_epoch": [1],
                    "nesterov": [False],
                    "dropouts": [(.5,)],
                    "nb_layers": [1],
                    "batchnorm": [False],
                    "layer_sizes": [(1000,)],
                    "weight_init_stddevs": [(.1,)],
                    "bias_init_consts": [(1.,)],
                    "num_classes": [2],
                    "penalty": [0.], 
                    "optimizer": ["sgd"],
                    "num_classification_tasks": [len(task_types)]
                  }
    params_dict = {"layer_sizes": [(10,), (100,)]}

    def model_builder(tasks, task_types, params_dict, logdir, verbosity=None):
        return TensorflowModel(
            tasks, task_types, params_dict, logdir, 
            tf_class=TensorflowMultiTaskClassifier,
            verbosity=verbosity)
    optimizer = HyperparamOpt(model_builder, tasks, task_types,
                              verbosity="low")
    def model_builder(model_params, model_dir):
        tensorflow_model = TensorflowMultiTaskClassifier(
            len(tasks), n_features, model_dir, **model_params)
        return TensorflowModel(tensorflow_model, model_dir)
    optimizer = HyperparamOpt(model_builder, verbosity="low")
    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
      params_dict, train_dataset, valid_dataset, transformers,
      metric, logdir=None)
      params_dict, train_dataset, valid_dataset, transformers, metric,
      logdir=None)