Commit eb2bf860 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #227 from rbharath/keras

Support Latest Keras Versions
parents 301609b8 b567fa17
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -16,8 +16,9 @@ install:
- conda install -c omnia boost=1.59.0
- conda install -c omnia openbabel
- conda install joblib
- conda install -c omnia theano
- conda install -c omnia keras
- conda install h5py
- pip install keras 
- export KERAS_BACKEND=tensorflow
- conda install seaborn
- conda install six
- conda install dill
+11 −1
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@ from deepchem.models.keras_models.fcnet import MultiTaskDNN
from deepchem.models.tensorflow_models import TensorflowModel
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.splits import ScaffoldSplitter
import tensorflow as tf
from keras import backend as K

def rf_model_builder(tasks, task_types, params_dict, model_dir, verbosity=None):
    """Builds random forests given hyperparameters.
@@ -173,6 +175,14 @@ class TestHyperparamOptAPI(TestAPI):
                  "batchnorm": [False],
                  "data_shape": [train_dataset.get_data_shape()]}
      
    def model_builder(tasks, task_types, model_params, task_model_dir,
                      verbosity=None):
      g = tf.Graph()
      sess = tf.Session(graph=g)
      K.set_session(sess)
      with g.as_default():
        return MultiTaskDNN(tasks, task_types, model_params, task_model_dir,
                            model_instance=LogisticRegression())
    optimizer = HyperparamOpt(MultiTaskDNN, tasks, task_types,
                              verbosity="low")
    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
+55 −50
Original line number Diff line number Diff line
@@ -33,6 +33,9 @@ from deepchem.models.tensorflow_models import TensorflowModel
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.splits import ScaffoldSplitter
from deepchem.splits import SpecifiedSplitter
from deepchem.models.keras_models.fcnet import MultiTaskDNN
import tensorflow as tf
from keras import backend as K

class TestModelAPI(TestAPI):
  """
@@ -277,8 +280,6 @@ class TestModelAPI(TestAPI):
  #                        Metric(metrics.mean_squared_error),
  #                        Metric(metrics.mean_absolute_error)]

  #  model = SingleTaskDNN(tasks, task_types, model_params, self.model_dir)

  #  # Fit trained model
  #  model.fit(train_dataset)
  #  model.save()
@@ -294,12 +295,16 @@ class TestModelAPI(TestAPI):

  def test_multitask_keras_mlp_ECFP_classification_API(self):
    """Straightforward test of Keras multitask deepchem classification API."""
    from deepchem.models.keras_models.fcnet import MultiTaskDNN
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      task_type = "classification"
      # TODO(rbharath): There should be some automatic check to ensure that all
      # required model_params are specified.
      # TODO(rbharath): Turning off dropout to make tests behave.
      model_params = {"nb_hidden": 10, "activation": "relu",
                    "dropout": .5, "learning_rate": .01,
                      "dropout": .0, "learning_rate": .01,
                      "momentum": .9, "nesterov": False,
                      "decay": 1e-4, "batch_size": 5,
                      "nb_epoch": 2, "init": "glorot_uniform",
+249 −235
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ from deepchem.models.tensorflow_models import TensorflowModel
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskRegressor
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.models.multitask import SingletaskToMultitask
import tensorflow as tf
from keras import backend as K

class TestOverfitAPI(TestAPI):
  """
@@ -152,6 +154,10 @@ class TestOverfitAPI(TestAPI):

  def test_keras_regression_overfit(self):
    """Test that keras models can overfit simple regression datasets."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      tasks = ["task0"]
      task_types = {task: "regression" for task in tasks}
      n_samples = 10
@@ -252,6 +258,10 @@ class TestOverfitAPI(TestAPI):

  def test_keras_classification_overfit(self):
    """Test that keras models can overfit simple classification datasets."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      tasks = ["task0"]
      task_types = {task: "classification" for task in tasks}
      n_samples = 10
@@ -301,6 +311,10 @@ class TestOverfitAPI(TestAPI):

  def test_keras_skewed_classification_overfit(self):
    """Test keras models can overfit 0/1 datasets with few actives."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      tasks = ["task0"]
      task_types = {task: "classification" for task in tasks}
      n_samples = 100
@@ -480,20 +494,12 @@ class TestOverfitAPI(TestAPI):
    X = np.random.rand(n_samples, n_features)
    y = np.random.binomial(1, p, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    print("np.count_nonzero(y)")
    print(np.count_nonzero(y))
    ##### DEBUG
    y_flat, w_flat = np.squeeze(y), np.squeeze(w)
    y_nonzero = y_flat[w_flat != 0]
    num_nonzero = np.count_nonzero(y_nonzero)
    weight_nonzero = len(y_nonzero)/num_nonzero
    print("weight_nonzero")
    print(weight_nonzero)
    w_flat[y_flat != 0] = weight_nonzero
    w = np.reshape(w_flat, (n_samples, n_tasks))
    print("np.amin(w), np.amax(w)")
    print(np.amin(w), np.amax(w))
    ##### DEBUG
  
    dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

@@ -577,6 +583,10 @@ class TestOverfitAPI(TestAPI):

  def test_keras_multitask_classification_overfit(self):
    """Test keras multitask overfits tiny data."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_tasks = 10
      tasks = ["task%d" % task for task in range(n_tasks)]
      task_types = {task: "classification" for task in tasks}
@@ -723,6 +733,10 @@ class TestOverfitAPI(TestAPI):

  def test_keras_multitask_regression_overfit(self):
    """Test keras multitask overfits tiny data."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_tasks = 10
      tasks = ["task%d" % task for task in range(n_tasks)]
      task_types = {task: "regression" for task in tasks}
+57 −51
Original line number Diff line number Diff line
@@ -20,6 +20,8 @@ from deepchem.models.sklearn_models import SklearnModel
from deepchem.models.keras_models.fcnet import MultiTaskDNN
from deepchem.models.tensorflow_models import TensorflowModel
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
import tensorflow as tf
from keras import backend as K

class TestModelReload(TestAPI):

@@ -69,6 +71,10 @@ class TestModelReload(TestAPI):

  def test_keras_reload(self):
    """Test that trained keras models can be reloaded correctly."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      tasks = ["task0"]
      task_types = {task: "classification" for task in tasks}
      n_samples = 10