Commit 643b4b70 authored by casey's avatar casey
Browse files

Added nb_epoch unittest

parent 7219f7df
Loading
Loading
Loading
Loading
+37 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ valuable test suite so leaving it in despite the flakiness.
"""
import numpy as np
import sklearn
import sklearn.ensemble
import deepchem as dc
import unittest
import tempfile
@@ -171,3 +172,39 @@ class TestGaussianHyperparamOpt(unittest.TestCase):
      assert "learning_rate" in hp_str
    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0

  @flaky
  def test_multitask_example_nb_epoch(self):
    """Test a simple example of optimizing a multitask model with a gaussian process search."""
    # Generate dummy dataset
    np.random.seed(123)
    train_dataset = dc.data.NumpyDataset(
        np.random.rand(10, 3), np.zeros((10, 2)), np.ones((10, 2)),
        np.arange(10))
    valid_dataset = dc.data.NumpyDataset(
        np.random.rand(5, 3), np.zeros((5, 2)), np.ones((5, 2)), np.arange(5))
    transformers = []

    optimizer = dc.hyper.GaussianProcessHyperparamOpt(
        lambda **params: dc.models.MultitaskRegressor(n_tasks=2,
                                                      n_features=3, dropouts=[0.],
                                                      weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
                                                      learning_rate=0.003, **params))

    params_dict = {"batch_size": 10}
    metric = dc.metrics.Metric(
        dc.metrics.mean_squared_error, task_averager=np.mean)

    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
        params_dict,
        train_dataset,
        valid_dataset,
        metric,
        transformers,
        nb_epoch=3,
        max_iter=1,
        use_max=False)

    valid_score = best_model.evaluate(valid_dataset, [metric], transformers)
    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0
+35 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import tempfile
import numpy as np
import deepchem as dc
import sklearn
import sklearn.ensemble


class TestGridHyperparamOpt(unittest.TestCase):
@@ -158,3 +159,37 @@ class TestGridHyperparamOpt(unittest.TestCase):

    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0

  def test_multitask_nb_epoch(self):
    """Test a simple example of optimizing a multitask model with a grid search."""
    # Generate dummy dataset
    np.random.seed(123)
    train_dataset = dc.data.NumpyDataset(
        np.random.rand(10, 3), np.zeros((10, 2)), np.ones((10, 2)),
        np.arange(10))
    valid_dataset = dc.data.NumpyDataset(
        np.random.rand(5, 3), np.zeros((5, 2)), np.ones((5, 2)), np.arange(5))

    optimizer = dc.hyper.GridHyperparamOpt(
        lambda **params: dc.models.MultitaskRegressor(n_tasks=2,
                                                      n_features=3, dropouts=[0.],
                                                      weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)],
                                                      learning_rate=0.003, **params))

    params_dict = {"batch_size": [10, 20]}
    transformers = []
    metric = dc.metrics.Metric(
        dc.metrics.mean_squared_error, task_averager=np.mean)

    best_model, best_hyperparams, all_results = optimizer.hyperparam_search(
        params_dict,
        train_dataset,
        valid_dataset,
        metric,
        transformers,
        nb_epoch=3,
        use_max=False)

    valid_score = best_model.evaluate(valid_dataset, [metric])
    assert valid_score["mean-mean_squared_error"] == min(all_results.values())
    assert valid_score["mean-mean_squared_error"] > 0
 No newline at end of file