Unverified Commit 596c2142 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1846 from peastman/slow

Fix failing slow tests
parents 4cbaac01 e6fc1e33
Loading
Loading
Loading
Loading
+2 −22
Original line number Diff line number Diff line
@@ -2,8 +2,6 @@
Tests to make sure deepchem models can fit models on easy datasets.
"""

from nose.plugins.attrib import attr

__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2016, Stanford University"
__license__ = "MIT"
@@ -189,15 +187,7 @@ class TestGeneralize(unittest.TestCase):
  #  for score in scores[classification_metric.name]:
  #    assert score > .5

  @attr('slow')
  def test_xgboost_regression(self):
    """
    This test is not actually slow -- but cannot currently run
    on Ubuntu 14.04 with Tensorflow 1.4.0

    See Discussion Here
    https://github.com/deepchem/deepchem/issues/960
    """
    import xgboost
    np.random.seed(123)

@@ -215,7 +205,7 @@ class TestGeneralize(unittest.TestCase):
    # Set early stopping round = n_estimators so that esr won't work
    esr = {'early_stopping_rounds': 50}

    xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123)
    xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123)
    model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

    # Fit trained model
@@ -224,18 +214,9 @@ class TestGeneralize(unittest.TestCase):

    # Eval model on test
    scores = model.evaluate(test_dataset, [regression_metric])
    assert scores[regression_metric.name] < 50
    assert scores[regression_metric.name] < 55

  @attr('slow')
  def test_xgboost_multitask_regression(self):
    """
    Test that xgboost models can learn on simple multitask regression.
    This test is not actually slow -- but cannot currently run
    on Ubuntu 14.04 with Tensorflow 1.4.0

    See Discussion Here
    https://github.com/deepchem/deepchem/issues/960
    """
    import xgboost
    np.random.seed(123)
    n_tasks = 4
@@ -271,7 +252,6 @@ class TestGeneralize(unittest.TestCase):
    for score in scores[regression_metric.name]:
      assert score < 50

  @attr('slow')
  def test_xgboost_classification(self):
    """Test that sklearn models can learn on simple classification datasets."""
    import xgboost
+0 −40
Original line number Diff line number Diff line
@@ -573,46 +573,6 @@ class TestOverfit(test_util.TensorFlowTestCase):
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] < .2

  @attr('slow')
  def test_ANI_multitask_regression_overfit(self):
    """Test ANI-1 regression overfits tiny data."""
    input_file = os.path.join(self.current_dir, "example_DTNN.mat")
    np.random.seed(123)
    tf.random.set_seed(123)
    dataset = scipy.io.loadmat(input_file)
    X = np.concatenate([np.expand_dims(dataset['Z'], 2), dataset['R']], axis=2)
    X = X[:, :13, :]
    y = dataset['T']
    w = np.ones_like(y)
    dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
    regression_metric = dc.metrics.Metric(
        dc.metrics.pearson_r2_score, mode="regression")
    n_tasks = y.shape[1]
    batch_size = 10

    transformers = [
        dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset),
    ]

    for transformer in transformers:
      dataset = transformer.transform(dataset)

    model = dc.models.ANIRegression(
        n_tasks,
        13,
        atom_number_cases=[1, 6, 7, 8],
        batch_size=batch_size,
        learning_rate=ExponentialDecay(0.01, 0.7, 100),
        mode="regression")

    # Fit trained model
    model.fit(dataset, nb_epoch=500)

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric], transformers[0:1])

    assert scores[regression_metric.name] > .7

  @attr('slow')
  def test_DAG_singletask_regression_overfit(self):
    """Test DAG regressor multitask overfits tiny data."""
+19 −8
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ class XGBoostModel(SklearnModel):
    X = dataset.X
    y = np.squeeze(dataset.y)
    w = np.squeeze(dataset.w)
    seed = self.model_instance.seed
    seed = self.model_instance.random_state
    import xgboost as xgb
    if isinstance(self.model_instance, xgb.XGBClassifier):
      xgb_metric = "auc"
@@ -88,15 +88,26 @@ class XGBoostModel(SklearnModel):
    '''
    Find best potential parameters set using few n_estimators
    '''

    # Make sure user specified params are in the grid.
    max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
    colsample_bytree_grid = list(
        np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
    reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))

    def unique_not_none(values):
      return list(np.unique([x for x in values if x is not None]))

    max_depth_grid = unique_not_none([self.model_instance.max_depth, 5, 7])
    colsample_bytree_grid = unique_not_none(
        [self.model_instance.colsample_bytree, 0.66, 0.9])
    reg_lambda_grid = unique_not_none([self.model_instance.reg_lambda, 1, 5])
    learning_rate = 0.3
    if self.model_instance.learning_rate is not None:
      learning_rate = max(learning_rate, self.model_instance.learning_rate)
    n_estimators = 60
    if self.model_instance.n_estimators is not None:
      n_estimators = min(n_estimators, self.model_instance.n_estimators)
    param_grid = {
        'max_depth': max_depth_grid,
        'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
        'n_estimators': [min(self.model_instance.n_estimators, 60)],
        'learning_rate': [learning_rate],
        'n_estimators': [n_estimators],
        'gamma': [self.model_instance.gamma],
        'min_child_weight': [self.model_instance.min_child_weight],
        'max_delta_step': [self.model_instance.max_delta_step],
@@ -107,7 +118,7 @@ class XGBoostModel(SklearnModel):
        'reg_lambda': reg_lambda_grid,
        'scale_pos_weight': [self.model_instance.scale_pos_weight],
        'base_score': [self.model_instance.base_score],
        'seed': [self.model_instance.seed]
        'seed': [self.model_instance.random_state]
    }
    grid_search = GridSearchCV(
        self.model_instance, param_grid, cv=2, refit=False, scoring=metric)
+3 −2
Original line number Diff line number Diff line
@@ -110,7 +110,8 @@ class A2C(object):
  The method receives the list of states generated during the rollout, the action taken for each one,
  and a new goal state.  It should generate a new list of states that are identical to the input ones,
  except specifying the new goal.  It should return that list of states, and the rewards that would
  have been received for taking the specified actions from those states.
  have been received for taking the specified actions from those states.  The output arrays may be
  shorter than the input ones, if the modified rollout would have terminated sooner.
  """

  def __init__(self,
@@ -488,7 +489,7 @@ class A2C(object):
    outputs = self._compute_model(inputs)
    values = outputs[self._value_index].numpy()
    values = np.append(values.flatten(), 0.0)
    self._process_rollout(hindsight_states, actions,
    self._process_rollout(hindsight_states, actions[:len(rewards)],
                          np.array(rewards, dtype=np.float32),
                          np.array(values, dtype=np.float32),
                          initial_rnn_states)
+3 −1
Original line number Diff line number Diff line
@@ -84,7 +84,8 @@ class PPO(object):
  The method receives the list of states generated during the rollout, the action taken for each one,
  and a new goal state.  It should generate a new list of states that are identical to the input ones,
  except specifying the new goal.  It should return that list of states, and the rewards that would
  have been received for taking the specified actions from those states.
  have been received for taking the specified actions from those states.  The output arrays may be
  shorter than the input ones, if the modified rollout would have terminated sooner.
  """

  def __init__(self,
@@ -543,6 +544,7 @@ class _Worker(object):
    values = outputs[self.ppo._value_index].numpy()
    values = np.append(values.flatten(), 0.0)
    probabilities = outputs[self.ppo._action_prob_index].numpy()
    actions = actions[:len(rewards)]
    action_prob = probabilities[np.arange(len(actions)), actions]
    return self.process_rollout(hindsight_states, actions, action_prob,
                                np.array(rewards, dtype=np.float32),
Loading