Commit c348b7f1 authored by peastman's avatar peastman
Browse files

Fixed errors using xgboost

parent 7e0f1771
Loading
Loading
Loading
Loading
+1 −21
Original line number Diff line number Diff line
@@ -2,8 +2,6 @@
Tests to make sure deepchem models can fit models on easy datasets.
"""

from nose.plugins.attrib import attr

__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2016, Stanford University"
__license__ = "MIT"
@@ -189,15 +187,7 @@ class TestGeneralize(unittest.TestCase):
  #  for score in scores[classification_metric.name]:
  #    assert score > .5

  @attr('slow')
  def test_xgboost_regression(self):
    """
    This test is not actually slow -- but cannot currently run
    on Ubuntu 14.04 with Tensorflow 1.4.0

    See Discussion Here
    https://github.com/deepchem/deepchem/issues/960
    """
    import xgboost
    np.random.seed(123)

@@ -215,7 +205,7 @@ class TestGeneralize(unittest.TestCase):
    # Set early stopping round = n_estimators so that esr won't work
    esr = {'early_stopping_rounds': 50}

    xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123)
    xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123)
    model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

    # Fit trained model
@@ -226,16 +216,7 @@ class TestGeneralize(unittest.TestCase):
    scores = model.evaluate(test_dataset, [regression_metric])
    assert scores[regression_metric.name] < 50

  @attr('slow')
  def test_xgboost_multitask_regression(self):
    """
    Test that xgboost models can learn on simple multitask regression.
    This test is not actually slow -- but cannot currently run
    on Ubuntu 14.04 with Tensorflow 1.4.0

    See Discussion Here
    https://github.com/deepchem/deepchem/issues/960
    """
    import xgboost
    np.random.seed(123)
    n_tasks = 4
@@ -271,7 +252,6 @@ class TestGeneralize(unittest.TestCase):
    for score in scores[regression_metric.name]:
      assert score < 50

  @attr('slow')
  def test_xgboost_classification(self):
    """Test that sklearn models can learn on simple classification datasets."""
    import xgboost
+19 −8
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ class XGBoostModel(SklearnModel):
    X = dataset.X
    y = np.squeeze(dataset.y)
    w = np.squeeze(dataset.w)
    seed = self.model_instance.seed
    seed = self.model_instance.random_state
    import xgboost as xgb
    if isinstance(self.model_instance, xgb.XGBClassifier):
      xgb_metric = "auc"
@@ -88,15 +88,26 @@ class XGBoostModel(SklearnModel):
    '''
    Find best potential parameters set using few n_estimators
    '''

    # Make sure user specified params are in the grid.
    max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
    colsample_bytree_grid = list(
        np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
    reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))

    def unique_not_none(values):
      return list(np.unique([x for x in values if x is not None]))

    max_depth_grid = unique_not_none([self.model_instance.max_depth, 5, 7])
    colsample_bytree_grid = unique_not_none(
        [self.model_instance.colsample_bytree, 0.66, 0.9])
    reg_lambda_grid = unique_not_none([self.model_instance.reg_lambda, 1, 5])
    learning_rate = 0.3
    if self.model_instance.learning_rate is not None:
      learning_rate = max(learning_rate, self.model_instance.learning_rate)
    n_estimators = 60
    if self.model_instance.n_estimators is not None:
      n_estimators = min(n_estimators, self.model_instance.n_estimators)
    param_grid = {
        'max_depth': max_depth_grid,
        'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
        'n_estimators': [min(self.model_instance.n_estimators, 60)],
        'learning_rate': [learning_rate],
        'n_estimators': [n_estimators],
        'gamma': [self.model_instance.gamma],
        'min_child_weight': [self.model_instance.min_child_weight],
        'max_delta_step': [self.model_instance.max_delta_step],
@@ -107,7 +118,7 @@ class XGBoostModel(SklearnModel):
        'reg_lambda': reg_lambda_grid,
        'scale_pos_weight': [self.model_instance.scale_pos_weight],
        'base_score': [self.model_instance.base_score],
        'seed': [self.model_instance.seed]
        'seed': [self.model_instance.random_state]
    }
    grid_search = GridSearchCV(
        self.model_instance, param_grid, cv=2, refit=False, scoring=metric)