Fixed errors using xgboost (c348b7f1) · Commits · 钟慕尧 / deepchem

deepchem/models/tests/test_generalize.py

+1 −21

Original line number	Diff line number	Diff line
		@@ -2,8 +2,6 @@
		Tests to make sure deepchem models can fit models on easy datasets.
		"""

		from nose.plugins.attrib import attr

		__author__ = "Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"
		@@ -189,15 +187,7 @@ class TestGeneralize(unittest.TestCase):
		# for score in scores[classification_metric.name]:
		# assert score > .5

		@attr('slow')
		def test_xgboost_regression(self):
		"""
		This test is not actually slow -- but cannot currently run
		on Ubuntu 14.04 with Tensorflow 1.4.0

		See Discussion Here
		https://github.com/deepchem/deepchem/issues/960
		"""
		import xgboost
		np.random.seed(123)

		@@ -215,7 +205,7 @@ class TestGeneralize(unittest.TestCase):
		# Set early stopping round = n_estimators so that esr won't work
		esr = {'early_stopping_rounds': 50}

		xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123)
		xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123)
		model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

		# Fit trained model
		@@ -226,16 +216,7 @@ class TestGeneralize(unittest.TestCase):
		scores = model.evaluate(test_dataset, [regression_metric])
		assert scores[regression_metric.name] < 50

		@attr('slow')
		def test_xgboost_multitask_regression(self):
		"""
		Test that xgboost models can learn on simple multitask regression.
		This test is not actually slow -- but cannot currently run
		on Ubuntu 14.04 with Tensorflow 1.4.0

		See Discussion Here
		https://github.com/deepchem/deepchem/issues/960
		"""
		import xgboost
		np.random.seed(123)
		n_tasks = 4
		@@ -271,7 +252,6 @@ class TestGeneralize(unittest.TestCase):
		for score in scores[regression_metric.name]:
		assert score < 50

		@attr('slow')
		def test_xgboost_classification(self):
		"""Test that sklearn models can learn on simple classification datasets."""
		import xgboost

deepchem/models/xgboost_models/init.py

+19 −8

Original line number	Diff line number	Diff line
		@@ -52,7 +52,7 @@ class XGBoostModel(SklearnModel):
		X = dataset.X
		y = np.squeeze(dataset.y)
		w = np.squeeze(dataset.w)
		seed = self.model_instance.seed
		seed = self.model_instance.random_state
		import xgboost as xgb
		if isinstance(self.model_instance, xgb.XGBClassifier):
		xgb_metric = "auc"
		@@ -88,15 +88,26 @@ class XGBoostModel(SklearnModel):
		'''
		Find best potential parameters set using few n_estimators
		'''

		# Make sure user specified params are in the grid.
		max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
		colsample_bytree_grid = list(
		np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
		reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))

		def unique_not_none(values):
		return list(np.unique([x for x in values if x is not None]))

		max_depth_grid = unique_not_none([self.model_instance.max_depth, 5, 7])
		colsample_bytree_grid = unique_not_none(
		[self.model_instance.colsample_bytree, 0.66, 0.9])
		reg_lambda_grid = unique_not_none([self.model_instance.reg_lambda, 1, 5])
		learning_rate = 0.3
		if self.model_instance.learning_rate is not None:
		learning_rate = max(learning_rate, self.model_instance.learning_rate)
		n_estimators = 60
		if self.model_instance.n_estimators is not None:
		n_estimators = min(n_estimators, self.model_instance.n_estimators)
		param_grid = {
		'max_depth': max_depth_grid,
		'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
		'n_estimators': [min(self.model_instance.n_estimators, 60)],
		'learning_rate': [learning_rate],
		'n_estimators': [n_estimators],
		'gamma': [self.model_instance.gamma],
		'min_child_weight': [self.model_instance.min_child_weight],
		'max_delta_step': [self.model_instance.max_delta_step],
		@@ -107,7 +118,7 @@ class XGBoostModel(SklearnModel):
		'reg_lambda': reg_lambda_grid,
		'scale_pos_weight': [self.model_instance.scale_pos_weight],
		'base_score': [self.model_instance.base_score],
		'seed': [self.model_instance.seed]
		'seed': [self.model_instance.random_state]
		}
		grid_search = GridSearchCV(
		self.model_instance, param_grid, cv=2, refit=False, scoring=metric)

Admin message