Merge pull request #1846 from peastman/slow (596c2142) · Commits · 钟慕尧 / deepchem

deepchem/models/tests/test_generalize.py

+2 −22

Original line number	Diff line number	Diff line
		@@ -2,8 +2,6 @@
		Tests to make sure deepchem models can fit models on easy datasets.
		"""

		from nose.plugins.attrib import attr

		__author__ = "Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"
		@@ -189,15 +187,7 @@ class TestGeneralize(unittest.TestCase):
		# for score in scores[classification_metric.name]:
		# assert score > .5

		@attr('slow')
		def test_xgboost_regression(self):
		"""
		This test is not actually slow -- but cannot currently run
		on Ubuntu 14.04 with Tensorflow 1.4.0

		See Discussion Here
		https://github.com/deepchem/deepchem/issues/960
		"""
		import xgboost
		np.random.seed(123)

		@@ -215,7 +205,7 @@ class TestGeneralize(unittest.TestCase):
		# Set early stopping round = n_estimators so that esr won't work
		esr = {'early_stopping_rounds': 50}

		xgb_model = xgboost.XGBRegressor(n_estimators=50, seed=123)
		xgb_model = xgboost.XGBRegressor(n_estimators=50, random_state=123)
		model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

		# Fit trained model
		@@ -224,18 +214,9 @@ class TestGeneralize(unittest.TestCase):

		# Eval model on test
		scores = model.evaluate(test_dataset, [regression_metric])
		assert scores[regression_metric.name] < 50
		assert scores[regression_metric.name] < 55

		@attr('slow')
		def test_xgboost_multitask_regression(self):
		"""
		Test that xgboost models can learn on simple multitask regression.
		This test is not actually slow -- but cannot currently run
		on Ubuntu 14.04 with Tensorflow 1.4.0

		See Discussion Here
		https://github.com/deepchem/deepchem/issues/960
		"""
		import xgboost
		np.random.seed(123)
		n_tasks = 4
		@@ -271,7 +252,6 @@ class TestGeneralize(unittest.TestCase):
		for score in scores[regression_metric.name]:
		assert score < 50

		@attr('slow')
		def test_xgboost_classification(self):
		"""Test that sklearn models can learn on simple classification datasets."""
		import xgboost

deepchem/models/tests/test_overfit.py

+0 −40

Original line number	Diff line number	Diff line
		@@ -573,46 +573,6 @@ class TestOverfit(test_util.TensorFlowTestCase):
		scores = model.evaluate(dataset, [regression_metric])
		assert scores[regression_metric.name] < .2

		@attr('slow')
		def test_ANI_multitask_regression_overfit(self):
		"""Test ANI-1 regression overfits tiny data."""
		input_file = os.path.join(self.current_dir, "example_DTNN.mat")
		np.random.seed(123)
		tf.random.set_seed(123)
		dataset = scipy.io.loadmat(input_file)
		X = np.concatenate([np.expand_dims(dataset['Z'], 2), dataset['R']], axis=2)
		X = X[:, :13, :]
		y = dataset['T']
		w = np.ones_like(y)
		dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
		regression_metric = dc.metrics.Metric(
		dc.metrics.pearson_r2_score, mode="regression")
		n_tasks = y.shape[1]
		batch_size = 10

		transformers = [
		dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset),
		]

		for transformer in transformers:
		dataset = transformer.transform(dataset)

		model = dc.models.ANIRegression(
		n_tasks,
		13,
		atom_number_cases=[1, 6, 7, 8],
		batch_size=batch_size,
		learning_rate=ExponentialDecay(0.01, 0.7, 100),
		mode="regression")

		# Fit trained model
		model.fit(dataset, nb_epoch=500)

		# Eval model on train
		scores = model.evaluate(dataset, [regression_metric], transformers[0:1])

		assert scores[regression_metric.name] > .7

		@attr('slow')
		def test_DAG_singletask_regression_overfit(self):
		"""Test DAG regressor multitask overfits tiny data."""

deepchem/models/xgboost_models/init.py

+19 −8

Original line number	Diff line number	Diff line
		@@ -52,7 +52,7 @@ class XGBoostModel(SklearnModel):
		X = dataset.X
		y = np.squeeze(dataset.y)
		w = np.squeeze(dataset.w)
		seed = self.model_instance.seed
		seed = self.model_instance.random_state
		import xgboost as xgb
		if isinstance(self.model_instance, xgb.XGBClassifier):
		xgb_metric = "auc"
		@@ -88,15 +88,26 @@ class XGBoostModel(SklearnModel):
		'''
		Find best potential parameters set using few n_estimators
		'''

		# Make sure user specified params are in the grid.
		max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
		colsample_bytree_grid = list(
		np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
		reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))

		def unique_not_none(values):
		return list(np.unique([x for x in values if x is not None]))

		max_depth_grid = unique_not_none([self.model_instance.max_depth, 5, 7])
		colsample_bytree_grid = unique_not_none(
		[self.model_instance.colsample_bytree, 0.66, 0.9])
		reg_lambda_grid = unique_not_none([self.model_instance.reg_lambda, 1, 5])
		learning_rate = 0.3
		if self.model_instance.learning_rate is not None:
		learning_rate = max(learning_rate, self.model_instance.learning_rate)
		n_estimators = 60
		if self.model_instance.n_estimators is not None:
		n_estimators = min(n_estimators, self.model_instance.n_estimators)
		param_grid = {
		'max_depth': max_depth_grid,
		'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
		'n_estimators': [min(self.model_instance.n_estimators, 60)],
		'learning_rate': [learning_rate],
		'n_estimators': [n_estimators],
		'gamma': [self.model_instance.gamma],
		'min_child_weight': [self.model_instance.min_child_weight],
		'max_delta_step': [self.model_instance.max_delta_step],
		@@ -107,7 +118,7 @@ class XGBoostModel(SklearnModel):
		'reg_lambda': reg_lambda_grid,
		'scale_pos_weight': [self.model_instance.scale_pos_weight],
		'base_score': [self.model_instance.base_score],
		'seed': [self.model_instance.seed]
		'seed': [self.model_instance.random_state]
		}
		grid_search = GridSearchCV(
		self.model_instance, param_grid, cv=2, refit=False, scoring=metric)

deepchem/rl/a2c.py

+3 −2

Original line number	Diff line number	Diff line
		@@ -110,7 +110,8 @@ class A2C(object):
		The method receives the list of states generated during the rollout, the action taken for each one,
		and a new goal state. It should generate a new list of states that are identical to the input ones,
		except specifying the new goal. It should return that list of states, and the rewards that would
		have been received for taking the specified actions from those states.
		have been received for taking the specified actions from those states. The output arrays may be
		shorter than the input ones, if the modified rollout would have terminated sooner.
		"""

		def __init__(self,
		@@ -488,7 +489,7 @@ class A2C(object):
		outputs = self._compute_model(inputs)
		values = outputs[self._value_index].numpy()
		values = np.append(values.flatten(), 0.0)
		self._process_rollout(hindsight_states, actions,
		self._process_rollout(hindsight_states, actions[:len(rewards)],
		np.array(rewards, dtype=np.float32),
		np.array(values, dtype=np.float32),
		initial_rnn_states)

deepchem/rl/ppo.py

+3 −1

Original line number	Diff line number	Diff line
		@@ -84,7 +84,8 @@ class PPO(object):
		The method receives the list of states generated during the rollout, the action taken for each one,
		and a new goal state. It should generate a new list of states that are identical to the input ones,
		except specifying the new goal. It should return that list of states, and the rewards that would
		have been received for taking the specified actions from those states.
		have been received for taking the specified actions from those states. The output arrays may be
		shorter than the input ones, if the modified rollout would have terminated sooner.
		"""

		def __init__(self,
		@@ -543,6 +544,7 @@ class _Worker(object):
		values = outputs[self.ppo._value_index].numpy()
		values = np.append(values.flatten(), 0.0)
		probabilities = outputs[self.ppo._action_prob_index].numpy()
		actions = actions[:len(rewards)]
		action_prob = probabilities[np.arange(len(actions)), actions]
		return self.process_rollout(hindsight_states, actions, action_prob,
		np.array(rewards, dtype=np.float32),

Admin message