Merge pull request #227 from rbharath/keras (eb2bf860) · Commits · 钟慕尧 / deepchem

.travis.yml

+3 −2

Original line number	Diff line number	Diff line
		@@ -16,8 +16,9 @@ install:
		- conda install -c omnia boost=1.59.0
		- conda install -c omnia openbabel
		- conda install joblib
		- conda install -c omnia theano
		- conda install -c omnia keras
		- conda install h5py
		- pip install keras
		- export KERAS_BACKEND=tensorflow
		- conda install seaborn
		- conda install six
		- conda install dill

deepchem/hyperparameters/tests/test_hyperparam_opt.py

+11 −1

Original line number	Diff line number	Diff line
		@@ -30,6 +30,8 @@ from deepchem.models.keras_models.fcnet import MultiTaskDNN
		from deepchem.models.tensorflow_models import TensorflowModel
		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
		from deepchem.splits import ScaffoldSplitter
		import tensorflow as tf
		from keras import backend as K

		def rf_model_builder(tasks, task_types, params_dict, model_dir, verbosity=None):
		"""Builds random forests given hyperparameters.
		@@ -173,6 +175,14 @@ class TestHyperparamOptAPI(TestAPI):
		"batchnorm": [False],
		"data_shape": [train_dataset.get_data_shape()]}

		def model_builder(tasks, task_types, model_params, task_model_dir,
		verbosity=None):
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		return MultiTaskDNN(tasks, task_types, model_params, task_model_dir,
		model_instance=LogisticRegression())
		optimizer = HyperparamOpt(MultiTaskDNN, tasks, task_types,
		verbosity="low")
		best_model, best_hyperparams, all_results = optimizer.hyperparam_search(

deepchem/models/tests/test_api.py

+55 −50

Original line number	Diff line number	Diff line
		@@ -33,6 +33,9 @@ from deepchem.models.tensorflow_models import TensorflowModel
		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
		from deepchem.splits import ScaffoldSplitter
		from deepchem.splits import SpecifiedSplitter
		from deepchem.models.keras_models.fcnet import MultiTaskDNN
		import tensorflow as tf
		from keras import backend as K

		class TestModelAPI(TestAPI):
		"""
		@@ -277,8 +280,6 @@ class TestModelAPI(TestAPI):
		# Metric(metrics.mean_squared_error),
		# Metric(metrics.mean_absolute_error)]

		# model = SingleTaskDNN(tasks, task_types, model_params, self.model_dir)

		# # Fit trained model
		# model.fit(train_dataset)
		# model.save()
		@@ -294,12 +295,16 @@ class TestModelAPI(TestAPI):

		def test_multitask_keras_mlp_ECFP_classification_API(self):
		"""Straightforward test of Keras multitask deepchem classification API."""
		from deepchem.models.keras_models.fcnet import MultiTaskDNN
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		task_type = "classification"
		# TODO(rbharath): There should be some automatic check to ensure that all
		# required model_params are specified.
		# TODO(rbharath): Turning off dropout to make tests behave.
		model_params = {"nb_hidden": 10, "activation": "relu",
		"dropout": .5, "learning_rate": .01,
		"dropout": .0, "learning_rate": .01,
		"momentum": .9, "nesterov": False,
		"decay": 1e-4, "batch_size": 5,
		"nb_epoch": 2, "init": "glorot_uniform",

deepchem/models/tests/test_overfit.py

+249 −235

Original line number	Diff line number	Diff line
		@@ -26,6 +26,8 @@ from deepchem.models.tensorflow_models import TensorflowModel
		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskRegressor
		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
		from deepchem.models.multitask import SingletaskToMultitask
		import tensorflow as tf
		from keras import backend as K

		class TestOverfitAPI(TestAPI):
		"""
		@@ -152,6 +154,10 @@ class TestOverfitAPI(TestAPI):

		def test_keras_regression_overfit(self):
		"""Test that keras models can overfit simple regression datasets."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		tasks = ["task0"]
		task_types = {task: "regression" for task in tasks}
		n_samples = 10
		@@ -252,6 +258,10 @@ class TestOverfitAPI(TestAPI):

		def test_keras_classification_overfit(self):
		"""Test that keras models can overfit simple classification datasets."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		tasks = ["task0"]
		task_types = {task: "classification" for task in tasks}
		n_samples = 10
		@@ -301,6 +311,10 @@ class TestOverfitAPI(TestAPI):

		def test_keras_skewed_classification_overfit(self):
		"""Test keras models can overfit 0/1 datasets with few actives."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		tasks = ["task0"]
		task_types = {task: "classification" for task in tasks}
		n_samples = 100
		@@ -480,20 +494,12 @@ class TestOverfitAPI(TestAPI):
		X = np.random.rand(n_samples, n_features)
		y = np.random.binomial(1, p, size=(n_samples, n_tasks))
		w = np.ones((n_samples, n_tasks))
		print("np.count_nonzero(y)")
		print(np.count_nonzero(y))
		##### DEBUG
		y_flat, w_flat = np.squeeze(y), np.squeeze(w)
		y_nonzero = y_flat[w_flat != 0]
		num_nonzero = np.count_nonzero(y_nonzero)
		weight_nonzero = len(y_nonzero)/num_nonzero
		print("weight_nonzero")
		print(weight_nonzero)
		w_flat[y_flat != 0] = weight_nonzero
		w = np.reshape(w_flat, (n_samples, n_tasks))
		print("np.amin(w), np.amax(w)")
		print(np.amin(w), np.amax(w))
		##### DEBUG

		dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

		@@ -577,6 +583,10 @@ class TestOverfitAPI(TestAPI):

		def test_keras_multitask_classification_overfit(self):
		"""Test keras multitask overfits tiny data."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		n_tasks = 10
		tasks = ["task%d" % task for task in range(n_tasks)]
		task_types = {task: "classification" for task in tasks}
		@@ -723,6 +733,10 @@ class TestOverfitAPI(TestAPI):

		def test_keras_multitask_regression_overfit(self):
		"""Test keras multitask overfits tiny data."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		n_tasks = 10
		tasks = ["task%d" % task for task in range(n_tasks)]
		task_types = {task: "regression" for task in tasks}

deepchem/models/tests/test_reload.py

+57 −51

Original line number	Diff line number	Diff line
		@@ -20,6 +20,8 @@ from deepchem.models.sklearn_models import SklearnModel
		from deepchem.models.keras_models.fcnet import MultiTaskDNN
		from deepchem.models.tensorflow_models import TensorflowModel
		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
		import tensorflow as tf
		from keras import backend as K

		class TestModelReload(TestAPI):

		@@ -69,6 +71,10 @@ class TestModelReload(TestAPI):

		def test_keras_reload(self):
		"""Test that trained keras models can be reloaded correctly."""
		g = tf.Graph()
		sess = tf.Session(graph=g)
		K.set_session(sess)
		with g.as_default():
		tasks = ["task0"]
		task_types = {task: "classification" for task in tasks}
		n_samples = 10

Admin message