Ported overfit tests (ce4a5ca7) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorflow_models/init.py

+8 −7

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@ from deepchem.metrics import from_one_hot
		from deepchem.models.tensorflow_models import model_ops
		from deepchem.models.tensorflow_models import utils as tf_utils
		from deepchem.utils.save import log
		from deepchem.datasets import pad_features

		def softmax(x):
		"""Simple numpy softmax implementation
		@@ -574,9 +575,9 @@ class TensorflowRegressor(TensorflowGraphModel):
		def get_task_type(self):
		return "regressor"

		def add_output_ops(self, graph):
		def add_output_ops(self, graph, output):
		"""No-op for regression models since no softmax."""
		pass
		return output

		def cost(self, output, labels, weights):
		"""Calculate single-task training cost for a batch of examples.
		@@ -605,8 +606,8 @@ class TensorflowRegressor(TensorflowGraphModel):
		with graph.as_default():
		batch_size = self.batch_size
		labels = []
		with placeholder_scope:
		for task in xrange(self.n_tasks):
		with tf.name_scope(placeholder_scope):
		labels.append(tf.identity(
		tf.placeholder(tf.float32, shape=[None],
		name='labels_%d' % task)))
		@@ -647,7 +648,7 @@ class TensorflowRegressor(TensorflowGraphModel):
		X = pad_features(self.batch_size, X)
		feed_dict = self.construct_feed_dict(X)
		data = self._get_shared_session(train=False).run(
		self.output, feed_dict=feed_dict)
		self.eval_graph.output, feed_dict=feed_dict)
		batch_outputs = np.asarray(data[:n_tasks], dtype=float)
		# reshape to batch_size x n_tasks x ...
		if batch_outputs.ndim == 3:
		@@ -683,11 +684,11 @@ class TensorflowModel(Model):
		self.model_instance = model
		self.fit_transformers = None

		def fit(self, dataset, shuffle=False):
		def fit(self, dataset, **kwargs):
		"""
		Fits TensorflowGraph to data.
		"""
		self.model_instance.fit(dataset, shuffle=shuffle)
		self.model_instance.fit(dataset, **kwargs)

		def predict_on_batch(self, X):
		"""

deepchem/models/tensorflow_models/fcnet.py

+3 −4

Original line number	Diff line number	Diff line
		@@ -14,7 +14,6 @@ from deepchem.models.tensorflow_models import TensorflowClassifier
		from deepchem.models.tensorflow_models import TensorflowRegressor
		from deepchem.models.tensorflow_models import model_ops
		from deepchem.metrics import to_one_hot
		from deepchem.datasets import pad_features

		class TensorflowMultiTaskClassifier(TensorflowClassifier):
		"""Implements an icml model as configured in a model_config.proto."""
		@@ -108,11 +107,11 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
		mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
		batch_size x n_features.
		"""
		n_features = self.n_inputs
		n_features = self.n_features
		placeholder_scope = TensorflowGraph.get_placeholder_scope(
		graph, name_scopes)
		with graph.as_default():
		with tf.name_scope(placeholder_scope):
		with placeholder_scope:
		self.mol_features = tf.placeholder(
		tf.float32,
		shape=[None, n_features],
		@@ -149,7 +148,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):

		output = []
		for task in range(self.n_tasks):
		self.output.append(tf.squeeze(
		output.append(tf.squeeze(
		model_ops.FullyConnectedLayer(
		tensor=prev_layer,
		size=layer_sizes[i],

deepchem/models/tests/test_multitask.py

+0 −15

Original line number	Diff line number	Diff line
		@@ -25,25 +25,10 @@ class TestMultitaskData(TestAPI):
		"""
		def test_multitask_order(self):
		"""Test that order of tasks in multitask datasets is preserved."""
		from deepchem.models.keras_models.fcnet import MultiTaskDNN
		splittype = "scaffold"
		output_transformers = []
		input_transformers = []
		task_type = "classification"
		# TODO(rbharath): There should be some automatic check to ensure that all
		# required model_params are specified.
		model_params = {"nb_hidden": 10, "activation": "relu",
		"dropout": .5, "learning_rate": .01,
		"momentum": .9, "nesterov": False,
		"decay": 1e-4, "batch_size": 5,
		"nb_epoch": 2, "init": "glorot_uniform",
		"nb_layers": 1, "batchnorm": False}

		input_file = os.path.join(self.current_dir, "multitask_example.csv")
		tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
		"task7", "task8", "task9", "task10", "task11", "task12",
		"task13", "task14", "task15", "task16"]
		task_types = {task: task_type for task in tasks}

		featurizer = CircularFingerprint(size=1024)

deepchem/models/tests/test_overfit.py

+107 −328

File changed.

Preview size limit exceeded, changes collapsed.

deepchem/models/tests/test_singletask_to_multitask.py

+14 −28

Original line number	Diff line number	Diff line
		@@ -25,27 +25,17 @@ class TestSingletasktoMultitaskAPI(TestAPI):
		Test top-level API for singletask_to_multitask ML models.
		"""
		def test_singletask_to_multitask_classification(self):
		splittype = "scaffold"
		compound_featurizers = [CircularFingerprint(size=1024)]
		complex_featurizers = []
		output_transformers = []
		tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
		"task7", "task8", "task9", "task10", "task11", "task12",
		"task13", "task14", "task15", "task16"]
		task_types = {task: "classification" for task in tasks}
		input_file = "multitask_example.csv"

		n_features = 10
		n_tasks = len(tasks)
		n_tasks = 17
		tasks = range(n_tasks)
		# Define train dataset
		n_train = 100
		X_train = np.random.rand(n_train, n_features)
		y_train = np.random.randint(2, size=(n_train, n_tasks))
		w_train = np.ones_like(y_train)
		ids_train = ["C"] * n_train
		train_dataset = Dataset.from_numpy(self.train_dir,
		X_train, y_train, w_train, ids_train,
		tasks)
		train_dataset = Dataset.from_numpy(
		self.train_dir, X_train, y_train, w_train, ids_train)

		# Define test dataset
		n_test = 10
		@@ -53,31 +43,27 @@ class TestSingletasktoMultitaskAPI(TestAPI):
		y_test = np.random.randint(2, size=(n_test, n_tasks))
		w_test = np.ones_like(y_test)
		ids_test = ["C"] * n_test
		test_dataset = Dataset.from_numpy(self.test_dir,
		X_test, y_test, w_test, ids_test,
		tasks)
		test_dataset = Dataset.from_numpy(
		self.test_dir, X_test, y_test, w_test, ids_test)

		params_dict = {
		"batch_size": 32,
		"data_shape": train_dataset.get_data_shape()
		}
		transformers = []
		classification_metrics = [Metric(metrics.roc_auc_score)]
		def model_builder(tasks, task_types, model_params, model_builder, verbosity=None):
		return SklearnModel(tasks, task_types, model_params, model_builder,
		model_instance=LogisticRegression())
		multitask_model = SingletaskToMultitask(tasks, task_types, params_dict,
		self.model_dir, model_builder)
		def model_builder(model_dir):
		sklearn_model = LogisticRegression()
		return SklearnModel(sklearn_model, model_dir)
		multitask_model = SingletaskToMultitask(
		tasks, model_builder, self.model_dir)

		# Fit trained model
		multitask_model.fit(train_dataset)
		multitask_model.save()

		# Eval multitask_model on train
		evaluator = Evaluator(multitask_model, train_dataset, output_transformers,
		evaluator = Evaluator(multitask_model, train_dataset, transformers,
		verbosity=True)
		_ = evaluator.compute_model_performance(classification_metrics)

		# Eval multitask_model on test
		evaluator = Evaluator(multitask_model, test_dataset, output_transformers,
		evaluator = Evaluator(multitask_model, test_dataset, transformers,
		verbosity=True)
		_ = evaluator.compute_model_performance(classification_metrics)

Admin message