make_estimator() works with MultiTaskClassifier and MultiTaskRegressor (898295dc) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/fcnet.py

+14 −23

Original line number	Diff line number	Diff line
		@@ -18,7 +18,7 @@ from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.metrics import to_one_hot

		from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMaxCrossEntropy, L2Loss, ReduceSum
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum


		class MultiTaskClassifier(TensorGraph):
		@@ -112,15 +112,16 @@ class MultiTaskClassifier(TensorGraph):

		# Compute the loss function for each label.

		output = Reshape(
		logits = Reshape(
		shape=(-1, n_tasks, n_classes),
		in_layers=[
		Dense(in_layers=[prev_layer], out_channels=n_tasks * n_classes)
		])
		output = SoftMax(logits)
		self.add_output(output)
		labels = Label(shape=(None, n_tasks, n_classes))
		weights = Weights(shape=(None, n_tasks))
		loss = SoftMaxCrossEntropy(in_layers=[labels, output])
		loss = SoftMaxCrossEntropy(in_layers=[labels, logits])
		weighted_loss = WeightedError(in_layers=[loss, weights])
		if weight_decay_penalty != 0.0:
		weighted_loss = WeightDecay(
		@@ -152,6 +153,16 @@ class MultiTaskClassifier(TensorGraph):
		feed_dict[self.task_weights[0]] = w_b
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode):
		tensors = {}
		for layer, column in zip(self.features, feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])
		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = tf.one_hot(tf.cast(labels, tf.int32), self.n_classes)
		return tensors

		def predict_proba(self, dataset, transformers=[], outputs=None):
		return super(MultiTaskClassifier, self).predict(dataset, transformers,
		outputs)
		@@ -289,26 +300,6 @@ class MultiTaskRegressor(TensorGraph):
		in_layers=[weighted_loss])
		self.set_loss(weighted_loss)

		def default_generator(self,
		dataset,
		epochs=1,
		predict=False,
		deterministic=True,
		pad_batches=True):
		for epoch in range(epochs):
		for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
		batch_size=self.batch_size,
		deterministic=deterministic,
		pad_batches=pad_batches):
		feed_dict = dict()
		if y_b is not None and not predict:
		feed_dict[self.labels[0]] = y_b.reshape(-1, self.n_tasks, 1)
		if X_b is not None:
		feed_dict[self.features[0]] = X_b
		if w_b is not None and not predict:
		feed_dict[self.task_weights[0]] = w_b
		yield feed_dict


		class MultiTaskFitTransformRegressor(MultiTaskRegressor):
		"""Implements a MultiTaskRegressor that performs on-the-fly transformation during fit/predict.

deepchem/models/tensorgraph/tensor_graph.py

+34 −7

Original line number	Diff line number	Diff line
		@@ -902,12 +902,7 @@ class TensorGraph(Model):
		def model_fn(features, labels, mode):
		# Define the inputs.

		tensors = {}
		for layer, column in zip(self.features, feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])
		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
		tensors[self.labels[0]] = labels
		tensors = self.create_estimator_inputs(feature_columns, weight_column, features, labels, mode)
		for layer, tensor in tensors.items():
		layer.add_summary_to_tg(tensor)

		@@ -927,7 +922,7 @@ class TensorGraph(Model):
		weights = tensors[self.task_weights[0]]
		eval_metric_ops = {}
		for name, function in metrics.items():
		eval_metric_ops[name] = function(labels, predictions, weights)
		eval_metric_ops[name] = function(tensors[self.labels[0]], predictions, weights)
		return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)
		if mode == tf.estimator.ModeKeys.TRAIN:
		loss = create_tensors(self.loss, tensors, 1)
		@@ -941,6 +936,29 @@ class TensorGraph(Model):

		return tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)

		def create_estimator_inputs(self, feature_columns, weight_column, features, labels, mode):
		"""This is called by make_estimator() to create tensors for the inputs.

		feature_columns and weight_column are the arguments passed to
		make_estimator(). features, labels, and mode are the arguments passed to
		the estimator's model function. This method creates and returns a dict with
		one entry for every Feature, Label, or Weights layer in the graph. The keys
		are the layers, and the values are the tensors that correspond to them.

		Any subclass that overrides default_generator() must also override this
		method.
		"""
		if self.__class__.default_generator is not TensorGraph.default_generator:
		raise ValueError("Class overrides default_generator() but not create_estimator_inputs()")
		tensors = {}
		for layer, column in zip(self.features, feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])
		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(features, [weight_column])
		if labels is not None:
		tensors[self.labels[0]] = tf.cast(labels, self.labels[0].dtype)
		return tensors

		def _enqueue_batch(tg, generator, graph, sess, n_enqueued, final_sample):
		"""
		Function to load data into
		@@ -963,6 +981,15 @@ def _enqueue_batch(tg, generator, graph, sess, n_enqueued, final_sample):
		for layer in tg.features + tg.labels + tg.task_weights:
		if layer in feed_dict:
		value = feed_dict[layer]
		# Add or remove dimensions of size 1 to match the shape of the layer.
		value_dims = len(value.shape)
		layer_dims = len(layer.shape)
		if value_dims < layer_dims:
		if all(i==1 for i in layer.shape[value_dims:]):
		value = value.reshape(list(value.shape)+[1]*(layer_dims-value_dims))
		if value_dims > layer_dims:
		if all(i==1 for i in value.shape[layer_dims:]):
		value = value.reshape(value.shape[:layer_dims])
		else:
		value = np.zeros(
		[0] + list(layer.shape[1:]), dtype=layer.dtype.as_numpy_dtype)

Admin message